CSV

Module

PDFHighlights.Internal.CSV — Module

This module contains functions that can only be applied to CSV files.

Functions

PDFHighlights.Internal.CSV._check — Method

_check(csv::String) -> Nothing

Check the structural integrity of the CSV file (see the exceptions list).

Arguments

csv::String: absolute or relative path to the CSV file

Throws

IncorrectHeader: the specified file has an incorrect header
LastElementIsNotAnInteger: the last element in the line is not an integer
NotFiveColumns: the row does not represent elements for 5 columns

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, io = mktemp()
println(io, HEADER)
flush(io)
file = _file * ".csv"
mv(_file, file)

PDFHighlights.Internal.CSV._check(file)

source

PDFHighlights.Internal.CSV._get_authors_from_CSV — Method

_get_authors_from_CSV(csv::String) -> Vector{String}

Extract the values of the Author column from the CSV file.

Arguments

csv::String: absolute or relative path to the CSV file

Returns

Vector{String}: the authors

Throws

Exceptions from: get_all

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, io = mktemp()
println(io, HEADER, '\n', ",,Susanna Kaysen,,1")
flush(io)
file = _file * ".csv"
mv(_file, file)

PDFHighlights.Internal.CSV._get_authors_from_CSV(file) == ["Susanna Kaysen"]

source

PDFHighlights.Internal.CSV._get_highlights_from_CSV — Method

_get_highlights_from_CSV(csv::String) -> Vector{String}

Extract the values of the Highlight column from the CSV file.

Arguments

csv::String: absolute or relative path to the CSV file

Returns

Vector{String}: the highlights

Throws

Exceptions from: get_all

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, io = mktemp()
println(io, HEADER, '\n', "The world didn't stop spinning,,,,1")
flush(io)
file = _file * ".csv"
mv(_file, file)

PDFHighlights.Internal.CSV._get_highlights_from_CSV(file) ==
["The world didn't stop spinning"]

source

PDFHighlights.Internal.CSV._get_titles_from_CSV — Method

_get_titles_from_CSV(csv::String) -> Vector{String}

Extract the values of the Title column from the CSV file.

Arguments

csv::String: absolute or relative path to the CSV file

Returns

Vector{String}: the titles

Throws

Exceptions from: get_all

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, io = mktemp()
println(io, HEADER, '\n', ",\"Girl, Interrupted\",,,1")
flush(io)
file = _file * ".csv"
mv(_file, file)

PDFHighlights.Internal.CSV._get_titles_from_CSV(file) == ["Girl, Interrupted"]

source

PDFHighlights.Internal.CSV.get_all — Method

get_all(csv::String) -> Tuple{
    Vector{String},
    Vector{String},
    Vector{String},
    Vector{String},
    Vector{Int32},
}

Extract the values of all columns from the CSV file.

Arguments

csv::String: absolute or relative path to the CSV file

Returns

Tuple{Vector{String}, Vector{String}, Vector{String}, Vector{String}, Vector{Int32}}: the highlights, titles, authors, notes, and locations

Throws

IntegrityCheckFailed: another exception was thrown while checking the integrity of the table
Exceptions from: initialize

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, io = mktemp()
row = string(
    "The world didn't stop spinning,",
    "\"Girl, Interrupted\",",
    "Susanna Kaysen,",
    "Journal,",
    "5722",
)
println(io, HEADER, '\n', row)
flush(io)
file = _file * ".csv"
mv(_file, file)

get_all(file) == (
    ["The world didn't stop spinning"],
    ["Girl, Interrupted"],
    ["Susanna Kaysen"],
    ["Journal"],
    [5722],
)

source

PDFHighlights.Internal.CSV.get_locations — Method

get_locations(csv::String) -> Vector{Int32}

Extract the values of the Location column from the CSV file.

Arguments

csv::String: absolute or relative path to the CSV file

Returns

Vector{Int32}: the locations

Throws

Exceptions from: get_all

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, io = mktemp()
println(io, HEADER, '\n', ",,,,5722")
flush(io)
file = _file * ".csv"
mv(_file, file)

get_locations(file) == Int32[5722]

source

PDFHighlights.Internal.CSV.get_notes — Method

get_notes(csv::String) -> Vector{String}

Extract the values of the Note column from the CSV file.

Arguments

csv::String: absolute or relative path to the CSV file

Returns

Vector{String}: the notes

Throws

Exceptions from: get_all

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, io = mktemp()
println(io, HEADER, '\n', ",,,Journal,")
flush(io)
file = _file * ".csv"
mv(_file, file)

get_notes(file) == ["Journal"]

source

PDFHighlights.Internal.CSV.import_highlights — Method

import_highlights(csv::String, target::String; quiet::Bool=false) -> Nothing

Import the highlights (and related data) into a CSV file from a PDF file or directory with PDF files.

Arguments

csv::String: absolute or relative path to the CSV file
target::String: a PDF file or a directory with PDF files

Keywords

quiet::Bool=false: if true, don't print to standard output

Throws

IntegrityCheckFailed: another exception was thrown while checking the integrity of the table
Exceptions from: initialize

Example

using PDFHighlights
using Suppressor

path_to_pdf_dir = joinpath(pathof(PDFHighlights) |> dirname |> dirname, "test", "pdf")
path_to_pdf = joinpath(path_to_pdf_dir, "TestPDF.pdf")

_file, io = mktemp()
file = _file * ".csv"
mv(_file, file)

(@capture_out(import_highlights(file, path_to_pdf)) ==
"""

    CSV: "$(basename(file))"
    PDF: "TestPDF.pdf"
    Highlights (found / added): 7 / 7

""") |> println

(@capture_out(import_highlights(file, path_to_pdf_dir)) ==
"""

    CSV: "$(basename(file))"
    Directory: "pdf"
    Highlights (found / added): 7 / 0

""") |> println

@capture_out(import_highlights(file, path_to_pdf; quiet=true)) == ""

source

PDFHighlights.Internal.CSV.initialize — Method

initialize(csv::String) -> Nothing

If the file along the csv path does not exist, then create it and write the header; if it exists but is empty, do the same; if it exists and is not empty, check structural correctness.

Arguments

csv::String: absolute or relative path to the CSV file

Throws

NotCSV: the specified path does not end in .csv
Exceptions from: _check

Example

using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER

_file, _ = mktemp()
file = _file * ".csv"
mv(_file, file)

initialize(file)

open(file, "r") do io
    readlines(io) == [HEADER]
end

source

Macros

PDFHighlights.Internal.CSV.@extract — Macro

@extract(array::Symbol) -> Expr

Get a piece of line (a highlight, an author, a title, etc.).

Arguments

array::Symbol: the name of the array to which this piece refers

Returns

Expr: the code extracting the piece and putting it into the corresponding array

Example

using PDFHighlights
using SyntaxTree

(@macroexpand(PDFHighlights.Internal.CSV.@extract(highlights)) |> linefilter! ==
quote
    if current_comma_index == 1
        highlights[line_index] = ""
    else
        piece = line[1:(current_comma_index - 1)]
        if piece == "\"\""
            highlights[line_index] = ""
        elseif startswith(piece, "\"")
            highlights[line_index] = chop(piece; head = 1, tail = 1)
        else
            highlights[line_index] = piece
        end
    end
end |> linefilter!) |> println

(@macroexpand(PDFHighlights.Internal.CSV.@extract(locations)) |> linefilter! ==
quote
    if current_comma_index == lastindex(line)
        locations[line_index] = 0
    else
        locations[line_index] = parse(
            Int32,
            line[(current_comma_index + 1):end],
        )
    end
end |> linefilter!) |> println

array = :titles

@macroexpand(PDFHighlights.Internal.CSV.@extract(titles)) |> linefilter! ==
quote
    if current_comma_index == previous_comma_index + 1
        $(array)[line_index] = ""
    else
        piece = line[(previous_comma_index + 1):(current_comma_index - 1)]
        if piece == "\"\""
            $(array)[line_index] = ""
        elseif startswith(piece, "\"")
            $(array)[line_index] = chop(piece; head = 1, tail = 1)
        else
            $(array)[line_index] = piece
        end
    end
end |> linefilter!

source