CSV
Module
PDFHighlights.Internal.CSV
— ModuleThis module contains functions that can only be applied to CSV files.
Functions
PDFHighlights.Internal.CSV._check
— Method_check(csv::String) -> Nothing
Check the structural integrity of the CSV file (see the exceptions list).
Arguments
csv::String
: absolute or relative path to the CSV file
Throws
IncorrectHeader
: the specified file has an incorrect headerLastElementIsNotAnInteger
: the last element in the line is not an integerNotFiveColumns
: the row does not represent elements for 5 columns
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, io = mktemp()
println(io, HEADER)
flush(io)
file = _file * ".csv"
mv(_file, file)
PDFHighlights.Internal.CSV._check(file)
PDFHighlights.Internal.CSV._get_authors_from_CSV
— Method_get_authors_from_CSV(csv::String) -> Vector{String}
Extract the values of the Author
column from the CSV file.
Arguments
csv::String
: absolute or relative path to the CSV file
Returns
Vector{String}
: the authors
Throws
- Exceptions from:
get_all
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, io = mktemp()
println(io, HEADER, '\n', ",,Susanna Kaysen,,1")
flush(io)
file = _file * ".csv"
mv(_file, file)
PDFHighlights.Internal.CSV._get_authors_from_CSV(file) == ["Susanna Kaysen"]
PDFHighlights.Internal.CSV._get_highlights_from_CSV
— Method_get_highlights_from_CSV(csv::String) -> Vector{String}
Extract the values of the Highlight
column from the CSV file.
Arguments
csv::String
: absolute or relative path to the CSV file
Returns
Vector{String}
: the highlights
Throws
- Exceptions from:
get_all
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, io = mktemp()
println(io, HEADER, '\n', "The world didn't stop spinning,,,,1")
flush(io)
file = _file * ".csv"
mv(_file, file)
PDFHighlights.Internal.CSV._get_highlights_from_CSV(file) ==
["The world didn't stop spinning"]
PDFHighlights.Internal.CSV._get_titles_from_CSV
— Method_get_titles_from_CSV(csv::String) -> Vector{String}
Extract the values of the Title
column from the CSV file.
Arguments
csv::String
: absolute or relative path to the CSV file
Returns
Vector{String}
: the titles
Throws
- Exceptions from:
get_all
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, io = mktemp()
println(io, HEADER, '\n', ",\"Girl, Interrupted\",,,1")
flush(io)
file = _file * ".csv"
mv(_file, file)
PDFHighlights.Internal.CSV._get_titles_from_CSV(file) == ["Girl, Interrupted"]
PDFHighlights.Internal.CSV.get_all
— Methodget_all(csv::String) -> Tuple{
Vector{String},
Vector{String},
Vector{String},
Vector{String},
Vector{Int32},
}
Extract the values of all columns from the CSV file.
Arguments
csv::String
: absolute or relative path to the CSV file
Returns
Tuple{Vector{String}, Vector{String}, Vector{String}, Vector{String}, Vector{Int32}}
: the highlights, titles, authors, notes, and locations
Throws
IntegrityCheckFailed
: another exception was thrown while checking the integrity of the table- Exceptions from:
initialize
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, io = mktemp()
row = string(
"The world didn't stop spinning,",
"\"Girl, Interrupted\",",
"Susanna Kaysen,",
"Journal,",
"5722",
)
println(io, HEADER, '\n', row)
flush(io)
file = _file * ".csv"
mv(_file, file)
get_all(file) == (
["The world didn't stop spinning"],
["Girl, Interrupted"],
["Susanna Kaysen"],
["Journal"],
[5722],
)
PDFHighlights.Internal.CSV.get_locations
— Methodget_locations(csv::String) -> Vector{Int32}
Extract the values of the Location
column from the CSV file.
Arguments
csv::String
: absolute or relative path to the CSV file
Returns
Vector{Int32}
: the locations
Throws
- Exceptions from:
get_all
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, io = mktemp()
println(io, HEADER, '\n', ",,,,5722")
flush(io)
file = _file * ".csv"
mv(_file, file)
get_locations(file) == Int32[5722]
PDFHighlights.Internal.CSV.get_notes
— Methodget_notes(csv::String) -> Vector{String}
Extract the values of the Note
column from the CSV file.
Arguments
csv::String
: absolute or relative path to the CSV file
Returns
Vector{String}
: the notes
Throws
- Exceptions from:
get_all
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, io = mktemp()
println(io, HEADER, '\n', ",,,Journal,")
flush(io)
file = _file * ".csv"
mv(_file, file)
get_notes(file) == ["Journal"]
PDFHighlights.Internal.CSV.import_highlights
— Methodimport_highlights(csv::String, target::String; quiet::Bool=false) -> Nothing
Import the highlights (and related data) into a CSV file from a PDF file or directory with PDF files.
Arguments
csv::String
: absolute or relative path to the CSV filetarget::String
: a PDF file or a directory with PDF files
Keywords
quiet::Bool=false
: iftrue
, don't print to standard output
Throws
IntegrityCheckFailed
: another exception was thrown while checking the integrity of the table- Exceptions from:
initialize
Example
using PDFHighlights
using Suppressor
path_to_pdf_dir = joinpath(pathof(PDFHighlights) |> dirname |> dirname, "test", "pdf")
path_to_pdf = joinpath(path_to_pdf_dir, "TestPDF.pdf")
_file, io = mktemp()
file = _file * ".csv"
mv(_file, file)
(@capture_out(import_highlights(file, path_to_pdf)) ==
"""
CSV: "$(basename(file))"
PDF: "TestPDF.pdf"
Highlights (found / added): 7 / 7
""") |> println
(@capture_out(import_highlights(file, path_to_pdf_dir)) ==
"""
CSV: "$(basename(file))"
Directory: "pdf"
Highlights (found / added): 7 / 0
""") |> println
@capture_out(import_highlights(file, path_to_pdf; quiet=true)) == ""
PDFHighlights.Internal.CSV.initialize
— Methodinitialize(csv::String) -> Nothing
If the file along the csv
path does not exist, then create it and write the header; if it exists but is empty, do the same; if it exists and is not empty, check structural correctness.
Arguments
csv::String
: absolute or relative path to the CSV file
Throws
Example
using PDFHighlights
HEADER = PDFHighlights.Internal.Constants.HEADER
_file, _ = mktemp()
file = _file * ".csv"
mv(_file, file)
initialize(file)
open(file, "r") do io
readlines(io) == [HEADER]
end
Macros
PDFHighlights.Internal.CSV.@extract
— Macro@extract(array::Symbol) -> Expr
Get a piece of line (a highlight, an author, a title, etc.).
Arguments
array::Symbol
: the name of the array to which this piece refers
Returns
Expr
: the code extracting the piece and putting it into the corresponding array
Example
using PDFHighlights
using SyntaxTree
(@macroexpand(PDFHighlights.Internal.CSV.@extract(highlights)) |> linefilter! ==
quote
if current_comma_index == 1
highlights[line_index] = ""
else
piece = line[1:(current_comma_index - 1)]
if piece == "\"\""
highlights[line_index] = ""
elseif startswith(piece, "\"")
highlights[line_index] = chop(piece; head = 1, tail = 1)
else
highlights[line_index] = piece
end
end
end |> linefilter!) |> println
(@macroexpand(PDFHighlights.Internal.CSV.@extract(locations)) |> linefilter! ==
quote
if current_comma_index == lastindex(line)
locations[line_index] = 0
else
locations[line_index] = parse(
Int32,
line[(current_comma_index + 1):end],
)
end
end |> linefilter!) |> println
array = :titles
@macroexpand(PDFHighlights.Internal.CSV.@extract(titles)) |> linefilter! ==
quote
if current_comma_index == previous_comma_index + 1
$(array)[line_index] = ""
else
piece = line[(previous_comma_index + 1):(current_comma_index - 1)]
if piece == "\"\""
$(array)[line_index] = ""
elseif startswith(piece, "\"")
$(array)[line_index] = chop(piece; head = 1, tail = 1)
else
$(array)[line_index] = piece
end
end
end |> linefilter!