527 lines
17 KiB
Racket
527 lines
17 KiB
Racket
#lang at-exp racket/base
|
||
(require racket/function
|
||
racket/match
|
||
racket/list
|
||
racket/string)
|
||
|
||
(struct bibdb (raw bibs))
|
||
|
||
(define (bibtex-parse ip)
|
||
(define STRING-DB (make-hash))
|
||
(define ENTRY-DB (make-hash))
|
||
|
||
(define (perror ip sym fmt . args)
|
||
(define loc (call-with-values (λ () (port-next-location ip)) list))
|
||
(apply error sym (string-append fmt " @ line ~a column ~a byte ~a") (append args loc)))
|
||
|
||
(define (read-while pred ip)
|
||
(list->string
|
||
(let loop ()
|
||
(match (peek-char ip)
|
||
[(and (? char?) (? pred))
|
||
(cons (read-char ip)
|
||
(loop))]
|
||
[_
|
||
empty]))))
|
||
|
||
(define (read-until pred ip)
|
||
(read-while (negate pred) ip))
|
||
|
||
(define (slurp-whitespace ip)
|
||
(read-while (λ (c) (and (char? c) (char-whitespace? c))) ip))
|
||
|
||
(define (read-entries ip)
|
||
(slurp-whitespace ip)
|
||
(match (read-char ip)
|
||
[#\%
|
||
(read-line ip)
|
||
(read-entries ip)]
|
||
[#\@
|
||
(read-entry ip)
|
||
(read-entries ip)]
|
||
[(? eof-object?)
|
||
(void)]
|
||
[c
|
||
;; All other characters are comments.
|
||
(read-entries ip)]))
|
||
|
||
(define (read-entry ip)
|
||
(match (read-until (λ (c) (or (char=? c #\{)
|
||
(char=? c #\()))
|
||
ip)
|
||
[(app string-foldcase "string")
|
||
(slurp-whitespace ip)
|
||
(match (read-char ip)
|
||
[#\{
|
||
(void)]
|
||
[c
|
||
(perror ip 'read-entry "Parsing entry, expected {, got ~v" c)])
|
||
(define tag (read-tag ip))
|
||
(slurp-whitespace ip)
|
||
(match (read-char ip)
|
||
[#\=
|
||
(slurp-whitespace ip)
|
||
(define string (read-value ip))
|
||
(slurp-whitespace ip)
|
||
(match (read-char ip)
|
||
[#\}
|
||
(hash-set! STRING-DB tag string)]
|
||
[c
|
||
(perror ip 'read-entry "Parsing string, expected }, got ~v; tag is ~v; string is ~v" c tag string)])]
|
||
[c
|
||
(perror ip 'read-entry "Parsing string, expected =, got ~v; tag is ~v" c tag)])]
|
||
[(or (app string-foldcase "comment")
|
||
(app string-foldcase "preamble"))
|
||
(read-char ip)
|
||
(let loop ()
|
||
(read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) ip)
|
||
(match (read-char ip)
|
||
[#\{
|
||
(loop) (loop)]
|
||
[#\}
|
||
(void)]))]
|
||
[typ
|
||
(read-char ip)
|
||
(slurp-whitespace ip)
|
||
(define label (string-foldcase (read-until (λ (c) (char=? c #\,)) ip)))
|
||
(read-char ip)
|
||
(define alist
|
||
(let loop ()
|
||
(slurp-whitespace ip)
|
||
(define atag (read-tag ip))
|
||
(cond
|
||
[(string=? "" atag)
|
||
(read-char ip)
|
||
(hash)]
|
||
[else
|
||
(slurp-whitespace ip)
|
||
(match (read-char ip)
|
||
[#\=
|
||
(slurp-whitespace ip)
|
||
(define aval (read-value ip))
|
||
(slurp-whitespace ip)
|
||
(match (read-char ip)
|
||
[#\,
|
||
(hash-set (loop) atag aval)]
|
||
[#\}
|
||
(hash atag aval)]
|
||
[c
|
||
(perror ip 'read-entry "Parsing entry, expected , or }, got ~v; label is ~v; atag is ~v; aval is ~v" c label atag aval)])]
|
||
[c
|
||
(perror ip 'read-entry "Parsing entry tag, expected =, got ~v; label is ~v; atag is ~v" c label atag)])])))
|
||
(hash-set! ENTRY-DB label
|
||
(hash-set alist 'type (string-foldcase typ)))]))
|
||
|
||
(define (read-tag ip)
|
||
(slurp-whitespace ip)
|
||
(string-foldcase
|
||
(read-until
|
||
(λ (c) (or (char-whitespace? c)
|
||
(char=? c #\=)
|
||
(char=? c #\{)
|
||
(char=? c #\})))
|
||
ip)))
|
||
|
||
(define (read-braced-value ip)
|
||
(read-char ip)
|
||
(let loop ()
|
||
(define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\})))
|
||
ip))
|
||
(match (peek-char ip)
|
||
[#\{
|
||
(string-append first-part (read-value ip) (loop))]
|
||
[#\}
|
||
(read-char ip)
|
||
first-part])))
|
||
|
||
(define (read-value ip)
|
||
(slurp-whitespace ip)
|
||
(define first-part (read-value-single ip))
|
||
(slurp-whitespace ip)
|
||
(match (peek-char ip)
|
||
[#\#
|
||
(read-char ip)
|
||
(string-append first-part (read-value ip))]
|
||
[_
|
||
first-part]))
|
||
|
||
(define (read-value-single ip)
|
||
(slurp-whitespace ip)
|
||
(match (peek-char ip)
|
||
[#\{
|
||
(read-braced-value ip)]
|
||
[#\"
|
||
(read-char ip)
|
||
(let loop ()
|
||
(define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\")))
|
||
ip))
|
||
(match (peek-char ip)
|
||
[#\{
|
||
(string-append first-part (read-braced-value ip) (loop))]
|
||
[#\"
|
||
(read-char ip)
|
||
first-part]))]
|
||
[(? char-numeric?)
|
||
(read-while char-numeric? ip)]
|
||
[(? char-alphabetic?)
|
||
(define string-tag (read-until (λ (c) (or (char-whitespace? c)
|
||
(char=? c #\,)))
|
||
ip))
|
||
(hash-ref STRING-DB string-tag
|
||
(λ () string-tag))]
|
||
[c
|
||
(perror ip 'read-value "Parsing value, expected {, got ~v" c)]))
|
||
|
||
(read-entries ip)
|
||
|
||
(bibdb ENTRY-DB (make-hash)))
|
||
|
||
(define (path->bibdb pth)
|
||
(define bibdb
|
||
(with-input-from-file
|
||
pth
|
||
(λ ()
|
||
(port-count-lines! (current-input-port))
|
||
(bibtex-parse (current-input-port)))))
|
||
bibdb)
|
||
|
||
(require scriblib/autobib
|
||
scribble/manual)
|
||
|
||
(define-syntax-rule
|
||
(define-bibtex-cite bib-pth
|
||
~cite-id citet-id generate-bibliography-id . options)
|
||
(begin
|
||
(define-cite autobib-cite autobib-citet generate-bibliography-id . options)
|
||
(define-bibtex-cite* bib-pth
|
||
autobib-cite autobib-citet
|
||
~cite-id citet-id)))
|
||
|
||
(define ((make-citer bibtex-db citer) f . r)
|
||
(apply citer
|
||
(filter-map
|
||
(λ (key)
|
||
(and (not (string=? "\n" key))
|
||
(generate-bib bibtex-db key)))
|
||
(append-map (curry regexp-split #px"\\s+")
|
||
(cons f r)))))
|
||
|
||
(define-syntax-rule
|
||
(define-bibtex-cite* bib-pth
|
||
autobib-cite autobib-citet
|
||
~cite-id citet-id)
|
||
(begin
|
||
(define bibtex-db (path->bibdb bib-pth))
|
||
(define ~cite-id (make-citer bibtex-db autobib-cite))
|
||
(define citet-id (make-citer bibtex-db autobib-citet))))
|
||
|
||
;; Seems a little redundant to convert latex escapes into unicode only to
|
||
;; convert them back into latex, but we need to sort authors so we can't
|
||
;; leave them as literal-chars.
|
||
(define (latex-to-unicode str)
|
||
; This is probably defined somewhere...
|
||
; NOTE: Incomplete. Please file PR if you need more.
|
||
(define converts
|
||
'(("\\'\\i" . "ı́")
|
||
("\\\"u" . "ü")
|
||
("\\\"o" . "ö")
|
||
("\\\"i" . "ï")
|
||
("\\'i" . "í")
|
||
("\\i" . "ı")
|
||
("\\'a" . "á")
|
||
("\\'A" . "Á")
|
||
("\\~a" . "ã")
|
||
("\\`a" . "À")
|
||
("\\~A" . "Ã")))
|
||
(for/fold ([str str])
|
||
([p converts])
|
||
(string-replace str (car p) (cdr p))))
|
||
|
||
(define (parse-author as)
|
||
(and as
|
||
(apply authors
|
||
(for/list ([a (in-list (regexp-split #px"\\s+and\\s+" as))])
|
||
(define (trim s)
|
||
(string-trim (regexp-replace #px"\\s+" s " ")))
|
||
(match (latex-to-unicode a)
|
||
[(pregexp #px"^(.*),(.*),(.*)$" (list _ two suffix one))
|
||
(author-name (trim one) (trim two) #:suffix (trim suffix))]
|
||
[(pregexp #px"^(.*),(.*)$" (list _ two one))
|
||
(author-name (string-trim one) (string-trim two))]
|
||
[(pregexp #px"^(.*?)\\s+(\\p{Ll}[^\\s]*(\\s+\\p{Ll}[^\\s]*)*)\\s+(.*)$" (list _ one von-like _ two))
|
||
(author-name (string-trim one)
|
||
(string-append (string-trim von-like) " " (string-trim two)))]
|
||
[space-separated
|
||
(match (regexp-split #px"\\s+" space-separated)
|
||
[(list one) (org-author-name one)]
|
||
[(list one two) (author-name one two)]
|
||
[(list-rest first rest)
|
||
(author-name (apply string-append (add-between (cons first (drop-right rest 1))
|
||
" "))
|
||
(last rest))])])))))
|
||
|
||
(module+ test
|
||
(require rackunit)
|
||
|
||
;; use this as a predicate to hack around lack of
|
||
;; ability to use equal? on author element structs;
|
||
;; unfortunately, it ony compares the composed strings
|
||
(define (print-as-equal-string? a b)
|
||
(equal? (format "~s" a)
|
||
(format "~s" b)))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James Earl Jones")
|
||
(authors
|
||
(author-name "James Earl" "Jones")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "Tim Robbins and Morgan Freeman")
|
||
(authors (author-name "Tim" "Robbins")
|
||
(author-name "Morgan" "Freeman")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "Edward L. Deci and Robert J. Vallerand and Luc G. Pelletier and Richard M. Ryan")
|
||
(authors (author-name "Edward L." "Deci")
|
||
(author-name "Robert J." "Vallerand")
|
||
(author-name "Luc G." "Pelletier")
|
||
(author-name "Richard M." "Ryan")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "Lst, Fst")
|
||
(authors
|
||
(author-name "Fst" "Lst")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "Lst,Fst")
|
||
(authors
|
||
(author-name "Fst" "Lst")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James, Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James,Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "LstA LstB, Fst")
|
||
(authors
|
||
(author-name "Fst" "LstA LstB")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "LstA LstB,Fst")
|
||
(authors
|
||
(author-name "Fst" "LstA LstB")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "LstA LstB, FstA FstB")
|
||
(authors
|
||
(author-name "FstA FstB" "LstA LstB")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "LstA LstB,FstA FstB")
|
||
(authors
|
||
(author-name "FstA FstB" "LstA LstB")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James, Jr, Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "Jr")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James,Jr, Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "Jr")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James, Jr,Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "Jr")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James,Jr,Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "Jr")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James, III, Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "III")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James,III, Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "III")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James, III,Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "III")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James,III,Earl Jones")
|
||
(authors
|
||
(author-name "Earl Jones" "James" #:suffix "III")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James Jack von Earl Jones")
|
||
(authors
|
||
(author-name "James Jack" "von Earl Jones")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James Jack de la Earl Jones")
|
||
(authors
|
||
(author-name "James Jack" "de la Earl Jones")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James Jack van der Earl Jones")
|
||
(authors
|
||
(author-name "James Jack" "van der Earl Jones")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James Jack von de la Earl Jones")
|
||
(authors
|
||
(author-name "James Jack" "von de la Earl Jones")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "James Jack di Earl Jones")
|
||
(authors
|
||
(author-name "James Jack" "di Earl Jones")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "First fOn bER Last")
|
||
(authors
|
||
(author-name "First" "fOn bER Last")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "Deci, Edward L. and Robert J. Vallerand and Pelletier, Luc G. and Ryan, Jr, Richard M.")
|
||
(authors (author-name "Edward L." "Deci")
|
||
(author-name "Robert J." "Vallerand")
|
||
(author-name "Luc G." "Pelletier")
|
||
(author-name "Richard M." "Ryan" #:suffix "Jr")))
|
||
|
||
(check
|
||
print-as-equal-string?
|
||
(parse-author "Foo anderson") ;; Should not be parsed as the two authors "Foo" & "erson"
|
||
(authors
|
||
(author-name "Foo" "anderson"))))
|
||
|
||
(define (parse-pages ps)
|
||
(match ps
|
||
[(regexp #rx"^([0-9]+)\\-+([0-9]+)$" (list _ f l))
|
||
(list f l)]
|
||
[#f
|
||
#f]
|
||
[_
|
||
(error 'parse-pages "Invalid page format ~e" ps)]))
|
||
|
||
(require scribble/core)
|
||
(define (support-escapes s)
|
||
(elem #:style (make-style #f '(exact-chars)) s))
|
||
|
||
(define (generate-bib db key)
|
||
(match-define (bibdb raw bibs) db)
|
||
(hash-ref! bibs (string-foldcase key)
|
||
(λ ()
|
||
(define the-raw (hash-ref raw (string-foldcase key) (λ () (error 'bibtex "Unknown citation ~e" key))))
|
||
(define (raw-attr a [def #f])
|
||
(hash-ref the-raw a def))
|
||
(define (raw-attr* a)
|
||
(hash-ref the-raw a
|
||
(λ () (error 'bibtex "Key ~a is missing attribute ~a, has ~a"
|
||
key a the-raw))))
|
||
(match (raw-attr 'type)
|
||
["misc"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:url (raw-attr "url"))]
|
||
["book"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:is-book? #t
|
||
#:url (raw-attr "url"))]
|
||
["article"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:location (journal-location (raw-attr* "journal")
|
||
#:pages (parse-pages (raw-attr "pages"))
|
||
#:number (raw-attr "number")
|
||
#:volume (raw-attr "volume"))
|
||
#:url (raw-attr "url"))]
|
||
["inproceedings"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:location (proceedings-location (raw-attr "booktitle"))
|
||
#:url (raw-attr "url"))]
|
||
["webpage"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:url (raw-attr "url"))]
|
||
["mastersthesis"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:location (raw-attr "school")
|
||
#:url (raw-attr "url"))]
|
||
["phdthesis"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:location (dissertation-location #:institution (raw-attr "school")
|
||
#:degree "PhD")
|
||
#:url (raw-attr "url"))]
|
||
["techreport"
|
||
(make-bib #:title (support-escapes (raw-attr "title"))
|
||
#:author (parse-author (raw-attr "author"))
|
||
#:date (raw-attr "year")
|
||
#:location
|
||
(match* ((raw-attr "institution") (raw-attr "number"))
|
||
[(#f #f) @elem{}]
|
||
[(l #f) @elem{@|l|}]
|
||
[(#f n) @elem{@|n|}]
|
||
[(l n) @elem{@|l|, @|n|}])
|
||
#:url (raw-attr "url"))]
|
||
[_
|
||
(make-bib #:title (format "~v" the-raw))]))))
|
||
|
||
(provide (struct-out bibdb)
|
||
path->bibdb
|
||
bibtex-parse
|
||
define-bibtex-cite
|
||
define-bibtex-cite*)
|