#lang at-exp racket/base (require racket/function racket/match racket/list racket/string) (struct bibdb (raw bibs)) (define (bibtex-parse ip) (define STRING-DB (make-hash)) (define ENTRY-DB (make-hash)) (define (perror ip sym fmt . args) (define loc (call-with-values (λ () (port-next-location ip)) list)) (apply error sym (string-append fmt " @ line ~a column ~a byte ~a") (append args loc))) (define (read-while pred ip) (list->string (let loop () (match (peek-char ip) [(and (? char?) (? pred)) (cons (read-char ip) (loop))] [_ empty])))) (define (read-until pred ip) (read-while (negate pred) ip)) (define (slurp-whitespace ip) (read-while (λ (c) (and (char? c) (char-whitespace? c))) ip)) (define (read-entries ip) (slurp-whitespace ip) (match (read-char ip) [#\% (read-line ip) (read-entries ip)] [#\@ (read-entry ip) (read-entries ip)] [(? eof-object?) (void)] [c ;; All other characters are comments. (read-entries ip)])) (define (read-entry ip) (match (read-until (λ (c) (or (char=? c #\{) (char=? c #\())) ip) [(app string-foldcase "string") (slurp-whitespace ip) (match (read-char ip) [#\{ (void)] [c (perror ip 'read-entry "Parsing entry, expected {, got ~v" c)]) (define tag (read-tag ip)) (slurp-whitespace ip) (match (read-char ip) [#\= (slurp-whitespace ip) (define string (read-value ip)) (slurp-whitespace ip) (match (read-char ip) [#\} (hash-set! STRING-DB tag string)] [c (perror ip 'read-entry "Parsing string, expected }, got ~v; tag is ~v; string is ~v" c tag string)])] [c (perror ip 'read-entry "Parsing string, expected =, got ~v; tag is ~v" c tag)])] [(or (app string-foldcase "comment") (app string-foldcase "preamble")) (read-char ip) (let loop () (read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) ip) (match (read-char ip) [#\{ (loop) (loop)] [#\} (void)]))] [typ (read-char ip) (slurp-whitespace ip) (define label (string-foldcase (read-until (λ (c) (char=? c #\,)) ip))) (read-char ip) (define alist (let loop () (slurp-whitespace ip) (define atag (read-tag ip)) (cond [(string=? "" atag) (read-char ip) (hash)] [else (slurp-whitespace ip) (match (read-char ip) [#\= (slurp-whitespace ip) (define aval (read-value ip)) (slurp-whitespace ip) (match (read-char ip) [#\, (hash-set (loop) atag aval)] [#\} (hash atag aval)] [c (perror ip 'read-entry "Parsing entry, expected , or }, got ~v; label is ~v; atag is ~v; aval is ~v" c label atag aval)])] [c (perror ip 'read-entry "Parsing entry tag, expected =, got ~v; label is ~v; atag is ~v" c label atag)])]))) (hash-set! ENTRY-DB label (hash-set alist 'type (string-foldcase typ)))])) (define (read-tag ip) (slurp-whitespace ip) (string-foldcase (read-until (λ (c) (or (char-whitespace? c) (char=? c #\=) (char=? c #\{) (char=? c #\}))) ip))) (define (read-braced-value ip) (read-char ip) (let loop () (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) ip)) (match (peek-char ip) [#\{ (string-append first-part (read-value ip) (loop))] [#\} (read-char ip) first-part]))) (define (read-value ip) (slurp-whitespace ip) (define first-part (read-value-single ip)) (slurp-whitespace ip) (match (peek-char ip) [#\# (read-char ip) (string-append first-part (read-value ip))] [_ first-part])) (define (read-value-single ip) (slurp-whitespace ip) (match (peek-char ip) [#\{ (read-braced-value ip)] [#\" (read-char ip) (let loop () (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\"))) ip)) (match (peek-char ip) [#\{ (string-append first-part (read-braced-value ip) (loop))] [#\" (read-char ip) first-part]))] [(? char-numeric?) (read-while char-numeric? ip)] [(? char-alphabetic?) (define string-tag (read-until (λ (c) (or (char-whitespace? c) (char=? c #\,))) ip)) (hash-ref STRING-DB string-tag (λ () string-tag))] [c (perror ip 'read-value "Parsing value, expected {, got ~v" c)])) (read-entries ip) (bibdb ENTRY-DB (make-hash))) (define (path->bibdb pth) (define bibdb (with-input-from-file pth (λ () (port-count-lines! (current-input-port)) (bibtex-parse (current-input-port))))) bibdb) (require scriblib/autobib scribble/manual) (define-syntax-rule (define-bibtex-cite bib-pth ~cite-id citet-id generate-bibliography-id . options) (begin (define-cite autobib-cite autobib-citet generate-bibliography-id . options) (define-bibtex-cite* bib-pth autobib-cite autobib-citet ~cite-id citet-id))) (define ((make-citer bibtex-db citer) f . r) (apply citer (filter-map (λ (key) (and (not (string=? "\n" key)) (generate-bib bibtex-db key))) (append-map (curry regexp-split #px"\\s+") (cons f r))))) (define-syntax-rule (define-bibtex-cite* bib-pth autobib-cite autobib-citet ~cite-id citet-id) (begin (define bibtex-db (path->bibdb bib-pth)) (define ~cite-id (make-citer bibtex-db autobib-cite)) (define citet-id (make-citer bibtex-db autobib-citet)))) ;; Seems a little redundant to convert latex escapes into unicode only to ;; convert them back into latex, but we need to sort authors so we can't ;; leave them as literal-chars. (define (latex-to-unicode str) ; This is probably defined somewhere... ; NOTE: Incomplete. Please file PR if you need more. (define converts '(("\\'\\i" . "ı́") ("\\\"u" . "ü") ("\\\"o" . "ö") ("\\\"i" . "ï") ("\\'i" . "í") ("\\i" . "ı") ("\\'a" . "á") ("\\'A" . "Á") ("\\~a" . "ã") ("\\`a" . "À") ("\\~A" . "Ã"))) (for/fold ([str str]) ([p converts]) (string-replace str (car p) (cdr p)))) (define (parse-author as) (and as (apply authors (for/list ([a (in-list (regexp-split #px"\\s+and\\s+" as))]) (define (trim s) (string-trim (regexp-replace #px"\\s+" s " "))) (match (latex-to-unicode a) [(pregexp #px"^(.*),(.*),(.*)$" (list _ two suffix one)) (author-name (trim one) (trim two) #:suffix (trim suffix))] [(pregexp #px"^(.*),(.*)$" (list _ two one)) (author-name (string-trim one) (string-trim two))] [(pregexp #px"^(.*?)\\s+(\\p{Ll}[^\\s]*(\\s+\\p{Ll}[^\\s]*)*)\\s+(.*)$" (list _ one von-like _ two)) (author-name (string-trim one) (string-append (string-trim von-like) " " (string-trim two)))] [space-separated (match (regexp-split #px"\\s+" space-separated) [(list one) (org-author-name one)] [(list one two) (author-name one two)] [(list-rest first rest) (author-name (apply string-append (add-between (cons first (drop-right rest 1)) " ")) (last rest))])]))))) (module+ test (require rackunit) ;; use this as a predicate to hack around lack of ;; ability to use equal? on author element structs; ;; unfortunately, it ony compares the composed strings (define (print-as-equal-string? a b) (equal? (format "~s" a) (format "~s" b))) (check print-as-equal-string? (parse-author "James Earl Jones") (authors (author-name "James Earl" "Jones"))) (check print-as-equal-string? (parse-author "Tim Robbins and Morgan Freeman") (authors (author-name "Tim" "Robbins") (author-name "Morgan" "Freeman"))) (check print-as-equal-string? (parse-author "Edward L. Deci and Robert J. Vallerand and Luc G. Pelletier and Richard M. Ryan") (authors (author-name "Edward L." "Deci") (author-name "Robert J." "Vallerand") (author-name "Luc G." "Pelletier") (author-name "Richard M." "Ryan"))) (check print-as-equal-string? (parse-author "Lst, Fst") (authors (author-name "Fst" "Lst"))) (check print-as-equal-string? (parse-author "Lst,Fst") (authors (author-name "Fst" "Lst"))) (check print-as-equal-string? (parse-author "James, Earl Jones") (authors (author-name "Earl Jones" "James"))) (check print-as-equal-string? (parse-author "James,Earl Jones") (authors (author-name "Earl Jones" "James"))) (check print-as-equal-string? (parse-author "LstA LstB, Fst") (authors (author-name "Fst" "LstA LstB"))) (check print-as-equal-string? (parse-author "LstA LstB,Fst") (authors (author-name "Fst" "LstA LstB"))) (check print-as-equal-string? (parse-author "LstA LstB, FstA FstB") (authors (author-name "FstA FstB" "LstA LstB"))) (check print-as-equal-string? (parse-author "LstA LstB,FstA FstB") (authors (author-name "FstA FstB" "LstA LstB"))) (check print-as-equal-string? (parse-author "James, Jr, Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "Jr"))) (check print-as-equal-string? (parse-author "James,Jr, Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "Jr"))) (check print-as-equal-string? (parse-author "James, Jr,Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "Jr"))) (check print-as-equal-string? (parse-author "James,Jr,Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "Jr"))) (check print-as-equal-string? (parse-author "James, III, Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "III"))) (check print-as-equal-string? (parse-author "James,III, Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "III"))) (check print-as-equal-string? (parse-author "James, III,Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "III"))) (check print-as-equal-string? (parse-author "James,III,Earl Jones") (authors (author-name "Earl Jones" "James" #:suffix "III"))) (check print-as-equal-string? (parse-author "James Jack von Earl Jones") (authors (author-name "James Jack" "von Earl Jones"))) (check print-as-equal-string? (parse-author "James Jack de la Earl Jones") (authors (author-name "James Jack" "de la Earl Jones"))) (check print-as-equal-string? (parse-author "James Jack van der Earl Jones") (authors (author-name "James Jack" "van der Earl Jones"))) (check print-as-equal-string? (parse-author "James Jack von de la Earl Jones") (authors (author-name "James Jack" "von de la Earl Jones"))) (check print-as-equal-string? (parse-author "James Jack di Earl Jones") (authors (author-name "James Jack" "di Earl Jones"))) (check print-as-equal-string? (parse-author "First fOn bER Last") (authors (author-name "First" "fOn bER Last"))) (check print-as-equal-string? (parse-author "Deci, Edward L. and Robert J. Vallerand and Pelletier, Luc G. and Ryan, Jr, Richard M.") (authors (author-name "Edward L." "Deci") (author-name "Robert J." "Vallerand") (author-name "Luc G." "Pelletier") (author-name "Richard M." "Ryan" #:suffix "Jr"))) (check print-as-equal-string? (parse-author "Foo anderson") ;; Should not be parsed as the two authors "Foo" & "erson" (authors (author-name "Foo" "anderson")))) (define (parse-pages ps) (match ps [(regexp #rx"^([0-9]+)\\-+([0-9]+)$" (list _ f l)) (list f l)] [#f #f] [_ (error 'parse-pages "Invalid page format ~e" ps)])) (require scribble/core) (define (support-escapes s) (elem #:style (make-style #f '(exact-chars)) s)) (define (generate-bib db key) (match-define (bibdb raw bibs) db) (hash-ref! bibs (string-foldcase key) (λ () (define the-raw (hash-ref raw (string-foldcase key) (λ () (error 'bibtex "Unknown citation ~e" key)))) (define (raw-attr a [def #f]) (hash-ref the-raw a def)) (define (raw-attr* a) (hash-ref the-raw a (λ () (error 'bibtex "Key ~a is missing attribute ~a, has ~a" key a the-raw)))) (match (raw-attr 'type) ["misc" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:url (raw-attr "url"))] ["book" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:is-book? #t #:url (raw-attr "url"))] ["article" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (journal-location (raw-attr* "journal") #:pages (parse-pages (raw-attr "pages")) #:number (raw-attr "number") #:volume (raw-attr "volume")) #:url (raw-attr "url"))] ["inproceedings" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (proceedings-location (raw-attr "booktitle")) #:url (raw-attr "url"))] ["webpage" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:url (raw-attr "url"))] ["mastersthesis" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (raw-attr "school") #:url (raw-attr "url"))] ["phdthesis" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (dissertation-location #:institution (raw-attr "school") #:degree "PhD") #:url (raw-attr "url"))] ["techreport" (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (match* ((raw-attr "institution") (raw-attr "number")) [(#f #f) @elem{}] [(l #f) @elem{@|l|}] [(#f n) @elem{@|n|}] [(l n) @elem{@|l|, @|n|}]) #:url (raw-attr "url"))] [_ (make-bib #:title (format "~v" the-raw))])))) (provide (struct-out bibdb) path->bibdb bibtex-parse define-bibtex-cite define-bibtex-cite*)