diff --git a/collects/scriblib/bibtex.rkt b/collects/scriblib/bibtex.rkt index 44399d02..01ca4dbe 100644 --- a/collects/scriblib/bibtex.rkt +++ b/collects/scriblib/bibtex.rkt @@ -12,23 +12,23 @@ (define (perror ip sym fmt . args) (define loc (call-with-values (λ () (port-next-location ip)) list)) (apply error sym (string-append fmt " @ line ~a column ~a byte ~a") (append args loc))) - + (define (read-while pred ip) (list->string (let loop () (match (peek-char ip) - [(? pred) + [(and (? char?) (? pred)) (cons (read-char ip) (loop))] [_ empty])))) - + (define (read-until pred ip) (read-while (negate pred) ip)) - + (define (slurp-whitespace ip) (read-while (λ (c) (and (char? c) (char-whitespace? c))) ip)) - + (define (read-entries ip) (slurp-whitespace ip) (match (read-char ip) @@ -41,10 +41,13 @@ [(? eof-object?) (void)] [c - (perror ip 'read-entries "Expected % or @, got ~v" c)])) - + ;; All other characters are comments. + (read-entries ip)])) + (define (read-entry ip) - (match (read-until (λ (c) (char=? c #\{)) ip) + (match (read-until (λ (c) (or (char=? c #\{) + (char=? c #\())) + ip) [(app string-downcase "string") (slurp-whitespace ip) (match (read-char ip) @@ -84,56 +87,91 @@ (let loop () (slurp-whitespace ip) (define atag (read-tag ip)) - (slurp-whitespace ip) - (match (read-char ip) - [#\= + (cond + [(string=? "" atag) + (read-char ip) + (hash)] + [else (slurp-whitespace ip) - (define aval (read-value ip)) (match (read-char ip) - [#\, - (hash-set (loop) atag aval)] - [#\} - (hash atag aval)] + [#\= + (slurp-whitespace ip) + (define aval (read-value ip)) + (slurp-whitespace ip) + (match (read-char ip) + [#\, + (hash-set (loop) atag aval)] + [#\} + (hash atag aval)] + [c + (perror ip 'read-entry "Parsing entry, expected , or }, got ~v; label is ~v; atag is ~v; aval is ~v" c label atag aval)])] [c - (perror ip 'read-entry "Parsing entry, expected , or }, got ~v; label is ~v; atag is ~v; aval is ~v" c label atag aval)])] - [c - (perror ip 'read-entry "Parsing entry tag, expected =, got ~v; label is ~v; atag is ~v" c label atag)]))) + (perror ip 'read-entry "Parsing entry tag, expected =, got ~v; label is ~v; atag is ~v" c label atag)])]))) (hash-set! ENTRY-DB label (hash-set alist 'type typ))])) - + (define (read-tag ip) (slurp-whitespace ip) - (string-downcase - (read-until + (string-downcase + (read-until (λ (c) (or (char-whitespace? c) - (char=? c #\=))) + (char=? c #\=) + (char=? c #\{) + (char=? c #\}))) ip))) - + + (define (read-braced-value ip) + (read-char ip) + (let loop () + (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) + ip)) + (match (peek-char ip) + [#\{ + (string-append first-part (read-value ip) (loop))] + [#\} + (read-char ip) + first-part]))) + (define (read-value ip) + (slurp-whitespace ip) + (define first-part (read-value-single ip)) + (slurp-whitespace ip) + (match (peek-char ip) + [#\# + (read-char ip) + (string-append first-part (read-value ip))] + [_ + first-part])) + + (define (read-value-single ip) (slurp-whitespace ip) (match (peek-char ip) [#\{ + (read-braced-value ip)] + [#\" (read-char ip) (let loop () - (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) + (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\"))) ip)) (match (peek-char ip) [#\{ - (string-append first-part (read-value ip) (loop))] - [#\} + (string-append first-part (read-braced-value ip) (loop))] + [#\" (read-char ip) first-part]))] [(? char-numeric?) (read-while char-numeric? ip)] [(? char-alphabetic?) - (define string-tag (read-until (λ (c) (char=? c #\,)) ip)) + (define string-tag (read-until (λ (c) (or (char-whitespace? c) + (char=? c #\,))) + ip)) (hash-ref STRING-DB string-tag (λ () string-tag))] [c (perror ip 'read-value "Parsing value, expected {, got ~v" c)])) - + (read-entries ip) - + (bibdb ENTRY-DB (make-hash))) (define (path->bibdb pth) @@ -155,11 +193,11 @@ (define bibtex-db (path->bibdb bib-pth)) (define-cite autobib-cite autobib-citet generate-bibliography-id) (define ((make-citer citer) f . r) - (apply citer - (filter-map + (apply citer + (filter-map (λ (key) - (and (not (string=? "\n" key)) - (generate-bib bibtex-db key))) + (and (not (string=? "\n" key)) + (generate-bib bibtex-db key))) (append-map (curry regexp-split #rx" +") (cons f r))))) (define ~cite-id (make-citer autobib-cite)) @@ -189,7 +227,7 @@ (define (raw-attr a [def #f]) (hash-ref the-raw a def)) (define (raw-attr* a) - (hash-ref the-raw a + (hash-ref the-raw a (λ () (error 'bibtex "Key ~a is missing attribute ~a, has ~a" key a the-raw)))) (match (raw-attr 'type) @@ -228,14 +266,17 @@ (make-bib #:title (raw-attr "title") #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") - #:location + #:location (match* ((raw-attr "institution") (raw-attr "number")) - [(#f #f) @elem{}] - [(l #f) @elem{@|l|}] - [(#f n) @elem{@|n|}] - [(l n) @elem{@|l|, @|n|}]) + [(#f #f) @elem{}] + [(l #f) @elem{@|l|}] + [(#f n) @elem{@|n|}] + [(l n) @elem{@|l|, @|n|}]) #:url (raw-attr "url"))] [_ (make-bib #:title (format "~v" the-raw))])))) -(provide define-bibtex-cite) +(provide (struct-out bibdb) + path->bibdb + bibtex-parse + define-bibtex-cite) diff --git a/collects/scriblib/scribblings/bibtex.scrbl b/collects/scriblib/scribblings/bibtex.scrbl index a09d9c00..6e660f76 100644 --- a/collects/scriblib/scribblings/bibtex.scrbl +++ b/collects/scriblib/scribblings/bibtex.scrbl @@ -18,3 +18,18 @@ Uses @racket[define-cite] from @racketmodname[scriblib/autobib], but augments th Each string is broken along spaces into citations keys that are looked up in the BibTeX database and turned into @racket[bib?] structures. } + +@defstruct*[bibdb ([raw (hash/c string? (hash/c string? string?))] + [bibs (hash/c string? bib?)])]{ + Represents a BibTeX database. The @racket[_raw] hash table maps the labels in the file to hash tables of the attributes and their values. The @racket[_bibs] hash table maps the same labels to Scribble data-structures representing the same information. + } + +@defproc[(path->bibdb [path path-string?]) + bibdb?]{ + Parses a path into a BibTeX database. + } + +@defproc[(bibtex-parse [ip input-port?]) + bibdb?]{ + Parses an input port into a BibTeX database. + } diff --git a/collects/tests/scriblib/bibtex.rkt b/collects/tests/scriblib/bibtex.rkt index 2df35c22..f916bc27 100644 --- a/collects/tests/scriblib/bibtex.rkt +++ b/collects/tests/scriblib/bibtex.rkt @@ -6,6 +6,15 @@ (define-runtime-path example.bib "example.bib") (test + (let () + (define example (path->bibdb example.bib)) + (define raw (bibdb-raw example)) + + (test + (hash-ref (hash-ref raw "sweig42") "month") => "march" + (hash-ref (hash-ref raw "sweig42a") "month") => "1~mar" + (hash-ref (hash-ref raw "sweig42b") "month") => "1~march" + (hash-ref (hash-ref raw "sweig42c") "month") => "1~marcha")) (let () (define-bibtex-cite example.bib ~cite-id citet-id generate-bibliography-id) diff --git a/collects/tests/scriblib/example.bib b/collects/tests/scriblib/example.bib index 2c269379..81408e36 100644 --- a/collects/tests/scriblib/example.bib +++ b/collects/tests/scriblib/example.bib @@ -309,3 +309,91 @@ Year = {1987}, Bdsk-Url-1 = {http://doi.acm.org/10.1145/41625.41654}} +@article{hochreiter_long_1997, + author = {Hochreiter, S. and Schmidhuber, J.}, + title = {Long {Short-Term} Memory}, + volume = {9}, + number = {8}, + journal = {Neural Computation}, + year = {1997}, + pages = {1735--1780} +} + +@Article{py03, + author = {Xavier D\'ecoret}, + title = "PyBiTex", + year = 2003 +} + +@Article{key03, + title = "A {bunch {of} braces {in}} title" +} + +@Article{key01, + author = "Simon {"}the {saint"} Templar", +} + +@Article{key01, + title = "The history of @ sign" +} + +Some {{comments} with unbalanced braces +....and a "commented" entry... + +Book{landru21, + author = {Landru, Henri D\'esir\'e}, + title = {A hundred recipes for you wife}, + publisher = {Culinary Expert Series}, + year = 1921 +} + +..some other comments..before a valid entry... + +@Book{steward03, + author = { Martha Steward }, + title = {Cooking behind bars}, + publisher = {Culinary Expert Series}, + year = 2003 +} + +...and finally an entry commented by the use of the special @Comment entry type. + +@Comment{steward03, + author = {Martha Steward}, + title = {Cooking behind bars}, + publisher = {Culinary Expert Series}, + year = 2003 +} + +@Comment{ + @Book{steward03, + author = {Martha Steward}, + title = {Cooking behind bars}, + publisher = {Culinary Expert Series}, + year = 2003 + } +} + +@String{mar = "march"} + +@Book{sweig42, + Author = { Stefan Sweig }, + title = { The impossible book }, + publisher = { Dead Poet Society}, + year = 1942, + month = mar +} + +@Book{sweig42a, + month = "1~mar" +} + +@Book{sweig42b, + month = "1~" # mar +} + +@Book{sweig42c, + month = "1~" # mar # "a" +} + +@preamble{"This bibliography was generated on \today"}