original commit: eb55bc9a86acd4169ee29a66ac1b8abcd1ef1a62
This commit is contained in:
Jay McCarthy 2011-04-21 13:44:38 -06:00
parent 1d0f2af4f0
commit f423da24e0

View File

@ -1,5 +1,6 @@
#lang racket/base #lang racket/base
(require racket/function (require racket/function
racket/match
racket/list) racket/list)
(define-syntax-rule (define-syntax-rule
@ -11,85 +12,97 @@
(define this-cite (define this-cite
(curry cite bibtex-db)))) (curry cite bibtex-db))))
(require parser-tools/lex (define (bibtex-parse ip)
parser-tools/yacc (define (read-while pred ip)
(prefix-in : parser-tools/lex-sre)) (match (peek-char ip)
[(? pred)
(read-char ip)
(read-until pred ip)]
[_
(void)]))
(define (read-until pred ip)
(list->string
(let loop ()
(match (peek-char ip)
[(? pred)
(cons (read-char ip) (loop))]
[_
empty]))))
(define-empty-tokens toks (AT LBRACE RBRACE COMMA EQUALS HASH EOF)) (define (slurp-whitespace ip)
(define-tokens rtoks (NUM TAG STR)) (read-while char-whitespace? ip))
(define bibtex-lex
(lexer-src-pos
["@" (token-AT)]
["{" (token-LBRACE)]
["}" (token-RBRACE)]
["," (token-COMMA)]
["=" (token-EQUALS)]
["#" (token-HASH)]
[(:: "%" (complement (:: any-string #\newline any-string)))
(return-without-pos (bibtex-lex input-port))]
[(:+ (char-range "0" "9"))
(token-NUM (string->number lexeme))]
[(:: alphabetic (complement (:: any-string (:or whitespace (char-set "{},")) any-string)))
(token-TAG (string-downcase lexeme))]
#;[(:: #\" (complement (:: any-string #\" any-string)) #\")
(token-STR (substring lexeme 1 (sub1 (string-length lexeme))))]
[(eof) (token-EOF)]
[any-char (return-without-pos (bibtex-lex input-port))]))
(define bibtex-parse (define (read-entries ip)
(parser (slurp-whitespace ip)
(src-pos) (match (read-char ip)
(tokens toks rtoks) [#\%
(end EOF) (read-line ip)
(start db) (read-entries ip)]
(error [#\@
(lambda (tok-ok? tok-name tok-value start-pos end-pos) (cons (read-entry ip)
(error 'bibtex-parse (read-entries ip))]
"Received ~a token ~a(~s) at ~a:~a-~a:~a" [c
(if tok-ok? "valid" "invalid") (error 'read-entries "Expected % or @, got ~a" c)]))
tok-name tok-value
(position-line start-pos) (position-col start-pos) (define (read-entry ip)
(position-line end-pos) (position-col end-pos)))) (match (peek-string 6 0 ip)
(grammar [(app string-downcase "string")
(db [() empty] (read-string 6 ip)
[(entry db) (cons $1 $2)]) (slurp-whitespace ip)
(entry (match (read-char ip)
[(AT TAG LBRACE elems RBRACE) [#\{
(vector $2 $4)]) (slurp-whitespace ip)
(elems (define tag (read-tag ip))
[() empty] (printf "tag ~a\n" tag)
[(elem) (list $1)] (slurp-whitespace ip)
[(elem COMMA elems) (cons $1 $3)]) (match (read-char ip)
(elem [#\=
[(TAG EQUALS val) (slurp-whitespace ip)
(vector $1 $3)] (define string (read-value ip))
[(TAG) (printf "string (~a,~a)\n" tag string)
$1] (slurp-whitespace ip)
[(NUM) (match (read-char ip)
(number->string $1)]) [#\}
(val (cons tag string)]
[(NUM) $1] [c
[(STR) $1] (error 'read-entry "Parsing string, expected }, got ~a" c)])]
[(TAG) $1] [c
[(LBRACE bvals RBRACE) (error 'read-entry "Parsing string, expected =, got ~a" c)])]
$2] [c
[(val HASH val) (error 'read-entry "Parsing string, expected {, got ~a" c)])]))
(cons $1 $3)])
(bval (define (read-tag ip)
[(val) $1] (slurp-whitespace ip)
[(COMMA) ","]) (read-until char-whitespace? ip))
(bvals
[() empty] (define (read-value ip)
[(bval bvals) (cons $1 $2)])))) (slurp-whitespace ip)
(match (peek-char ip)
[#\{
(read-char ip)
(define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) ip))
(match (peek-char ip)
[#\{
(printf "Inner read: ~a\n" first-part)
(string-append first-part (read-value ip))]
[#\}
(read-char ip)
first-part])]
[c
(error 'read-value "Parsing value, expected {, got ~a" c)]))
(with-handlers
([exn? (λ (x)
(printf "~a\n" (read-string 100 ip))
(raise x))])
(read-entries ip)))
(define (path->bibdb pth) (define (path->bibdb pth)
(printf "~a\n" (printf "~a\n"
(with-input-from-file (with-input-from-file
pth pth
(λ () (λ ()
(port-count-lines! (current-input-port)) (bibtex-parse (current-input-port)))))
(bibtex-parse
(λ () (bibtex-lex (current-input-port)))))))
(error 'path->bibdb pth) (error 'path->bibdb pth)
#f) #f)