Progress
original commit: eb55bc9a86acd4169ee29a66ac1b8abcd1ef1a62
This commit is contained in:
parent
1d0f2af4f0
commit
f423da24e0
|
@ -1,5 +1,6 @@
|
||||||
#lang racket/base
|
#lang racket/base
|
||||||
(require racket/function
|
(require racket/function
|
||||||
|
racket/match
|
||||||
racket/list)
|
racket/list)
|
||||||
|
|
||||||
(define-syntax-rule
|
(define-syntax-rule
|
||||||
|
@ -11,85 +12,97 @@
|
||||||
(define this-cite
|
(define this-cite
|
||||||
(curry cite bibtex-db))))
|
(curry cite bibtex-db))))
|
||||||
|
|
||||||
(require parser-tools/lex
|
(define (bibtex-parse ip)
|
||||||
parser-tools/yacc
|
(define (read-while pred ip)
|
||||||
(prefix-in : parser-tools/lex-sre))
|
(match (peek-char ip)
|
||||||
|
[(? pred)
|
||||||
|
(read-char ip)
|
||||||
|
(read-until pred ip)]
|
||||||
|
[_
|
||||||
|
(void)]))
|
||||||
|
(define (read-until pred ip)
|
||||||
|
(list->string
|
||||||
|
(let loop ()
|
||||||
|
(match (peek-char ip)
|
||||||
|
[(? pred)
|
||||||
|
(cons (read-char ip) (loop))]
|
||||||
|
[_
|
||||||
|
empty]))))
|
||||||
|
|
||||||
(define-empty-tokens toks (AT LBRACE RBRACE COMMA EQUALS HASH EOF))
|
(define (slurp-whitespace ip)
|
||||||
(define-tokens rtoks (NUM TAG STR))
|
(read-while char-whitespace? ip))
|
||||||
(define bibtex-lex
|
|
||||||
(lexer-src-pos
|
|
||||||
["@" (token-AT)]
|
|
||||||
["{" (token-LBRACE)]
|
|
||||||
["}" (token-RBRACE)]
|
|
||||||
["," (token-COMMA)]
|
|
||||||
["=" (token-EQUALS)]
|
|
||||||
["#" (token-HASH)]
|
|
||||||
[(:: "%" (complement (:: any-string #\newline any-string)))
|
|
||||||
(return-without-pos (bibtex-lex input-port))]
|
|
||||||
[(:+ (char-range "0" "9"))
|
|
||||||
(token-NUM (string->number lexeme))]
|
|
||||||
[(:: alphabetic (complement (:: any-string (:or whitespace (char-set "{},")) any-string)))
|
|
||||||
(token-TAG (string-downcase lexeme))]
|
|
||||||
#;[(:: #\" (complement (:: any-string #\" any-string)) #\")
|
|
||||||
(token-STR (substring lexeme 1 (sub1 (string-length lexeme))))]
|
|
||||||
[(eof) (token-EOF)]
|
|
||||||
[any-char (return-without-pos (bibtex-lex input-port))]))
|
|
||||||
|
|
||||||
(define bibtex-parse
|
(define (read-entries ip)
|
||||||
(parser
|
(slurp-whitespace ip)
|
||||||
(src-pos)
|
(match (read-char ip)
|
||||||
(tokens toks rtoks)
|
[#\%
|
||||||
(end EOF)
|
(read-line ip)
|
||||||
(start db)
|
(read-entries ip)]
|
||||||
(error
|
[#\@
|
||||||
(lambda (tok-ok? tok-name tok-value start-pos end-pos)
|
(cons (read-entry ip)
|
||||||
(error 'bibtex-parse
|
(read-entries ip))]
|
||||||
"Received ~a token ~a(~s) at ~a:~a-~a:~a"
|
[c
|
||||||
(if tok-ok? "valid" "invalid")
|
(error 'read-entries "Expected % or @, got ~a" c)]))
|
||||||
tok-name tok-value
|
|
||||||
(position-line start-pos) (position-col start-pos)
|
(define (read-entry ip)
|
||||||
(position-line end-pos) (position-col end-pos))))
|
(match (peek-string 6 0 ip)
|
||||||
(grammar
|
[(app string-downcase "string")
|
||||||
(db [() empty]
|
(read-string 6 ip)
|
||||||
[(entry db) (cons $1 $2)])
|
(slurp-whitespace ip)
|
||||||
(entry
|
(match (read-char ip)
|
||||||
[(AT TAG LBRACE elems RBRACE)
|
[#\{
|
||||||
(vector $2 $4)])
|
(slurp-whitespace ip)
|
||||||
(elems
|
(define tag (read-tag ip))
|
||||||
[() empty]
|
(printf "tag ~a\n" tag)
|
||||||
[(elem) (list $1)]
|
(slurp-whitespace ip)
|
||||||
[(elem COMMA elems) (cons $1 $3)])
|
(match (read-char ip)
|
||||||
(elem
|
[#\=
|
||||||
[(TAG EQUALS val)
|
(slurp-whitespace ip)
|
||||||
(vector $1 $3)]
|
(define string (read-value ip))
|
||||||
[(TAG)
|
(printf "string (~a,~a)\n" tag string)
|
||||||
$1]
|
(slurp-whitespace ip)
|
||||||
[(NUM)
|
(match (read-char ip)
|
||||||
(number->string $1)])
|
[#\}
|
||||||
(val
|
(cons tag string)]
|
||||||
[(NUM) $1]
|
[c
|
||||||
[(STR) $1]
|
(error 'read-entry "Parsing string, expected }, got ~a" c)])]
|
||||||
[(TAG) $1]
|
[c
|
||||||
[(LBRACE bvals RBRACE)
|
(error 'read-entry "Parsing string, expected =, got ~a" c)])]
|
||||||
$2]
|
[c
|
||||||
[(val HASH val)
|
(error 'read-entry "Parsing string, expected {, got ~a" c)])]))
|
||||||
(cons $1 $3)])
|
|
||||||
(bval
|
(define (read-tag ip)
|
||||||
[(val) $1]
|
(slurp-whitespace ip)
|
||||||
[(COMMA) ","])
|
(read-until char-whitespace? ip))
|
||||||
(bvals
|
|
||||||
[() empty]
|
(define (read-value ip)
|
||||||
[(bval bvals) (cons $1 $2)]))))
|
(slurp-whitespace ip)
|
||||||
|
(match (peek-char ip)
|
||||||
|
[#\{
|
||||||
|
(read-char ip)
|
||||||
|
(define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) ip))
|
||||||
|
(match (peek-char ip)
|
||||||
|
[#\{
|
||||||
|
(printf "Inner read: ~a\n" first-part)
|
||||||
|
(string-append first-part (read-value ip))]
|
||||||
|
[#\}
|
||||||
|
(read-char ip)
|
||||||
|
first-part])]
|
||||||
|
[c
|
||||||
|
(error 'read-value "Parsing value, expected {, got ~a" c)]))
|
||||||
|
|
||||||
|
(with-handlers
|
||||||
|
([exn? (λ (x)
|
||||||
|
(printf "~a\n" (read-string 100 ip))
|
||||||
|
(raise x))])
|
||||||
|
(read-entries ip)))
|
||||||
|
|
||||||
(define (path->bibdb pth)
|
(define (path->bibdb pth)
|
||||||
(printf "~a\n"
|
(printf "~a\n"
|
||||||
(with-input-from-file
|
(with-input-from-file
|
||||||
pth
|
pth
|
||||||
(λ ()
|
(λ ()
|
||||||
(port-count-lines! (current-input-port))
|
(bibtex-parse (current-input-port)))))
|
||||||
(bibtex-parse
|
|
||||||
(λ () (bibtex-lex (current-input-port)))))))
|
|
||||||
(error 'path->bibdb pth)
|
(error 'path->bibdb pth)
|
||||||
#f)
|
#f)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user