From 1f72a6db65b4695cefcf57b735e3f96220b8d551 Mon Sep 17 00:00:00 2001 From: Kathy Gray Date: Tue, 8 Apr 2008 20:33:26 +0000 Subject: [PATCH] Adding controlled laxy data structures for parser svn: r9205 --- .../examples/combinator-example.ss | 4 +- .../private-combinator/combinator-parser.scm | 48 +++++++- .../private-combinator/combinator.scm | 107 +++++++++++++++--- .../private-combinator/errors.scm | 2 +- .../private-combinator/structs.scm | 100 +++++++++------- 5 files changed, 197 insertions(+), 64 deletions(-) diff --git a/collects/combinator-parser/examples/combinator-example.ss b/collects/combinator-parser/examples/combinator-example.ss index 862d697e83..7fadcb66ab 100644 --- a/collects/combinator-parser/examples/combinator-example.ss +++ b/collects/combinator-parser/examples/combinator-example.ss @@ -1,6 +1,6 @@ -(module combinator-example mzscheme +(module combinator-example scheme/base -(require mzlib/unit +(require scheme/unit parser-tools/lex combinator-parser/combinator-unit) diff --git a/collects/combinator-parser/private-combinator/combinator-parser.scm b/collects/combinator-parser/private-combinator/combinator-parser.scm index 381396bb31..3face3c4bd 100644 --- a/collects/combinator-parser/private-combinator/combinator-parser.scm +++ b/collects/combinator-parser/private-combinator/combinator-parser.scm @@ -41,8 +41,54 @@ (position-token-end-pos (car (res-rest result))))))] [(res? result) (fail-type->message (res-msg result))] + [(lazy-opts? result) + #;(printf "lazy-opts ~a~n" result) + (let* ([finished? (lambda (o) + (cond [(res? o) + (and (not (null? (res-a o))) + (null? (res-rest o)))] + [(repeat-res? o) + (eq? (repeat-res-stop o) 'out-of-input)] + [else #f]))] + [possible-errors + (lambda (matches) + (filter res-possible-error + (map (lambda (a) (if (repeat-res? a) (repeat-res-a a) a)) + matches)))] + [result-a + (lambda (res) + (cond + [(res? res) (res-a res)] + [(and (repeat-res? res) + (res? (repeat-res-a res))) + (res-a (repeat-res-a res))] + [else + (error 'parser-internal-errorcl (format "~a" res))]))]) + (let loop ([matched (lazy-opts-matches result)]) + (cond + [(and (pair? matched) (finished? (car matched))) (result-a (car matched))] + [(pair? matched) (loop (cdr matched))] + [(and matched (finished? matched)) (result-a matched)] + [(or (null? matched) matched) + (loop ((if (lazy-choice? result) next-choice next-opt) result))] + [else + (let ([p-errors (possible-errors (lazy-opts-matches result))]) + (cond + [(pair? p-errors) + (let ([fails (cons (lazy-opts-errors result) + (map res-possible-error p-errors))]) + (fail-type->message + (make-options-fail (rank-choice (map fail-type-chance fails)) + #f + (if (lazy-choice? result) + (lazy-choice-name result) "program") + (rank-choice (map fail-type-used fails)) + (rank-choice (map fail-type-may-use fails)) + fails)))] + [(null? p-errors) + (fail-type->message (lazy-opts-errors result))]))])))] [(or (choice-res? result) (pair? result)) - #;(printf "choice-res or pair? ~a~n" (choice-res? result)) + (printf "choice-res or pair? ~a~n" result) (let* ([options (if (choice-res? result) (choice-res-matches result) result)] [finished-options (filter (lambda (o) (cond [(res? o) diff --git a/collects/combinator-parser/private-combinator/combinator.scm b/collects/combinator-parser/private-combinator/combinator.scm index e4cc37b962..d21e8ad018 100644 --- a/collects/combinator-parser/private-combinator/combinator.scm +++ b/collects/combinator-parser/private-combinator/combinator.scm @@ -60,9 +60,7 @@ build)]) (opt-lambda (input [last-src (list 1 0 1 0)] [alts 1]) - #;(printf "terminal ~a~n" name) - #;(printf "input ~a~n" (pair? input)) - #;(printf "input ~a~n" (null? input)) + (printf "terminal ~a~n" name) #;(cond [(eq? input return-name) (printf "dummy given~n")] @@ -130,7 +128,7 @@ [my-error (sequence-error-gen name sequence-length)] [my-walker (seq-walker id-position name my-error)]) (opt-lambda (input [last-src (list 1 0 1 0)] [alts 1]) - #;(unless (eq? input return-name) (printf "seq ~a~n" name)) + (unless (eq? input return-name) (printf "seq ~a~n" name)) (cond [(eq? input return-name) name] [(weak-map-get memo-table input #f) @@ -244,17 +242,52 @@ (next-call (repeat-res-a fst) fst fst (res-msg (repeat-res-a fst)) #f (res-first-tok (repeat-res-a fst)) alts)] + [(or (lazy-choice? fst) (lazy-opts? fst)) + #;(printf "lazy res: ~a ~a ~a~n" fst seq-name (length seen)) + (let* ([opt-r (make-lazy-opts null + (make-options-fail 0 last-src seq-name 0 0 null) + null null)] + [next-c (lambda (res) + (cond + [(res? res) + #;(printf "lazy-choice-res, res ~a ~a~n" seq-name (length seen)) + (next-call res fst res (lazy-choice-name fst) + (and id-spot? (res-id res)) + (res-first-tok res) alts)] + [(repeat-res? res) + #;(printf "lazy- choice-res, repeat-res ~a ~a ~a~n" + (res? (repeat-res-a res)) seq-name (length seen)) + (next-call (repeat-res-a res) res (repeat-res-a res) + (res-msg (repeat-res-a res)) #f + (res-first-tok (repeat-res-a res)) + alts)] + [else (error 'parser-internal-errora (format "~a" res))]))] + [parsed-options (map (lambda (res) (lambda () (next-c res))) + (lazy-opts-matches fst))] + [unparsed-options + (map + (lambda (thunked) + (lambda () + (let ([res (next-choice fst)]) + (if res + (next-c res) + (begin (set-lazy-opts-thunks! opt-r null) #f))))) + (lazy-opts-thunks fst))]) + (set-lazy-opts-thunks! opt-r (append parsed-options unparsed-options)) + (if (next-opt opt-r) + opt-r + (lazy-opts-errors opt-r))) + ] [(or (choice-res? fst) (pair? fst)) - #;(printf "choice-res or pair: ~a ~a ~a~n" - (choice-res? fst) - seq-name (length seen)) + #;(printf "choice-res: ~a ~a ~a~n" fst seq-name (length seen)) (let*-values ([(lst name curr) - (if (choice-res? fst) - (values (choice-res-matches fst) - (lambda (_) (choice-res-name fst)) - (lambda (_) fst)) - (values fst res-msg (lambda (x) x)))] + (cond + [(choice-res? fst) + (values (choice-res-matches fst) + (lambda (_) (choice-res-name fst)) + (lambda (_) fst))] + [else (values fst res-msg (lambda (x) x))])] [(new-alts) (+ alts (length lst))] [(rsts) (map (lambda (res) @@ -277,7 +310,8 @@ #;(printf "case ~a ~a, choice case: intermediate results are ~a~n" seq-name (length seen) lst) (cond - [(null? correct-rsts) + [(and (null? correct-rsts) (or (not (lazy-choice? fst)) + (null? (lazy-opts-thunks fst)))) #;(printf "correct-rsts null for ~a ~a ~n" seq-name (length seen)) (let ([fails (map @@ -293,6 +327,9 @@ seq-name (rank-choice (map fail-type-used fails)) (rank-choice (map fail-type-may-use fails)) fails)))] + [(and (null? correct-rsts) (lazy-choice? fst) (not (null? (lazy-opts-thunks fst)))) + (let loop ([next-res (next-choice fst)]) + (when next-res (loop (next-choice fst))))] [else correct-rsts]))] [else (error 'here3)]))])))]) walker)) @@ -517,6 +554,7 @@ (define (repeat-greedy sub) (letrec ([repeat-name (lambda () (string-append "any number of " (sub return-name)))] [memo-table (make-weak-map)] + [inner-memo-table (make-weak-map)] [process-rest (lambda (curr-ans rest-ans) (cond @@ -567,11 +605,10 @@ #;(printf "length of curr-input for ~a ~a~n" repeat-name (length curr-input)) #;(printf "curr-input ~a~n" (map position-token-token curr-input)) (cond + [(weak-map-get inner-memo-table curr-input #f)(weak-map-get inner-memo-table curr-input)] [(null? curr-input) #;(printf "out of input for ~a~n" repeat-name) (make-repeat-res (make-res null null (repeat-name) "" 0 #f #f) 'out-of-input)] - #;[(weak-map-get memo-table curr-input #f) - (weak-map-get memo-table curr-input)] [else (let ([this-res (sub curr-input curr-src)]) #;(printf "Repeat of ~a called it's repeated entity ~n" (repeat-name)) @@ -592,7 +629,7 @@ (fail-type-chance (res-msg this-res))) (let ([fail (make-repeat-res (make-res null curr-input (repeat-name) "" 0 #f #f) (res-msg this-res))]) - #;(weak-map-put! memo-table curr-input fail) + (weak-map-put! inner-memo-table curr-input fail) fail)] [(repeat-res? this-res) #;(printf "repeat-res case of ~a~n" repeat-name) @@ -625,12 +662,12 @@ ans)])))) ;choice: [list [[list 'a ] -> result]] name -> result - (define (choice opt-list name) + (define (choice2 opt-list name) (let ([memo-table (make-weak-map)] [num-choices (length opt-list)] [choice-names (lambda () (map (lambda (o) (o return-name)) opt-list))]) (opt-lambda (input [last-src (list 0 0 0 0)] [alts 1]) - #;(unless (eq? input return-name) (printf "choice ~a~n" name)) + (unless (eq? input return-name) (printf "choice ~a~n" name)) #;(printf "possible options are ~a~n" choice-names) (let ([sub-opts (sub1 (+ alts num-choices))]) (cond @@ -676,6 +713,40 @@ #;(printf "errors were ~a~n" errors) (weak-map-put! memo-table input ans) ans)]))))) + ;choice: [list [[list 'a ] -> result]] name -> result + (define (choice opt-list name) + (let ([memo-table (make-weak-map)] + [num-choices (length opt-list)] + [choice-names (lambda () (map (lambda (o) (o return-name)) opt-list))]) + (opt-lambda (input [last-src (list 0 0 0 0)] [alts 1]) + (unless (eq? input return-name) (printf "choice ~a~n" name)) + #;(printf "possible options are ~a~n" choice-names) + (let ([sub-opts (sub1 (+ alts num-choices))]) + (cond + [(weak-map-get memo-table input #f) (weak-map-get memo-table input)] + [(eq? input return-name) name] + [else + (let* ([options (map (lambda (term) (lambda () (term input last-src sub-opts))) opt-list)] + [initial-fail (make-choice-fail 0 + (if (or (null? input) (not (position-token? (car input)))) + last-src + (update-src-end last-src + (position-token-end-pos (car input)))) + name + 0 + 0 + num-choices + (choice-names) + (null? input) + null)] + [initial-ans (make-lazy-choice null initial-fail options name)] + [ans + (if (next-choice initial-ans) + initial-ans + (fail-res input (lazy-opts-errors initial-ans)))]) + (printf "choice ~a is returning options were ~a, answer is ~a ~n" name (choice-names) ans) + (weak-map-put! memo-table input ans) ans)]))))) + (define (flatten lst) (cond [(pair? lst) diff --git a/collects/combinator-parser/private-combinator/errors.scm b/collects/combinator-parser/private-combinator/errors.scm index eca4ec8b22..6d71265272 100644 --- a/collects/combinator-parser/private-combinator/errors.scm +++ b/collects/combinator-parser/private-combinator/errors.scm @@ -19,7 +19,7 @@ (let* ([name (fail-type-name fail-type)] [a (a/an name)] [msg (lambda (m) (make-err m (fail-type-src fail-type)))]) - #;(printf "fail-type->message ~a~n" fail-type) + (printf "fail-type->message ~a~n" fail-type) (cond [(terminal-fail? fail-type) (collapse-message diff --git a/collects/combinator-parser/private-combinator/structs.scm b/collects/combinator-parser/private-combinator/structs.scm index 8543d1ed12..7f1d7a9b5e 100644 --- a/collects/combinator-parser/private-combinator/structs.scm +++ b/collects/combinator-parser/private-combinator/structs.scm @@ -1,6 +1,9 @@ (module structs scheme/base - (provide (all-defined-out) #;(except-out make-fail-type)) + (provide (except-out (all-defined-out) + set-choice-fail-messages! + set-lazy-opts-matches!) + ) (require parser-tools/lex) @@ -15,61 +18,74 @@ (position-offset start)))) ;(make-fail-type float fail-src string int int) - (define-struct fail-type (chance src name used may-use) #:transparent) + (define-struct fail-type (chance src name used may-use) #:transparent #:mutable) ;(make-terminal-fail float fail-src string symbol 'a) (define-struct (terminal-fail fail-type) (kind found)) ;(make-sequence-fail float fail-src string symbol (list string) string 'a boolean string) (define-struct (sequence-fail fail-type) (id kind correct expected found repeat? last-seen)) ;(make-choice-fail float fail-src string int (list string) (list fail-type) boolean) - (define-struct (choice-fail fail-type) (options names ended? messages) #:transparent) + (define-struct (choice-fail fail-type) (options names ended? (messages #:mutable)) #:transparent) ;(make-options-fail float #f #f (list fail-type)) - (define-struct (options-fail fail-type) (opts)) - - #;(define (!!!-fail fail) - (let*-values ([(chance src name used may-use) - (if (fail-type? fail) - (values (!!! (fail-type-chance fail)) - (!!! (fail-type-src fail)) - (!!! (fail-type-name fail)) - (!!! (fail-type-used fail)) - (!!! (fail-type-may-use fail))) - (values #f #f #f #f #f))]) - (cond - [(terminal-fail? fail) - (make-terminal-fail chance src name used may-use - (!!! (terminal-fail-kind fail)) - (!!! (terminal-fail-found fail)))] - [(sequence-fail? fail) - (make-sequence-fail chance src name used may-use - (!!! (sequence-fail-id fail)) - (!!! (sequence-fail-kind fail)) - (!!! (sequence-fail-correct fail)) - (!!! (sequence-fail-expected fail)) - (!!!-fail (sequence-fail-found fail)) - (!!! (sequence-fail-repeat? fail)) - (!!! (sequence-fail-last-seen fail)))] - [(choice-fail? fail) - (make-choice-fail chance src name used may-use - (!!! (choice-fail-options fail)) - (!!! (choice-fail-names fail)) - (!!! (choice-fail-ended? fail)) - (map !!!-fail (!!! (choice-fail-messages fail))))] - [(options-fail? fail) - (make-options-fail chance src name used may-use - (map !!!-fail (!!! (options-fail-opts fail))))] - [else (!!! fail)]))) - - - + (define-struct (options-fail fail-type) ((opts #:mutable))) ;result = res | choice-res | repeat-res | (listof (U res choice-res)) ;(make-res (U #f (listof 'b)) (listof 'a) (U string fail-type) (U string 'a) int) [U #f fail-type] token (define-struct res (a rest msg id used possible-error first-tok) #:transparent) - ;make-choice-res string (listof res fail-type) + ;make-choice-res string (listof res) fail-type) (define-struct choice-res (name matches errors) #:transparent) ;(make-repeat-res answer (U symbol fail-type)) (define-struct repeat-res (a stop) #:transparent) + ;(make-lazy-opts (listof res) fail-type (listof (_ => res))) + (define-struct lazy-opts ((matches #:mutable) errors (thunks #:mutable)) #:transparent) + ;(make-lazy-choice (listof res) fail-type (listof (_ -> res)) string) + (define-struct (lazy-choice lazy-opts) (name) #:transparent) + + (define (update-choice-errors failc mss) + (set-choice-fail-messages! failc (cons mss (choice-fail-messages failc))) + (set-fail-type-chance! failc (max (fail-type-chance failc) (fail-type-chance mss))) + (set-fail-type-used! failc (max (fail-type-used failc) (fail-type-used mss))) + (set-fail-type-may-use! failc (max (fail-type-may-use failc) (fail-type-may-use mss)))) + (define (update-opt-errors failc mss) + (set-options-fail-opts! failc (cons mss (options-fail-opts failc))) + (set-fail-type-chance! failc (max (fail-type-chance failc) (fail-type-chance mss))) + (set-fail-type-used! failc (max (fail-type-used failc) (fail-type-used mss))) + (set-fail-type-may-use! failc (max (fail-type-may-use failc) (fail-type-may-use mss)))) + + (define (make-force thunks set-thunks matches set-matches update-errors errors) + (letrec ([next + (lambda (lc) + (printf "next-opt ~a~n" lc) + (cond + [(null? (thunks lc)) #f] + [else + (let ([curr-res ((car (thunks lc)))]) + (set-thunks lc (cdr (thunks lc))) + (and curr-res + (cond + [(or (and (res? curr-res) (res-a curr-res)) + (repeat-res? curr-res) + (choice-res? curr-res) + (lazy-opts? curr-res) + (and (lazy-choice? curr-res) (not (null? (lazy-opts-matches curr-res))))) + (set-matches lc (cons curr-res (matches lc))) + curr-res] + [else + (update-errors (errors lc) + (cond + [(res? curr-res) (res-msg curr-res)] + [(lazy-choice? curr-res) (lazy-opts-errors curr-res)])) + (next lc)])))]))]) + next)) + + (define next-choice + (make-force lazy-opts-thunks set-lazy-opts-thunks! + lazy-opts-matches set-lazy-opts-matches! + update-choice-errors lazy-opts-errors)) + (define next-opt + (make-force lazy-opts-thunks set-lazy-opts-thunks! + lazy-opts-matches set-lazy-opts-matches! + update-opt-errors lazy-opts-errors)) (define (fail-res rst msg) (make-res #f rst msg "" 0 #f #f))