racket/collects/combinator-parser/private-combinator/combinator.scm
Kathy Gray fb40b02b42 Moved the combinator parser out of parser-tools to alleviate the problem the
dependence on lazy had for the mzscheme build

svn: r5728
2007-03-04 23:30:43 +00:00

427 lines
24 KiB
Scheme

(module combinator (lib "lazy.ss" "lazy")
(require (lib "unit.ss")
(only (lib "etc.ss") opt-lambda))
(require "structs.scm"
"parser-sigs.ss"
(lib "lex.ss" "parser-tools"))
(provide (all-defined))
(define-unit combinators@
(import error-format-parameters^ ranking-parameters^ language-dictionary^)
(export combinator-parser-forms^)
(define return-name "dummy")
;terminal: ('a -> bool 'a -> 'b string) -> ( (list 'a) -> res )
(define terminal
(opt-lambda (pred build name [spell? #f] [case? #f] [class? #f])
(let* ([fail-str (string-append "failed " name)]
[t-name
(lambda (t) (if src? (token-name (position-token-token t)) (token-name t)))]
[t-val
(lambda (t) (if src? (token-value (position-token-token t)) (token-value t)))]
[spell? (if spell? spell?
(lambda (token)
(when (position-token? token) (set! token (position-token-token token)))
(and (token-value token)
(misspelled name (token-value token)))))]
[case? (if case? case?
(lambda (token)
(when (position-token? token) (set! token (position-token-token token)))
(and (token-value token)
(misscap name (token-value token)))))]
[class? (if class? class?
(lambda (token)
(when (position-token? token) (set! token (position-token-token token)))
(missclass name (token-name token))))]
[make-fail
(lambda (c n k i u)
(make-terminal-fail c (if src?
(make-src-lst (position-token-start-pos i)
(position-token-end-pos i))
null)
n 0 u k (if src? (position-token-token i) i)))]
[value (lambda (t) (or (t-val t) name))]
[builder
(if src?
(lambda (token) (build (position-token-token token)
(position-token-start-pos token)
(position-token-end-pos token)))
build)])
(opt-lambda (input [last-src (list 0 0 0 0)] [alts 1])
#;(!!! (printf "terminal ~a~n" name))
(cond
[(eq? input return-name) name]
[(null? input)
(make-terminal-fail null last-src .4 0 0 'end #f)]
[(pred (if src? (position-token-token (car input)) (car input)))
(make-res (list (builder (car input))) (cdr input)
name (value (car input)) 1 #f (car input))]
[else
#;(printf "Incorrect input for ~a : ~a miscase? ~a misspell? ~a ~n" name
(cond
[(and (position-token? (car input))
(token-value (position-token-token (car input))))
(token-value (position-token-token (car input)))]
[(position-token? (car input))
(token-name (position-token-token (car input)))]
[else (car input)])
(case? (car input))
(spell? (car input)))
(fail-res (cdr input)
(let-values ([(chance kind may-use)
(cond
[(case? (car input)) (values 9/10 'misscase 1)]
[(spell? (car input))
(values 4/5 'misspell 1)]
[(class? (car input)) (values 2/5 'missclass 1)]
[else (values 1/5 'wrong 0)])])
(make-fail chance name kind (car input) may-use)))])))))
;seq: ( (list ((list 'a) -> res)) ((list 'b) -> 'c) string -> ((list 'a) -> result)
(define seq
(opt-lambda (sub-list build name [id-position 0])
(let* ([sequence-length (length sub-list)]
[memo-table (make-hash-table 'weak)]
[prev (lambda (x)
(cond [(eq? x return-name) "default previous"]
[else (fail-res null null)]))]
[builder
(lambda (r)
(cond
[(res? r)
(make-res (list (build (res-a r))) (res-rest r)
name (res-id r) (res-used r)
(res-possible-error r)
(res-first-tok r))]
[(repeat-res? r)
(make-res (list (build (res-a (repeat-res-a r))))
(res-rest (repeat-res-a r))
name (res-id (repeat-res-a r))
(res-used (repeat-res-a r))
(repeat-res-stop r)
(res-first-tok (repeat-res-a r)))]))]
[my-error (sequence-error-gen name sequence-length)]
[my-walker (seq-walker id-position name my-error)])
(opt-lambda (input [alts 1] [last-src (list 0 0 0 0)])
#;(printf "seq ~a~n" name)
(cond
[(eq? input return-name) name]
[(hash-table-get memo-table input #f) (hash-table-get memo-table input)]
[(null? sub-list)
(builder (make-res null input name #f 0 #f #f))]
[else
(let* ([pre-build-ans (my-walker sub-list input prev #f #f null 0 alts last-src)]
[ans
(cond
[(and (res? pre-build-ans) (res-a pre-build-ans)) (builder pre-build-ans)]
[(pair? pre-build-ans) (map builder pre-build-ans)]
[else pre-build-ans])])
(hash-table-put! memo-table input ans)
#;(printf "sequence ~a returning ~n" name)
ans)])))))
;seq-walker: int string error-gen -> [(list parser) (list alpha) parser result (U bool string) (list string) int int -> result
(define (seq-walker id-position seq-name build-error)
(letrec ([next-res
(lambda (a id used tok rst)
(cond
[(res? rst)
(make-res (append a (res-a rst)) (res-rest rst)
seq-name (or id (res-id rst))
(+ used (res-used rst)) (res-possible-error rst) tok)]
[(repeat-res? rst)
(make-res (append a (res-a (repeat-res-a rst)))
(res-rest (repeat-res-a rst)) seq-name
(or id (res-id (repeat-res-a rst)))
(+ used (res-used (repeat-res-a rst)))
(repeat-res-stop rst) tok)]))]
[walker
(lambda (subs input previous? look-back curr-id seen used alts last-src)
(let* ([next-preds (cdr subs)]
[curr-pred (car subs)]
[id-spot? (= id-position (add1 (length seen)))]
[next-call
(lambda (old-result curr curr-name new-id tok alts)
(let* ([old-answer (res-a old-result)]
[rest (res-rest old-result)]
[old-used (res-used old-result)]
[rsts (walker next-preds rest curr-pred curr
(or new-id curr-id) (cons curr-name seen)
(+ old-used used) alts
(make-src-lst (res-first-tok old-result)))])
(cond
[(and (res? rsts) (res-a rsts))
(next-res old-answer new-id old-used tok rsts)]
[(res? rsts) (fail-res rest (res-msg rsts))]
[(pair? rsts)
(map (lambda (rst) (next-res old-answer new-id old-used tok rst))
rsts)])))])
(cond
[(null? next-preds)
(build-error (curr-pred input last-src)
(previous? input) (previous? return-name)
look-back used curr-id seen alts last-src)]
[else
#;(printf "seq-walker called: else case~n")
(let ([fst (curr-pred input last-src)])
(cond
[(res? fst)
(cond
[(res-a fst) (next-call fst fst (res-msg fst) (and id-spot? (res-id fst))
(res-first-tok fst) alts)]
[else
(build-error fst (previous? input) (previous? return-name)
look-back used curr-id seen alts last-src)])]
[(repeat-res? fst) (next-call (repeat-res-a fst) fst
(res-msg (repeat-res-a fst)) #f
(res-first-tok (repeat-res-a fst)) alts)]
[(or (choice-res? fst) (pair? fst))
#;(printf "choice-res or pair: ~a ~a ~n"
(if (choice-res? fst) (map res-rest (choice-res-matches fst)) fst)
(if (choice-res? fst) (map res-a (choice-res-matches fst)) fst))
(let*-values
([(lst name curr)
(if (choice-res? fst)
(values (choice-res-matches fst)
(lambda (_) (choice-res-name fst))
(lambda (_) fst))
(values fst res-msg (lambda (x) x)))]
[(new-alts) (+ alts (length lst))]
[(rsts)
(map (lambda (res)
(cond
[(res? res)
(next-call res (curr res) (name res) (and id-spot? (res-id res))
(res-first-tok res) new-alts)]
[(repeat-res? res)
(next-call (repeat-res-a res) res
(res-msg (repeat-res-a res)) #f
(res-first-tok (repeat-res-a res)) new-alts)])) lst)]
[(correct-rsts) (correct-list rsts)])
#;(printf "correct-rsts ~a~n" (map res-a correct-rsts))
#;(printf "rsts: ~a~n" (map res-a rsts))
(cond
[(null? correct-rsts)
(let ([fails (map (lambda (rst)
(res-msg
(build-error rst (previous? input) (previous? return-name)
look-back used curr-id seen alts last-src)))
rsts)])
(fail-res input
(make-options-fail
(rank-choice (map fail-type-chance fails)) #f seq-name
(rank-choice (map fail-type-used fails))
(rank-choice (map fail-type-may-use fails)) fails)))]
[else correct-rsts]))]))])))])
walker))
;get-fail-info: fail-type -> (values symbol 'a 'b)
(define (get-fail-info fail)
(cond
[(terminal-fail? fail)
(values (terminal-fail-kind fail)
(fail-type-name fail)
(terminal-fail-found fail))]
[(sequence-fail? fail)
(values 'sub-seq (sequence-fail-expected fail) fail)]
[(choice-fail? fail) (values 'choice null fail)]
[(options-fail? fail) (values 'options null fail)]))
;update-src: symbol src-list src-list token -> src-list
(define (update-src error-kind src prev-src tok)
(and src?
(case error-kind
[(choice options) prev-src]
[(sub-seq misscase misspell end) src]
[(missclass wrong) (update-src-start src (position-token-start-pos tok))])))
;build-sequence-error: result boolean result string int [U #f string] [listof string] int int -> result
(define (sequence-error-gen name len)
(lambda (old-res prev prev-name look-back used id seen alts last-src)
(cond
[(and (pair? old-res) (null? (cdr old-res))) (car old-res)]
[(repeat-res? old-res)
(cond
[(fail-type? (repeat-res-stop old-res))
(let ([res (repeat-res-a old-res)])
(make-res (res-a res) (res-rest res) (res-msg res) (res-id res) (res-used res)
(repeat-res-stop old-res) (res-first-tok res)))]
[else (repeat-res-a old-res)])]
[(or (and (res? old-res) (res-a old-res))
(choice-res? old-res)
(pair? old-res)) old-res]
[else
;There actually was an error
(fail-res (res-rest old-res)
(let*-values ([(fail)
(cond
[(and (repeat-res? look-back)
(fail-type? (repeat-res-stop look-back))
(> (fail-type-chance (repeat-res-stop look-back))
(fail-type-chance (res-msg old-res))))
(repeat-res-stop look-back)]
[else (res-msg old-res)])]
[(kind expected found) (get-fail-info fail)]
[(new-src) (update-src kind
(fail-type-src fail)
last-src
(res-first-tok old-res))]
[(seen-len) (length seen)]
[(updated-len) (+ (- used seen-len) len)])
#;(printf "sequence ~a failed.~n seen ~a~n" name (reverse seen))
#;(when (repeat-res? look-back)
(printf "look-back ~a : ~a vs ~a : ~a > ~a~n"
(repeat-res-stop look-back)
(and (fail-type? (repeat-res-stop look-back)) (fail-type-name (repeat-res-stop look-back)))
(fail-type-name (res-msg old-res))
(and (fail-type? (repeat-res-stop look-back)) (fail-type-chance (repeat-res-stop look-back)))
(fail-type-chance (res-msg old-res))))
#;(printf "old-probability ~a new probability ~a~n"
(cond
[(= 0 used) (fail-type-chance fail)]
[else (+ (* (fail-type-chance fail) (/ 1 updated-len))
(/ used updated-len))])
(compute-chance len seen-len used alts (fail-type-chance fail)))
(make-sequence-fail
(cond
[(= 0 used) (fail-type-chance fail)]
[else (compute-chance len seen-len used alts (fail-type-chance fail))])
(fail-type-src fail)
name used
(+ used (fail-type-may-use fail))
id kind (reverse seen) expected found (and (res? prev) (res-a prev) (res-msg prev))
prev-name)))])))
(define (compute-chance expected-length seen-length used-toks num-alts sub-chance)
(let* ([revised-expectation (+ (- used-toks seen-length) expected-length)]
[probability-with-sub (* (/ (add1 used-toks) revised-expectation) (/ 1 num-alts))]
[probability-without-sub (* (/ used-toks revised-expectation) (/ 1 num-alts))]
[expected-sub probability-with-sub]
[expected-no-sub probability-without-sub]
[probability (/ (* expected-sub sub-chance) (+ (* expected-sub sub-chance)
(* expected-no-sub (- 1 sub-chance))))])
#;(printf "compute-chance: args ~a ~a ~a ~a ~a~n"
expected-length seen-length used-toks num-alts sub-chance)
#;(printf "compute-chance: intermediate values: ~a ~a ~a ~a ~a~n"
revised-expectation probability-with-sub probability-without-sub expected-sub expected-no-sub)
probability))
;repeat: (list 'a) -> result -> (list 'a) -> result
(define (repeat sub)
(letrec ([repeat-name (string-append "any number of " (sub return-name))]
[memo-table (make-hash-table 'weak)]
[process-rest
(lambda (curr-ans rest-ans)
(cond
[(repeat-res? rest-ans)
(let ([a (res-a curr-ans)]
[rest (repeat-res-a rest-ans)])
(make-repeat-res
(cond
[(res? rest)
(make-res (append a (res-a rest)) (res-rest rest) repeat-name ""
(+ (res-used curr-ans) (res-used rest)) (res-possible-error rest)
(res-first-tok curr-ans))]
[(and (pair? rest) (null? (cdr rest)))
(make-res (append a (res-a (car rest))) (res-rest (car rest)) repeat-name ""
(+ (res-used curr-ans) (res-used (car rest)))
(res-possible-error (car rest))
(res-first-tok curr-ans))]
[(pair? rest)
(correct-list
(map (lambda (rs)
(make-res (append a (res-a rs)) (res-rest rs) repeat-name ""
(+ (res-used curr-ans) (res-used rs))
(res-possible-error rs)
(res-first-tok curr-ans)))
rest))])
(repeat-res-stop rest-ans)))]
[(pair? rest-ans)
(map (lambda (r) (process-rest curr-ans r)) rest-ans)]))])
(opt-lambda (input [last-src (list 0 0 0 0)] [alts 1])
(cond
[(eq? input return-name) repeat-name]
[(hash-table-get memo-table input #f) (hash-table-get memo-table input)]
[else
(let ([ans
(let loop ([curr-input input])
(cond
[(null? curr-input)
(make-repeat-res (make-res null null repeat-name "" 0 #f #f) 'out-of-input)]
[else
(let ([this-res (sub curr-input last-src)])
#;(printf "Repeat of ~a called it's repeated entity: ~a~n" repeat-name this-res)
(cond
[(and (res? this-res) (res-a this-res))
(process-rest this-res (loop (res-rest this-res)))]
[(res? this-res)
(make-repeat-res (make-res null curr-input repeat-name "" 0 #f #f) (res-msg this-res))]
[(or (choice-res? this-res) (pair? this-res))
(map (lambda (match) (process-rest match (loop (res-rest match))))
(if (choice-res? this-res) (choice-res-matches this-res) this-res))]))]))])
(hash-table-put! memo-table input ans)
#;(printf "repeat of ~a ended with ans ~a~n" repeat-name ans)
ans)]))))
;choice: [list [[list 'a ] -> result]] name -> result
(define (choice opt-list name)
(let ([memo-table (make-hash-table 'weak)]
[num-choices (length opt-list)]
[choice-names (map (lambda (o) (o return-name)) opt-list)])
(opt-lambda (input [last-src (list 0 0 0 0)] [alts 1])
#;(printf "choice ~a~n" name)
(let ([sub-opts (sub1 (+ alts num-choices))])
(cond
[(hash-table-get memo-table input #f) (hash-table-get memo-table input)]
[(eq? input return-name) name]
[else
(let* ([options (map (lambda (term) (term input last-src sub-opts)) opt-list)]
[fails (map res-msg options)]
[corrects (correct-list options)]
[ans
(cond
[(null? corrects)
(fail-res input
(make-choice-fail (rank-choice (map fail-type-chance fails)) #f name
(rank-choice (map fail-type-used fails))
(rank-choice (map fail-type-may-use fails))
num-choices choice-names fails))]
[(null? (cdr corrects)) (car corrects)]
[else (make-choice-res name corrects)])])
#;(printf "choice ~a is returning ~a options were ~a ~n" name ans choice-names)
(hash-table-put! memo-table input ans) ans)])))))
;correct-list: (list result) -> (list result)
(define (correct-list subs)
(cond
[(pair? subs)
(cond
[(and (res? (car subs)) (res-a (car subs)))
(cons (car subs) (correct-list (cdr subs)))]
[(choice-res? (car subs))
(append (choice-res-matches (car subs)) (correct-list (cdr subs)))]
[(repeat-res? (car subs))
(correct-list (cons (repeat-res-a (car subs)) (cdr subs)))]
[(pair? (car subs))
(append (car subs) (correct-list (cdr subs)))]
[else (correct-list (cdr subs))])]
[(null? subs) null]))
(define (update-src-start src new-start)
(list (position-line new-start)
(position-col new-start)
(position-offset new-start)
(+ (- (third src) (position-offset new-start))
(fourth src))))
(define (update-src-end src new-end)
(list (first src) (second src) (third src)
(- (position-offset new-end) (third src))))
)
)