(module combinator (lib "lazy.ss" "lazy") (require (lib "unit.ss") (only (lib "etc.ss") opt-lambda)) (require "structs.scm" "parser-sigs.ss" (lib "lex.ss" "parser-tools")) (provide (all-defined)) (define-unit combinators@ (import error-format-parameters^ ranking-parameters^ language-dictionary^) (export combinator-parser-forms^) (define return-name "dummy") ;terminal: ('a -> bool 'a -> 'b string) -> ( (list 'a) -> res ) (define terminal (opt-lambda (pred build name [spell? #f] [case? #f] [class? #f]) (let* ([fail-str (string-append "failed " name)] [t-name (lambda (t) (if src? (token-name (position-token-token t)) (token-name t)))] [t-val (lambda (t) (if src? (token-value (position-token-token t)) (token-value t)))] [spell? (if spell? spell? (lambda (token) (when (position-token? token) (set! token (position-token-token token))) (and (token-value token) (misspelled name (token-value token)))))] [case? (if case? case? (lambda (token) (when (position-token? token) (set! token (position-token-token token))) (and (token-value token) (misscap name (token-value token)))))] [class? (if class? class? (lambda (token) (when (position-token? token) (set! token (position-token-token token))) (missclass name (token-name token))))] [make-fail (lambda (c n k i u) (make-terminal-fail c (if src? (make-src-lst (position-token-start-pos i) (position-token-end-pos i)) null) n 0 u k (if src? (position-token-token i) i)))] [value (lambda (t) (or (t-val t) name))] [builder (if src? (lambda (token) (build (position-token-token token) (position-token-start-pos token) (position-token-end-pos token))) build)]) (opt-lambda (input [last-src (list 0 0 0 0)] [alts 1]) #;(!!! (printf "terminal ~a~n" name)) (cond [(eq? input return-name) name] [(null? input) (make-terminal-fail null last-src .4 0 0 'end #f)] [(pred (if src? (position-token-token (car input)) (car input))) (make-res (list (builder (car input))) (cdr input) name (value (car input)) 1 #f (car input))] [else #;(printf "Incorrect input for ~a : ~a miscase? ~a misspell? ~a ~n" name (cond [(and (position-token? (car input)) (token-value (position-token-token (car input)))) (token-value (position-token-token (car input)))] [(position-token? (car input)) (token-name (position-token-token (car input)))] [else (car input)]) (case? (car input)) (spell? (car input))) (fail-res (cdr input) (let-values ([(chance kind may-use) (cond [(case? (car input)) (values 9/10 'misscase 1)] [(spell? (car input)) (values 4/5 'misspell 1)] [(class? (car input)) (values 2/5 'missclass 1)] [else (values 1/5 'wrong 0)])]) (make-fail chance name kind (car input) may-use)))]))))) ;seq: ( (list ((list 'a) -> res)) ((list 'b) -> 'c) string -> ((list 'a) -> result) (define seq (opt-lambda (sub-list build name [id-position 0]) (let* ([sequence-length (length sub-list)] [memo-table (make-hash-table 'weak)] [prev (lambda (x) (cond [(eq? x return-name) "default previous"] [else (fail-res null null)]))] [builder (lambda (r) (cond [(res? r) (make-res (list (build (res-a r))) (res-rest r) name (res-id r) (res-used r) (res-possible-error r) (res-first-tok r))] [(repeat-res? r) (make-res (list (build (res-a (repeat-res-a r)))) (res-rest (repeat-res-a r)) name (res-id (repeat-res-a r)) (res-used (repeat-res-a r)) (repeat-res-stop r) (res-first-tok (repeat-res-a r)))]))] [my-error (sequence-error-gen name sequence-length)] [my-walker (seq-walker id-position name my-error)]) (opt-lambda (input [alts 1] [last-src (list 0 0 0 0)]) #;(printf "seq ~a~n" name) (cond [(eq? input return-name) name] [(hash-table-get memo-table input #f) (hash-table-get memo-table input)] [(null? sub-list) (builder (make-res null input name #f 0 #f #f))] [else (let* ([pre-build-ans (my-walker sub-list input prev #f #f null 0 alts last-src)] [ans (cond [(and (res? pre-build-ans) (res-a pre-build-ans)) (builder pre-build-ans)] [(pair? pre-build-ans) (map builder pre-build-ans)] [else pre-build-ans])]) (hash-table-put! memo-table input ans) #;(printf "sequence ~a returning ~n" name) ans)]))))) ;seq-walker: int string error-gen -> [(list parser) (list alpha) parser result (U bool string) (list string) int int -> result (define (seq-walker id-position seq-name build-error) (letrec ([next-res (lambda (a id used tok rst) (cond [(res? rst) (make-res (append a (res-a rst)) (res-rest rst) seq-name (or id (res-id rst)) (+ used (res-used rst)) (res-possible-error rst) tok)] [(repeat-res? rst) (make-res (append a (res-a (repeat-res-a rst))) (res-rest (repeat-res-a rst)) seq-name (or id (res-id (repeat-res-a rst))) (+ used (res-used (repeat-res-a rst))) (repeat-res-stop rst) tok)]))] [walker (lambda (subs input previous? look-back curr-id seen used alts last-src) (let* ([next-preds (cdr subs)] [curr-pred (car subs)] [id-spot? (= id-position (add1 (length seen)))] [next-call (lambda (old-result curr curr-name new-id tok alts) (let* ([old-answer (res-a old-result)] [rest (res-rest old-result)] [old-used (res-used old-result)] [rsts (walker next-preds rest curr-pred curr (or new-id curr-id) (cons curr-name seen) (+ old-used used) alts (make-src-lst (res-first-tok old-result)))]) (cond [(and (res? rsts) (res-a rsts)) (next-res old-answer new-id old-used tok rsts)] [(res? rsts) (fail-res rest (res-msg rsts))] [(pair? rsts) (map (lambda (rst) (next-res old-answer new-id old-used tok rst)) rsts)])))]) (cond [(null? next-preds) (build-error (curr-pred input last-src) (previous? input) (previous? return-name) look-back used curr-id seen alts last-src)] [else #;(printf "seq-walker called: else case~n") (let ([fst (curr-pred input last-src)]) (cond [(res? fst) (cond [(res-a fst) (next-call fst fst (res-msg fst) (and id-spot? (res-id fst)) (res-first-tok fst) alts)] [else (build-error fst (previous? input) (previous? return-name) look-back used curr-id seen alts last-src)])] [(repeat-res? fst) (next-call (repeat-res-a fst) fst (res-msg (repeat-res-a fst)) #f (res-first-tok (repeat-res-a fst)) alts)] [(or (choice-res? fst) (pair? fst)) #;(printf "choice-res or pair: ~a ~a ~n" (if (choice-res? fst) (map res-rest (choice-res-matches fst)) fst) (if (choice-res? fst) (map res-a (choice-res-matches fst)) fst)) (let*-values ([(lst name curr) (if (choice-res? fst) (values (choice-res-matches fst) (lambda (_) (choice-res-name fst)) (lambda (_) fst)) (values fst res-msg (lambda (x) x)))] [(new-alts) (+ alts (length lst))] [(rsts) (map (lambda (res) (cond [(res? res) (next-call res (curr res) (name res) (and id-spot? (res-id res)) (res-first-tok res) new-alts)] [(repeat-res? res) (next-call (repeat-res-a res) res (res-msg (repeat-res-a res)) #f (res-first-tok (repeat-res-a res)) new-alts)])) lst)] [(correct-rsts) (correct-list rsts)]) #;(printf "correct-rsts ~a~n" (map res-a correct-rsts)) #;(printf "rsts: ~a~n" (map res-a rsts)) (cond [(null? correct-rsts) (let ([fails (map (lambda (rst) (res-msg (build-error rst (previous? input) (previous? return-name) look-back used curr-id seen alts last-src))) rsts)]) (fail-res input (make-options-fail (rank-choice (map fail-type-chance fails)) #f seq-name (rank-choice (map fail-type-used fails)) (rank-choice (map fail-type-may-use fails)) fails)))] [else correct-rsts]))]))])))]) walker)) ;get-fail-info: fail-type -> (values symbol 'a 'b) (define (get-fail-info fail) (cond [(terminal-fail? fail) (values (terminal-fail-kind fail) (fail-type-name fail) (terminal-fail-found fail))] [(sequence-fail? fail) (values 'sub-seq (sequence-fail-expected fail) fail)] [(choice-fail? fail) (values 'choice null fail)] [(options-fail? fail) (values 'options null fail)])) ;update-src: symbol src-list src-list token -> src-list (define (update-src error-kind src prev-src tok) (and src? (case error-kind [(choice options) prev-src] [(sub-seq misscase misspell end) src] [(missclass wrong) (update-src-start src (position-token-start-pos tok))]))) ;build-sequence-error: result boolean result string int [U #f string] [listof string] int int -> result (define (sequence-error-gen name len) (lambda (old-res prev prev-name look-back used id seen alts last-src) (cond [(and (pair? old-res) (null? (cdr old-res))) (car old-res)] [(repeat-res? old-res) (cond [(fail-type? (repeat-res-stop old-res)) (let ([res (repeat-res-a old-res)]) (make-res (res-a res) (res-rest res) (res-msg res) (res-id res) (res-used res) (repeat-res-stop old-res) (res-first-tok res)))] [else (repeat-res-a old-res)])] [(or (and (res? old-res) (res-a old-res)) (choice-res? old-res) (pair? old-res)) old-res] [else ;There actually was an error (fail-res (res-rest old-res) (let*-values ([(fail) (cond [(and (repeat-res? look-back) (fail-type? (repeat-res-stop look-back)) (> (fail-type-chance (repeat-res-stop look-back)) (fail-type-chance (res-msg old-res)))) (repeat-res-stop look-back)] [else (res-msg old-res)])] [(kind expected found) (get-fail-info fail)] [(new-src) (update-src kind (fail-type-src fail) last-src (res-first-tok old-res))] [(seen-len) (length seen)] [(updated-len) (+ (- used seen-len) len)]) #;(printf "sequence ~a failed.~n seen ~a~n" name (reverse seen)) #;(when (repeat-res? look-back) (printf "look-back ~a : ~a vs ~a : ~a > ~a~n" (repeat-res-stop look-back) (and (fail-type? (repeat-res-stop look-back)) (fail-type-name (repeat-res-stop look-back))) (fail-type-name (res-msg old-res)) (and (fail-type? (repeat-res-stop look-back)) (fail-type-chance (repeat-res-stop look-back))) (fail-type-chance (res-msg old-res)))) #;(printf "old-probability ~a new probability ~a~n" (cond [(= 0 used) (fail-type-chance fail)] [else (+ (* (fail-type-chance fail) (/ 1 updated-len)) (/ used updated-len))]) (compute-chance len seen-len used alts (fail-type-chance fail))) (make-sequence-fail (cond [(= 0 used) (fail-type-chance fail)] [else (compute-chance len seen-len used alts (fail-type-chance fail))]) (fail-type-src fail) name used (+ used (fail-type-may-use fail)) id kind (reverse seen) expected found (and (res? prev) (res-a prev) (res-msg prev)) prev-name)))]))) (define (compute-chance expected-length seen-length used-toks num-alts sub-chance) (let* ([revised-expectation (+ (- used-toks seen-length) expected-length)] [probability-with-sub (* (/ (add1 used-toks) revised-expectation) (/ 1 num-alts))] [probability-without-sub (* (/ used-toks revised-expectation) (/ 1 num-alts))] [expected-sub probability-with-sub] [expected-no-sub probability-without-sub] [probability (/ (* expected-sub sub-chance) (+ (* expected-sub sub-chance) (* expected-no-sub (- 1 sub-chance))))]) #;(printf "compute-chance: args ~a ~a ~a ~a ~a~n" expected-length seen-length used-toks num-alts sub-chance) #;(printf "compute-chance: intermediate values: ~a ~a ~a ~a ~a~n" revised-expectation probability-with-sub probability-without-sub expected-sub expected-no-sub) probability)) ;repeat: (list 'a) -> result -> (list 'a) -> result (define (repeat sub) (letrec ([repeat-name (string-append "any number of " (sub return-name))] [memo-table (make-hash-table 'weak)] [process-rest (lambda (curr-ans rest-ans) (cond [(repeat-res? rest-ans) (let ([a (res-a curr-ans)] [rest (repeat-res-a rest-ans)]) (make-repeat-res (cond [(res? rest) (make-res (append a (res-a rest)) (res-rest rest) repeat-name "" (+ (res-used curr-ans) (res-used rest)) (res-possible-error rest) (res-first-tok curr-ans))] [(and (pair? rest) (null? (cdr rest))) (make-res (append a (res-a (car rest))) (res-rest (car rest)) repeat-name "" (+ (res-used curr-ans) (res-used (car rest))) (res-possible-error (car rest)) (res-first-tok curr-ans))] [(pair? rest) (correct-list (map (lambda (rs) (make-res (append a (res-a rs)) (res-rest rs) repeat-name "" (+ (res-used curr-ans) (res-used rs)) (res-possible-error rs) (res-first-tok curr-ans))) rest))]) (repeat-res-stop rest-ans)))] [(pair? rest-ans) (map (lambda (r) (process-rest curr-ans r)) rest-ans)]))]) (opt-lambda (input [last-src (list 0 0 0 0)] [alts 1]) (cond [(eq? input return-name) repeat-name] [(hash-table-get memo-table input #f) (hash-table-get memo-table input)] [else (let ([ans (let loop ([curr-input input]) (cond [(null? curr-input) (make-repeat-res (make-res null null repeat-name "" 0 #f #f) 'out-of-input)] [else (let ([this-res (sub curr-input last-src)]) #;(printf "Repeat of ~a called it's repeated entity: ~a~n" repeat-name this-res) (cond [(and (res? this-res) (res-a this-res)) (process-rest this-res (loop (res-rest this-res)))] [(res? this-res) (make-repeat-res (make-res null curr-input repeat-name "" 0 #f #f) (res-msg this-res))] [(or (choice-res? this-res) (pair? this-res)) (map (lambda (match) (process-rest match (loop (res-rest match)))) (if (choice-res? this-res) (choice-res-matches this-res) this-res))]))]))]) (hash-table-put! memo-table input ans) #;(printf "repeat of ~a ended with ans ~a~n" repeat-name ans) ans)])))) ;choice: [list [[list 'a ] -> result]] name -> result (define (choice opt-list name) (let ([memo-table (make-hash-table 'weak)] [num-choices (length opt-list)] [choice-names (map (lambda (o) (o return-name)) opt-list)]) (opt-lambda (input [last-src (list 0 0 0 0)] [alts 1]) #;(printf "choice ~a~n" name) (let ([sub-opts (sub1 (+ alts num-choices))]) (cond [(hash-table-get memo-table input #f) (hash-table-get memo-table input)] [(eq? input return-name) name] [else (let* ([options (map (lambda (term) (term input last-src sub-opts)) opt-list)] [fails (map res-msg options)] [corrects (correct-list options)] [ans (cond [(null? corrects) (fail-res input (make-choice-fail (rank-choice (map fail-type-chance fails)) #f name (rank-choice (map fail-type-used fails)) (rank-choice (map fail-type-may-use fails)) num-choices choice-names fails))] [(null? (cdr corrects)) (car corrects)] [else (make-choice-res name corrects)])]) #;(printf "choice ~a is returning ~a options were ~a ~n" name ans choice-names) (hash-table-put! memo-table input ans) ans)]))))) ;correct-list: (list result) -> (list result) (define (correct-list subs) (cond [(pair? subs) (cond [(and (res? (car subs)) (res-a (car subs))) (cons (car subs) (correct-list (cdr subs)))] [(choice-res? (car subs)) (append (choice-res-matches (car subs)) (correct-list (cdr subs)))] [(repeat-res? (car subs)) (correct-list (cons (repeat-res-a (car subs)) (cdr subs)))] [(pair? (car subs)) (append (car subs) (correct-list (cdr subs)))] [else (correct-list (cdr subs))])] [(null? subs) null])) (define (update-src-start src new-start) (list (position-line new-start) (position-col new-start) (position-offset new-start) (+ (- (third src) (position-offset new-start)) (fourth src)))) (define (update-src-end src new-end) (list (first src) (second src) (third src) (- (position-offset new-end) (third src)))) ) )