From 72e2049f96df86887a8485f15a284dc74e25fabd Mon Sep 17 00:00:00 2001 From: Kathy Gray Date: Mon, 30 Mar 2009 22:44:06 +0000 Subject: [PATCH] First step in implementing updated probability model --- modification to P(error) svn: r14368 --- .../private-combinator/combinator-parser.scm | 11 ++- .../private-combinator/combinator.scm | 94 ++++++++++++------- .../private-combinator/parser-sigs.ss | 2 +- .../private-combinator/structs.scm | 27 ++++++ 4 files changed, 96 insertions(+), 38 deletions(-) diff --git a/collects/combinator-parser/private-combinator/combinator-parser.scm b/collects/combinator-parser/private-combinator/combinator-parser.scm index 044b2d26b9..02200db11d 100644 --- a/collects/combinator-parser/private-combinator/combinator-parser.scm +++ b/collects/combinator-parser/private-combinator/combinator-parser.scm @@ -181,7 +181,7 @@ [else out])))) ) - (define-unit rank-defaults@ + #;(define-unit rank-defaults@ (import) (export ranking-parameters^) (define (rank-choice choices) (apply max choices)) @@ -189,6 +189,15 @@ (rank-misspell rank-caps rank-class rank-wrong rank-end) (values 4/5 9/10 2/5 1/5 2/5))) + (define-unit rank-defaults@ + (import) + (export ranking-parameters^) + (define (rank-choice choices) (apply max choices)) + (define-values + (rank-misspell rank-caps rank-class rank-wrong rank-end rank-repeat) + (values 16/71 18/71 8/71 4/71 8/71 17/71))) + + (define-unit out-struct@ (import) (export out^) diff --git a/collects/combinator-parser/private-combinator/combinator.scm b/collects/combinator-parser/private-combinator/combinator.scm index 4f93ad3004..688c2e8044 100644 --- a/collects/combinator-parser/private-combinator/combinator.scm +++ b/collects/combinator-parser/private-combinator/combinator.scm @@ -15,6 +15,7 @@ (export combinator-parser-forms^) (define return-name "dummy") + (define terminal-occurs "unique-eq") (define (make-weak-map) (make-weak-hasheq))   @@ -34,7 +35,8 @@ ;terminal: ('a -> bool 'a -> 'b string) -> ( (list 'a) -> res ) (define terminal (opt-lambda (pred build name [spell? #f] [case? #f] [class? #f]) - (let* ([fail-str (string-append "failed " name)] + (let* ([memo-table (make-weak-map)] + [fail-str (string-append "failed " name)] [t-name (if src? (lambda (t) (token-name (position-token-token t))) token-name)] [t-val (if src? (lambda (t) (token-value (position-token-token t))) token-value)] [spell? (or spell? @@ -62,49 +64,53 @@ (opt-lambda (input [last-src (list 1 0 1 0)] [alts 1]) #;(printf "terminal ~a~n" name) #;(cond - [(eq? input return-name) - (printf "dummy given~n")] - [(null? input) (printf "null given~n")] + [(eq? input return-name) (printf "name requested~n")] + [(null? input) (printf "null input~n")] [else (let ([token (position-token-token (car input))]) - (printf "Look at token ~a~n" token) - (printf "calling pred: ~a~n" (pred token)))]) + (printf "Token given ~a, match? ~a~n" token (pred token)))]) (cond [(eq? input return-name) name] - [(null? input) - (fail-res null (make-terminal-fail rank-end last-src name 0 0 'end #f))] - [else - (let* ([curr-input (car input)] - [token (if src? (position-token-token curr-input) curr-input)]) - (cond - [(pred token) - (make-res (list (builder curr-input)) - (cdr input) name - (value curr-input) 1 #f curr-input)] - [else - #;(printf "Incorrect input for ~a : ~a miscase? ~a misspell? ~a ~n" name - (cond - [(token-value token) - (token-value token)] - [else (token-name token)]) - (case? curr-input) - (spell? curr-input)) - (fail-res (cdr input) - (let-values ([(chance kind may-use) - (cond - [(case? curr-input) (values rank-caps 'misscase 1)] - [(> (spell? curr-input) 3/5) - (values (* rank-misspell - (spell? curr-input)) 'misspell 1)] - [(class? curr-input) (values rank-class 'missclass 1)] - [else (values rank-wrong 'wrong 0)])]) - (make-fail chance name kind curr-input may-use)))]))]))))) - + [(eq? input terminal-occurs) (list (make-occurs name 1))] + [(weak-map-get memo-table input #f) (weak-map-get memo-table input)] + [else + (let ([result + (cond + [(null? input) + (fail-res null (make-terminal-fail rank-end last-src name 0 0 'end #f))] + [else + (let* ([curr-input (car input)] + [token (if src? (position-token-token curr-input) curr-input)]) + (cond + [(pred token) + (make-res (list (builder curr-input)) + (cdr input) name + (value curr-input) 1 #f curr-input)] + [else + #;(printf "Incorrect input for ~a : ~a miscase? ~a misspell? ~a ~n" name + (cond + [(token-value token) (token-value token)] + [else (token-name token)]) + (case? curr-input) + (spell? curr-input)) + (fail-res (cdr input) + (let-values ([(chance kind may-use) + (cond + [(case? curr-input) (values rank-caps 'misscase 1)] + [(> (spell? curr-input) 3/5) + (values (* rank-misspell + (spell? curr-input)) 'misspell 1)] + [(class? curr-input) (values rank-class 'missclass 1)] + [else (values rank-wrong 'wrong 0)])]) + (make-fail chance name kind curr-input may-use)))]))])]) + (weak-map-put! memo-table input result))]))))) + ;seq: ( (list ((list 'a) -> res)) ((list 'b) -> 'c) string -> ((list 'a) -> result) (define seq (opt-lambda (sub-list build name [id-position 0]) (let* ([sequence-length (length sub-list)] [memo-table (make-weak-map)] + [terminal-counts #f] [prev (lambda (x) (cond [(eq? x return-name) "default previous"] [else (fail-res null null)]))] @@ -131,6 +137,13 @@ #;(unless (eq? input return-name) (printf "seq ~a~n" name)) (cond [(eq? input return-name) name] + [(eq? input terminal-occurs) + (or terminal-counts + (begin + (set! terminal-counts 'counting) + (set! terminal-counts + (consolidate-count (map (lambda (symbol) (symbol terminal-occurs)) sub-list))) + terminal-counts))] [(weak-map-get memo-table input #f) (weak-map-get memo-table input)] [(null? sub-list) @@ -659,6 +672,7 @@ (opt-lambda (input [start-src (list 1 0 1 0)] [alts 1]) (cond [(eq? input return-name) (repeat-name)] + [(eq? input terminal-occurs) (sub terminal-occurs)] [(weak-map-get memo-table input #f)(weak-map-get memo-table input)] [else (let ([ans @@ -739,6 +753,7 @@ ;choice: [list [[list 'a ] -> result]] name -> result (define (choice opt-list name) (let ([memo-table (make-weak-map)] + [terminal-counts #f] [num-choices (length opt-list)] [choice-names (lambda () (map (lambda (o) (o return-name)) opt-list))]) (opt-lambda (input [last-src (list 0 0 0 0)] [alts 1]) @@ -746,8 +761,15 @@ #;(printf "possible options are ~a~n" (choice-names)) (let ([sub-opts (sub1 (+ alts num-choices))]) (cond - [(weak-map-get memo-table input #f) (weak-map-get memo-table input)] [(eq? input return-name) name] + [(eq? input terminal-occurs) + (or terminal-counts + (begin + (set! terminal-counts 'counting) + (set! terminal-counts + (consolidate-count (map (lambda (symbol) (symbol terminal-occurs)) opt-list))) + terminal-counts))] + [(weak-map-get memo-table input #f) (weak-map-get memo-table input)] [else #;(printf "choice ~a~n" name) #;(printf "possible options are ~a~n" (choice-names)) diff --git a/collects/combinator-parser/private-combinator/parser-sigs.ss b/collects/combinator-parser/private-combinator/parser-sigs.ss index f1a4afbecc..174d5047b9 100644 --- a/collects/combinator-parser/private-combinator/parser-sigs.ss +++ b/collects/combinator-parser/private-combinator/parser-sigs.ss @@ -186,7 +186,7 @@ (src? input-type show-options max-depth max-choice-depth)) (define-signature ranking-parameters^ - (rank-misspell rank-caps rank-class rank-wrong rank-end rank-choice)) + (rank-misspell rank-caps rank-class rank-wrong rank-end rank-choice rank-repeat)) (define-signature updating-rank^ (blamed-terminal failed-last-parse)) diff --git a/collects/combinator-parser/private-combinator/structs.scm b/collects/combinator-parser/private-combinator/structs.scm index b1ad940e1a..040c883ff8 100644 --- a/collects/combinator-parser/private-combinator/structs.scm +++ b/collects/combinator-parser/private-combinator/structs.scm @@ -37,6 +37,33 @@ (define-struct lazy-opts ((matches #:mutable) errors (thunks #:mutable)) #:transparent) ;(make-lazy-choice (listof res) fail-type (listof (_ -> res)) string) (define-struct (lazy-choice lazy-opts) (name) #:transparent) + + ;(make-count string int) + (define-struct occurs (terminal count)) + + (define (consolidate-count cts) + (cond + [(null? cts) cts] + [(eq? 'counting (car cts)) (consolidate-count cts)] + [(pair? (car cts)) (consolidate-count (append (car cts) (cdr cts)))] + [else + (let-values ([(front back) (augment-count (car cts) (cdr cts))]) + (cons front (consolidate-count back)))])) + (define (augment-count count rst) + (cond + [(null? rst) (values count rst)] + [(eq? 'counting (car rst)) (augment-count count (cdr rst))] + [(pair? (car rst)) (augment-count count (append (car rst) (cdr rst)))] + [else + (let-values ([(current back) (augment-count count (cdr rst))]) + (cond + [(equal? (occurs-terminal count) (occurs-terminal (car rst))) + (values (make-occurs (occurs-terminal count) (+ (occurs-count count) + (occurs-count current) + (occurs-count (car rst)))) + back)] + [else (values current (cons (car rst) back))]))])) + ;parse-build = answer | none ;(make-answer 'b)