diff --git a/collects/combinator-parser/doc.txt b/collects/combinator-parser/doc.txt index 6b2fc65466..907ca6efd2 100644 --- a/collects/combinator-parser/doc.txt +++ b/collects/combinator-parser/doc.txt @@ -124,8 +124,9 @@ The _language-format-parameters^_ requires two names input->output-name: token -> string - translates tokens into strings The _language-dictionary^_ requires three names - misspelled: string string -> boolean - - check the spelling of the second arg against the first + misspelled: string string -> number - + check the spelling of the second arg against the first, return a number + that is the probability that the second is a misspelling of the first misscap: string string -> boolean - check the capitalization of the second arg against the first missclass: string string -> boolean - diff --git a/collects/combinator-parser/private-combinator/combinator-parser.scm b/collects/combinator-parser/private-combinator/combinator-parser.scm index 17fe71e32a..4c2412d02b 100644 --- a/collects/combinator-parser/private-combinator/combinator-parser.scm +++ b/collects/combinator-parser/private-combinator/combinator-parser.scm @@ -77,10 +77,13 @@ [else (!!! out)])))) ) - (define-unit rank-max@ + (define-unit rank-defaults@ (import) (export ranking-parameters^) - (define (rank-choice choices) (apply max choices))) + (define (rank-choice choices) (apply max choices)) + (define-values + (rank-misspell rank-caps rank-class rank-wrong rank-end) + (4/5 9/10 2/5 1/5 2/5))) (define-unit out-struct@ (import) @@ -90,7 +93,7 @@ (define-compound-unit/infer combinator-parser@ (import error-format-parameters^ language-format-parameters^ language-dictionary^) (export combinator-parser-forms^ parser^ out^) - (link out-struct@ main-parser@ rank-max@ error-formatting@ combinators@)) + (link out-struct@ main-parser@ rank-defaults@ error-formatting@ combinators@)) (define-unit/new-import-export combinator-parser-tools@ (import error-format-parameters^ language-format-parameters^ language-dictionary^) diff --git a/collects/combinator-parser/private-combinator/combinator.scm b/collects/combinator-parser/private-combinator/combinator.scm index 327c32b645..ee61af84e9 100644 --- a/collects/combinator-parser/private-combinator/combinator.scm +++ b/collects/combinator-parser/private-combinator/combinator.scm @@ -57,7 +57,7 @@ (cond [(eq? input return-name) name] [(null? input) - (fail-res null (make-terminal-fail .4 last-src name 0 0 'end #f))] + (fail-res null (make-terminal-fail rank-end last-src name 0 0 'end #f))] [(pred (if src? (position-token-token (car input)) (car input))) (make-res (list (builder (car input))) (cdr input) name (value (car input)) 1 #f (car input))] @@ -75,11 +75,11 @@ (fail-res (cdr input) (let-values ([(chance kind may-use) (cond - [(case? (car input)) (values 9/10 'misscase 1)] - [(spell? (car input)) - (values 4/5 'misspell 1)] - [(class? (car input)) (values 2/5 'missclass 1)] - [else (values 1/5 'wrong 0)])]) + [(case? (car input)) (values rank-caps 'misscase 1)] + [(> (spell? (car input)) 3/5) + (values (* rank-misspell (spell? (car input))) 'misspell 1)] + [(class? (car input)) (values rank-class 'missclass 1)] + [else (values rank-wrong 'wrong 0)])]) (make-fail chance name kind (car input) may-use)))]))))) ;seq: ( (list ((list 'a) -> res)) ((list 'b) -> 'c) string -> ((list 'a) -> result) @@ -167,7 +167,7 @@ (cond [(null? next-preds) (build-error (curr-pred input last-src) - (previous? input) (previous? return-name) + (previous? input) (previous? return-name) #f look-back used curr-id seen alts last-src)] [else #;(printf "seq-walker called: else case, ~a case of ~a~n" @@ -180,8 +180,8 @@ [(res-a fst) (next-call fst fst (res-msg fst) (and id-spot? (res-id fst)) (res-first-tok fst) alts)] [else - (build-error fst (previous? input) (previous? return-name) - look-back used curr-id seen alts last-src)])] + (build-error fst (previous? input) (previous? return-name) + (car next-preds) look-back used curr-id seen alts last-src)])] [(repeat-res? fst) (next-call (repeat-res-a fst) fst (res-msg (repeat-res-a fst)) #f (res-first-tok (repeat-res-a fst)) alts)] @@ -215,7 +215,7 @@ (let ([fails (map (lambda (rst) (res-msg (build-error rst (previous? input) (previous? return-name) - look-back used curr-id seen alts last-src))) + (car next-preds) look-back used curr-id seen alts last-src))) rsts)]) (fail-res input (make-options-fail @@ -250,7 +250,7 @@ ;build-sequence-error: result boolean result string int [U #f string] [listof string] int int -> result (define (sequence-error-gen name len) - (lambda (old-res prev prev-name look-back used id seen alts last-src) + (lambda (old-res prev prev-name next-pred look-back used id seen alts last-src) (cond [(and (pair? old-res) (null? (cdr old-res))) (car old-res)] [(repeat-res? old-res) @@ -274,6 +274,14 @@ (fail-type-chance (res-msg old-res)))) (repeat-res-stop look-back)] [else (res-msg old-res)])] + [(next-ok?) + (and (= (fail-type-may-use fail) 1) + next-pred + (next-pred (cdr (res-rest old-res))))] + [(next-used) + (if (and next-ok? (res? next-ok?) (res-a next-ok?)) + (res-used next-ok?) + 0)] [(kind expected found) (get-fail-info fail)] [(new-src) (update-src kind (fail-type-src fail) @@ -301,7 +309,7 @@ [else (compute-chance len seen-len used alts (fail-type-chance fail))]) (fail-type-src fail) name used - (+ used (fail-type-may-use fail)) + (+ used (fail-type-may-use fail) next-used) id kind (reverse seen) expected found (and (res? prev) (res-a prev) (res-msg prev)) prev-name)))]))) diff --git a/collects/combinator-parser/private-combinator/errors.scm b/collects/combinator-parser/private-combinator/errors.scm index 017cf872da..4d0c2d579f 100644 --- a/collects/combinator-parser/private-combinator/errors.scm +++ b/collects/combinator-parser/private-combinator/errors.scm @@ -85,7 +85,8 @@ message-to-date)] [(sub-seq choice) (fail-type->message (sequence-fail-found fail-type) - (add-to-message (msg (format "An error occured in ~a.~n" id-name)) name message-to-date))] + (add-to-message (msg (format "An error occured in ~a.~n" id-name)) + name (sequence-fail-id fail-type) message-to-date))] [(options) (let ([sorted-opts (sort (options-fail-opts (sequence-fail-found fail-type)) (lambda (a b) (>= (fail-type-chance a) (fail-type-chance b))))]) @@ -93,7 +94,7 @@ (add-to-message (msg (format "There is an error in this ~a after ~a, it is likely you intended a(n) ~a here.~n" id-name (car (reverse show-sequence)) (fail-type-name (car sorted-opts)))) - name message-to-date)))]))] + name (sequence-fail-id fail-type) message-to-date)))]))] [(options-fail? fail-type) #;(printf "selecting for options on ~a~n" name) (let* ([winners (select-errors (options-fail-opts fail-type))] @@ -113,7 +114,7 @@ name (if (equal? top-name name) "" (format ", it is likely you intended ~a ~a here" (a/an top-name) top-name)))])) - name message-to-date)))] + name #f message-to-date)))] [(choice-fail? fail-type) #;(printf "selecting for ~a~n" name) (let* ([winners (select-errors (choice-fail-messages fail-type))] @@ -146,7 +147,7 @@ (if (equal? name top-name) "" (format ", it is likely that you intended ~a ~a here" (a/an top-name) top-name)) (if show-options " To see all options click here." ""))])) - name message-to-date)))]))) + name #f message-to-date)))]))) (define (chance-used a) (* (fail-type-chance a) (fail-type-used a))) (define (chance-may-use a) (* (fail-type-chance a) (fail-type-may-use a))) @@ -235,14 +236,18 @@ (remove-dups (cdr l) n)] [else (cons (car l) (remove-dups (cdr l) n))])) - (define-struct ms (who say)) + (define-struct ms (who id? say)) ;add-to-message: err string (list err) -> (list err) - (define (add-to-message msg name rest) - (let ([next (make-ms name msg)]) + (define (add-to-message msg name id? rest) + (let ([next (make-ms name id? msg)] + [curr-len (length rest)]) (cond [(null? rest) (list next)] [(equal? (ms-who (car rest)) name) (cons next (cdr rest))] + [(and id? (ms-id? (car rest)) (< curr-len max-depth)) (cons next rest)] + [(and id? (ms-id? (car rest))) (cons next (first-n (sub1 max-depth) rest))] + [id? (add-to-message msg name id? (cdr rest))] [(< (length rest) max-depth) (cons next rest)] [else (cons next (first-n (sub1 max-depth) rest))]))) diff --git a/collects/combinator-parser/private-combinator/parser-sigs.ss b/collects/combinator-parser/private-combinator/parser-sigs.ss index 532168a41b..0e23688998 100644 --- a/collects/combinator-parser/private-combinator/parser-sigs.ss +++ b/collects/combinator-parser/private-combinator/parser-sigs.ss @@ -20,25 +20,6 @@ (format "token-~a" (syntax-e e))))) (syntax->list #'(elt ...)))))])) - (define-for-syntax (insert-name stx name) - (let loop ([term stx] - [pos 0] - [id-pos 0] - [terms null]) - (syntax-case term (sequence choose ^) - [((sequence a b) . rest) - (loop (syntax rest) (add1 pos) id-pos - (cons (quasisyntax (sequence a b #,name)) terms))] - [((choose a) . rest) - (loop (syntax rest) (add1 pos) id-pos - (cons (quasisyntax (choose a #,name)) terms))] - [((^ a) . rest) - (loop (syntax (a . rest)) - pos (add1 pos) terms)] - [(a . rest) - (loop (syntax rest) (add1 pos) id-pos (cons (syntax a) terms))] - [() (list (reverse terms) id-pos)]))) - (define-signature language-dictionary^ (misspelled misscap missclass)) (define-signature combinator-parser-forms^ @@ -138,35 +119,48 @@ (lambda (token) #f) (lambda (token) #f))) ...))))])))) - (define-syntaxes (sequence) - (values - (lambda (stx) - (syntax-case stx (^) - [(_ (term ...) proc) - (syntax - (seq (list term ...) proc (symbol->string (gensym 'seq))))] - [(_ terms proc name) - (let ([new-terms (insert-name (syntax terms) (syntax name))]) - (with-syntax (((term ...) (car new-terms)) - (id-pos (cadr new-terms))) - (syntax (seq (list term ...) proc name id-pos))))])))) - - (define-syntaxes(choose) - (values - (lambda (stx) - (syntax-case stx () - [(_ (term ...)) - (syntax - (choice (list term ...) (symbol->string (gensym 'choice))))] - [(_ terms name) - (with-syntax (((term ...) [car (insert-name (syntax terms) (syntax name))])) + (define-syntaxes (sequence choose ^) + (let ([insert-name + (lambda (stx name) + (let loop ([term stx] + [pos 0] + [id-pos 0] + [terms null]) + (syntax-case term (sequence choose ^) + [((sequence a b) . rest) + (loop (syntax rest) (add1 pos) id-pos + (cons (quasisyntax (sequence a b #,name)) terms))] + [((choose a) . rest) + (loop (syntax rest) (add1 pos) id-pos + (cons (quasisyntax (choose a #,name)) terms))] + [((^ a) . rest) + (loop (syntax (a . rest)) + pos (add1 pos) terms)] + [(a . rest) + (loop (syntax rest) (add1 pos) id-pos (cons (syntax a) terms))] + [() (list (reverse terms) id-pos)])))]) + (values + (lambda (stx) + (syntax-case stx (^) + [(_ (term ...) proc) (syntax - (choice (list term ...) name)))])))) - - (define-syntaxes (^) - (values - (syntax-rules () - [(_ f) f]))) + (seq (list term ...) proc (symbol->string (gensym 'seq))))] + [(_ terms proc name) + (let ([new-terms (insert-name (syntax terms) (syntax name))]) + (with-syntax (((term ...) (car new-terms)) + (id-pos (cadr new-terms))) + (syntax (seq (list term ...) proc name id-pos))))])) + (lambda (stx) + (syntax-case stx () + [(_ (term ...)) + (syntax + (choice (list term ...) (symbol->string (gensym 'choice))))] + [(_ terms name) + (with-syntax (((term ...) [car (insert-name (syntax terms) (syntax name))])) + (syntax + (choice (list term ...) name)))])) + (syntax-rules () + [(_ f) f])))) (define-syntaxes (eta) (values (syntax-rules () @@ -182,7 +176,11 @@ (define-signature error-format-parameters^ (src? input-type show-options max-depth max-choice-depth)) - (define-signature ranking-parameters^ (rank-choice)) + (define-signature ranking-parameters^ + (rank-misspell rank-caps rank-class rank-wrong rank-end rank-choice)) + + (define-signature updating-rank^ + (blamed-terminal failed-last-parse)) (define-signature error^ (fail-type->message))