From e70e0d3867c35910c51de8ed6a66327c9d44f26b Mon Sep 17 00:00:00 2001 From: Ben Greenman Date: Mon, 27 Jun 2016 01:14:23 -0400 Subject: [PATCH] [regexp] stop using cast --- test/regexp-fail.rkt | 10 +++--- test/regexp-pass.rkt | 4 +-- trivial/private/parameters.rkt | 2 +- trivial/private/raco-command.rkt | 6 ++++ trivial/private/regexp.rkt | 52 +++++++++++++------------------- 5 files changed, 34 insertions(+), 40 deletions(-) diff --git a/test/regexp-fail.rkt b/test/regexp-fail.rkt index 25b904a..4aa2d9f 100644 --- a/test/regexp-fail.rkt +++ b/test/regexp-fail.rkt @@ -14,7 +14,7 @@ (ann (regexp-match: "hi" "hi") (U #f (List String String String))) (ann (regexp-match: #rx"(h)(i)" "hi") - (U #f (List String))) + (U #f (List String String))) (ann (regexp-match: #px"(?<=h)(?=i)" "hi") (U #f (List String String String))) ;;bg; ill-typed in untyped Racket @@ -39,7 +39,7 @@ (U #f (List String String))) ;; --- can't handle starred groups (ann (regexp-match: "(a)*(b)" "b") - (U #f (List String String))) + (U #f (List String String String))) ) (test-compile-error @@ -52,12 +52,12 @@ (and m (string=? (car m) "lang")))) ;; ---- is raising a type error, which is GOOD, but throwing during test - ;;; -- return type assumed to be String, but really is Bytes - ;;; (ugly, but at least we catch it statically) + ;; -- return type assumed to be String, but really is Bytes + ;; (ugly, but at least we catch it statically) ;(with-input-from-file "test/regexp-fail.rkt" ; (lambda () ; (define m (regexp-match: #rx"lang" (current-input-port))) - ; (and m (bytes=? #;string=? (car m) #"lang")))) + ; (and m (string=? (car m) #"lang")))) ) ;; 2016-06-13 : these really should be errors, just no-opts diff --git a/test/regexp-pass.rkt b/test/regexp-pass.rkt index 4e25382..a0a9061 100644 --- a/test/regexp-pass.rkt +++ b/test/regexp-pass.rkt @@ -13,8 +13,6 @@ ;; -- regexps, from the world - (let ([str - (let () ;; -- from klocker? anyway the unicode will segfault `unsafe-string-ref` (check-equal? (ann (regexp-match: #rx"⇒" "yolo") (U #f (List String))) @@ -364,7 +362,7 @@ '("jan 1970" "jan" #f "1970")) ) - ;; -- (? = 0 groups + ;; -- (? = 0 groups ...) (check-equal? (ann (regexp-match: "^(?:[a-z]*/)*([a-z]+)$" "/usr/local/bin/mzscheme") diff --git a/trivial/private/parameters.rkt b/trivial/private/parameters.rkt index ef01dc3..37bab4c 100644 --- a/trivial/private/parameters.rkt +++ b/trivial/private/parameters.rkt @@ -9,7 +9,7 @@ ;; ============================================================================= (: *TRIVIAL-LOG* (Parameterof Boolean)) -(define *TRIVIAL-LOG* (make-parameter #f)) +(define *TRIVIAL-LOG* (make-parameter #t)) (: *STOP-LIST* (Parameterof (Listof Identifier))) (define *STOP-LIST* (make-parameter '())) diff --git a/trivial/private/raco-command.rkt b/trivial/private/raco-command.rkt index 3746af0..568f696 100644 --- a/trivial/private/raco-command.rkt +++ b/trivial/private/raco-command.rkt @@ -20,6 +20,7 @@ (only-in racket/list last) (only-in racket/string string-split string-prefix? string-contains?) (only-in racket/system process) + trivial/private/parameters racket/path syntax/modread ) @@ -76,6 +77,10 @@ ([(k v) (in-hash H)]) (values (cons (cons k v) acc) (max pad-to (string-length (symbol->string k)))))) +(define (assert-log-enabled) + (unless (*TRIVIAL-LOG*) + (raise-user-error 'trivial "Cannot collect data, need to set parameter *TRIVIAL-LOG* in module 'trivial/private/parameters'"))) + (define (remove-compiled ps) (define c-dir (build-path (or (path-only ps) (current-directory)) "compiled")) (define fname (path-replace-extension (file-name-from-path ps) "_rkt.zo")) @@ -105,6 +110,7 @@ (values H H++))) (define (collect-and-summarize fname) + (assert-log-enabled) (remove-compiled fname) (define cmd (format "raco make ~a" fname)) (define-values (in out pid err check-status) (apply values (process cmd))) diff --git a/trivial/private/regexp.rkt b/trivial/private/regexp.rkt index 19183a0..74899c5 100644 --- a/trivial/private/regexp.rkt +++ b/trivial/private/regexp.rkt @@ -51,8 +51,7 @@ str)) ;; Dispatch for counting groups - ;; On success, return (Pairof Type (Listof Boolean)) - ;; - type is probably the return type of matches + ;; On success, return (Listof Boolean) ;; - booleans indicating "always succeeds" (#t) and "may fail" (#f) (define (parse-groups v-stx) (define v (quoted-stx-value? v-stx)) @@ -66,12 +65,10 @@ [else #f])) (define (parse-groups/string str #:src stx) - (let ([ng (parse-groups/untyped str #:src stx)]) - (and ng (cons 'String ng)))) + (parse-groups/untyped str #:src stx)) (define (parse-groups/bytes b #:src stx) - (let ([ng (parse-groups/untyped (~a b) #:src stx)]) - (and ng (cons 'Bytes ng)))) + (parse-groups/untyped (~a b) #:src stx)) (define (parse-groups/regexp rx #:src stx) (parse-groups/string (~a rx) #:src stx)) @@ -124,6 +121,7 @@ #:when (not (has-?-before ivl ?-pos*))) (and (not (has-unguarded-pipe-before-or-after ivl paren-ivl* pipe-pos*)) + (not (has-*-after ivl str)) (not (has-?-after ivl ?-pos*))))) (define (has-?-before ivl ?-pos*) @@ -136,6 +134,11 @@ (for/or ([?pos (in-list ?-pos*)]) (= pos-after ?pos))) + (define (has-*-after ivl str) + (let ([i (+ 1 (cdr ivl))]) + (and (< i (string-length str)) + (eq? #\* (string-ref str i))))) + (define (has-unguarded-pipe-before-or-after ivl paren-ivl* pipe-pos*) (define other-paren-ivl* (for/list ([ivl2 (in-list paren-ivl*)] @@ -234,17 +237,6 @@ (and (< (car ivl) i) (< i (cdr ivl)))) - (define (infer-return-type pattern-sym arg-stx) - (if (and - (or (eq? pattern-sym 'String) - (eq? pattern-sym 'Regexp)) - (or (syntax-parse arg-stx - ((x:str arg* ...) #t) - ((x arg* ...) #:when (bytes? (syntax-e #'x)) #f) - ;; TODO ;; ((x arg* ...) #:when (port? (syntax-e #'x)) #f) - (_ #t)))) - 'String - 'Bytes)) ) ;; ----------------------------------------------------------------------------- @@ -272,22 +264,20 @@ (define-syntax regexp-match: (make-alias #'regexp-match (lambda (stx) (syntax-parse stx [(_ pat:pattern/groups arg* ...) - #:with (type-sym . capture?*) - (syntax/loc stx pat.evidence) - #:with return-type - (format-id stx "~a" (infer-return-type (syntax-e #'type-sym) #'(arg* ...))) - #:with (exact-group-type* ...) - (let ([stx-never-fail (syntax/loc stx return-type)] - [stx-may-fail (syntax/loc stx (U #f return-type))]) - (for/list ([c-stx (in-list (syntax-e #'capture?*))]) - (if (syntax-e c-stx) stx-never-fail stx-may-fail))) - (syntax/loc stx + #:with capture?* (syntax/loc stx pat.evidence) + (quasisyntax/loc stx (let ([maybe-match (regexp-match pat.expanded arg* ...)]) (if maybe-match - (cast ;; -- use `ann` to validate return type assumption & `cast` to remove #f - ;; 2016-06-13: ideally we should be typechecking `arg` instead of guessing - (ann maybe-match (Pairof return-type (Listof (U #f return-type)))) - (List return-type exact-group-type* ...)) + ;; -- Use `(or ... error)` to force guaranteed-capture groups. + (let ([rxm-error (lambda (i) (raise-user-error 'regexp-match: "Possible bug: expected group ~a to capture based on rx pattern '~a', but capture failed.\n Please report to 'http://github.com/bennn/trivial/issues' and use Racket's regexp-match in the meantime." i pat.expanded))]) + (list (car maybe-match) + #,@(for/list ([capture?-stx (in-list (syntax-e #'capture?*))] + [i (in-naturals 1)]) + (if (syntax-e capture?-stx) + (quasisyntax/loc stx + (or (list-ref maybe-match '#,i) (rxm-error '#,i))) + (quasisyntax/loc stx + (list-ref maybe-match '#,i)))))) #f)))] [_ #f]))))