[regexp] stop using cast

This commit is contained in:
Ben Greenman 2016-06-27 01:14:23 -04:00
parent d2a4b87d4d
commit e70e0d3867
5 changed files with 34 additions and 40 deletions

View File

@ -14,7 +14,7 @@
(ann (regexp-match: "hi" "hi")
(U #f (List String String String)))
(ann (regexp-match: #rx"(h)(i)" "hi")
(U #f (List String)))
(U #f (List String String)))
(ann (regexp-match: #px"(?<=h)(?=i)" "hi")
(U #f (List String String String)))
;;bg; ill-typed in untyped Racket
@ -39,7 +39,7 @@
(U #f (List String String)))
;; --- can't handle starred groups
(ann (regexp-match: "(a)*(b)" "b")
(U #f (List String String)))
(U #f (List String String String)))
)
(test-compile-error
@ -52,12 +52,12 @@
(and m (string=? (car m) "lang"))))
;; ---- is raising a type error, which is GOOD, but throwing during test
;;; -- return type assumed to be String, but really is Bytes
;;; (ugly, but at least we catch it statically)
;; -- return type assumed to be String, but really is Bytes
;; (ugly, but at least we catch it statically)
;(with-input-from-file "test/regexp-fail.rkt"
; (lambda ()
; (define m (regexp-match: #rx"lang" (current-input-port)))
; (and m (bytes=? #;string=? (car m) #"lang"))))
; (and m (string=? (car m) #"lang"))))
)
;; 2016-06-13 : these really should be errors, just no-opts

View File

@ -13,8 +13,6 @@
;; -- regexps, from the world
(let ([str
(let () ;; -- from klocker? anyway the unicode will segfault `unsafe-string-ref`
(check-equal?
(ann (regexp-match: #rx"⇒" "yolo") (U #f (List String)))
@ -364,7 +362,7 @@
'("jan 1970" "jan" #f "1970"))
)
;; -- (? = 0 groups
;; -- (? = 0 groups ...)
(check-equal?
(ann
(regexp-match: "^(?:[a-z]*/)*([a-z]+)$" "/usr/local/bin/mzscheme")

View File

@ -9,7 +9,7 @@
;; =============================================================================
(: *TRIVIAL-LOG* (Parameterof Boolean))
(define *TRIVIAL-LOG* (make-parameter #f))
(define *TRIVIAL-LOG* (make-parameter #t))
(: *STOP-LIST* (Parameterof (Listof Identifier)))
(define *STOP-LIST* (make-parameter '()))

View File

@ -20,6 +20,7 @@
(only-in racket/list last)
(only-in racket/string string-split string-prefix? string-contains?)
(only-in racket/system process)
trivial/private/parameters
racket/path
syntax/modread
)
@ -76,6 +77,10 @@
([(k v) (in-hash H)])
(values (cons (cons k v) acc) (max pad-to (string-length (symbol->string k))))))
(define (assert-log-enabled)
(unless (*TRIVIAL-LOG*)
(raise-user-error 'trivial "Cannot collect data, need to set parameter *TRIVIAL-LOG* in module 'trivial/private/parameters'")))
(define (remove-compiled ps)
(define c-dir (build-path (or (path-only ps) (current-directory)) "compiled"))
(define fname (path-replace-extension (file-name-from-path ps) "_rkt.zo"))
@ -105,6 +110,7 @@
(values H H++)))
(define (collect-and-summarize fname)
(assert-log-enabled)
(remove-compiled fname)
(define cmd (format "raco make ~a" fname))
(define-values (in out pid err check-status) (apply values (process cmd)))

View File

@ -51,8 +51,7 @@
str))
;; Dispatch for counting groups
;; On success, return (Pairof Type (Listof Boolean))
;; - type is probably the return type of matches
;; On success, return (Listof Boolean)
;; - booleans indicating "always succeeds" (#t) and "may fail" (#f)
(define (parse-groups v-stx)
(define v (quoted-stx-value? v-stx))
@ -66,12 +65,10 @@
[else #f]))
(define (parse-groups/string str #:src stx)
(let ([ng (parse-groups/untyped str #:src stx)])
(and ng (cons 'String ng))))
(parse-groups/untyped str #:src stx))
(define (parse-groups/bytes b #:src stx)
(let ([ng (parse-groups/untyped (~a b) #:src stx)])
(and ng (cons 'Bytes ng))))
(parse-groups/untyped (~a b) #:src stx))
(define (parse-groups/regexp rx #:src stx)
(parse-groups/string (~a rx) #:src stx))
@ -124,6 +121,7 @@
#:when (not (has-?-before ivl ?-pos*)))
(and
(not (has-unguarded-pipe-before-or-after ivl paren-ivl* pipe-pos*))
(not (has-*-after ivl str))
(not (has-?-after ivl ?-pos*)))))
(define (has-?-before ivl ?-pos*)
@ -136,6 +134,11 @@
(for/or ([?pos (in-list ?-pos*)])
(= pos-after ?pos)))
(define (has-*-after ivl str)
(let ([i (+ 1 (cdr ivl))])
(and (< i (string-length str))
(eq? #\* (string-ref str i)))))
(define (has-unguarded-pipe-before-or-after ivl paren-ivl* pipe-pos*)
(define other-paren-ivl*
(for/list ([ivl2 (in-list paren-ivl*)]
@ -234,17 +237,6 @@
(and (< (car ivl) i)
(< i (cdr ivl))))
(define (infer-return-type pattern-sym arg-stx)
(if (and
(or (eq? pattern-sym 'String)
(eq? pattern-sym 'Regexp))
(or (syntax-parse arg-stx
((x:str arg* ...) #t)
((x arg* ...) #:when (bytes? (syntax-e #'x)) #f)
;; TODO ;; ((x arg* ...) #:when (port? (syntax-e #'x)) #f)
(_ #t))))
'String
'Bytes))
)
;; -----------------------------------------------------------------------------
@ -272,22 +264,20 @@
(define-syntax regexp-match: (make-alias #'regexp-match
(lambda (stx) (syntax-parse stx
[(_ pat:pattern/groups arg* ...)
#:with (type-sym . capture?*)
(syntax/loc stx pat.evidence)
#:with return-type
(format-id stx "~a" (infer-return-type (syntax-e #'type-sym) #'(arg* ...)))
#:with (exact-group-type* ...)
(let ([stx-never-fail (syntax/loc stx return-type)]
[stx-may-fail (syntax/loc stx (U #f return-type))])
(for/list ([c-stx (in-list (syntax-e #'capture?*))])
(if (syntax-e c-stx) stx-never-fail stx-may-fail)))
(syntax/loc stx
#:with capture?* (syntax/loc stx pat.evidence)
(quasisyntax/loc stx
(let ([maybe-match (regexp-match pat.expanded arg* ...)])
(if maybe-match
(cast ;; -- use `ann` to validate return type assumption & `cast` to remove #f
;; 2016-06-13: ideally we should be typechecking `arg` instead of guessing
(ann maybe-match (Pairof return-type (Listof (U #f return-type))))
(List return-type exact-group-type* ...))
;; -- Use `(or ... error)` to force guaranteed-capture groups.
(let ([rxm-error (lambda (i) (raise-user-error 'regexp-match: "Possible bug: expected group ~a to capture based on rx pattern '~a', but capture failed.\n Please report to 'http://github.com/bennn/trivial/issues' and use Racket's regexp-match in the meantime." i pat.expanded))])
(list (car maybe-match)
#,@(for/list ([capture?-stx (in-list (syntax-e #'capture?*))]
[i (in-naturals 1)])
(if (syntax-e capture?-stx)
(quasisyntax/loc stx
(or (list-ref maybe-match '#,i) (rxm-error '#,i)))
(quasisyntax/loc stx
(list-ref maybe-match '#,i))))))
#f)))]
[_ #f]))))