diff --git a/private/common.rkt b/private/common.rkt index 56d2b36..99cf88d 100644 --- a/private/common.rkt +++ b/private/common.rkt @@ -1,6 +1,7 @@ #lang racket/base ;; Common helper functions +;; TODO make-set!-transformer (provide @@ -40,10 +41,10 @@ #:attributes (evidence expanded) (pattern e #:with e+ (expand-expr #'e) - #:with p+ (p? (syntax/loc #'e e+)) + #:with p+ (p? #'e+) #:when (syntax-e #'p+) - #:attr evidence (syntax/loc #'e p+) - #:attr expanded (syntax/loc #'e e+)))) + #:attr evidence #'p+ + #:attr expanded #'e+))) (define (expand-expr stx) (local-expand stx 'expression '())) diff --git a/private/regexp.rkt b/private/regexp.rkt new file mode 100644 index 0000000..b96e712 --- /dev/null +++ b/private/regexp.rkt @@ -0,0 +1,148 @@ +#lang typed/racket/base + +;; Stronger types for regular expression matching. + +(provide + regexp: + pregexp: + byte-regexp: + byte-pregexp: + define-regexp: + let-regexp: + + regexp-match: +) + +(require + (for-syntax + (only-in racket/syntax format-id) + typed/racket/base + (only-in racket/format ~a) + syntax/parse + trivial/private/common)) + +;; ============================================================================= + +(begin-for-syntax + (define errloc-key 'regexp-match:) + + (define (group-error str reason) + (raise-argument-error + errloc-key + (format "Invalid regexp pattern (unmatched ~a)" reason) + str)) + + ;; Dispatch for counting groups + (define (parse-groups v-stx) + (define v (quoted-stx-value? v-stx)) + (cond + [(string? v) (parse-groups/string v #:src v-stx)] + [(regexp? v) (parse-groups/regexp v #:src v-stx)] + [(pregexp? v) (parse-groups/pregexp v #:src v-stx)] + [(bytes? v) (parse-groups/bytes v #:src v-stx)] + [(byte-regexp? v) (parse-groups/byte-regexp v #:src v-stx)] + [(byte-pregexp? v) (parse-groups/byte-pregexp v #:src v-stx)] + [else #f])) + + ;; Count the number of matched parentheses in a regexp pattern. + ;; Raise an exception if there are unmatched parens. + (define (parse-groups/untyped str #:src stx) + (define last-index (- (string-length str) 1)) + (let loop ([i 0] [in-paren '()] [num-groups 0]) + (if (> i last-index) + (if (null? in-paren) + num-groups + (group-error str (format "'(' at index ~a" (car in-paren)))) + (case (string-ref str i) + [(#\() + ;; Watch for (? patterns + (if (and (< i last-index) + (eq? #\? (string-ref str (+ i 1)))) + (loop (+ i 2) (cons #f in-paren) num-groups) + (loop (+ i 1) (cons i in-paren) num-groups))] + [(#\)) + (cond + [(null? in-paren) + (group-error str (format "')' at index ~a" i))] + [(eq? #f (car in-paren)) + ;; Matched closing paren, but does not count as a group + (loop (+ i 1) (cdr in-paren) num-groups)] + [else + (loop (+ i 1) (cdr in-paren) (+ 1 num-groups))])] + [(#\\) + (if (and (< i last-index) + (eq? #\\ (string-ref str (+ i 1)))) + (loop (+ i 3) in-paren num-groups) + (loop (+ i 2) in-paren num-groups))] + [(#\|) + ;; Nope! Can't handle pipes + #f] + [else + (loop (+ i 1) in-paren num-groups)])))) + + (define (parse-groups/string str #:src stx) + (let ([ng (parse-groups/untyped str #:src stx)]) + (and ng (cons ng 'String)))) + + (define (parse-groups/bytes b #:src stx) + (let ([ng (parse-groups/untyped (~a b) #:src stx)]) + (and ng (cons ng 'Bytes)))) + + (define (parse-groups/regexp rx #:src stx) + (parse-groups/string (~a rx) #:src stx)) + + (define parse-groups/pregexp + parse-groups/regexp) + + (define (parse-groups/byte-regexp bx #:src stx) + (parse-groups/bytes (~a bx) #:src stx)) + + (define parse-groups/byte-pregexp + parse-groups/byte-regexp) + + (define-values (num-groups-key rx? def-rx let-rx) + (make-value-property 'rx:groups parse-groups)) + (define-syntax-class/predicate pattern/groups rx?) +) + +;; ----------------------------------------------------------------------------- + +(define-syntax (define-matcher* stx) + (syntax-parse stx + [(_ f*:id ...) + #:with (f+* ...) (for/list ([f (in-list (syntax-e #'(f* ...)))]) + (format-id stx "~a:" (syntax-e f))) + #`(begin + (define-syntax f+* (make-alias #'f* + (lambda (stx) (syntax-parse stx + [(_ pat:pattern/groups) + (syntax-property + (syntax/loc stx (f* pat.expanded)) + num-groups-key + #'pat.evidence)] + [_ #f])))) ...)])) + +(define-matcher* regexp pregexp byte-regexp byte-pregexp) + +(define-syntax define-regexp: (make-keyword-alias 'define def-rx)) +(define-syntax let-regexp: (make-keyword-alias 'let let-rx)) + +(define-syntax regexp-match: (make-alias #'regexp-match + (lambda (stx) (syntax-parse stx + [(_ pat:pattern/groups arg* ...) + #:with (num-groups . type-sym) (syntax/loc stx pat.evidence) + ;; TODO keep source location in type-sym, stop using format-id + ;; (Is it really that bad?) + #:with type (format-id stx "~a" (syntax-e #'type-sym)) + #:with (index* ...) (for/list ([i (in-range (syntax-e #'num-groups))]) i) + (syntax/loc stx + (let ([maybe-match (regexp-match pat.expanded arg* ...)]) + (if maybe-match + (let ([m : (Listof (Option type)) maybe-match]) + (list (car maybe-match) + (begin (set! m (cdr m)) + (or (car m) (error 'regexp-match: (format "Internal error at result index ~a, try using Racket's regexp-match" 'index*)))) + ...)) + #f)))] + [_ #f])))) + diff --git a/regexp.rkt b/regexp.rkt index 3dafc37..de12faf 100644 --- a/regexp.rkt +++ b/regexp.rkt @@ -1,15 +1,12 @@ #lang typed/racket/base ;; Stronger types for regular expression matching. -;; -;; TODO use syntax-class to abstract over local-expands / check num-groups -;; TODO groups can be #f when using | ... any other way? (provide regexp: define-regexp: let-regexp: - pregexp: define-pregexp: let-pregexp: - byte-regexp: define-byte-regexp: let-byte-regexp: - byte-pregexp: define-byte-pregexp: let-byte-pregexp: + pregexp: + byte-regexp: + byte-pregexp: ;; Expression and definition forms that try checking their argument patterns. ;; If check succeeds, will remember the number of pattern groups ;; for calls to `regexp-match:`. @@ -23,179 +20,4 @@ ;; Will raise a compile-time exception if the pattern contains unmatched groups. ) -(require (for-syntax - typed/racket/base - (only-in racket/format ~a) - (only-in racket/syntax format-id) - syntax/id-table - syntax/parse - syntax/stx - trivial/private/common -)) - -;; ============================================================================= - -(define-for-syntax num-groups-key 'regexp-match:num-groups) -(define-for-syntax errloc-key 'regexp-match:) -(define-for-syntax id+num-groups (make-free-id-table)) - -(begin-for-syntax (define-syntax-class pattern/groups - #:attributes (expanded num-groups type) - (pattern e - #:with e+ (expand-expr #'e) - #:with (g . t) (count-groups #'e+) - #:when (syntax-e #'g) - #:attr expanded #'e+ - #:attr num-groups #'g - #:attr type #'t))) - -;; ----------------------------------------------------------------------------- - -;; (define-matcher f) -;; Expand to two forms: -;; - (f: arg) -;; - (define-f: id arg) -;; The first is for statically-checked patterns in expressions, -;; the second is for patterns in definitions. -(define-syntax define-matcher - (syntax-parser - [(_ f:id) - #:with f: (format-id #'f "~a:" (syntax-e #'f)) - #:with let-f: (format-id #'f "let-~a:" (syntax-e #'f)) - #:with define-f: (format-id #'f "define-~a:" (syntax-e #'f)) - #'(begin - ;; For expressions, (regexp: val) - (define-syntax f: - (syntax-parser - [g:id - (syntax/loc #'g f)] - [(_ pat:pattern/groups) - (syntax-property #'(f pat.expanded) - num-groups-key - (cons (syntax-e #'pat.num-groups) #'pat.type))] - [(_ arg* (... ...)) - #'(f arg* (... ...))])) - ;; For lets, (let-regexp: ([id val]) ...) - (define-syntax let-f: - (syntax-parser - [(_ ([name*:id pat*:pattern/groups] (... ...)) e* (... ...)) - #'(let ([name* pat*.expanded] (... ...)) - (let-syntax ([name* (make-rename-transformer - (syntax-property #'name* num-groups-key - (cons 'pat*.num-groups #'pat*.type)))] (... ...)) - e* (... ...)))] - [(_ arg* (... ...)) - #'(let arg* (... ...))])) - ;; For definitions, (define-regexp: id val) - (define-syntax define-f: - (syntax-parser - [(_ name:id pat:pattern/groups) - (free-id-table-set! id+num-groups - #'name - (cons (syntax-e #'pat.num-groups) #'pat.type)) - #'(define name pat.expanded)] - [(_ arg* (... ...)) - #'(define arg* (... ...))]))) ])) - -(define-matcher regexp) -(define-matcher pregexp) -(define-matcher byte-regexp) -(define-matcher byte-pregexp) - -(define-syntax regexp-match: - (syntax-parser - [(f pat:pattern/groups arg* ...) - #:with (index* ...) (for/list ([i (in-range (syntax-e #'pat.num-groups))]) i) - #'(let ([maybe-match (regexp-match pat.expanded arg* ...)]) - (if maybe-match - (let ([m : (Listof (Option pat.type)) maybe-match]) - (list (car maybe-match) - (begin (set! m (cdr m)) - (or (car m) (error 'regexp-match: (format "Internal error at result index ~a, try using Racket's regexp-match" 'index*)))) - ...)) - #f))] - [f:id - (syntax/loc #'f regexp-match)] - [(f arg* ...) - (syntax/loc #'f (regexp-match arg* ...))])) - -;; ----------------------------------------------------------------------------- - -(define-for-syntax (group-error str reason) - (raise-argument-error - errloc-key - (format "Invalid regexp pattern (unmatched ~a)" reason) - str)) - -;; Dispatch for counting groups -(define-for-syntax (count-groups v-stx) - (cond - [(syntax-property v-stx num-groups-key) - => (lambda (x) x)] - [(identifier? v-stx) - (free-id-table-ref id+num-groups v-stx #f)] - [(quoted-stx-value? v-stx) - => (lambda (v) - (cond - [(string? v) (count-groups/string v #:src v-stx)] - [(regexp? v) (count-groups/regexp v #:src v-stx)] - [(pregexp? v) (count-groups/pregexp v #:src v-stx)] - [(bytes? v) (count-groups/bytes v #:src v-stx)] - [(byte-regexp? v) (count-groups/byte-regexp v #:src v-stx)] - [(byte-pregexp? v) (count-groups/byte-pregexp v #:src v-stx)] - [else #f]))] - [else #f])) - -;; Count the number of matched parentheses in a regexp pattern. -;; Raise an exception if there are unmatched parens. -(define-for-syntax (count-groups/untyped str #:src stx) - (define last-index (- (string-length str) 1)) - (let loop ([i 0] [in-paren '()] [num-groups 0]) - (if (> i last-index) - (if (null? in-paren) - num-groups - (group-error str (format "'(' at index ~a" (car in-paren)))) - (case (string-ref str i) - [(#\() - ;; Watch for (? patterns - (if (and (< i last-index) - (eq? #\? (string-ref str (+ i 1)))) - (loop (+ i 2) (cons #f in-paren) num-groups) - (loop (+ i 1) (cons i in-paren) num-groups))] - [(#\)) - (cond - [(null? in-paren) - (group-error str (format "')' at index ~a" i))] - [(eq? #f (car in-paren)) - ;; Matched closing paren, but does not count as a group - (loop (+ i 1) (cdr in-paren) num-groups)] - [else - (loop (+ i 1) (cdr in-paren) (+ 1 num-groups))])] - [(#\\) - (if (and (< i last-index) - (eq? #\\ (string-ref str (+ i 1)))) - (loop (+ i 3) in-paren num-groups) - (loop (+ i 2) in-paren num-groups))] - [(#\|) - ;; Nope! Can't handle pipes - #f] - [else - (loop (+ i 1) in-paren num-groups)])))) - -(define-for-syntax (count-groups/string str #:src stx) - (cons (count-groups/untyped str #:src stx) (syntax/loc stx String))) - -(define-for-syntax (count-groups/bytes b #:src stx) - (cons (count-groups/untyped (~a b) #:src stx) (syntax/loc stx Bytes))) - -(define-for-syntax (count-groups/regexp rx #:src stx) - (count-groups/string (~a rx) #:src stx)) - -(define-for-syntax count-groups/pregexp - count-groups/regexp) - -(define-for-syntax (count-groups/byte-regexp bx #:src stx) - (count-groups/bytes (~a bx) #:src stx)) - -(define-for-syntax count-groups/byte-pregexp - count-groups/byte-regexp) +(require trivial/private/regexp) diff --git a/regexp/no-colon.rkt b/regexp/no-colon.rkt index 3523719..0370810 100644 --- a/regexp/no-colon.rkt +++ b/regexp/no-colon.rkt @@ -9,10 +9,4 @@ [byte-regexp: byte-regexp] [byte-pregexp: byte-pregexp] [let-regexp: let-regexp] - [let-pregexp: let-pregexp] - [let-byte-regexp: let-byte-regexp] - [let-byte-pregexp: let-byte-pregexp] - [define-regexp: define-regexp] - [define-pregexp: define-pregexp] - [define-byte-regexp: define-byte-regexp] - [define-byte-pregexp: define-byte-pregexp])) + [define-regexp: define-regexp])) diff --git a/test/regexp-pass.rkt b/test/regexp-pass.rkt index c9e165a..4c1b13c 100644 --- a/test/regexp-pass.rkt +++ b/test/regexp-pass.rkt @@ -221,23 +221,6 @@ (U #f (Listof (U #f String)))) '("hellooo" "ll" "ooo")) - ;; -- let-pregexp: - (check-equal? - (ann - (let-pregexp: ([rx #px"he(l*)(o*)"]) - (regexp-match: rx "helllooo")) - (U #f (List String String String))) - '("helllooo" "lll" "ooo")) - - ;; -- define-pregexp: - (check-equal? - (ann - (let () - (define-pregexp: rx #px"he(l*)(o*)") - (regexp-match: rx "helllooo")) - (U #f (List String String String))) - '("helllooo" "lll" "ooo")) - (check-equal? (ann (let () @@ -245,37 +228,12 @@ (U #f (List String String String))) '("hellooo" "ll" "ooo")) - (check-equal? - (ann - (let () - (define-pregexp: rx (pregexp: "he(l*)(o*)")) - (regexp-match: rx "hellooo")) - (U #f (List String String String))) - '("hellooo" "ll" "ooo")) - (check-equal? (ann (regexp-match: #rx#"he(l*)(o*)" #"helllooo") (U #f (List Bytes Bytes Bytes))) '(#"helllooo" #"lll" #"ooo")) - ;; -- let-byte-regexp: - (check-equal? - (ann - (let-byte-regexp: ([rx #rx#"he(l*)(o*)"]) - (regexp-match: rx #"helllooo")) - (U #f (List Bytes Bytes Bytes))) - '(#"helllooo" #"lll" #"ooo")) - - ;; -- define-byte-regexp: - (check-equal? - (ann - (let () - (define-byte-regexp: rx #rx#"he(l*)(o*)") - (regexp-match: rx #"helllooo")) - (U #f (List Bytes Bytes Bytes))) - '(#"helllooo" #"lll" #"ooo")) - (check-equal? (ann (let () @@ -283,37 +241,12 @@ (U #f (List Bytes Bytes Bytes))) '(#"hellooo" #"ll" #"ooo")) - (check-equal? - (ann - (let () - (define-byte-regexp: rx (byte-regexp: #"he(l*)(o*)")) - (regexp-match: rx "hellooo")) - (U #f (List Bytes Bytes Bytes))) - '(#"hellooo" #"ll" #"ooo")) - (check-equal? (ann (regexp-match: #px#"he(l*)(o*)" "helllooo") (U #f (List Bytes Bytes Bytes))) '(#"helllooo" #"lll" #"ooo")) - ;; -- let-byte-pregexp: - (check-equal? - (ann - (let-byte-pregexp: ([rx #px#"he(l*)(o*)"]) - (regexp-match: rx "helllooo")) - (U #f (List Bytes Bytes Bytes))) - '(#"helllooo" #"lll" #"ooo")) - - ;; -- define-byte-pregexp: - (check-equal? - (ann - (let () - (define-byte-pregexp: rx #px#"he(l*)(o*)") - (regexp-match: rx "helllooo")) - (U #f (List Bytes Bytes Bytes))) - '(#"helllooo" #"lll" #"ooo")) - (check-equal? (ann (let () @@ -321,14 +254,6 @@ (U #f (List Bytes Bytes Bytes))) '(#"hellooo" #"ll" #"ooo")) - (check-equal? - (ann - (let () - (define-byte-pregexp: rx (byte-pregexp: #"he(l*)(o*)")) - (regexp-match: rx "hellooo")) - (U #f (List Bytes Bytes Bytes))) - '(#"hellooo" #"ll" #"ooo")) - ;; -- special cases / miscellaneous ;; --- Can't handle |, yet