From b6ad6f56463a53845d4fc80054cd4f8a7c61a745 Mon Sep 17 00:00:00 2001
From: ben <benjaminlgreenman@gmail.com>
Date: Wed, 9 Mar 2016 11:33:52 -0500
Subject: [PATCH] [regexp] cleaner impl

---
 private/common.rkt   |   7 +-
 private/regexp.rkt   | 148 ++++++++++++++++++++++++++++++++++
 regexp.rkt           | 186 +------------------------------------------
 regexp/no-colon.rkt  |   8 +-
 test/regexp-pass.rkt |  75 -----------------
 5 files changed, 157 insertions(+), 267 deletions(-)
 create mode 100644 private/regexp.rkt

diff --git a/private/common.rkt b/private/common.rkt
index 56d2b36..99cf88d 100644
--- a/private/common.rkt
+++ b/private/common.rkt
@@ -1,6 +1,7 @@
 #lang racket/base
 
 ;; Common helper functions
+;; TODO make-set!-transformer
 
 (provide
 
@@ -40,10 +41,10 @@
    #:attributes (evidence expanded)
    (pattern e
     #:with e+ (expand-expr #'e)
-    #:with p+ (p? (syntax/loc #'e e+))
+    #:with p+ (p? #'e+)
     #:when (syntax-e #'p+)
-    #:attr evidence (syntax/loc #'e p+)
-    #:attr expanded (syntax/loc #'e e+))))
+    #:attr evidence #'p+
+    #:attr expanded #'e+)))
 
 (define (expand-expr stx)
   (local-expand stx 'expression '()))
diff --git a/private/regexp.rkt b/private/regexp.rkt
new file mode 100644
index 0000000..b96e712
--- /dev/null
+++ b/private/regexp.rkt
@@ -0,0 +1,148 @@
+#lang typed/racket/base
+
+;; Stronger types for regular expression matching.
+
+(provide
+  regexp:
+  pregexp:
+  byte-regexp:
+  byte-pregexp:
+  define-regexp:
+  let-regexp:
+
+  regexp-match:
+)
+
+(require
+  (for-syntax
+    (only-in racket/syntax format-id)
+    typed/racket/base
+    (only-in racket/format ~a)
+    syntax/parse
+    trivial/private/common))
+
+;; =============================================================================
+
+(begin-for-syntax
+  (define errloc-key     'regexp-match:)
+
+  (define (group-error str reason)
+    (raise-argument-error
+      errloc-key
+      (format "Invalid regexp pattern (unmatched ~a)" reason)
+      str))
+
+  ;; Dispatch for counting groups
+  (define (parse-groups v-stx)
+    (define v (quoted-stx-value? v-stx))
+    (cond
+      [(string? v)        (parse-groups/string v #:src v-stx)]
+      [(regexp? v)        (parse-groups/regexp v #:src v-stx)]
+      [(pregexp? v)       (parse-groups/pregexp v #:src v-stx)]
+      [(bytes? v)         (parse-groups/bytes v #:src v-stx)]
+      [(byte-regexp? v)   (parse-groups/byte-regexp v #:src v-stx)]
+      [(byte-pregexp? v)  (parse-groups/byte-pregexp v #:src v-stx)]
+      [else               #f]))
+
+  ;; Count the number of matched parentheses in a regexp pattern.
+  ;; Raise an exception if there are unmatched parens.
+  (define (parse-groups/untyped str #:src stx)
+    (define last-index (- (string-length str) 1))
+    (let loop ([i 0] [in-paren '()] [num-groups 0])
+      (if (> i last-index)
+        (if (null? in-paren)
+          num-groups
+          (group-error str (format "'(' at index ~a" (car in-paren))))
+        (case (string-ref str i)
+         [(#\()
+          ;; Watch for (? patterns
+          (if (and (< i last-index)
+                   (eq? #\? (string-ref str (+ i 1))))
+            (loop (+ i 2) (cons #f in-paren) num-groups)
+            (loop (+ i 1) (cons i in-paren) num-groups))]
+         [(#\))
+          (cond
+           [(null? in-paren)
+            (group-error str (format "')' at index ~a" i))]
+           [(eq? #f (car in-paren))
+            ;; Matched closing paren, but does not count as a group
+            (loop (+ i 1) (cdr in-paren) num-groups)]
+           [else
+            (loop (+ i 1) (cdr in-paren) (+ 1 num-groups))])]
+         [(#\\)
+          (if (and (< i last-index)
+                   (eq? #\\ (string-ref str (+ i 1))))
+            (loop (+ i 3) in-paren num-groups)
+            (loop (+ i 2) in-paren num-groups))]
+         [(#\|)
+          ;; Nope! Can't handle pipes
+          #f]
+         [else
+          (loop (+ i 1) in-paren num-groups)]))))
+
+  (define (parse-groups/string str #:src stx)
+    (let ([ng (parse-groups/untyped str #:src stx)])
+      (and ng (cons ng 'String))))
+
+  (define (parse-groups/bytes b #:src stx)
+    (let ([ng (parse-groups/untyped (~a b) #:src stx)])
+      (and ng (cons ng 'Bytes))))
+
+  (define (parse-groups/regexp rx #:src stx)
+    (parse-groups/string (~a rx) #:src stx))
+
+  (define parse-groups/pregexp
+    parse-groups/regexp)
+
+  (define (parse-groups/byte-regexp bx #:src stx)
+    (parse-groups/bytes (~a bx) #:src stx))
+
+  (define parse-groups/byte-pregexp
+    parse-groups/byte-regexp)
+
+  (define-values (num-groups-key rx? def-rx let-rx)
+    (make-value-property 'rx:groups parse-groups))
+  (define-syntax-class/predicate pattern/groups rx?)
+)
+
+;; -----------------------------------------------------------------------------
+
+(define-syntax (define-matcher* stx)
+  (syntax-parse stx
+   [(_ f*:id ...)
+    #:with (f+* ...) (for/list ([f (in-list (syntax-e #'(f* ...)))])
+                       (format-id stx "~a:" (syntax-e f)))
+    #`(begin
+        (define-syntax f+* (make-alias #'f*
+          (lambda (stx) (syntax-parse stx
+           [(_ pat:pattern/groups)
+            (syntax-property
+              (syntax/loc stx (f* pat.expanded))
+              num-groups-key
+              #'pat.evidence)]
+           [_ #f])))) ...)]))
+
+(define-matcher* regexp pregexp byte-regexp byte-pregexp)
+
+(define-syntax define-regexp: (make-keyword-alias 'define def-rx))
+(define-syntax let-regexp: (make-keyword-alias 'let let-rx))
+
+(define-syntax regexp-match: (make-alias #'regexp-match
+  (lambda (stx) (syntax-parse stx
+   [(_ pat:pattern/groups arg* ...)
+    #:with (num-groups . type-sym) (syntax/loc stx pat.evidence)
+    ;; TODO keep source location in type-sym, stop using format-id
+    ;;  (Is it really that bad?)
+    #:with type (format-id stx "~a" (syntax-e #'type-sym))
+    #:with (index* ...) (for/list ([i (in-range (syntax-e #'num-groups))]) i)
+    (syntax/loc stx
+      (let ([maybe-match (regexp-match pat.expanded arg* ...)])
+        (if maybe-match
+          (let ([m : (Listof (Option type)) maybe-match])
+            (list (car maybe-match)
+                  (begin (set! m (cdr m))
+                         (or (car m) (error 'regexp-match: (format "Internal error at result index ~a, try using Racket's regexp-match" 'index*))))
+                  ...))
+          #f)))]
+   [_ #f]))))
+
diff --git a/regexp.rkt b/regexp.rkt
index 3dafc37..de12faf 100644
--- a/regexp.rkt
+++ b/regexp.rkt
@@ -1,15 +1,12 @@
 #lang typed/racket/base
 
 ;; Stronger types for regular expression matching.
-;;
-;; TODO use syntax-class to abstract over local-expands / check num-groups
-;; TODO groups can be #f when using | ... any other way?
 
 (provide
   regexp:       define-regexp:       let-regexp:
-  pregexp:      define-pregexp:      let-pregexp:
-  byte-regexp:  define-byte-regexp:  let-byte-regexp:
-  byte-pregexp: define-byte-pregexp: let-byte-pregexp:
+  pregexp:
+  byte-regexp:
+  byte-pregexp:
   ;; Expression and definition forms that try checking their argument patterns.
   ;; If check succeeds, will remember the number of pattern groups
   ;; for calls to `regexp-match:`.
@@ -23,179 +20,4 @@
   ;; Will raise a compile-time exception if the pattern contains unmatched groups.
 )
 
-(require (for-syntax
-  typed/racket/base
-  (only-in racket/format ~a)
-  (only-in racket/syntax format-id)
-  syntax/id-table
-  syntax/parse
-  syntax/stx
-  trivial/private/common
-))
-
-;; =============================================================================
-
-(define-for-syntax num-groups-key 'regexp-match:num-groups)
-(define-for-syntax errloc-key     'regexp-match:)
-(define-for-syntax id+num-groups (make-free-id-table))
-
-(begin-for-syntax (define-syntax-class pattern/groups
-  #:attributes (expanded num-groups type)
-  (pattern e
-    #:with e+ (expand-expr #'e)
-    #:with (g . t) (count-groups #'e+)
-    #:when (syntax-e #'g)
-    #:attr expanded #'e+
-    #:attr num-groups #'g
-    #:attr type #'t)))
-
-;; -----------------------------------------------------------------------------
-
-;; (define-matcher f)
-;; Expand to two forms:
-;; - (f: arg)
-;; - (define-f: id arg)
-;; The first is for statically-checked patterns in expressions,
-;;  the second is for patterns in definitions.
-(define-syntax define-matcher
-  (syntax-parser
-   [(_ f:id)
-    #:with f: (format-id #'f "~a:" (syntax-e #'f))
-    #:with let-f: (format-id #'f "let-~a:" (syntax-e #'f))
-    #:with define-f: (format-id #'f "define-~a:" (syntax-e #'f))
-    #'(begin
-        ;; For expressions, (regexp: val)
-        (define-syntax f:
-          (syntax-parser
-           [g:id
-            (syntax/loc #'g f)]
-           [(_ pat:pattern/groups)
-            (syntax-property #'(f pat.expanded)
-              num-groups-key
-              (cons (syntax-e #'pat.num-groups) #'pat.type))]
-           [(_ arg* (... ...))
-            #'(f arg* (... ...))]))
-        ;; For lets, (let-regexp: ([id val]) ...)
-        (define-syntax let-f:
-          (syntax-parser
-           [(_ ([name*:id pat*:pattern/groups] (... ...)) e* (... ...))
-            #'(let ([name* pat*.expanded] (... ...))
-                (let-syntax ([name* (make-rename-transformer
-                                      (syntax-property #'name* num-groups-key
-                                        (cons 'pat*.num-groups #'pat*.type)))] (... ...))
-                  e* (... ...)))]
-           [(_ arg* (... ...))
-            #'(let arg* (... ...))]))
-        ;; For definitions, (define-regexp: id val)
-        (define-syntax define-f:
-          (syntax-parser
-           [(_ name:id pat:pattern/groups)
-            (free-id-table-set! id+num-groups
-                                #'name
-                                (cons (syntax-e #'pat.num-groups) #'pat.type))
-            #'(define name pat.expanded)]
-           [(_ arg* (... ...))
-            #'(define arg* (... ...))]))) ]))
-
-(define-matcher regexp)
-(define-matcher pregexp)
-(define-matcher byte-regexp)
-(define-matcher byte-pregexp)
-
-(define-syntax regexp-match:
-  (syntax-parser
-   [(f pat:pattern/groups arg* ...)
-    #:with (index* ...) (for/list ([i (in-range (syntax-e #'pat.num-groups))]) i)
-    #'(let ([maybe-match (regexp-match pat.expanded arg* ...)])
-        (if maybe-match
-          (let ([m : (Listof (Option pat.type)) maybe-match])
-            (list (car maybe-match)
-                  (begin (set! m (cdr m))
-                         (or (car m) (error 'regexp-match: (format "Internal error at result index ~a, try using Racket's regexp-match" 'index*))))
-                  ...))
-          #f))]
-   [f:id
-    (syntax/loc #'f regexp-match)]
-   [(f arg* ...)
-    (syntax/loc #'f (regexp-match arg* ...))]))
-
-;; -----------------------------------------------------------------------------
-
-(define-for-syntax (group-error str reason)
-  (raise-argument-error
-    errloc-key
-    (format "Invalid regexp pattern (unmatched ~a)" reason)
-    str))
-
-;; Dispatch for counting groups
-(define-for-syntax (count-groups v-stx)
-  (cond
-    [(syntax-property v-stx num-groups-key)
-     => (lambda (x) x)]
-    [(identifier? v-stx)
-     (free-id-table-ref id+num-groups v-stx #f)]
-    [(quoted-stx-value? v-stx)
-     => (lambda (v)
-     (cond
-       [(string? v)        (count-groups/string v #:src v-stx)]
-       [(regexp? v)        (count-groups/regexp v #:src v-stx)]
-       [(pregexp? v)       (count-groups/pregexp v #:src v-stx)]
-       [(bytes? v)         (count-groups/bytes v #:src v-stx)]
-       [(byte-regexp? v)   (count-groups/byte-regexp v #:src v-stx)]
-       [(byte-pregexp? v)  (count-groups/byte-pregexp v #:src v-stx)]
-       [else               #f]))]
-    [else  #f]))
-
-;; Count the number of matched parentheses in a regexp pattern.
-;; Raise an exception if there are unmatched parens.
-(define-for-syntax (count-groups/untyped str #:src stx)
-  (define last-index (- (string-length str) 1))
-  (let loop ([i 0] [in-paren '()] [num-groups 0])
-    (if (> i last-index)
-      (if (null? in-paren)
-        num-groups
-        (group-error str (format "'(' at index ~a" (car in-paren))))
-      (case (string-ref str i)
-       [(#\()
-        ;; Watch for (? patterns
-        (if (and (< i last-index)
-                 (eq? #\? (string-ref str (+ i 1))))
-          (loop (+ i 2) (cons #f in-paren) num-groups)
-          (loop (+ i 1) (cons i in-paren) num-groups))]
-       [(#\))
-        (cond
-         [(null? in-paren)
-          (group-error str (format "')' at index ~a" i))]
-         [(eq? #f (car in-paren))
-          ;; Matched closing paren, but does not count as a group
-          (loop (+ i 1) (cdr in-paren) num-groups)]
-         [else
-          (loop (+ i 1) (cdr in-paren) (+ 1 num-groups))])]
-       [(#\\)
-        (if (and (< i last-index)
-                 (eq? #\\ (string-ref str (+ i 1))))
-          (loop (+ i 3) in-paren num-groups)
-          (loop (+ i 2) in-paren num-groups))]
-       [(#\|)
-        ;; Nope! Can't handle pipes
-        #f]
-       [else
-        (loop (+ i 1) in-paren num-groups)]))))
-
-(define-for-syntax (count-groups/string str #:src stx)
-  (cons (count-groups/untyped str #:src stx) (syntax/loc stx String)))
-
-(define-for-syntax (count-groups/bytes b #:src stx)
-  (cons (count-groups/untyped (~a b) #:src stx) (syntax/loc stx Bytes)))
-
-(define-for-syntax (count-groups/regexp rx #:src stx)
-  (count-groups/string (~a rx) #:src stx))
-
-(define-for-syntax count-groups/pregexp
-  count-groups/regexp)
-
-(define-for-syntax (count-groups/byte-regexp bx #:src stx)
-  (count-groups/bytes (~a bx) #:src stx))
-
-(define-for-syntax count-groups/byte-pregexp
-  count-groups/byte-regexp)
+(require trivial/private/regexp)
diff --git a/regexp/no-colon.rkt b/regexp/no-colon.rkt
index 3523719..0370810 100644
--- a/regexp/no-colon.rkt
+++ b/regexp/no-colon.rkt
@@ -9,10 +9,4 @@
   [byte-regexp: byte-regexp]
   [byte-pregexp: byte-pregexp]
   [let-regexp: let-regexp]
-  [let-pregexp: let-pregexp]
-  [let-byte-regexp: let-byte-regexp]
-  [let-byte-pregexp: let-byte-pregexp]
-  [define-regexp: define-regexp]
-  [define-pregexp: define-pregexp]
-  [define-byte-regexp: define-byte-regexp]
-  [define-byte-pregexp: define-byte-pregexp]))
+  [define-regexp: define-regexp]))
diff --git a/test/regexp-pass.rkt b/test/regexp-pass.rkt
index c9e165a..4c1b13c 100644
--- a/test/regexp-pass.rkt
+++ b/test/regexp-pass.rkt
@@ -221,23 +221,6 @@
       (U #f (Listof (U #f String))))
     '("hellooo" "ll" "ooo"))
 
-  ;; -- let-pregexp:
-  (check-equal?
-    (ann
-      (let-pregexp: ([rx #px"he(l*)(o*)"])
-        (regexp-match: rx "helllooo"))
-      (U #f (List String String String)))
-    '("helllooo" "lll" "ooo"))
-
-  ;; -- define-pregexp:
-  (check-equal?
-    (ann
-      (let ()
-        (define-pregexp: rx #px"he(l*)(o*)")
-        (regexp-match: rx "helllooo"))
-      (U #f (List String String String)))
-    '("helllooo" "lll" "ooo"))
-
   (check-equal?
     (ann
       (let ()
@@ -245,37 +228,12 @@
       (U #f (List String String String)))
     '("hellooo" "ll" "ooo"))
 
-  (check-equal?
-    (ann
-      (let ()
-        (define-pregexp: rx (pregexp: "he(l*)(o*)"))
-        (regexp-match: rx "hellooo"))
-      (U #f (List String String String)))
-    '("hellooo" "ll" "ooo"))
-
   (check-equal?
     (ann
       (regexp-match: #rx#"he(l*)(o*)" #"helllooo")
       (U #f (List Bytes Bytes Bytes)))
     '(#"helllooo" #"lll" #"ooo"))
 
-  ;; -- let-byte-regexp:
-  (check-equal?
-    (ann
-      (let-byte-regexp: ([rx #rx#"he(l*)(o*)"])
-        (regexp-match: rx #"helllooo"))
-      (U #f (List Bytes Bytes Bytes)))
-    '(#"helllooo" #"lll" #"ooo"))
-
-  ;; -- define-byte-regexp:
-  (check-equal?
-    (ann
-      (let ()
-        (define-byte-regexp: rx #rx#"he(l*)(o*)")
-        (regexp-match: rx #"helllooo"))
-      (U #f (List Bytes Bytes Bytes)))
-    '(#"helllooo" #"lll" #"ooo"))
-
   (check-equal?
     (ann
       (let ()
@@ -283,37 +241,12 @@
       (U #f (List Bytes Bytes Bytes)))
     '(#"hellooo" #"ll" #"ooo"))
 
-  (check-equal?
-    (ann
-      (let ()
-        (define-byte-regexp: rx (byte-regexp: #"he(l*)(o*)"))
-        (regexp-match: rx "hellooo"))
-      (U #f (List Bytes Bytes Bytes)))
-    '(#"hellooo" #"ll" #"ooo"))
-
   (check-equal?
     (ann
       (regexp-match: #px#"he(l*)(o*)" "helllooo")
       (U #f (List Bytes Bytes Bytes)))
     '(#"helllooo" #"lll" #"ooo"))
 
-  ;; -- let-byte-pregexp:
-  (check-equal?
-    (ann
-      (let-byte-pregexp: ([rx #px#"he(l*)(o*)"])
-        (regexp-match: rx "helllooo"))
-      (U #f (List Bytes Bytes Bytes)))
-    '(#"helllooo" #"lll" #"ooo"))
-
-  ;; -- define-byte-pregexp:
-  (check-equal?
-    (ann
-      (let ()
-        (define-byte-pregexp: rx #px#"he(l*)(o*)")
-        (regexp-match: rx "helllooo"))
-      (U #f (List Bytes Bytes Bytes)))
-    '(#"helllooo" #"lll" #"ooo"))
-
   (check-equal?
     (ann
       (let ()
@@ -321,14 +254,6 @@
       (U #f (List Bytes Bytes Bytes)))
     '(#"hellooo" #"ll" #"ooo"))
 
-  (check-equal?
-    (ann
-      (let ()
-        (define-byte-pregexp: rx (byte-pregexp: #"he(l*)(o*)"))
-        (regexp-match: rx "hellooo"))
-      (U #f (List Bytes Bytes Bytes)))
-    '(#"hellooo" #"ll" #"ooo"))
-
   ;; -- special cases / miscellaneous
 
   ;; --- Can't handle |, yet