[regexp] cleaner impl
This commit is contained in:
parent
f39a7dbb43
commit
b6ad6f5646
|
@ -1,6 +1,7 @@
|
|||
#lang racket/base
|
||||
|
||||
;; Common helper functions
|
||||
;; TODO make-set!-transformer
|
||||
|
||||
(provide
|
||||
|
||||
|
@ -40,10 +41,10 @@
|
|||
#:attributes (evidence expanded)
|
||||
(pattern e
|
||||
#:with e+ (expand-expr #'e)
|
||||
#:with p+ (p? (syntax/loc #'e e+))
|
||||
#:with p+ (p? #'e+)
|
||||
#:when (syntax-e #'p+)
|
||||
#:attr evidence (syntax/loc #'e p+)
|
||||
#:attr expanded (syntax/loc #'e e+))))
|
||||
#:attr evidence #'p+
|
||||
#:attr expanded #'e+)))
|
||||
|
||||
(define (expand-expr stx)
|
||||
(local-expand stx 'expression '()))
|
||||
|
|
148
private/regexp.rkt
Normal file
148
private/regexp.rkt
Normal file
|
@ -0,0 +1,148 @@
|
|||
#lang typed/racket/base
|
||||
|
||||
;; Stronger types for regular expression matching.
|
||||
|
||||
(provide
|
||||
regexp:
|
||||
pregexp:
|
||||
byte-regexp:
|
||||
byte-pregexp:
|
||||
define-regexp:
|
||||
let-regexp:
|
||||
|
||||
regexp-match:
|
||||
)
|
||||
|
||||
(require
|
||||
(for-syntax
|
||||
(only-in racket/syntax format-id)
|
||||
typed/racket/base
|
||||
(only-in racket/format ~a)
|
||||
syntax/parse
|
||||
trivial/private/common))
|
||||
|
||||
;; =============================================================================
|
||||
|
||||
(begin-for-syntax
|
||||
(define errloc-key 'regexp-match:)
|
||||
|
||||
(define (group-error str reason)
|
||||
(raise-argument-error
|
||||
errloc-key
|
||||
(format "Invalid regexp pattern (unmatched ~a)" reason)
|
||||
str))
|
||||
|
||||
;; Dispatch for counting groups
|
||||
(define (parse-groups v-stx)
|
||||
(define v (quoted-stx-value? v-stx))
|
||||
(cond
|
||||
[(string? v) (parse-groups/string v #:src v-stx)]
|
||||
[(regexp? v) (parse-groups/regexp v #:src v-stx)]
|
||||
[(pregexp? v) (parse-groups/pregexp v #:src v-stx)]
|
||||
[(bytes? v) (parse-groups/bytes v #:src v-stx)]
|
||||
[(byte-regexp? v) (parse-groups/byte-regexp v #:src v-stx)]
|
||||
[(byte-pregexp? v) (parse-groups/byte-pregexp v #:src v-stx)]
|
||||
[else #f]))
|
||||
|
||||
;; Count the number of matched parentheses in a regexp pattern.
|
||||
;; Raise an exception if there are unmatched parens.
|
||||
(define (parse-groups/untyped str #:src stx)
|
||||
(define last-index (- (string-length str) 1))
|
||||
(let loop ([i 0] [in-paren '()] [num-groups 0])
|
||||
(if (> i last-index)
|
||||
(if (null? in-paren)
|
||||
num-groups
|
||||
(group-error str (format "'(' at index ~a" (car in-paren))))
|
||||
(case (string-ref str i)
|
||||
[(#\()
|
||||
;; Watch for (? patterns
|
||||
(if (and (< i last-index)
|
||||
(eq? #\? (string-ref str (+ i 1))))
|
||||
(loop (+ i 2) (cons #f in-paren) num-groups)
|
||||
(loop (+ i 1) (cons i in-paren) num-groups))]
|
||||
[(#\))
|
||||
(cond
|
||||
[(null? in-paren)
|
||||
(group-error str (format "')' at index ~a" i))]
|
||||
[(eq? #f (car in-paren))
|
||||
;; Matched closing paren, but does not count as a group
|
||||
(loop (+ i 1) (cdr in-paren) num-groups)]
|
||||
[else
|
||||
(loop (+ i 1) (cdr in-paren) (+ 1 num-groups))])]
|
||||
[(#\\)
|
||||
(if (and (< i last-index)
|
||||
(eq? #\\ (string-ref str (+ i 1))))
|
||||
(loop (+ i 3) in-paren num-groups)
|
||||
(loop (+ i 2) in-paren num-groups))]
|
||||
[(#\|)
|
||||
;; Nope! Can't handle pipes
|
||||
#f]
|
||||
[else
|
||||
(loop (+ i 1) in-paren num-groups)]))))
|
||||
|
||||
(define (parse-groups/string str #:src stx)
|
||||
(let ([ng (parse-groups/untyped str #:src stx)])
|
||||
(and ng (cons ng 'String))))
|
||||
|
||||
(define (parse-groups/bytes b #:src stx)
|
||||
(let ([ng (parse-groups/untyped (~a b) #:src stx)])
|
||||
(and ng (cons ng 'Bytes))))
|
||||
|
||||
(define (parse-groups/regexp rx #:src stx)
|
||||
(parse-groups/string (~a rx) #:src stx))
|
||||
|
||||
(define parse-groups/pregexp
|
||||
parse-groups/regexp)
|
||||
|
||||
(define (parse-groups/byte-regexp bx #:src stx)
|
||||
(parse-groups/bytes (~a bx) #:src stx))
|
||||
|
||||
(define parse-groups/byte-pregexp
|
||||
parse-groups/byte-regexp)
|
||||
|
||||
(define-values (num-groups-key rx? def-rx let-rx)
|
||||
(make-value-property 'rx:groups parse-groups))
|
||||
(define-syntax-class/predicate pattern/groups rx?)
|
||||
)
|
||||
|
||||
;; -----------------------------------------------------------------------------
|
||||
|
||||
(define-syntax (define-matcher* stx)
|
||||
(syntax-parse stx
|
||||
[(_ f*:id ...)
|
||||
#:with (f+* ...) (for/list ([f (in-list (syntax-e #'(f* ...)))])
|
||||
(format-id stx "~a:" (syntax-e f)))
|
||||
#`(begin
|
||||
(define-syntax f+* (make-alias #'f*
|
||||
(lambda (stx) (syntax-parse stx
|
||||
[(_ pat:pattern/groups)
|
||||
(syntax-property
|
||||
(syntax/loc stx (f* pat.expanded))
|
||||
num-groups-key
|
||||
#'pat.evidence)]
|
||||
[_ #f])))) ...)]))
|
||||
|
||||
(define-matcher* regexp pregexp byte-regexp byte-pregexp)
|
||||
|
||||
(define-syntax define-regexp: (make-keyword-alias 'define def-rx))
|
||||
(define-syntax let-regexp: (make-keyword-alias 'let let-rx))
|
||||
|
||||
(define-syntax regexp-match: (make-alias #'regexp-match
|
||||
(lambda (stx) (syntax-parse stx
|
||||
[(_ pat:pattern/groups arg* ...)
|
||||
#:with (num-groups . type-sym) (syntax/loc stx pat.evidence)
|
||||
;; TODO keep source location in type-sym, stop using format-id
|
||||
;; (Is it really that bad?)
|
||||
#:with type (format-id stx "~a" (syntax-e #'type-sym))
|
||||
#:with (index* ...) (for/list ([i (in-range (syntax-e #'num-groups))]) i)
|
||||
(syntax/loc stx
|
||||
(let ([maybe-match (regexp-match pat.expanded arg* ...)])
|
||||
(if maybe-match
|
||||
(let ([m : (Listof (Option type)) maybe-match])
|
||||
(list (car maybe-match)
|
||||
(begin (set! m (cdr m))
|
||||
(or (car m) (error 'regexp-match: (format "Internal error at result index ~a, try using Racket's regexp-match" 'index*))))
|
||||
...))
|
||||
#f)))]
|
||||
[_ #f]))))
|
||||
|
186
regexp.rkt
186
regexp.rkt
|
@ -1,15 +1,12 @@
|
|||
#lang typed/racket/base
|
||||
|
||||
;; Stronger types for regular expression matching.
|
||||
;;
|
||||
;; TODO use syntax-class to abstract over local-expands / check num-groups
|
||||
;; TODO groups can be #f when using | ... any other way?
|
||||
|
||||
(provide
|
||||
regexp: define-regexp: let-regexp:
|
||||
pregexp: define-pregexp: let-pregexp:
|
||||
byte-regexp: define-byte-regexp: let-byte-regexp:
|
||||
byte-pregexp: define-byte-pregexp: let-byte-pregexp:
|
||||
pregexp:
|
||||
byte-regexp:
|
||||
byte-pregexp:
|
||||
;; Expression and definition forms that try checking their argument patterns.
|
||||
;; If check succeeds, will remember the number of pattern groups
|
||||
;; for calls to `regexp-match:`.
|
||||
|
@ -23,179 +20,4 @@
|
|||
;; Will raise a compile-time exception if the pattern contains unmatched groups.
|
||||
)
|
||||
|
||||
(require (for-syntax
|
||||
typed/racket/base
|
||||
(only-in racket/format ~a)
|
||||
(only-in racket/syntax format-id)
|
||||
syntax/id-table
|
||||
syntax/parse
|
||||
syntax/stx
|
||||
trivial/private/common
|
||||
))
|
||||
|
||||
;; =============================================================================
|
||||
|
||||
(define-for-syntax num-groups-key 'regexp-match:num-groups)
|
||||
(define-for-syntax errloc-key 'regexp-match:)
|
||||
(define-for-syntax id+num-groups (make-free-id-table))
|
||||
|
||||
(begin-for-syntax (define-syntax-class pattern/groups
|
||||
#:attributes (expanded num-groups type)
|
||||
(pattern e
|
||||
#:with e+ (expand-expr #'e)
|
||||
#:with (g . t) (count-groups #'e+)
|
||||
#:when (syntax-e #'g)
|
||||
#:attr expanded #'e+
|
||||
#:attr num-groups #'g
|
||||
#:attr type #'t)))
|
||||
|
||||
;; -----------------------------------------------------------------------------
|
||||
|
||||
;; (define-matcher f)
|
||||
;; Expand to two forms:
|
||||
;; - (f: arg)
|
||||
;; - (define-f: id arg)
|
||||
;; The first is for statically-checked patterns in expressions,
|
||||
;; the second is for patterns in definitions.
|
||||
(define-syntax define-matcher
|
||||
(syntax-parser
|
||||
[(_ f:id)
|
||||
#:with f: (format-id #'f "~a:" (syntax-e #'f))
|
||||
#:with let-f: (format-id #'f "let-~a:" (syntax-e #'f))
|
||||
#:with define-f: (format-id #'f "define-~a:" (syntax-e #'f))
|
||||
#'(begin
|
||||
;; For expressions, (regexp: val)
|
||||
(define-syntax f:
|
||||
(syntax-parser
|
||||
[g:id
|
||||
(syntax/loc #'g f)]
|
||||
[(_ pat:pattern/groups)
|
||||
(syntax-property #'(f pat.expanded)
|
||||
num-groups-key
|
||||
(cons (syntax-e #'pat.num-groups) #'pat.type))]
|
||||
[(_ arg* (... ...))
|
||||
#'(f arg* (... ...))]))
|
||||
;; For lets, (let-regexp: ([id val]) ...)
|
||||
(define-syntax let-f:
|
||||
(syntax-parser
|
||||
[(_ ([name*:id pat*:pattern/groups] (... ...)) e* (... ...))
|
||||
#'(let ([name* pat*.expanded] (... ...))
|
||||
(let-syntax ([name* (make-rename-transformer
|
||||
(syntax-property #'name* num-groups-key
|
||||
(cons 'pat*.num-groups #'pat*.type)))] (... ...))
|
||||
e* (... ...)))]
|
||||
[(_ arg* (... ...))
|
||||
#'(let arg* (... ...))]))
|
||||
;; For definitions, (define-regexp: id val)
|
||||
(define-syntax define-f:
|
||||
(syntax-parser
|
||||
[(_ name:id pat:pattern/groups)
|
||||
(free-id-table-set! id+num-groups
|
||||
#'name
|
||||
(cons (syntax-e #'pat.num-groups) #'pat.type))
|
||||
#'(define name pat.expanded)]
|
||||
[(_ arg* (... ...))
|
||||
#'(define arg* (... ...))]))) ]))
|
||||
|
||||
(define-matcher regexp)
|
||||
(define-matcher pregexp)
|
||||
(define-matcher byte-regexp)
|
||||
(define-matcher byte-pregexp)
|
||||
|
||||
(define-syntax regexp-match:
|
||||
(syntax-parser
|
||||
[(f pat:pattern/groups arg* ...)
|
||||
#:with (index* ...) (for/list ([i (in-range (syntax-e #'pat.num-groups))]) i)
|
||||
#'(let ([maybe-match (regexp-match pat.expanded arg* ...)])
|
||||
(if maybe-match
|
||||
(let ([m : (Listof (Option pat.type)) maybe-match])
|
||||
(list (car maybe-match)
|
||||
(begin (set! m (cdr m))
|
||||
(or (car m) (error 'regexp-match: (format "Internal error at result index ~a, try using Racket's regexp-match" 'index*))))
|
||||
...))
|
||||
#f))]
|
||||
[f:id
|
||||
(syntax/loc #'f regexp-match)]
|
||||
[(f arg* ...)
|
||||
(syntax/loc #'f (regexp-match arg* ...))]))
|
||||
|
||||
;; -----------------------------------------------------------------------------
|
||||
|
||||
(define-for-syntax (group-error str reason)
|
||||
(raise-argument-error
|
||||
errloc-key
|
||||
(format "Invalid regexp pattern (unmatched ~a)" reason)
|
||||
str))
|
||||
|
||||
;; Dispatch for counting groups
|
||||
(define-for-syntax (count-groups v-stx)
|
||||
(cond
|
||||
[(syntax-property v-stx num-groups-key)
|
||||
=> (lambda (x) x)]
|
||||
[(identifier? v-stx)
|
||||
(free-id-table-ref id+num-groups v-stx #f)]
|
||||
[(quoted-stx-value? v-stx)
|
||||
=> (lambda (v)
|
||||
(cond
|
||||
[(string? v) (count-groups/string v #:src v-stx)]
|
||||
[(regexp? v) (count-groups/regexp v #:src v-stx)]
|
||||
[(pregexp? v) (count-groups/pregexp v #:src v-stx)]
|
||||
[(bytes? v) (count-groups/bytes v #:src v-stx)]
|
||||
[(byte-regexp? v) (count-groups/byte-regexp v #:src v-stx)]
|
||||
[(byte-pregexp? v) (count-groups/byte-pregexp v #:src v-stx)]
|
||||
[else #f]))]
|
||||
[else #f]))
|
||||
|
||||
;; Count the number of matched parentheses in a regexp pattern.
|
||||
;; Raise an exception if there are unmatched parens.
|
||||
(define-for-syntax (count-groups/untyped str #:src stx)
|
||||
(define last-index (- (string-length str) 1))
|
||||
(let loop ([i 0] [in-paren '()] [num-groups 0])
|
||||
(if (> i last-index)
|
||||
(if (null? in-paren)
|
||||
num-groups
|
||||
(group-error str (format "'(' at index ~a" (car in-paren))))
|
||||
(case (string-ref str i)
|
||||
[(#\()
|
||||
;; Watch for (? patterns
|
||||
(if (and (< i last-index)
|
||||
(eq? #\? (string-ref str (+ i 1))))
|
||||
(loop (+ i 2) (cons #f in-paren) num-groups)
|
||||
(loop (+ i 1) (cons i in-paren) num-groups))]
|
||||
[(#\))
|
||||
(cond
|
||||
[(null? in-paren)
|
||||
(group-error str (format "')' at index ~a" i))]
|
||||
[(eq? #f (car in-paren))
|
||||
;; Matched closing paren, but does not count as a group
|
||||
(loop (+ i 1) (cdr in-paren) num-groups)]
|
||||
[else
|
||||
(loop (+ i 1) (cdr in-paren) (+ 1 num-groups))])]
|
||||
[(#\\)
|
||||
(if (and (< i last-index)
|
||||
(eq? #\\ (string-ref str (+ i 1))))
|
||||
(loop (+ i 3) in-paren num-groups)
|
||||
(loop (+ i 2) in-paren num-groups))]
|
||||
[(#\|)
|
||||
;; Nope! Can't handle pipes
|
||||
#f]
|
||||
[else
|
||||
(loop (+ i 1) in-paren num-groups)]))))
|
||||
|
||||
(define-for-syntax (count-groups/string str #:src stx)
|
||||
(cons (count-groups/untyped str #:src stx) (syntax/loc stx String)))
|
||||
|
||||
(define-for-syntax (count-groups/bytes b #:src stx)
|
||||
(cons (count-groups/untyped (~a b) #:src stx) (syntax/loc stx Bytes)))
|
||||
|
||||
(define-for-syntax (count-groups/regexp rx #:src stx)
|
||||
(count-groups/string (~a rx) #:src stx))
|
||||
|
||||
(define-for-syntax count-groups/pregexp
|
||||
count-groups/regexp)
|
||||
|
||||
(define-for-syntax (count-groups/byte-regexp bx #:src stx)
|
||||
(count-groups/bytes (~a bx) #:src stx))
|
||||
|
||||
(define-for-syntax count-groups/byte-pregexp
|
||||
count-groups/byte-regexp)
|
||||
(require trivial/private/regexp)
|
||||
|
|
|
@ -9,10 +9,4 @@
|
|||
[byte-regexp: byte-regexp]
|
||||
[byte-pregexp: byte-pregexp]
|
||||
[let-regexp: let-regexp]
|
||||
[let-pregexp: let-pregexp]
|
||||
[let-byte-regexp: let-byte-regexp]
|
||||
[let-byte-pregexp: let-byte-pregexp]
|
||||
[define-regexp: define-regexp]
|
||||
[define-pregexp: define-pregexp]
|
||||
[define-byte-regexp: define-byte-regexp]
|
||||
[define-byte-pregexp: define-byte-pregexp]))
|
||||
[define-regexp: define-regexp]))
|
||||
|
|
|
@ -221,23 +221,6 @@
|
|||
(U #f (Listof (U #f String))))
|
||||
'("hellooo" "ll" "ooo"))
|
||||
|
||||
;; -- let-pregexp:
|
||||
(check-equal?
|
||||
(ann
|
||||
(let-pregexp: ([rx #px"he(l*)(o*)"])
|
||||
(regexp-match: rx "helllooo"))
|
||||
(U #f (List String String String)))
|
||||
'("helllooo" "lll" "ooo"))
|
||||
|
||||
;; -- define-pregexp:
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
(define-pregexp: rx #px"he(l*)(o*)")
|
||||
(regexp-match: rx "helllooo"))
|
||||
(U #f (List String String String)))
|
||||
'("helllooo" "lll" "ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
|
@ -245,37 +228,12 @@
|
|||
(U #f (List String String String)))
|
||||
'("hellooo" "ll" "ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
(define-pregexp: rx (pregexp: "he(l*)(o*)"))
|
||||
(regexp-match: rx "hellooo"))
|
||||
(U #f (List String String String)))
|
||||
'("hellooo" "ll" "ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(regexp-match: #rx#"he(l*)(o*)" #"helllooo")
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"helllooo" #"lll" #"ooo"))
|
||||
|
||||
;; -- let-byte-regexp:
|
||||
(check-equal?
|
||||
(ann
|
||||
(let-byte-regexp: ([rx #rx#"he(l*)(o*)"])
|
||||
(regexp-match: rx #"helllooo"))
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"helllooo" #"lll" #"ooo"))
|
||||
|
||||
;; -- define-byte-regexp:
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
(define-byte-regexp: rx #rx#"he(l*)(o*)")
|
||||
(regexp-match: rx #"helllooo"))
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"helllooo" #"lll" #"ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
|
@ -283,37 +241,12 @@
|
|||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"hellooo" #"ll" #"ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
(define-byte-regexp: rx (byte-regexp: #"he(l*)(o*)"))
|
||||
(regexp-match: rx "hellooo"))
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"hellooo" #"ll" #"ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(regexp-match: #px#"he(l*)(o*)" "helllooo")
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"helllooo" #"lll" #"ooo"))
|
||||
|
||||
;; -- let-byte-pregexp:
|
||||
(check-equal?
|
||||
(ann
|
||||
(let-byte-pregexp: ([rx #px#"he(l*)(o*)"])
|
||||
(regexp-match: rx "helllooo"))
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"helllooo" #"lll" #"ooo"))
|
||||
|
||||
;; -- define-byte-pregexp:
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
(define-byte-pregexp: rx #px#"he(l*)(o*)")
|
||||
(regexp-match: rx "helllooo"))
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"helllooo" #"lll" #"ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
|
@ -321,14 +254,6 @@
|
|||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"hellooo" #"ll" #"ooo"))
|
||||
|
||||
(check-equal?
|
||||
(ann
|
||||
(let ()
|
||||
(define-byte-pregexp: rx (byte-pregexp: #"he(l*)(o*)"))
|
||||
(regexp-match: rx "hellooo"))
|
||||
(U #f (List Bytes Bytes Bytes)))
|
||||
'(#"hellooo" #"ll" #"ooo"))
|
||||
|
||||
;; -- special cases / miscellaneous
|
||||
|
||||
;; --- Can't handle |, yet
|
||||
|
|
Loading…
Reference in New Issue
Block a user