[private] rx: handle []] and [^]] patterns

This commit is contained in:
Ben Greenman 2016-06-12 15:05:19 -04:00
parent 4bcdc32fec
commit 945938aee9
2 changed files with 39 additions and 13 deletions

View File

@ -13,6 +13,18 @@
;; -- regexps, from the world ;; -- regexps, from the world
(let ([l "dont care"]) ;; from `morse-code-table.rkt`
(check-equal?
(ann
(regexp-match: #rx"[]] [^]]" l)
(U #f (List String)))
#f)
(check-equal?
(ann
(regexp-match: #px"^\\| \\[\\[[^]]*\\]\\] \\[([^]]*)\\] \\|\\| '''([^']*)'''" l)
(U #f (List String String String)))
#f))
(let ([str "1cm"]) ;; from html-render.rkt (let ([str "1cm"]) ;; from html-render.rkt
(check-equal? (check-equal?
(ann (regexp-match: #rx"^([+-]?[0-9]*\\.?([0-9]+)?)(em|ex|px|in|cm|mm|pt|pc|%|)$" str) (ann (regexp-match: #rx"^([+-]?[0-9]*\\.?([0-9]+)?)(em|ex|px|in|cm|mm|pt|pc|%|)$" str)

View File

@ -92,12 +92,14 @@
;; (-> String #:src Syntax (Listof Boolean)) ;; (-> String #:src Syntax (Listof Boolean))
(define (parse-groups/untyped str #:src stx) (define (parse-groups/untyped str #:src stx)
(define pos** (define char->pos*
(unescaped-pos* str '(#\[ #\] #\( #\) #\| #\?))) (let ([H (unescaped-pos* str '(#\[ #\] #\( #\) #\| #\?))])
(lambda (c)
(hash-ref H c (lambda () (raise-user-error 'parse-groups "No position data for '~a' character" c))))))
;; -- check that [] are matched ;; -- check that [] are matched
(define brack-ivl* (define brack-ivl*
(let* ([l-brack-pos* (car pos**)] (let* ([l-brack-pos* (char->pos* #\[)]
[r-brack-pos* (cadr pos**)] [r-brack-pos* (char->pos* #\])]
[r (pair-up l-brack-pos* r-brack-pos*)]) [r (pair-up l-brack-pos* r-brack-pos*)])
;; ?? okay for brackets to nest? ;; ?? okay for brackets to nest?
(if (list? r) (if (list? r)
@ -106,8 +108,9 @@
(group-error stx str (format "'~a' at index ~a" brack-char r)))))) (group-error stx str (format "'~a' at index ~a" brack-char r))))))
;; -- ignore characters between a pair of brackets ;; -- ignore characters between a pair of brackets
(define-values (l-paren-pos* r-paren-pos* pipe-pos* ?-pos*) (define-values (l-paren-pos* r-paren-pos* pipe-pos* ?-pos*)
(apply values (for/list ([pos* (in-list (cddr pos**))]) (apply values
(ivl-remove* brack-ivl* pos*)))) (for/list ([c (in-list '(#\( #\) #\| #\?))])
(ivl-remove* brack-ivl* (char->pos* c)))))
;; -- check that () are matched ;; -- check that () are matched
(define paren-ivl* (define paren-ivl*
(let ([r (pair-up l-paren-pos* r-paren-pos*)]) (let ([r (pair-up l-paren-pos* r-paren-pos*)])
@ -146,8 +149,10 @@
(define (unescaped-pos* str c*) (define (unescaped-pos* str c*)
(define L (string-length str)) (define L (string-length str))
(define escaped? (box #f)) (define escaped? (box #f))
(map reverse (define (have-char-at-index? c i hist)
(for/fold ([hist (for/list ([c (in-list c*)]) '())]) (memv i (hash-ref hist c)))
(define h-rev
(for/fold ([hist (for/hasheq ([c (in-list c*)]) (values c '()))])
([i (in-range L)]) ([i (in-range L)])
(define char (unsafe-string-ref str i)) (define char (unsafe-string-ref str i))
(cond (cond
@ -158,12 +163,21 @@
[(eq? #\\ char) [(eq? #\\ char)
(set-box! escaped? #t) (set-box! escaped? #t)
hist] hist]
;; --- special case for singleton <rng>,
;; documented at `http://docs.racket-lang.org/reference/regexp.html`
[(and (eq? #\] char)
(or (have-char-at-index? #\[ (- i 1) hist) ;; []] pattern
(and (have-char-at-index? #\[ (- i 2) hist)
(eq? #\^ (string-ref str (- i 1)))))) ;; [^]] pattern
hist]
[else [else
(for/list ([h (in-list hist)] (let ([i* (hash-ref hist char #f)])
[c (in-list c*)]) (if i*
(if (eq? c char) (hash-set hist char (cons i i*))
(cons i h) hist))])))
h))])))) ;; -- reverse all saved lists
(for/hasheq ([(c i*) (in-hash h-rev)])
(values c (reverse i*))))
;; (define-type Ivl (Pairof Natural Natural)) ;; (define-type Ivl (Pairof Natural Natural))