rewrite glob->regexp to deal with char ranges properly
svn: r3151 original commit: 8607cacc089786209ce2f7214b1b80294205fcef
This commit is contained in:
parent
c979c1ab87
commit
af91af3534
|
@ -405,60 +405,133 @@
|
|||
(string-utf-8-length s)))
|
||||
(= (cdar m) (string-length s)))))))
|
||||
|
||||
(define default-glob-case-sens
|
||||
(not (memq (system-type) '(windows macos macosx))))
|
||||
(define glob->regexp
|
||||
(opt-lambda (glob [case-sens? default-glob-case-sens])
|
||||
(define len (string-length glob))
|
||||
(define range #f)
|
||||
(define init-*? #f)
|
||||
(define case-sens
|
||||
(if case-sens?
|
||||
values
|
||||
(lambda (c)
|
||||
(if (char? c) ; do this only for char inputs -- see below
|
||||
(let* ([def-case-sens (not (memq (system-type) '(windows macos macosx)))]
|
||||
[quoted-chars (string->list ".|+^$()[]")]
|
||||
[error
|
||||
(lambda xs (apply error 'glob->regexp xs))]
|
||||
[->case-insens
|
||||
(lambda (c)
|
||||
(let ([c1 (char-upcase c)] [c2 (char-downcase c)])
|
||||
(if (char=? c1 c2) c (list #\[ c1 c2 #\])))
|
||||
c))))
|
||||
(let loop ([res '()] [i 0])
|
||||
(define (next x) (loop (cons x res) (add1 i)))
|
||||
(if (= i len)
|
||||
(begin
|
||||
(when range
|
||||
(error 'glob->regexp "unterminated range in glob: ~e" glob))
|
||||
(let loop ([left res] [res (if init-*? '() '(#\$))])
|
||||
(if (null? left)
|
||||
(regexp (list->string
|
||||
(cons #\^ (if init-*?
|
||||
`(#\( #\? #\: #\[ #\^ #\. #\] #\. #\*
|
||||
,@res #\| ,@res #\) #\$)
|
||||
res))))
|
||||
;; doing this only for single chars, which means that
|
||||
;; backslash-quoted char is left alone
|
||||
(let ([c (case-sens (car left))])
|
||||
(loop (cdr left)
|
||||
(if (char? c)
|
||||
(cons (case-sens c) res)
|
||||
(append c res)))))))
|
||||
(let ([c (string-ref glob i)])
|
||||
(if range
|
||||
(begin (set! range
|
||||
(case range
|
||||
[(0) (case c ((#\^) 1) (else 2))]
|
||||
[(1) 2]
|
||||
[else (case c ((#\]) #f) (else 2))]))
|
||||
(next c))
|
||||
(case c
|
||||
[(#\\) (set! i (add1 i))
|
||||
(if (< i len)
|
||||
(next (list #\\ (string-ref glob i)))
|
||||
(error 'glob->regexp "glob ends in backslash: ~e" glob))]
|
||||
[(#\*) (if (eq? 0 i)
|
||||
(begin (set! init-*? #t) (next '()))
|
||||
(next '(#\. #\*)))]
|
||||
[(#\?) (next (if (eq? 0 i) '(#\[ #\^ #\. #\]) #\.))]
|
||||
[(#\[) (set! range 0) (next #\[)]
|
||||
[(#\. #\| #\+ #\^ #\$ #\( #\) #\]) (next (list #\\ c))]
|
||||
[else (next c)])))))))
|
||||
(if (char=? c1 c2) c (list #\[ c1 c2 #\]))))]
|
||||
[no-dot* (string->list "(?:[^.].*|)")]
|
||||
[no-dot+ (string->list "(?:[^.].*)")]
|
||||
[no-dot1 (string->list "[^.]")]
|
||||
[ndot (char->integer #\.)]
|
||||
[dot+1 (integer->char (add1 ndot))]
|
||||
;; don't use dot-1, because it's "-"
|
||||
[dot-2 (integer->char (- ndot 2))])
|
||||
(define (range:add-dot l) ; add it to the end, before a possible last "-"
|
||||
(if (member l '(() (#\-)))
|
||||
(cons #\. l)
|
||||
(cons (car l) (range:add-dot (cdr l)))))
|
||||
(define (range:del-dot l) ; remove "."s, take care of ranges too
|
||||
;; problem: when we remove a "." from the middle of a range, we need to
|
||||
;; add "-" at the end
|
||||
(let loop ([l l] [-? #f])
|
||||
(cond [(and -? (null? l)) '(#\-)] ; add a "-"
|
||||
[(and -? (null? (cdr l)))
|
||||
(if (char=? #\- (car l)) l (list (car l) #\-))]
|
||||
[(null? l) '()]
|
||||
[(and (pair? l) (pair? (cdr l)) (pair? (cddr l))
|
||||
(char=? #\- (cadr l)))
|
||||
(let* ([lo (car l)] [nlo (char->integer lo)]
|
||||
[hi (caddr l)] [nhi (char->integer hi)]
|
||||
[rest (lambda (-?) (loop (cdddr l) -?))])
|
||||
(cond [(= nlo ndot nhi) (rest -?)]
|
||||
[(= nlo ndot) (list* dot+1 #\- hi (rest -?))]
|
||||
[(= nhi ndot) (list* lo #\- dot-2 (rest #t))]
|
||||
[(<= nlo ndot nhi)
|
||||
(list* lo #\- dot-2 dot+1 #\- hi (rest #t))]
|
||||
[else (list* lo #\- hi (rest -?))]))]
|
||||
[(char=? #\. (car l)) (loop (cdr l) -?)]
|
||||
[else (cons (car l) (loop (cdr l) -?))])))
|
||||
(define (hide-dots l)
|
||||
(let* ([1st (and (pair? l) (car l))]
|
||||
[2nd (and 1st (pair? (cdr l)) (cadr l))])
|
||||
(cond [(or (not 1st) (char? 1st)) l]
|
||||
;; "?---" --> "[^.]---"
|
||||
[(eq? 'char 1st) (cons no-dot1 (cdr l))]
|
||||
[(eq? 'any 1st)
|
||||
(cond ;; "*" --> "(?:[^.].*)"
|
||||
[(not 2nd) no-dot+]
|
||||
;; "*.---" --> "(?:[^.].*).---"
|
||||
[(or (eq? #\. 2nd) (equal? '(quote #\.) 2nd))
|
||||
(cons no-dot+ (cdr l))]
|
||||
;; "*---" --> "(?:[^.].*|)---"
|
||||
[(char? 2nd) (cons no-dot* (cdr l))]
|
||||
;; "*?---" --> "?*---"
|
||||
[(eq? 'char 2nd) (hide-dots (list* 'char 'any (cddr l)))]
|
||||
;; "**---" --> "*---"
|
||||
[(eq? 'any 2nd) (hide-dots (cons 'any (cddr l)))]
|
||||
;; same as non-.-char
|
||||
[(eq? 'quote (car 2nd)) (cons no-dot* (cdr l))]
|
||||
;; "*[---]---" --> "(?:[^.].*|)[--*--]---"
|
||||
[(eq? 'range (car 2nd))
|
||||
(list* no-dot*
|
||||
(list* (car 2nd) (cadr 2nd)
|
||||
((if (cadr 2nd)
|
||||
range:add-dot range:del-dot)
|
||||
(cddr 2nd)))
|
||||
(cddr l))])]
|
||||
[(eq? 'quote (caar l)) l]
|
||||
[(eq? 'range (caar l))
|
||||
(list* (list* (car 1st) (cadr 1st)
|
||||
((if (cadr 1st) range:add-dot range:del-dot)
|
||||
(cddr 1st)))
|
||||
(cdr l))]
|
||||
[else (error "internal error")])))
|
||||
(opt-lambda (glob [hide-dots? #t] [case-sens? def-case-sens])
|
||||
(let ([len (string-length glob)]
|
||||
[->case (if case-sens? values ->case-insens)])
|
||||
(let loop ([r '()] [i 0])
|
||||
(if (< i len)
|
||||
(let ([c (string-ref glob i)])
|
||||
(case c
|
||||
[(#\\)
|
||||
(let ([i (add1 i)])
|
||||
(if (< i len)
|
||||
(loop (cons `(quote ,(string-ref glob i)) r) (add1 i))
|
||||
(error "glob ends in backslash: ~e" glob)))]
|
||||
[(#\*) (loop (cons 'any r) (add1 i))]
|
||||
[(#\?) (loop (cons 'char r) (add1 i))]
|
||||
[(#\[)
|
||||
(let* ([i (add1 i)]
|
||||
[not? (and (< i len)
|
||||
(char=? #\^ (string-ref glob i)))])
|
||||
(let loop2 ([chars '()] [i (if not? (add1 i) i)])
|
||||
(if (= i len)
|
||||
(error "unterminated range in glob: ~e" glob)
|
||||
(let ([c (string-ref glob i)])
|
||||
(if (and (char=? #\] c) (pair? chars))
|
||||
(loop (cons `(range ,not? ,@(reverse! chars)) r)
|
||||
(add1 i))
|
||||
(loop2 (cons c chars) (add1 i)))))))]
|
||||
[else (loop (cons c r) (add1 i))]))
|
||||
(let* ([r (reverse! r)]
|
||||
[r (if hide-dots? (hide-dots r) r)])
|
||||
(regexp
|
||||
(list->string
|
||||
(apply append
|
||||
(map (lambda (c)
|
||||
(cond [(char? c)
|
||||
(if (memq c quoted-chars)
|
||||
;; doing this only for single chars,
|
||||
;; which means that backslash-quoted
|
||||
;; chars are left alone
|
||||
`(#\\ ,c) (list (->case c)))]
|
||||
[(eq? 'any c) '(#\. #\*)]
|
||||
[(eq? 'char c) '(#\.)]
|
||||
;; results of dot tweaking: no alphabetics
|
||||
[(char? (car c)) c]
|
||||
;; note: no ->case here
|
||||
[(eq? 'quote (car c)) `(#\\ ,(cadr c))]
|
||||
[(eq? 'range (car c))
|
||||
(append '(#\[)
|
||||
(if (cadr c) '(#\^) '())
|
||||
(cddr c)
|
||||
'(#\]))]
|
||||
[else (error "internal error")]))
|
||||
r)))))))))))
|
||||
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user