Some reformating. The only real change is using `write-bytes' instead

of a `display' in one place.
This commit is contained in:
Eli Barzilay 2011-06-04 16:32:36 -04:00
parent 3ecdd3bc59
commit 5754312120

View File

@ -15,22 +15,21 @@
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (real->decimal-string n [digits 2]) (define (real->decimal-string n [digits 2])
(unless (exact-nonnegative-integer? digits) (unless (exact-nonnegative-integer? digits)
(raise-type-error 'real->decimal-string "exact-nonnegative-integer" digits)) (raise-type-error 'real->decimal-string "exact-nonnegative-integer"
digits))
(let* ([e (expt 10 digits)] (let* ([e (expt 10 digits)]
[num (round (abs (* e (inexact->exact n))))]) [num (round (abs (* e (inexact->exact n))))])
(format "~a~a.~a" (format
(if (or (negative? n) "~a~a.~a"
(equal? n -0.0)) (if (or (negative? n) (equal? n -0.0)) "-" "")
"-" (quotient num e)
"") (if (zero? digits)
(quotient num e) ""
(if (zero? digits) (let ([s (number->string (remainder num e))])
"" (if (= (string-length s) digits)
(let ([s (number->string (remainder num e))]) s
(if (= (string-length s) digits) (string-append (make-string (- digits (string-length s)) #\0)
s s)))))))
(string-append (make-string (- digits (string-length s)) #\0)
s)))))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Regexp utilities ;; Regexp utilities
@ -87,39 +86,38 @@
(or (hash-ref t key #f) (or (hash-ref t key #f)
(let ([rx* (run-tweak)]) (hash-set! t key rx*) rx*)))))) (let ([rx* (run-tweak)]) (hash-set! t key rx*) rx*))))))
(define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f] [prefix #""]) (define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f]
[prefix #""])
(unless (input-port? input-port) (unless (input-port? input-port)
(raise-type-error 'regexp-try-match (raise-type-error 'regexp-try-match "input port" input-port))
"input port" input-port))
(unless (or (not out) (output-port? out)) (unless (or (not out) (output-port? out))
(raise-type-error 'regexp-try-match (raise-type-error 'regexp-try-match "output port or #f" out))
"output port or #f" out)) (let ([m (regexp-match-peek-positions pattern input-port start-k end-k #f
(let ([m (regexp-match-peek-positions pattern input-port start-k end-k #f prefix)]) prefix)])
(and m (and m
;; What happens if someone swipes our bytes before we can get them? ;; What happens if someone swipes our bytes before we can get them?
(let ([drop (caar m)]) (let ([drop (caar m)])
;; drop prefix before match: ;; drop prefix before match:
(let ([s (read-bytes drop input-port)]) (let ([s (read-bytes drop input-port)])
(when out (when out (write-bytes s out)))
(display s out)))
;; Get the matching part, and shift matching indices ;; Get the matching part, and shift matching indices
(let ([s (read-bytes (- (cdar m) drop) input-port)]) (let ([s (read-bytes (- (cdar m) drop) input-port)])
(cons s (cons s
(map (lambda (p) (map (lambda (p)
(and p (subbytes s (- (car p) drop) (- (cdr p) drop)))) (and p (subbytes s (- (car p) drop)
(- (cdr p) drop))))
(cdr m)))))))) (cdr m))))))))
;; Helper macro for the regexp functions below, with some utilities. ;; Helper macro for the regexp functions below, with some utilities.
(define (bstring-length s) (define (bstring-length s)
(if (bytes? s) (bytes-length s) (string-length s))) (if (bytes? s) (bytes-length s) (string-length s)))
(define no-empty-edge-matches (define no-empty-edge-matches
(make-regexp-tweaker (lambda (rx n) (make-regexp-tweaker
(if (bytes? rx) (lambda (rx n)
(bytes-append #"(?:" (if (bytes? rx)
rx (bytes-append #"(?:" rx #")(?<="
#")(?<=" (make-bytes n (char->integer #\.)) #")") (make-bytes n (char->integer #\.)) #")")
(format "(?:~a)(?<=~a)" (format "(?:~a)(?<=~a)" rx (make-bytes n (char->integer #\.)))))))
rx (make-bytes n (char->integer #\.)))))))
(define-syntax-rule (regexp-loop (define-syntax-rule (regexp-loop
name loop start end pattern string name loop start end pattern string
ipre ipre
@ -127,21 +125,22 @@
port-success-k port-success-choose port-failure-k port-success-k port-success-choose port-failure-k
need-leftover? peek?) need-leftover? peek?)
(let* ([string (if (path? string) (let* ([string (if (path? string)
(if (or (string? pattern) (regexp? pattern)) (if (or (string? pattern) (regexp? pattern))
(path->string string) (path->string string)
(path->bytes string)) (path->bytes string))
string)] string)]
[len (cond [(string? string) (string-length string)] [len (cond [(string? string) (string-length string)]
[(bytes? string) (bytes-length string)] [(bytes? string) (bytes-length string)]
[else #f])] [else #f])]
[orig-rx (cond [(bytes? pattern) (byte-regexp pattern)] [orig-rx
[(string? pattern) (regexp pattern)] (cond [(bytes? pattern) (byte-regexp pattern)]
[(regexp? pattern) pattern] [(string? pattern) (regexp pattern)]
[(byte-regexp? pattern) pattern] [(regexp? pattern) pattern]
[else [(byte-regexp? pattern) pattern]
(raise-type-error 'name [else (raise-type-error
"regexp, byte regexp, string, or byte string" 'name
pattern)])] "regexp, byte regexp, string, or byte string"
pattern)])]
[max-lookbehind (regexp-max-lookbehind orig-rx)]) [max-lookbehind (regexp-max-lookbehind orig-rx)])
(if peek? (if peek?
(unless (input-port? string) (unless (input-port? string)
@ -172,292 +171,297 @@
end)) end))
(reverse (reverse
(let loop ([acc '()] [start start] [end end] [ipre ipre] [0-ok? #t]) (let loop ([acc '()] [start start] [end end] [ipre ipre] [0-ok? #t])
(let* ([rx (if 0-ok? (let ([rx (if 0-ok?
orig-rx orig-rx
(no-empty-edge-matches orig-rx (add1 (bytes-length ipre))))]) (no-empty-edge-matches orig-rx
(add1 (bytes-length ipre))))])
(if (and port-success-choose (input-port? string)) (if (and port-success-choose (input-port? string))
;; Input port match, get string ;; Input port match, get string
(let* ([_ (when (positive? start) (let* ([_ (when (positive? start)
;; Skip start chars: ;; Skip start chars:
(let ([s (make-bytes 4096)]) (let ([s (make-bytes 4096)])
(let loop ([n 0]) (let loop ([n 0])
(unless (= n start) (unless (= n start)
(let ([m (read-bytes-avail! (let ([m (read-bytes-avail!
s string 0 (min (- start n) 4096))]) s string 0 (min (- start n) 4096))])
(unless (eof-object? m) (loop (+ n m))))))))] (unless (eof-object? m) (loop (+ n m))))))))]
[discarded/leftovers (if need-leftover? #f 0)] [discarded/leftovers (if need-leftover? #f 0)]
[spitout (if need-leftover? [spitout (if need-leftover?
(open-output-bytes) (open-output-bytes)
(make-output-port (make-output-port
'counter always-evt 'counter always-evt
(lambda (s start end flush? breakable?) (lambda (s start end flush? breakable?)
(let ([c (- end start)]) (let ([c (- end start)])
(set! discarded/leftovers (set! discarded/leftovers
(+ c discarded/leftovers)) (+ c discarded/leftovers))
c)) c))
void))] void))]
[end (and end (- end start))]) [end (and end (- end start))])
(let-values ([(ms ipre) (regexp-match/end rx (let-values ([(ms ipre)
string 0 end spitout ipre (regexp-match/end rx string 0 end spitout ipre
max-lookbehind)]) max-lookbehind)])
(let* ([m (and ms (car ms))] (let* ([m (and ms (car ms))]
[discarded/leftovers (if need-leftover? [discarded/leftovers (if need-leftover?
(get-output-bytes spitout) (get-output-bytes spitout)
discarded/leftovers)] discarded/leftovers)]
[skipped (if need-leftover? [skipped (if need-leftover?
(bstring-length discarded/leftovers) (bstring-length discarded/leftovers)
discarded/leftovers)] discarded/leftovers)]
[got (and m (bstring-length m))] [got (and m (bstring-length m))]
[end (and end m [end (and end m (- end skipped got))])
(- end skipped got))]) (if m
(if m (let ([0-ok? (not (zero? got))])
(let ([0-ok? (not (zero? got))]) (loop (port-success-choose m discarded/leftovers ms acc)
(loop (port-success-choose m discarded/leftovers ms acc) 0 end ipre 0-ok?))
0 end ipre 0-ok?)) (port-failure-k acc discarded/leftovers)))))
(port-failure-k acc discarded/leftovers)))))
;; String/port match, get positions
;; String/port match, get positions (let-values ([(m ipre)
(let-values ([(m ipre) (if peek?
(if peek? (regexp-match-peek-positions/end
(regexp-match-peek-positions/end rx rx string start end #f ipre
string start end #f ipre max-lookbehind)
max-lookbehind) (regexp-match-positions/end
(regexp-match-positions/end rx rx string start end #f ipre
string start end #f ipre max-lookbehind))])
max-lookbehind))]) (if (not m)
(if (not m) (failure-k acc start end)
(failure-k acc start end) (let* ([mstart (caar m)]
(let* ([mstart (caar m)] [mend (cdar m)]
[mend (cdar m)] [0-ok? (not (= mstart mend))])
[0-ok? (not (= mstart mend))]) (if port-success-k
(if port-success-k (port-success-k
(port-success-k (lambda (acc new-start new-end)
(lambda (acc new-start new-end) (loop acc new-start new-end ipre 0-ok?))
(loop acc new-start new-end ipre 0-ok?)) acc start end mstart mend)
acc start end mstart mend) (loop (success-choose start mstart mend m acc)
(loop (success-choose start mstart mend m acc) mend end ipre 0-ok?)))))))))))
mend end ipre 0-ok?)))))))))))
;; Returns all the positions at which the pattern matched.
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""])
(regexp-loop
regexp-match-positions* loop start end
pattern string
ipre
;; success-choose:
(lambda (start mstart mend ms acc) (cons (cons mstart mend) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port-success-k: need to shift index of rest as reading; cannot
;; do a tail call without adding another state variable to the
;; regexp loop, so this remains inefficient
(and (input-port? string)
(lambda (loop acc start end mstart mend)
(append (map (lambda (p)
(cons (+ mend (car p)) (+ mend (cdr p))))
(loop '() 0 (and end (- end mend))))
(cons (cons mstart mend) acc))))
;; other port functions: use string case
#f
#f
#f
#f))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define (regexp-match-peek-positions* pattern string [start 0] [end #f] [ipre #""]) (define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""])
(regexp-loop (regexp-loop regexp-match-positions* loop start end pattern string ipre
regexp-match-peek-positions* loop start end ;; success-choose:
pattern string (lambda (start mstart mend ms acc) (cons (cons mstart mend) acc))
ipre ;; failure-k:
;; success-choose: (lambda (acc start end) acc)
(lambda (start mstart mend ms acc) (cons (cons mstart mend) acc)) ;; port-success-k: need to shift index of rest as reading; cannot
;; failure-k: ;; do a tail call without adding another state variable to the
(lambda (acc start end) acc) ;; regexp loop, so this remains inefficient
;; port functions: use string case (and (input-port? string)
#f (lambda (loop acc start end mstart mend)
#f (append (map (lambda (p)
#f (cons (+ mend (car p)) (+ mend (cdr p))))
#f (loop '() 0 (and end (- end mend))))
#t)) (cons (cons mstart mend) acc))))
;; other port functions: use string case
#f
#f
#f
#f))
;; Returns all the positions at which the pattern matched.
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]
[ipre #""])
(regexp-loop regexp-match-peek-positions* loop start end
pattern string ipre
;; success-choose:
(lambda (start mstart mend ms acc) (cons (cons mstart mend) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port functions: use string case
#f
#f
#f
#f
#t))
;; Splits a string into a list by removing any piece which matches ;; Splits a string into a list by removing any piece which matches
;; the pattern. ;; the pattern.
(define (regexp-split pattern string [start 0] [end #f] [ipre #""]) (define (regexp-split pattern string [start 0] [end #f] [ipre #""])
(define buf (if (and (string? string) (or (byte-regexp? pattern) (define buf
(bytes? pattern))) (if (and (string? string) (or (byte-regexp? pattern) (bytes? pattern)))
(string->bytes/utf-8 string (char->integer #\?)) (string->bytes/utf-8 string (char->integer #\?))
string)) string))
(define sub (if (or (bytes? buf) (and (path? string) (define sub
(or (bytes? pattern) (if (or (bytes? buf)
(byte-regexp? pattern)))) (and (path? string)
subbytes (or (bytes? pattern) (byte-regexp? pattern))))
substring)) subbytes
substring))
(regexp-loop regexp-split loop start end pattern buf ipre (regexp-loop regexp-split loop start end pattern buf ipre
;; success-choose: ;; success-choose:
(lambda (start mstart mend ms acc) (cons (sub buf start mstart) acc)) (lambda (start mstart mend ms acc) (cons (sub buf start mstart) acc))
;; failure-k: ;; failure-k:
(lambda (acc start end) (lambda (acc start end)
(cons (if end (sub buf start end) (sub buf start)) acc)) (cons (if end (sub buf start end) (sub buf start)) acc))
;; port-success-k: ;; port-success-k:
#f #f
;; port-success-choose: ;; port-success-choose:
(lambda (match-string leftovers ms acc) (cons leftovers acc)) (lambda (match-string leftovers ms acc) (cons leftovers acc))
;; port-failure-k: ;; port-failure-k:
(lambda (acc leftover) (if leftover (cons leftover acc) acc)) (lambda (acc leftover) (if leftover (cons leftover acc) acc))
#t #t
#f)) #f))
;; Like splitting, but insert a replacement between matches ;; Like splitting, but insert a replacement between matches
(define -regexp-replace* (define -regexp-replace*
(let ([regexp-replace* (let ([regexp-replace*
(lambda (pattern string orig-replacement [ipre #""]) (lambda (pattern string orig-replacement [ipre #""])
(define buf (if (and (string? string) (or (byte-regexp? pattern) (define buf
(bytes? pattern))) (if (and (string? string)
(string->bytes/utf-8 string (char->integer #\?)) (or (byte-regexp? pattern) (bytes? pattern)))
string)) (string->bytes/utf-8 string (char->integer #\?))
string))
(define sub (if (bytes? buf) subbytes substring)) (define sub (if (bytes? buf) subbytes substring))
(define start 0) (define start 0)
(define end #f) (define end #f)
(define needs-string? (define needs-string?
(and (or (string? pattern) (regexp? pattern)) (and (or (string? pattern) (regexp? pattern)) (string? string)))
(string? string)))
(define replacement (define replacement
(if (and (not needs-string?) (string? orig-replacement)) (if (and (not needs-string?) (string? orig-replacement))
(string->bytes/utf-8 orig-replacement) (string->bytes/utf-8 orig-replacement)
orig-replacement)) orig-replacement))
(define (check proc args) (define (check proc args)
(let ([v (apply proc args)]) (let ([v (apply proc args)])
(unless (if needs-string? (unless (if needs-string? (string? v) (bytes? v))
(string? v) (raise-mismatch-error
(bytes? v)) '|regexp-replace* (calling given filter procedure)|
(raise-mismatch-error '|regexp-replace* (calling given filter procedure)| (if needs-string?
(if needs-string? "expected a string result: "
"expected a string result: " "expected a byte string result: ")
"expected a byte string result: ") v))
v))
v)) v))
(define need-replac? (and (not (procedure? replacement)) (define need-replac?
(regexp-match? #rx#"[\\&]" replacement))) (and (not (procedure? replacement))
(regexp-match? #rx#"[\\&]" replacement)))
(define (replac ms str) (define (replac ms str)
(if need-replac? (if need-replac?
((if (string? str) bytes->string/utf-8 values) ((if (string? str) bytes->string/utf-8 values)
(apply (apply
bytes-append bytes-append
(let ([str (if (string? str) (string->bytes/utf-8 str) str)] (let ([str (if (string? str) (string->bytes/utf-8 str) str)]
[get-match (lambda (n) [get-match
(if (n . < . (length ms)) (lambda (n)
(let* ([p (list-ref ms n)] (if (n . < . (length ms))
[s (if (pair? p) (let* ([p (list-ref ms n)]
(sub buf (car p) (cdr p)) [s (if (pair? p)
p)]) (sub buf (car p) (cdr p))
(if (string? s) p)])
(string->bytes/utf-8 s) (if (string? s) (string->bytes/utf-8 s) s))
s)) #""))])
#""))]) (let loop ([pos 0])
(let loop ([pos 0]) (let ([m (regexp-match-positions #rx#"[\\&]" str pos)])
(let ([m (regexp-match-positions #rx#"[\\&]" str pos)]) (if m
(if m (cons (subbytes str pos (caar m))
(cons (subbytes str pos (caar m)) (cond
(cond [(equal? (char->integer #\&)
[(equal? (char->integer #\&) (bytes-ref str (caar m))) (bytes-ref str (caar m)))
(cons (get-match 0) (loop (cdar m)))] (cons (get-match 0) (loop (cdar m)))]
[(= (cdar m) (bytes-length str)) [(= (cdar m) (bytes-length str))
;; \ with no following character ;; \ with no following character
(list (get-match 0))] (list (get-match 0))]
[(let ([next (bytes-ref str (cdar m))]) [(let ([next (bytes-ref str (cdar m))])
(or (and (equal? (char->integer #\&) next) (or (and (equal? (char->integer #\&) next)
#"&") #"&")
(and (equal? (char->integer #\\) next) (and (equal? (char->integer #\\) next)
#"\\"))) #"\\")))
=> (lambda (s) => (lambda (s)
(cons s (loop (add1 (cdar m)))))] (cons s (loop (add1 (cdar m)))))]
[else [else
(let ([n (regexp-match #rx#"^[0-9]+" str (cdar m))]) (let ([n (regexp-match #rx#"^[0-9]+" str
(if n (cdar m))])
(cons (get-match (string->number (bytes->string/utf-8 (car n)))) (if n
(loop (+ (cdar m) (bytes-length (car n))))) (cons (get-match (string->number
(cons (get-match 0) (bytes->string/utf-8
(loop (cdar m)))))])) (car n))))
(list (subbytes str pos)))))))) (loop (+ (cdar m)
str)) (bytes-length (car n)))))
(cons (get-match 0)
(loop (cdar m)))))]))
(list (subbytes str pos))))))))
str))
(when (or (string? pattern) (bytes? pattern) (when (or (string? pattern) (bytes? pattern)
(regexp? pattern) (byte-regexp? pattern)) (regexp? pattern) (byte-regexp? pattern))
(unless (or (string? string) (unless (or (string? string)
(bytes? string)) (bytes? string))
(raise-type-error 'regexp-replace* "string or byte string" string)) (raise-type-error 'regexp-replace* "string or byte string"
string))
(unless (or (string? replacement) (unless (or (string? replacement)
(bytes? replacement) (bytes? replacement)
(procedure? replacement)) (procedure? replacement))
(raise-type-error 'regexp-replace* "string, byte string, or procedure" (raise-type-error 'regexp-replace*
"string, byte string, or procedure"
replacement)) replacement))
(when (and needs-string? (bytes? replacement)) (when (and needs-string? (bytes? replacement))
(raise-mismatch-error 'regexp-replace* (raise-mismatch-error
"cannot replace a string with a byte string: " 'regexp-replace*
replacement))) "cannot replace a string with a byte string: "
replacement)))
(apply (apply
(if (bytes? buf) bytes-append string-append) (if (bytes? buf) bytes-append string-append)
(regexp-loop (regexp-loop regexp-replace* loop start end pattern buf ipre
regexp-replace* loop start end pattern buf ipre ;; success-choose:
;; success-choose: (lambda (start mstart mend ms acc)
(lambda (start mstart mend ms acc) (list* (if (procedure? replacement) (list* (if (procedure? replacement)
(check (check
replacement replacement
(for/list ([m ms]) (for/list ([m ms])
(and m (sub buf (car m) (cdr m))))) (and m (sub buf (car m) (cdr m)))))
(replac ms replacement)) (replac ms replacement))
(sub buf start mstart) (sub buf start mstart)
acc)) acc))
;; failure-k: ;; failure-k:
(lambda (acc start end) (lambda (acc start end)
(cons (if end (sub buf start end) (sub buf start)) acc)) (cons (if end (sub buf start end) (sub buf start)) acc))
;; port-success-k: ;; port-success-k:
#f #f
;; port-success-choose: ;; port-success-choose:
#f #f
;; port-failure-k: ;; port-failure-k:
#f #f
#t #t
#f)))]) #f)))])
regexp-replace*)) regexp-replace*))
;; Returns all the matches for the pattern in the string. ;; Returns all the matches for the pattern in the string.
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""]) (define (regexp-match* pattern string [start 0] [end #f] [ipre #""])
(define buf (if (and (string? string) (or (byte-regexp? pattern) (define buf
(bytes? pattern))) (if (and (string? string) (or (byte-regexp? pattern) (bytes? pattern)))
(string->bytes/utf-8 string (char->integer #\?)) (string->bytes/utf-8 string (char->integer #\?))
string)) string))
(define sub (if (or (bytes? buf) (and (path? string) (define sub
(or (bytes? pattern) (if (or (bytes? buf)
(byte-regexp? pattern)))) (and (path? string)
subbytes (or (bytes? pattern) (byte-regexp? pattern))))
substring)) subbytes
substring))
(regexp-loop regexp-match* loop start end pattern buf ipre (regexp-loop regexp-match* loop start end pattern buf ipre
;; success-choose: ;; success-choose:
(lambda (start mstart mend ms acc) (cons (sub buf mstart mend) acc)) (lambda (start mstart mend ms acc) (cons (sub buf mstart mend) acc))
;; failure-k: ;; failure-k:
(lambda (acc start end) acc) (lambda (acc start end) acc)
;; port-success-k: ;; port-success-k:
#f #f
;; port-success-choose: ;; port-success-choose:
(lambda (match-string leftovers ms acc) (cons match-string acc)) (lambda (match-string leftovers ms acc) (cons match-string acc))
;; port-failure-k: ;; port-failure-k:
(lambda (acc leftover) acc) (lambda (acc leftover) acc)
#f #f
#f)) #f))
(define (regexp-match-exact? p s) (define (regexp-match-exact? p s)
(let ([m (regexp-match-positions p s)]) (let ([m (regexp-match-positions p s)])
(and m (zero? (caar m)) (and m (zero? (caar m))
(= (cdar m) (= (cdar m)
(cond [(bytes? s) (bytes-length s)] (cond [(bytes? s) (bytes-length s)]
[(or (byte-regexp? p) (bytes? p)) [(or (byte-regexp? p) (bytes? p))
(if (path? s) (if (path? s)
(bytes-length (path->bytes s)) (bytes-length (path->bytes s))
(string-utf-8-length s))] (string-utf-8-length s))]
[else [else
(if (path? s) (if (path? s)
(string-length (path->string s)) (string-length (path->string s))
(string-length s))]))))) (string-length s))])))))
) )