reduce memory use in net/uri-codec

Eliminated use of lists and non-tail recursion, used
string ports and loops instead.
This commit is contained in:
Ryan Culpepper 2015-05-12 12:07:31 -04:00
parent e6113d1c3c
commit 9049737270

View File

@ -159,33 +159,46 @@ See more in PR8831.
(and (= 1 (string-length s)) (and (= 1 (string-length s))
(eq? char (string-ref s 0)))))) (eq? char (string-ref s 0))))))
str str
(string-append* (for/list ([byte (in-bytes (string->bytes/utf-8 str))]) (let ([out (open-output-string)])
(if (< byte ascii-size) (for ([byte (in-bytes (string->bytes/utf-8 str))])
(vector-ref table byte) (cond [(< byte ascii-size)
(number->hex-string byte)))))) (write-string (vector-ref table byte) out)]
[else
(write-string (number->hex-string byte) out)]))
(get-output-string out))))
;; vector string -> string ;; vector string -> string
(define (decode table str) (define (decode table str)
(define max-ascii (integer->char ascii-size)) (define max-ascii (integer->char ascii-size))
;; internal-decode : list -> listof[byte] (define in (open-input-string str))
(define (internal-decode l) (define out (open-output-bytes))
(if (null? l) '() (let loop ()
(let* ([c (car l)] [l (cdr l)] (define c (read-char in))
[hex (and (equal? #\% c) (pair? l) (pair? (cdr l)) (unless (eof-object? c)
(string->number (string (car l) (cadr l)) 16))] (cond [(eqv? c #\%)
[rest (internal-decode (if hex (cddr l) l))]) (define hex (read-string 2 in))
(cond [hex (cons hex rest)] (define hex-n (and (string? hex) (string->number hex 16)))
[(char<? c max-ascii) (cons (vector-ref table (char->integer c)) (cond [(exact-nonnegative-integer? hex-n) ;; not negative, fractional
rest)] ;; Note: write as byte to support multi-byte Unicode chars
;; This should probably error, but strings to be decoded might (write-byte hex-n out)]
;; come from misbehaving sources; maybe it's better to add some [else
;; parameter for a permissive mode; one source of such bad URLs ;; Pass through failed %-escapes as-is, for compatibility with
;; is user-defined strings where the string is entered directly ;; previous version of code.
;; and not properly encoded -- similar justification to (write-char #\% out)
;; browsers accepting unencoded chars in manually entered URLs. (when (string? hex)
[else (append (bytes->list (string->bytes/utf-8 (string c))) (write-string hex out))])]
rest)])))) [(char<? c max-ascii)
(bytes->string/utf-8 (apply bytes (internal-decode (string->list str))))) (write-char (integer->char (vector-ref table (char->integer c))) out)]
[else
;; This should probably error, but strings to be decoded might
;; come from misbehaving sources; maybe it's better to add some
;; parameter for a permissive mode; one source of such bad URLs
;; is user-defined strings where the string is entered directly
;; and not properly encoded -- similar justification to
;; browsers accepting unencoded chars in manually entered URLs.
(write-char c out)])
(loop)))
(get-output-string out))
;; Utility for defining codecs ;; Utility for defining codecs
(define-syntax-rule (define-codecs [encoder decoder mapping] ...) (define-syntax-rule (define-codecs [encoder decoder mapping] ...)