Changing decode-string so it avoids allocation and interns the individual string components.
original commit: 2f4671235c322f3450b0633cd866f18b95f58f13
This commit is contained in:
parent
1efd01584e
commit
db64a47dd7
|
@ -92,20 +92,28 @@
|
||||||
[s (regexp-replace* #rx" $" s "")])
|
[s (regexp-replace* #rx" $" s "")])
|
||||||
(datum-intern-literal s)))
|
(datum-intern-literal s)))
|
||||||
|
|
||||||
|
|
||||||
(define (decode-string s)
|
(define (decode-string s)
|
||||||
(let loop ([l '((#rx"---" mdash)
|
(define pattern #rx"(---|--|``|''|')")
|
||||||
(#rx"--" ndash)
|
(let loop ([start 0])
|
||||||
(#rx"``" ldquo)
|
(cond
|
||||||
(#rx"''" rdquo)
|
[(regexp-match-positions pattern s start)
|
||||||
(#rx"'" rsquo))])
|
=> (lambda (m)
|
||||||
(cond [(null? l) (list s)]
|
(define the-match (substring s (caar m) (cdar m)))
|
||||||
[(regexp-match-positions (caar l) s)
|
(list* (datum-intern-literal (substring s start (caar m)))
|
||||||
=> (lambda (m)
|
(cond
|
||||||
(datum-intern-literal
|
[(string=? the-match "---") 'mdash]
|
||||||
(append (decode-string (substring s 0 (caar m)))
|
[(string=? the-match "--") 'ndash]
|
||||||
(cdar l)
|
[(string=? the-match "``") 'ldquo]
|
||||||
(decode-string (substring s (cdar m))))))]
|
[(string=? the-match "''") 'rdquo]
|
||||||
[else (loop (cdr l))])))
|
[(string=? the-match "'") 'rsquo])
|
||||||
|
(loop (cdar m))))]
|
||||||
|
;; Common case: nothing to decode, so don't copy strings.
|
||||||
|
[(= start 0)
|
||||||
|
(list (datum-intern-literal s))]
|
||||||
|
[else
|
||||||
|
(list (datum-intern-literal (substring s start)))])))
|
||||||
|
|
||||||
|
|
||||||
(define (line-break? v)
|
(define (line-break? v)
|
||||||
(equal? v "\n"))
|
(equal? v "\n"))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user