Removing optimizations from sgml-reader that do not work for UTF-8

This commit is contained in:
Jay McCarthy 2013-01-18 06:14:40 -07:00
parent ecd43f7579
commit 2fc1f84f5b

View File

@ -315,19 +315,24 @@
(list->string data))))
|#
(define (lex-name* in)
(define os (open-output-string))
(let loop ()
(define ch (peek-char in))
(when (name-char? ch)
(read-char in)
(display ch os)
(loop)))
(get-output-string os))
;; lex-name : Input-port -> Symbol
(define (lex-name in)
(let ([s (bytes->string/utf-8 (car (regexp-match #rx"^[a-zA-Z_:0-9&.-]*" in)))])
(string->symbol
;; Common case: string is already lowercased
(if (regexp-match-positions #rx"[A-Z]" s)
(string-downcase s)
s))))
(string->symbol
(string-downcase
(lex-name* in))))
;; lex-name/case-sensitive : Input-port -> Symbol
(define (lex-name/case-sensitive in)
(let ([s (bytes->string/utf-8 (car (regexp-match #rx"^[a-zA-Z_:0-9&.-]*" in)))])
(string->symbol s)))
(string->symbol
(lex-name* in)))
#|
(define (lex-name in)
(string->symbol