diff --git a/collects/tests/xml/test.rkt b/collects/tests/xml/test.rkt
index b5465ca204..0df3754318 100644
--- a/collects/tests/xml/test.rkt
+++ b/collects/tests/xml/test.rkt
@@ -324,6 +324,22 @@ END
"
"
"read-xml: parse-error: expected root element - received #")
+ (test-read-xml/element
+ ""
+ '(make-element
+ (make-source (make-location 1 0 1) (make-location 1 43 44))
+ 'title
+ (list)
+ (list (make-cdata (make-source (make-location 1 7 8) (make-location 1 35 36)) ""))))
+
+ (test-read-xml/element
+ ""
+ '(make-element
+ (make-source (make-location 1 0 1) (make-location 1 28 29))
+ 'title
+ (list)
+ (list (make-cdata (make-source (make-location 1 7 8) (make-location 1 20 21)) ""))))
+
; XXX need more read-xml/element tests
)
diff --git a/collects/xml/private/reader.rkt b/collects/xml/private/reader.rkt
index f0766c73d8..7aa63cfe1c 100644
--- a/collects/xml/private/reader.rkt
+++ b/collects/xml/private/reader.rkt
@@ -384,35 +384,49 @@
;; uses Knuth-Morris-Pratt from
;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876
;; discards stop from input
-(define (gen-read-until-string stop)
- (let* ([len (string-length stop)]
- [prefix (make-vector len 0)]
- [fall-back
- (lambda (k c)
- (let ([k (let loop ([k k])
- (cond
- [(and (> k 0) (not (eq? (string-ref stop k) c)))
- (loop (vector-ref prefix (sub1 k)))]
- [else k]))])
- (if (eq? (string-ref stop k) c)
- (add1 k)
- k)))])
- (let init ([k 0] [q 1])
- (when (< q len)
- (let ([k (fall-back k (string-ref stop q))])
- (vector-set! prefix q k)
- (init k (add1 q)))))
- ;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop
- (lambda (in pos)
- (list->string
- (let/ec out
- (let loop ([matched 0] [out out])
- (let* ([c (non-eof read-char in pos)]
- [matched (fall-back matched c)])
- (cond
- [(= matched len) (out null)]
- [(zero? matched) (cons c (let/ec out (loop matched out)))]
- [else (cons c (loop matched out))]))))))))
+;; ---
+;; Modified by Jay to look more like the version on Wikipedia after discovering a bug when parsing CDATA
+;; The use of the hasheq table and the purely numeric code trades hash efficiency for stack/ec capture efficiency
+(struct hash-string (port pos ht))
+(define (hash-string-ref hs k)
+ (match-define (hash-string port pos ht) hs)
+ (hash-ref! ht k (lambda () (non-eof read-char port pos))))
+
+(define (gen-read-until-string W)
+ (define Wlen (string-length W))
+ (define T (make-vector Wlen #f))
+ (vector-set! T 0 -1)
+ (vector-set! T 1 0)
+ (let kmp-table ([pos 2] [cnd 0])
+ (when (pos . < . Wlen)
+ (cond
+ [(char=? (string-ref W (sub1 pos)) (string-ref W cnd))
+ (vector-set! T pos (add1 cnd))
+ (kmp-table (add1 pos) (add1 cnd))]
+ [(cnd . > . 0)
+ (kmp-table pos (vector-ref T cnd))]
+ [(zero? cnd)
+ (vector-set! T pos 0)
+ (kmp-table (add1 pos) 0)])))
+ (lambda (S-as-port S-pos)
+ (define S (hash-string S-as-port S-pos (make-hasheq)))
+ (define W-starts-at
+ (let kmp-search ([m 0] [i 0])
+ (if (char=? (string-ref W i) (hash-string-ref S (+ m i)))
+ (let ([i (add1 i)])
+ (if (= i Wlen)
+ m
+ (kmp-search m i)))
+ (let* ([Ti (vector-ref T i)]
+ [m (+ m i (* -1 Ti))])
+ (if (Ti . > . -1)
+ (let ([i Ti])
+ (kmp-search m i))
+ (let ([i 0])
+ (kmp-search m i)))))))
+ (list->string
+ (for/list ([i (in-range 0 W-starts-at)])
+ (hash-string-ref S i)))))
;; "-->" makes more sense, but "--" follows the spec.
(define lex-comment-contents (gen-read-until-string "--"))
@@ -460,4 +474,4 @@
(define (format-source loc)
(if (location? loc)
(format "~a.~a/~a" (location-line loc) (location-char loc) (location-offset loc))
- (format "~a" loc)))
+ (format "~a" loc)))
\ No newline at end of file