diff --git a/racket/src/regexp/demo.rkt b/racket/src/regexp/demo.rkt index 3089217198..2f4a525c4e 100644 --- a/racket/src/regexp/demo.rkt +++ b/racket/src/regexp/demo.rkt @@ -71,6 +71,8 @@ '((3 . 4))) (test (rx:regexp-match-positions "(?m:^.\n)" "a\nb\nc\n" 2 6 #f #"\n") '((2 . 4))) +(test (rx:regexp-match-positions "(?:(?m:^$))(?<=..)" "ge \n TLambda-tc\n\n ;; (extend Γ o Γx-s\n extend\n\n ;;" 29 #f #f #"\n") + '((46 . 46))) (test (regexp-replace* "-" "zero-or-more?" "_") "zero_or_more?") diff --git a/racket/src/regexp/match/extract.rkt b/racket/src/regexp/match/extract.rkt index 7d7aedf58e..a80bd3ab36 100644 --- a/racket/src/regexp/match/extract.rkt +++ b/racket/src/regexp/match/extract.rkt @@ -59,11 +59,11 @@ (bytes->string/utf-8 bstr-in #\? (- (car p) delta) (- (cdr p) delta)))) null))) -(define (byte-index->string-index str pos) +(define (byte-index->string-index str start-pos pos) ;; We assume that pos is on a code-point boundary in the ;; UTF-8 encoding of str. Find out how many code points ;; are before the index. - (let loop ([lo-pos 0] [lo 0] [hi (min (string-length str) + (let loop ([lo-pos 0] [lo 0] [hi (min (- (string-length str) start-pos) (* pos 6))]) (cond [(= lo hi) lo] @@ -71,7 +71,7 @@ (if (= lo-pos pos) lo hi)] [else (define mid (quotient (+ lo hi) 2)) - (define len (string-utf-8-length str lo mid)) + (define len (string-utf-8-length str (+ start-pos lo) (+ start-pos mid))) (define mid-pos (+ lo-pos len)) (cond [(= mid-pos pos) mid] diff --git a/racket/src/regexp/match/main.rkt b/racket/src/regexp/match/main.rkt index 56315f4a83..776c2c0f6b 100644 --- a/racket/src/regexp/match/main.rkt +++ b/racket/src/regexp/match/main.rkt @@ -349,7 +349,7 @@ ;; boundary, and everything from `ms-pos` to `ms-end` must ;; still be in `lb-in`. So, find `ms-pos` in the original ;; string, and take it from there. - (define ms-str-pos (byte-index->string-index in (- ms-pos start-pos))) + (define ms-str-pos (byte-index->string-index in start-offset (- ms-pos start-pos))) (define delta (lazy-bytes-discarded-count lb-in)) (byte-positions->string-positions bstr ms-pos me-pos state #:start-index (- ms-pos delta)