regexp: repair extracting positions from string match

This commit is contained in:
Matthew Flatt 2019-01-22 09:52:07 -07:00
parent 82d8184ca9
commit 3b99688275
3 changed files with 6 additions and 4 deletions

View File

@ -71,6 +71,8 @@
'((3 . 4)))
(test (rx:regexp-match-positions "(?m:^.\n)" "a\nb\nc\n" 2 6 #f #"\n")
'((2 . 4)))
(test (rx:regexp-match-positions "(?:(?m:^$))(?<=..)" "ge \n TLambda-tc\n\n ;; (extend Γ o Γx-s\n extend\n\n ;;" 29 #f #f #"\n")
'((46 . 46)))
(test (regexp-replace* "-" "zero-or-more?" "_")
"zero_or_more?")

View File

@ -59,11 +59,11 @@
(bytes->string/utf-8 bstr-in #\? (- (car p) delta) (- (cdr p) delta))))
null)))
(define (byte-index->string-index str pos)
(define (byte-index->string-index str start-pos pos)
;; We assume that pos is on a code-point boundary in the
;; UTF-8 encoding of str. Find out how many code points
;; are before the index.
(let loop ([lo-pos 0] [lo 0] [hi (min (string-length str)
(let loop ([lo-pos 0] [lo 0] [hi (min (- (string-length str) start-pos)
(* pos 6))])
(cond
[(= lo hi) lo]
@ -71,7 +71,7 @@
(if (= lo-pos pos) lo hi)]
[else
(define mid (quotient (+ lo hi) 2))
(define len (string-utf-8-length str lo mid))
(define len (string-utf-8-length str (+ start-pos lo) (+ start-pos mid)))
(define mid-pos (+ lo-pos len))
(cond
[(= mid-pos pos) mid]

View File

@ -349,7 +349,7 @@
;; boundary, and everything from `ms-pos` to `ms-end` must
;; still be in `lb-in`. So, find `ms-pos` in the original
;; string, and take it from there.
(define ms-str-pos (byte-index->string-index in (- ms-pos start-pos)))
(define ms-str-pos (byte-index->string-index in start-offset (- ms-pos start-pos)))
(define delta (lazy-bytes-discarded-count lb-in))
(byte-positions->string-positions bstr ms-pos me-pos state
#:start-index (- ms-pos delta)