regexp: repair extracting positions from string match
This commit is contained in:
parent
82d8184ca9
commit
3b99688275
|
@ -71,6 +71,8 @@
|
|||
'((3 . 4)))
|
||||
(test (rx:regexp-match-positions "(?m:^.\n)" "a\nb\nc\n" 2 6 #f #"\n")
|
||||
'((2 . 4)))
|
||||
(test (rx:regexp-match-positions "(?:(?m:^$))(?<=..)" "ge \n TLambda-tc\n\n ;; (extend Γ o Γx-s\n extend\n\n ;;" 29 #f #f #"\n")
|
||||
'((46 . 46)))
|
||||
|
||||
(test (regexp-replace* "-" "zero-or-more?" "_")
|
||||
"zero_or_more?")
|
||||
|
|
|
@ -59,11 +59,11 @@
|
|||
(bytes->string/utf-8 bstr-in #\? (- (car p) delta) (- (cdr p) delta))))
|
||||
null)))
|
||||
|
||||
(define (byte-index->string-index str pos)
|
||||
(define (byte-index->string-index str start-pos pos)
|
||||
;; We assume that pos is on a code-point boundary in the
|
||||
;; UTF-8 encoding of str. Find out how many code points
|
||||
;; are before the index.
|
||||
(let loop ([lo-pos 0] [lo 0] [hi (min (string-length str)
|
||||
(let loop ([lo-pos 0] [lo 0] [hi (min (- (string-length str) start-pos)
|
||||
(* pos 6))])
|
||||
(cond
|
||||
[(= lo hi) lo]
|
||||
|
@ -71,7 +71,7 @@
|
|||
(if (= lo-pos pos) lo hi)]
|
||||
[else
|
||||
(define mid (quotient (+ lo hi) 2))
|
||||
(define len (string-utf-8-length str lo mid))
|
||||
(define len (string-utf-8-length str (+ start-pos lo) (+ start-pos mid)))
|
||||
(define mid-pos (+ lo-pos len))
|
||||
(cond
|
||||
[(= mid-pos pos) mid]
|
||||
|
|
|
@ -349,7 +349,7 @@
|
|||
;; boundary, and everything from `ms-pos` to `ms-end` must
|
||||
;; still be in `lb-in`. So, find `ms-pos` in the original
|
||||
;; string, and take it from there.
|
||||
(define ms-str-pos (byte-index->string-index in (- ms-pos start-pos)))
|
||||
(define ms-str-pos (byte-index->string-index in start-offset (- ms-pos start-pos)))
|
||||
(define delta (lazy-bytes-discarded-count lb-in))
|
||||
(byte-positions->string-positions bstr ms-pos me-pos state
|
||||
#:start-index (- ms-pos delta)
|
||||
|
|
Loading…
Reference in New Issue
Block a user