regexp: repair extracting positions from string match
This commit is contained in:
parent
82d8184ca9
commit
3b99688275
|
@ -71,6 +71,8 @@
|
||||||
'((3 . 4)))
|
'((3 . 4)))
|
||||||
(test (rx:regexp-match-positions "(?m:^.\n)" "a\nb\nc\n" 2 6 #f #"\n")
|
(test (rx:regexp-match-positions "(?m:^.\n)" "a\nb\nc\n" 2 6 #f #"\n")
|
||||||
'((2 . 4)))
|
'((2 . 4)))
|
||||||
|
(test (rx:regexp-match-positions "(?:(?m:^$))(?<=..)" "ge \n TLambda-tc\n\n ;; (extend Γ o Γx-s\n extend\n\n ;;" 29 #f #f #"\n")
|
||||||
|
'((46 . 46)))
|
||||||
|
|
||||||
(test (regexp-replace* "-" "zero-or-more?" "_")
|
(test (regexp-replace* "-" "zero-or-more?" "_")
|
||||||
"zero_or_more?")
|
"zero_or_more?")
|
||||||
|
|
|
@ -59,11 +59,11 @@
|
||||||
(bytes->string/utf-8 bstr-in #\? (- (car p) delta) (- (cdr p) delta))))
|
(bytes->string/utf-8 bstr-in #\? (- (car p) delta) (- (cdr p) delta))))
|
||||||
null)))
|
null)))
|
||||||
|
|
||||||
(define (byte-index->string-index str pos)
|
(define (byte-index->string-index str start-pos pos)
|
||||||
;; We assume that pos is on a code-point boundary in the
|
;; We assume that pos is on a code-point boundary in the
|
||||||
;; UTF-8 encoding of str. Find out how many code points
|
;; UTF-8 encoding of str. Find out how many code points
|
||||||
;; are before the index.
|
;; are before the index.
|
||||||
(let loop ([lo-pos 0] [lo 0] [hi (min (string-length str)
|
(let loop ([lo-pos 0] [lo 0] [hi (min (- (string-length str) start-pos)
|
||||||
(* pos 6))])
|
(* pos 6))])
|
||||||
(cond
|
(cond
|
||||||
[(= lo hi) lo]
|
[(= lo hi) lo]
|
||||||
|
@ -71,7 +71,7 @@
|
||||||
(if (= lo-pos pos) lo hi)]
|
(if (= lo-pos pos) lo hi)]
|
||||||
[else
|
[else
|
||||||
(define mid (quotient (+ lo hi) 2))
|
(define mid (quotient (+ lo hi) 2))
|
||||||
(define len (string-utf-8-length str lo mid))
|
(define len (string-utf-8-length str (+ start-pos lo) (+ start-pos mid)))
|
||||||
(define mid-pos (+ lo-pos len))
|
(define mid-pos (+ lo-pos len))
|
||||||
(cond
|
(cond
|
||||||
[(= mid-pos pos) mid]
|
[(= mid-pos pos) mid]
|
||||||
|
|
|
@ -349,7 +349,7 @@
|
||||||
;; boundary, and everything from `ms-pos` to `ms-end` must
|
;; boundary, and everything from `ms-pos` to `ms-end` must
|
||||||
;; still be in `lb-in`. So, find `ms-pos` in the original
|
;; still be in `lb-in`. So, find `ms-pos` in the original
|
||||||
;; string, and take it from there.
|
;; string, and take it from there.
|
||||||
(define ms-str-pos (byte-index->string-index in (- ms-pos start-pos)))
|
(define ms-str-pos (byte-index->string-index in start-offset (- ms-pos start-pos)))
|
||||||
(define delta (lazy-bytes-discarded-count lb-in))
|
(define delta (lazy-bytes-discarded-count lb-in))
|
||||||
(byte-positions->string-positions bstr ms-pos me-pos state
|
(byte-positions->string-positions bstr ms-pos me-pos state
|
||||||
#:start-index (- ms-pos delta)
|
#:start-index (- ms-pos delta)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user