regexp: repair extracting positions from string match

This commit is contained in:
Matthew Flatt 2019-01-22 09:52:07 -07:00
parent 82d8184ca9
commit 3b99688275
3 changed files with 6 additions and 4 deletions

View File

@ -71,6 +71,8 @@
'((3 . 4))) '((3 . 4)))
(test (rx:regexp-match-positions "(?m:^.\n)" "a\nb\nc\n" 2 6 #f #"\n") (test (rx:regexp-match-positions "(?m:^.\n)" "a\nb\nc\n" 2 6 #f #"\n")
'((2 . 4))) '((2 . 4)))
(test (rx:regexp-match-positions "(?:(?m:^$))(?<=..)" "ge \n TLambda-tc\n\n ;; (extend Γ o Γx-s\n extend\n\n ;;" 29 #f #f #"\n")
'((46 . 46)))
(test (regexp-replace* "-" "zero-or-more?" "_") (test (regexp-replace* "-" "zero-or-more?" "_")
"zero_or_more?") "zero_or_more?")

View File

@ -59,11 +59,11 @@
(bytes->string/utf-8 bstr-in #\? (- (car p) delta) (- (cdr p) delta)))) (bytes->string/utf-8 bstr-in #\? (- (car p) delta) (- (cdr p) delta))))
null))) null)))
(define (byte-index->string-index str pos) (define (byte-index->string-index str start-pos pos)
;; We assume that pos is on a code-point boundary in the ;; We assume that pos is on a code-point boundary in the
;; UTF-8 encoding of str. Find out how many code points ;; UTF-8 encoding of str. Find out how many code points
;; are before the index. ;; are before the index.
(let loop ([lo-pos 0] [lo 0] [hi (min (string-length str) (let loop ([lo-pos 0] [lo 0] [hi (min (- (string-length str) start-pos)
(* pos 6))]) (* pos 6))])
(cond (cond
[(= lo hi) lo] [(= lo hi) lo]
@ -71,7 +71,7 @@
(if (= lo-pos pos) lo hi)] (if (= lo-pos pos) lo hi)]
[else [else
(define mid (quotient (+ lo hi) 2)) (define mid (quotient (+ lo hi) 2))
(define len (string-utf-8-length str lo mid)) (define len (string-utf-8-length str (+ start-pos lo) (+ start-pos mid)))
(define mid-pos (+ lo-pos len)) (define mid-pos (+ lo-pos len))
(cond (cond
[(= mid-pos pos) mid] [(= mid-pos pos) mid]

View File

@ -349,7 +349,7 @@
;; boundary, and everything from `ms-pos` to `ms-end` must ;; boundary, and everything from `ms-pos` to `ms-end` must
;; still be in `lb-in`. So, find `ms-pos` in the original ;; still be in `lb-in`. So, find `ms-pos` in the original
;; string, and take it from there. ;; string, and take it from there.
(define ms-str-pos (byte-index->string-index in (- ms-pos start-pos))) (define ms-str-pos (byte-index->string-index in start-offset (- ms-pos start-pos)))
(define delta (lazy-bytes-discarded-count lb-in)) (define delta (lazy-bytes-discarded-count lb-in))
(byte-positions->string-positions bstr ms-pos me-pos state (byte-positions->string-positions bstr ms-pos me-pos state
#:start-index (- ms-pos delta) #:start-index (- ms-pos delta)