simplify and extend the find-string method in text%
This implementation runs about 10% faster than the old when searching for "λ" in (collection-file-path "unit.rkt" "drracket" "private"). It is copied directly from the search search algorithm from Wikipedia called Knuth-Morris-Pratt, but with a fancier version of get-char that cooperates with text%'s snip interface for more efficient linear scans of characters. It also generalizes the previous one by supporting searching into embedded text% objects inside editor-snip% objects related to PR 14688 related to PR 14687 original commit: 06b9b4f0a2af26fdf0cb519f225521b8ca8362c6
This commit is contained in:
parent
5066731b25
commit
290b5c6893
|
@ -668,6 +668,27 @@ If @racket[case-sensitive?] is @racket[#f], then an uppercase and lowercase
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@defmethod[(find-string-embedded [str string?]
|
||||||
|
[direction (or/c 'forward 'backward) 'forward]
|
||||||
|
[start (or/c exact-nonnegative-integer? 'start) 'start]
|
||||||
|
[end (or/c exact-nonnegative-integer? 'eof) 'eof]
|
||||||
|
[get-start? any/c #t]
|
||||||
|
[case-sensitive? any/c #t])
|
||||||
|
(or/c exact-nonnegative-integer?
|
||||||
|
#f
|
||||||
|
(cons/c
|
||||||
|
(is-a?/c editor<%>)
|
||||||
|
(flat-rec-contract
|
||||||
|
nested-editor-search-result
|
||||||
|
(or/c (cons/c (is-a?/c editor<%>)
|
||||||
|
nested-editor-search-result)
|
||||||
|
exact-nonnegative-integer?))))]{
|
||||||
|
Like @method[text% find-string], but also searches in embedded editors,
|
||||||
|
returning a series of cons pairs whose @racket[car] positions
|
||||||
|
are the editors on the path to the editor where the search
|
||||||
|
string occurred and whose final @racket[cdr] position is the
|
||||||
|
search result position.
|
||||||
|
}
|
||||||
|
|
||||||
@defmethod[(find-string-all [str string?]
|
@defmethod[(find-string-all [str string?]
|
||||||
[direction (or/c 'forward 'backward) 'forward]
|
[direction (or/c 'forward 'backward) 'forward]
|
||||||
|
@ -683,6 +704,24 @@ Finds all occurrences of a string using @method[text% find-string]. If
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@defmethod[(find-string-embedded-all [str string?]
|
||||||
|
[direction (or/c 'forward 'backward) 'forward]
|
||||||
|
[start (or/c exact-nonnegative-integer? 'start) 'start]
|
||||||
|
[end (or/c exact-nonnegative-integer? 'eof) 'eof]
|
||||||
|
[get-start? any/c #t]
|
||||||
|
[case-sensitive any/c #t])
|
||||||
|
(listof (or/c exact-nonnegative-integer?
|
||||||
|
(cons/c
|
||||||
|
(is-a?/c editor<%>)
|
||||||
|
(flat-rec-contract
|
||||||
|
nested-editor-search-result
|
||||||
|
(or/c (cons/c (is-a?/c editor<%>)
|
||||||
|
nested-editor-search-result)
|
||||||
|
(listof exact-nonnegative-integer?))))))]{
|
||||||
|
Like @method[text% find-string-embedded], but also searches in embedded
|
||||||
|
editors, returning search results a list of the editors that contain
|
||||||
|
the matches.
|
||||||
|
}
|
||||||
|
|
||||||
@defmethod[(find-wordbreak [start (or/c (box/c exact-nonnegative-integer?) #f)]
|
@defmethod[(find-wordbreak [start (or/c (box/c exact-nonnegative-integer?) #f)]
|
||||||
[end (or/c (box/c exact-nonnegative-integer?) #f)]
|
[end (or/c (box/c exact-nonnegative-integer?) #f)]
|
||||||
|
|
|
@ -19,13 +19,17 @@
|
||||||
"keymap.rkt"
|
"keymap.rkt"
|
||||||
(only-in "cycle.rkt"
|
(only-in "cycle.rkt"
|
||||||
printer-dc%
|
printer-dc%
|
||||||
set-text%!)
|
set-text%!
|
||||||
|
editor-snip%)
|
||||||
"wordbreak.rkt"
|
"wordbreak.rkt"
|
||||||
"stream.rkt"
|
"stream.rkt"
|
||||||
"wx.rkt")
|
"wx.rkt")
|
||||||
|
|
||||||
(provide text%
|
(provide text%
|
||||||
add-text-keymap-functions)
|
add-text-keymap-functions
|
||||||
|
|
||||||
|
;; for the test suite
|
||||||
|
do-find-string-all)
|
||||||
|
|
||||||
;; ----------------------------------------
|
;; ----------------------------------------
|
||||||
|
|
||||||
|
@ -68,6 +72,8 @@
|
||||||
|
|
||||||
(define in-delayed-refresh (make-parameter #f))
|
(define in-delayed-refresh (make-parameter #f))
|
||||||
|
|
||||||
|
(define-local-member-name do-find-string-all do-find-string)
|
||||||
|
|
||||||
(defclass text% editor%
|
(defclass text% editor%
|
||||||
(inherit-field s-admin
|
(inherit-field s-admin
|
||||||
s-offscreen
|
s-offscreen
|
||||||
|
@ -3491,9 +3497,9 @@
|
||||||
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
|
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
|
||||||
[any? [bos? #t]]
|
[any? [bos? #t]]
|
||||||
[any? [case-sens? #t]])
|
[any? [case-sens? #t]])
|
||||||
(if (not (check-recalc #f #f))
|
(if (check-recalc #f #f)
|
||||||
#f
|
(do-find-string-all str direction start end #t bos? case-sens? #f)
|
||||||
(do-find-string-all str direction start end #t bos? case-sens?)))
|
#f))
|
||||||
|
|
||||||
(def/public (find-string-all [string? str]
|
(def/public (find-string-all [string? str]
|
||||||
[(symbol-in forward backward) [direction 'forward]]
|
[(symbol-in forward backward) [direction 'forward]]
|
||||||
|
@ -3501,9 +3507,29 @@
|
||||||
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
|
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
|
||||||
[any? [bos? #t]]
|
[any? [bos? #t]]
|
||||||
[any? [case-sens? #t]])
|
[any? [case-sens? #t]])
|
||||||
(if (not (check-recalc #f #f))
|
(if (check-recalc #f #f)
|
||||||
null
|
(do-find-string-all str direction start end #f bos? case-sens? #f)
|
||||||
(reverse (do-find-string-all str direction start end #f bos? case-sens?))))
|
null))
|
||||||
|
|
||||||
|
(def/public (find-string-embedded [string? str]
|
||||||
|
[(symbol-in forward backward) [direction 'forward]]
|
||||||
|
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
|
||||||
|
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
|
||||||
|
[any? [bos? #t]]
|
||||||
|
[any? [case-sens? #t]])
|
||||||
|
(if (check-recalc #f #f)
|
||||||
|
(do-find-string-all str direction start end #t bos? case-sens? #t)
|
||||||
|
#f))
|
||||||
|
|
||||||
|
(def/public (find-string-embedded-all [string? str]
|
||||||
|
[(symbol-in forward backward) [direction 'forward]]
|
||||||
|
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
|
||||||
|
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
|
||||||
|
[any? [bos? #t]]
|
||||||
|
[any? [case-sens? #t]])
|
||||||
|
(if (check-recalc #f #f)
|
||||||
|
(do-find-string-all str direction start end #f bos? case-sens? #t)
|
||||||
|
null))
|
||||||
|
|
||||||
(def/public (find-newline [(symbol-in forward backward) [direction 'forward]]
|
(def/public (find-newline [(symbol-in forward backward) [direction 'forward]]
|
||||||
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
|
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
|
||||||
|
@ -3526,160 +3552,206 @@
|
||||||
#f
|
#f
|
||||||
pos))))
|
pos))))
|
||||||
|
|
||||||
(define/private (do-find-string-all str direction
|
;; this is only public for the test suite
|
||||||
start end
|
; (do-find-string-all is bound by define-local-member-name)
|
||||||
just-one?
|
(define/public (do-find-string-all _word direction
|
||||||
bos?
|
_start _end
|
||||||
case-sens?)
|
just-one? beginning-of-match? case-sens? recur-inside?)
|
||||||
|
(define end
|
||||||
|
(cond
|
||||||
|
[(equal? _end 'eof) (last-position)]
|
||||||
|
[else _end]))
|
||||||
|
(define start
|
||||||
|
(cond
|
||||||
|
[(equal? _start 'start)
|
||||||
|
(get-start-position)]
|
||||||
|
[else _start]))
|
||||||
|
(define forward? (equal? direction 'forward))
|
||||||
|
(define word
|
||||||
|
(cond
|
||||||
|
[forward? _word]
|
||||||
|
[else
|
||||||
|
(define l (string-length _word))
|
||||||
|
(define s (make-string l))
|
||||||
|
(for ([i (in-range (string-length _word))])
|
||||||
|
(string-set! s i (string-ref _word (- l i 1))))
|
||||||
|
s]))
|
||||||
|
|
||||||
(let ([start (min (if (symbol? start)
|
(do-find-string word start end
|
||||||
startpos
|
just-one? case-sens? forward? recur-inside? beginning-of-match?))
|
||||||
start)
|
|
||||||
len)]
|
|
||||||
[end (min (if (symbol? end)
|
|
||||||
(if (eq? direction 'forward)
|
|
||||||
len
|
|
||||||
0)
|
|
||||||
end)
|
|
||||||
len)])
|
|
||||||
(let ([total-count
|
|
||||||
(if (eq? direction 'backward)
|
|
||||||
(- start end)
|
|
||||||
(- end start))])
|
|
||||||
(if (or (negative? total-count)
|
|
||||||
(string=? str ""))
|
|
||||||
(if just-one? #f null)
|
|
||||||
|
|
||||||
(let ([slen (string-length str)]
|
(define/private (convert-result m word forward? beginning-of-match?)
|
||||||
[str (if case-sens?
|
(cond
|
||||||
str
|
[forward?
|
||||||
(string-foldcase str))])
|
(if beginning-of-match?
|
||||||
(let-values ([(snip s-pos) (find-snip/pos start (if (eq? direction 'forward) 'after 'before))])
|
m
|
||||||
|
(+ m (string-length word)))]
|
||||||
|
[else
|
||||||
|
(define len (last-position))
|
||||||
|
(if beginning-of-match?
|
||||||
|
(- len m)
|
||||||
|
(- len m (string-length word)))]))
|
||||||
|
|
||||||
(if (not snip)
|
;; this uses the Knuth-Morris-Pratt string search algorithm, according to
|
||||||
(if just-one? #f null)
|
;; wikipedia: http://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm
|
||||||
|
;; this is a define-local-member-name to support the recur-inside? functionality
|
||||||
|
(define/public (do-find-string _word _start _end
|
||||||
|
just-one? case-sens? forward? recur-inside? beginning-of-match?)
|
||||||
|
(define word (if case-sens?
|
||||||
|
_word
|
||||||
|
(string-downcase _word)))
|
||||||
|
(define latest-snip-str #f)
|
||||||
|
(define latest-snip-len #f)
|
||||||
|
(define latest-snip-position #f)
|
||||||
|
(define latest-snip #f)
|
||||||
|
(define last-pos (last-position))
|
||||||
|
|
||||||
;; Knuth-Bendix
|
(define start (if forward? _start (- last-pos _start)))
|
||||||
|
(define end (if forward? _end (- last-pos _end)))
|
||||||
|
|
||||||
(let-values ([(offset shorten sbase beyond sgoal direction)
|
;; the algorithm may consider the same position
|
||||||
(if (eq? direction 'forward)
|
;; multiple times, so we track which positions that
|
||||||
(values (- start s-pos) 0 0 -1 slen 1)
|
;; have embedded editors that are already considered.
|
||||||
(values 0 (- (+ s-pos (snip->count snip)) start) (- slen 1) slen -1 -1))]
|
(define embedded-editors-considered (make-hash))
|
||||||
[(smap) (make-vector slen 0)])
|
|
||||||
|
|
||||||
;; initialize smap:
|
(define (get-char _i)
|
||||||
(vector-set! smap sbase beyond)
|
(define i (if forward? _i (- last-pos _i 1)))
|
||||||
(let loop ([s beyond]
|
(cond
|
||||||
[i (+ sbase direction)])
|
[(and latest-snip-str
|
||||||
(unless (= i sgoal)
|
(< -1
|
||||||
(let iloop ([s s])
|
(- i latest-snip-position)
|
||||||
(if (and (not (= beyond s))
|
latest-snip-len))
|
||||||
(not (char=? (string-ref str (+ s direction)) (string-ref str i))))
|
(string-ref latest-snip-str (- i latest-snip-position))]
|
||||||
(iloop (vector-ref smap s))
|
[else
|
||||||
(let ([s (if (char=? (string-ref str (+ s direction))
|
(define-values (guess-snip guess-snip-position guess-snip-len)
|
||||||
(string-ref str i))
|
(cond
|
||||||
(+ s direction)
|
[(not latest-snip)
|
||||||
s)])
|
(define fst (find-first-snip))
|
||||||
(vector-set! smap i s)
|
(values fst (and fst 0) (and fst (send fst get-count)))]
|
||||||
(loop s (+ i direction)))))))
|
[forward?
|
||||||
(define text "")
|
(define next (send latest-snip next))
|
||||||
(let a-loop ([s beyond]
|
(values next
|
||||||
[s-pos s-pos]
|
(and next (+ latest-snip-position latest-snip-len))
|
||||||
[snip snip]
|
(and next (send next get-count)))]
|
||||||
[total-count total-count]
|
[else
|
||||||
[offset offset]
|
(define prev (send latest-snip previous))
|
||||||
[shorten shorten]
|
(define pc (and prev (send prev get-count)))
|
||||||
[results null])
|
(values prev
|
||||||
(if (and snip (positive? total-count))
|
(and prev (- latest-snip-position pc))
|
||||||
(let*-values ([(need) (- (snip->count snip) shorten offset)]
|
pc)]))
|
||||||
[(need offset)
|
(cond
|
||||||
(if (need . > . total-count)
|
[(and guess-snip
|
||||||
(if (direction . < . 0)
|
(<= guess-snip-position i)
|
||||||
(values total-count (+ offset (- need total-count)))
|
(< i (+ guess-snip-position guess-snip-len)))
|
||||||
(values total-count offset))
|
(set! latest-snip guess-snip)
|
||||||
(values need offset))]
|
(set! latest-snip-position guess-snip-position)
|
||||||
[(total-count) (- total-count need)])
|
(set! latest-snip-len guess-snip-len)]
|
||||||
|
[else
|
||||||
|
(define b (box #f))
|
||||||
|
(set! latest-snip (find-snip i 'after-or-none b))
|
||||||
|
(when latest-snip
|
||||||
|
(set! latest-snip-position (unbox b))
|
||||||
|
(set! latest-snip-len (send latest-snip get-count)))])
|
||||||
|
|
||||||
(let b-loop ([checked 0]
|
(when (or (not latest-snip-str)
|
||||||
[need need]
|
(< (string-length latest-snip-str)
|
||||||
[results results])
|
latest-snip-len))
|
||||||
(let* ([thistime (min need 255)]
|
(set! latest-snip-str (make-string latest-snip-len)))
|
||||||
[need (- need thistime)]
|
(send latest-snip get-text! latest-snip-str 0 latest-snip-len 0)
|
||||||
[thisoffset (+ offset (if (direction . < . 0) need checked))]
|
(unless case-sens?
|
||||||
[wl? write-locked?]
|
(for ([c (in-range latest-snip-len)])
|
||||||
[fl? flow-locked?])
|
(string-set! latest-snip-str c (char-downcase (string-ref latest-snip-str c)))))
|
||||||
(when (< (string-length text) (send snip get-count))
|
(cond
|
||||||
(set! text (make-string (send snip get-count))))
|
[(and recur-inside?
|
||||||
(set! write-locked? #t)
|
(is-a? latest-snip editor-snip%))
|
||||||
(set! flow-locked? #t)
|
(cond
|
||||||
(send snip get-text! text thisoffset thistime 0)
|
[(hash-ref embedded-editors-considered i #f) #f]
|
||||||
(set! write-locked? wl?)
|
[else
|
||||||
(set! flow-locked? fl?)
|
(hash-set! embedded-editors-considered i #t)
|
||||||
|
(let loop ([snip latest-snip])
|
||||||
(let c-loop ([i (if (direction . > . 0) 0 (- thistime 1))]
|
(define ed (send snip get-editor))
|
||||||
[n thistime]
|
(cond
|
||||||
[s s]
|
[(is-a? ed text%)
|
||||||
[results results])
|
(define lp (send ed last-position))
|
||||||
(if (zero? n)
|
(define result
|
||||||
(if (positive? need)
|
(send ed do-find-string _word
|
||||||
|
(if forward? 0 lp) (if forward? lp 0)
|
||||||
(b-loop (add1 checked)
|
just-one? case-sens? forward? recur-inside? beginning-of-match?))
|
||||||
need
|
(and result (not (null? result)) (cons ed result))]
|
||||||
results)
|
[(not ed) #f]
|
||||||
|
[else
|
||||||
(let* ([s-pos (if (direction . > . 0)
|
(define inner-result
|
||||||
(+ s-pos (snip->count snip))
|
(let inner-loop ([inner-snip (send ed find-first-snip)])
|
||||||
s-pos)]
|
(cond
|
||||||
[snip (if (direction . > . 0)
|
[(is-a? inner-snip editor-snip%)
|
||||||
(snip->next snip)
|
(define this-one (loop inner-snip))
|
||||||
(snip->prev snip))]
|
|
||||||
[s-pos (if (and snip (direction . < . 0))
|
|
||||||
(- s-pos (snip->count snip))
|
|
||||||
s-pos)])
|
|
||||||
(a-loop s
|
|
||||||
s-pos
|
|
||||||
snip
|
|
||||||
total-count
|
|
||||||
0
|
|
||||||
0
|
|
||||||
results)))
|
|
||||||
|
|
||||||
(let* ([n (sub1 n)]
|
|
||||||
[c (string-ref text i)]
|
|
||||||
[c (if case-sens? c (char-foldcase c))]
|
|
||||||
[s (let loop ([s s])
|
|
||||||
(if (and (not (= beyond s))
|
|
||||||
(not (char=? (string-ref str (+ s direction)) c)))
|
|
||||||
(loop (vector-ref smap s))
|
|
||||||
s))])
|
|
||||||
(if (char=? (string-ref str (+ s direction)) c)
|
|
||||||
(let ([s (+ s direction)])
|
|
||||||
(if (= (+ s direction) sgoal)
|
|
||||||
(let* ([p (+ s-pos i thisoffset)]
|
|
||||||
[p (if bos?
|
|
||||||
(if (direction . < . 0)
|
|
||||||
(+ p slen)
|
|
||||||
(- p (- slen 1)))
|
|
||||||
(if (direction . > . 0)
|
|
||||||
(add1 p)
|
|
||||||
p))])
|
|
||||||
(if just-one?
|
(if just-one?
|
||||||
p ;; <------ single result returned here
|
(or this-one
|
||||||
(c-loop (+ i direction)
|
(inner-loop (send inner-snip next)))
|
||||||
n
|
(if this-one
|
||||||
beyond
|
(cons this-one
|
||||||
(cons p results))))
|
(inner-loop (send inner-snip next)))
|
||||||
(c-loop (+ i direction)
|
(inner-loop (send inner-snip next))))]
|
||||||
n
|
[(not inner-snip) (if just-one? #f '())]
|
||||||
s
|
[else (inner-loop (send inner-snip next))])))
|
||||||
results)))
|
(and inner-result
|
||||||
(c-loop (+ i direction)
|
(pair? inner-result)
|
||||||
n
|
(cons ed inner-result))]))])]
|
||||||
s
|
[else
|
||||||
results))))))))
|
(string-ref latest-snip-str (- i latest-snip-position))])]))
|
||||||
|
|
||||||
|
(define t (build-table word))
|
||||||
|
(define word-len-minus-one (- (string-length word) 1))
|
||||||
|
|
||||||
|
(let loop ([m start]
|
||||||
|
[i 0])
|
||||||
|
(define m-plus-i (+ m i))
|
||||||
|
(cond
|
||||||
|
[(< m-plus-i end)
|
||||||
|
(define the-char (get-char m-plus-i))
|
||||||
|
(cond
|
||||||
|
[(pair? the-char)
|
||||||
|
;; found an embedded editor with a search result; transmit it
|
||||||
(if just-one?
|
(if just-one?
|
||||||
#f
|
the-char
|
||||||
results)))))))))))
|
(cons the-char (loop (+ m 1) 0)))]
|
||||||
|
[(and (char? the-char) (char=? (string-ref word i) the-char))
|
||||||
|
(cond
|
||||||
|
[(= i word-len-minus-one)
|
||||||
|
(if just-one?
|
||||||
|
(convert-result m word forward? beginning-of-match?)
|
||||||
|
(cons (convert-result m word forward? beginning-of-match?)
|
||||||
|
(loop (+ m 1) 0)))]
|
||||||
|
[else
|
||||||
|
(loop m (+ i 1))])]
|
||||||
|
[else
|
||||||
|
(define t-i (vector-ref t i))
|
||||||
|
(cond
|
||||||
|
[t-i
|
||||||
|
(loop (- m-plus-i t-i) t-i)]
|
||||||
|
[else
|
||||||
|
(loop (+ m 1) 0)])])]
|
||||||
|
[else
|
||||||
|
(if just-one? #f '())])))
|
||||||
|
|
||||||
|
(define/private (build-table word)
|
||||||
|
(define t (make-vector (string-length word) #f))
|
||||||
|
(when ((string-length word) . > . 1)
|
||||||
|
(vector-set! t 1 0)
|
||||||
|
(let loop ([pos 2]
|
||||||
|
[cnd 0])
|
||||||
|
(when (< pos (string-length word))
|
||||||
|
(cond
|
||||||
|
[(char=? (string-ref word (- pos 1))
|
||||||
|
(string-ref word cnd))
|
||||||
|
(vector-set! t pos (+ cnd 1))
|
||||||
|
(loop (+ pos 1) (+ cnd 1))]
|
||||||
|
[(> cnd 0)
|
||||||
|
(loop pos (vector-ref t cnd))]
|
||||||
|
[else
|
||||||
|
(vector-set! t pos 0)
|
||||||
|
(loop (+ pos 1) cnd)]))))
|
||||||
|
t)
|
||||||
|
|
||||||
;; ----------------------------------------
|
;; ----------------------------------------
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user