simplify and extend the find-string method in text%

This implementation runs about 10% faster than the old when searching
for "λ" in (collection-file-path "unit.rkt" "drracket" "private"). It
is copied directly from the search search algorithm from Wikipedia
called Knuth-Morris-Pratt, but with a fancier version of get-char that
cooperates with text%'s snip interface for more efficient linear
scans of characters.

It also generalizes the previous one by supporting searching into
embedded text% objects inside editor-snip% objects

related to PR 14688
related to PR 14687

original commit: 06b9b4f0a2af26fdf0cb519f225521b8ca8362c6
This commit is contained in:
Robby Findler 2014-10-25 21:45:35 -05:00
parent 5066731b25
commit 290b5c6893
2 changed files with 274 additions and 163 deletions

View File

@ -668,6 +668,27 @@ If @racket[case-sensitive?] is @racket[#f], then an uppercase and lowercase
} }
@defmethod[(find-string-embedded [str string?]
[direction (or/c 'forward 'backward) 'forward]
[start (or/c exact-nonnegative-integer? 'start) 'start]
[end (or/c exact-nonnegative-integer? 'eof) 'eof]
[get-start? any/c #t]
[case-sensitive? any/c #t])
(or/c exact-nonnegative-integer?
#f
(cons/c
(is-a?/c editor<%>)
(flat-rec-contract
nested-editor-search-result
(or/c (cons/c (is-a?/c editor<%>)
nested-editor-search-result)
exact-nonnegative-integer?))))]{
Like @method[text% find-string], but also searches in embedded editors,
returning a series of cons pairs whose @racket[car] positions
are the editors on the path to the editor where the search
string occurred and whose final @racket[cdr] position is the
search result position.
}
@defmethod[(find-string-all [str string?] @defmethod[(find-string-all [str string?]
[direction (or/c 'forward 'backward) 'forward] [direction (or/c 'forward 'backward) 'forward]
@ -683,6 +704,24 @@ Finds all occurrences of a string using @method[text% find-string]. If
} }
@defmethod[(find-string-embedded-all [str string?]
[direction (or/c 'forward 'backward) 'forward]
[start (or/c exact-nonnegative-integer? 'start) 'start]
[end (or/c exact-nonnegative-integer? 'eof) 'eof]
[get-start? any/c #t]
[case-sensitive any/c #t])
(listof (or/c exact-nonnegative-integer?
(cons/c
(is-a?/c editor<%>)
(flat-rec-contract
nested-editor-search-result
(or/c (cons/c (is-a?/c editor<%>)
nested-editor-search-result)
(listof exact-nonnegative-integer?))))))]{
Like @method[text% find-string-embedded], but also searches in embedded
editors, returning search results a list of the editors that contain
the matches.
}
@defmethod[(find-wordbreak [start (or/c (box/c exact-nonnegative-integer?) #f)] @defmethod[(find-wordbreak [start (or/c (box/c exact-nonnegative-integer?) #f)]
[end (or/c (box/c exact-nonnegative-integer?) #f)] [end (or/c (box/c exact-nonnegative-integer?) #f)]

View File

@ -19,13 +19,17 @@
"keymap.rkt" "keymap.rkt"
(only-in "cycle.rkt" (only-in "cycle.rkt"
printer-dc% printer-dc%
set-text%!) set-text%!
editor-snip%)
"wordbreak.rkt" "wordbreak.rkt"
"stream.rkt" "stream.rkt"
"wx.rkt") "wx.rkt")
(provide text% (provide text%
add-text-keymap-functions) add-text-keymap-functions
;; for the test suite
do-find-string-all)
;; ---------------------------------------- ;; ----------------------------------------
@ -68,6 +72,8 @@
(define in-delayed-refresh (make-parameter #f)) (define in-delayed-refresh (make-parameter #f))
(define-local-member-name do-find-string-all do-find-string)
(defclass text% editor% (defclass text% editor%
(inherit-field s-admin (inherit-field s-admin
s-offscreen s-offscreen
@ -3491,9 +3497,9 @@
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]] [(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]] [any? [bos? #t]]
[any? [case-sens? #t]]) [any? [case-sens? #t]])
(if (not (check-recalc #f #f)) (if (check-recalc #f #f)
#f (do-find-string-all str direction start end #t bos? case-sens? #f)
(do-find-string-all str direction start end #t bos? case-sens?))) #f))
(def/public (find-string-all [string? str] (def/public (find-string-all [string? str]
[(symbol-in forward backward) [direction 'forward]] [(symbol-in forward backward) [direction 'forward]]
@ -3501,9 +3507,29 @@
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]] [(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]] [any? [bos? #t]]
[any? [case-sens? #t]]) [any? [case-sens? #t]])
(if (not (check-recalc #f #f)) (if (check-recalc #f #f)
null (do-find-string-all str direction start end #f bos? case-sens? #f)
(reverse (do-find-string-all str direction start end #f bos? case-sens?)))) null))
(def/public (find-string-embedded [string? str]
[(symbol-in forward backward) [direction 'forward]]
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]]
[any? [case-sens? #t]])
(if (check-recalc #f #f)
(do-find-string-all str direction start end #t bos? case-sens? #t)
#f))
(def/public (find-string-embedded-all [string? str]
[(symbol-in forward backward) [direction 'forward]]
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]]
[any? [case-sens? #t]])
(if (check-recalc #f #f)
(do-find-string-all str direction start end #f bos? case-sens? #t)
null))
(def/public (find-newline [(symbol-in forward backward) [direction 'forward]] (def/public (find-newline [(symbol-in forward backward) [direction 'forward]]
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]] [(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
@ -3526,160 +3552,206 @@
#f #f
pos)))) pos))))
(define/private (do-find-string-all str direction ;; this is only public for the test suite
start end ; (do-find-string-all is bound by define-local-member-name)
just-one? (define/public (do-find-string-all _word direction
bos? _start _end
case-sens?) just-one? beginning-of-match? case-sens? recur-inside?)
(define end
(cond
[(equal? _end 'eof) (last-position)]
[else _end]))
(define start
(cond
[(equal? _start 'start)
(get-start-position)]
[else _start]))
(define forward? (equal? direction 'forward))
(define word
(cond
[forward? _word]
[else
(define l (string-length _word))
(define s (make-string l))
(for ([i (in-range (string-length _word))])
(string-set! s i (string-ref _word (- l i 1))))
s]))
(let ([start (min (if (symbol? start) (do-find-string word start end
startpos just-one? case-sens? forward? recur-inside? beginning-of-match?))
start)
len)]
[end (min (if (symbol? end)
(if (eq? direction 'forward)
len
0)
end)
len)])
(let ([total-count
(if (eq? direction 'backward)
(- start end)
(- end start))])
(if (or (negative? total-count)
(string=? str ""))
(if just-one? #f null)
(let ([slen (string-length str)] (define/private (convert-result m word forward? beginning-of-match?)
[str (if case-sens? (cond
str [forward?
(string-foldcase str))]) (if beginning-of-match?
(let-values ([(snip s-pos) (find-snip/pos start (if (eq? direction 'forward) 'after 'before))]) m
(+ m (string-length word)))]
[else
(define len (last-position))
(if beginning-of-match?
(- len m)
(- len m (string-length word)))]))
(if (not snip) ;; this uses the Knuth-Morris-Pratt string search algorithm, according to
(if just-one? #f null) ;; wikipedia: http://en.wikipedia.org/wiki/KnuthMorrisPratt_algorithm
;; this is a define-local-member-name to support the recur-inside? functionality
(define/public (do-find-string _word _start _end
just-one? case-sens? forward? recur-inside? beginning-of-match?)
(define word (if case-sens?
_word
(string-downcase _word)))
(define latest-snip-str #f)
(define latest-snip-len #f)
(define latest-snip-position #f)
(define latest-snip #f)
(define last-pos (last-position))
;; Knuth-Bendix (define start (if forward? _start (- last-pos _start)))
(define end (if forward? _end (- last-pos _end)))
(let-values ([(offset shorten sbase beyond sgoal direction) ;; the algorithm may consider the same position
(if (eq? direction 'forward) ;; multiple times, so we track which positions that
(values (- start s-pos) 0 0 -1 slen 1) ;; have embedded editors that are already considered.
(values 0 (- (+ s-pos (snip->count snip)) start) (- slen 1) slen -1 -1))] (define embedded-editors-considered (make-hash))
[(smap) (make-vector slen 0)])
;; initialize smap: (define (get-char _i)
(vector-set! smap sbase beyond) (define i (if forward? _i (- last-pos _i 1)))
(let loop ([s beyond] (cond
[i (+ sbase direction)]) [(and latest-snip-str
(unless (= i sgoal) (< -1
(let iloop ([s s]) (- i latest-snip-position)
(if (and (not (= beyond s)) latest-snip-len))
(not (char=? (string-ref str (+ s direction)) (string-ref str i)))) (string-ref latest-snip-str (- i latest-snip-position))]
(iloop (vector-ref smap s)) [else
(let ([s (if (char=? (string-ref str (+ s direction)) (define-values (guess-snip guess-snip-position guess-snip-len)
(string-ref str i)) (cond
(+ s direction) [(not latest-snip)
s)]) (define fst (find-first-snip))
(vector-set! smap i s) (values fst (and fst 0) (and fst (send fst get-count)))]
(loop s (+ i direction))))))) [forward?
(define text "") (define next (send latest-snip next))
(let a-loop ([s beyond] (values next
[s-pos s-pos] (and next (+ latest-snip-position latest-snip-len))
[snip snip] (and next (send next get-count)))]
[total-count total-count] [else
[offset offset] (define prev (send latest-snip previous))
[shorten shorten] (define pc (and prev (send prev get-count)))
[results null]) (values prev
(if (and snip (positive? total-count)) (and prev (- latest-snip-position pc))
(let*-values ([(need) (- (snip->count snip) shorten offset)] pc)]))
[(need offset) (cond
(if (need . > . total-count) [(and guess-snip
(if (direction . < . 0) (<= guess-snip-position i)
(values total-count (+ offset (- need total-count))) (< i (+ guess-snip-position guess-snip-len)))
(values total-count offset)) (set! latest-snip guess-snip)
(values need offset))] (set! latest-snip-position guess-snip-position)
[(total-count) (- total-count need)]) (set! latest-snip-len guess-snip-len)]
[else
(define b (box #f))
(set! latest-snip (find-snip i 'after-or-none b))
(when latest-snip
(set! latest-snip-position (unbox b))
(set! latest-snip-len (send latest-snip get-count)))])
(let b-loop ([checked 0] (when (or (not latest-snip-str)
[need need] (< (string-length latest-snip-str)
[results results]) latest-snip-len))
(let* ([thistime (min need 255)] (set! latest-snip-str (make-string latest-snip-len)))
[need (- need thistime)] (send latest-snip get-text! latest-snip-str 0 latest-snip-len 0)
[thisoffset (+ offset (if (direction . < . 0) need checked))] (unless case-sens?
[wl? write-locked?] (for ([c (in-range latest-snip-len)])
[fl? flow-locked?]) (string-set! latest-snip-str c (char-downcase (string-ref latest-snip-str c)))))
(when (< (string-length text) (send snip get-count)) (cond
(set! text (make-string (send snip get-count)))) [(and recur-inside?
(set! write-locked? #t) (is-a? latest-snip editor-snip%))
(set! flow-locked? #t) (cond
(send snip get-text! text thisoffset thistime 0) [(hash-ref embedded-editors-considered i #f) #f]
(set! write-locked? wl?) [else
(set! flow-locked? fl?) (hash-set! embedded-editors-considered i #t)
(let loop ([snip latest-snip])
(let c-loop ([i (if (direction . > . 0) 0 (- thistime 1))] (define ed (send snip get-editor))
[n thistime] (cond
[s s] [(is-a? ed text%)
[results results]) (define lp (send ed last-position))
(if (zero? n) (define result
(if (positive? need) (send ed do-find-string _word
(if forward? 0 lp) (if forward? lp 0)
(b-loop (add1 checked) just-one? case-sens? forward? recur-inside? beginning-of-match?))
need (and result (not (null? result)) (cons ed result))]
results) [(not ed) #f]
[else
(let* ([s-pos (if (direction . > . 0) (define inner-result
(+ s-pos (snip->count snip)) (let inner-loop ([inner-snip (send ed find-first-snip)])
s-pos)] (cond
[snip (if (direction . > . 0) [(is-a? inner-snip editor-snip%)
(snip->next snip) (define this-one (loop inner-snip))
(snip->prev snip))]
[s-pos (if (and snip (direction . < . 0))
(- s-pos (snip->count snip))
s-pos)])
(a-loop s
s-pos
snip
total-count
0
0
results)))
(let* ([n (sub1 n)]
[c (string-ref text i)]
[c (if case-sens? c (char-foldcase c))]
[s (let loop ([s s])
(if (and (not (= beyond s))
(not (char=? (string-ref str (+ s direction)) c)))
(loop (vector-ref smap s))
s))])
(if (char=? (string-ref str (+ s direction)) c)
(let ([s (+ s direction)])
(if (= (+ s direction) sgoal)
(let* ([p (+ s-pos i thisoffset)]
[p (if bos?
(if (direction . < . 0)
(+ p slen)
(- p (- slen 1)))
(if (direction . > . 0)
(add1 p)
p))])
(if just-one? (if just-one?
p ;; <------ single result returned here (or this-one
(c-loop (+ i direction) (inner-loop (send inner-snip next)))
n (if this-one
beyond (cons this-one
(cons p results)))) (inner-loop (send inner-snip next)))
(c-loop (+ i direction) (inner-loop (send inner-snip next))))]
n [(not inner-snip) (if just-one? #f '())]
s [else (inner-loop (send inner-snip next))])))
results))) (and inner-result
(c-loop (+ i direction) (pair? inner-result)
n (cons ed inner-result))]))])]
s [else
results)))))))) (string-ref latest-snip-str (- i latest-snip-position))])]))
(define t (build-table word))
(define word-len-minus-one (- (string-length word) 1))
(let loop ([m start]
[i 0])
(define m-plus-i (+ m i))
(cond
[(< m-plus-i end)
(define the-char (get-char m-plus-i))
(cond
[(pair? the-char)
;; found an embedded editor with a search result; transmit it
(if just-one? (if just-one?
#f the-char
results))))))))))) (cons the-char (loop (+ m 1) 0)))]
[(and (char? the-char) (char=? (string-ref word i) the-char))
(cond
[(= i word-len-minus-one)
(if just-one?
(convert-result m word forward? beginning-of-match?)
(cons (convert-result m word forward? beginning-of-match?)
(loop (+ m 1) 0)))]
[else
(loop m (+ i 1))])]
[else
(define t-i (vector-ref t i))
(cond
[t-i
(loop (- m-plus-i t-i) t-i)]
[else
(loop (+ m 1) 0)])])]
[else
(if just-one? #f '())])))
(define/private (build-table word)
(define t (make-vector (string-length word) #f))
(when ((string-length word) . > . 1)
(vector-set! t 1 0)
(let loop ([pos 2]
[cnd 0])
(when (< pos (string-length word))
(cond
[(char=? (string-ref word (- pos 1))
(string-ref word cnd))
(vector-set! t pos (+ cnd 1))
(loop (+ pos 1) (+ cnd 1))]
[(> cnd 0)
(loop pos (vector-ref t cnd))]
[else
(vector-set! t pos 0)
(loop (+ pos 1) cnd)]))))
t)
;; ---------------------------------------- ;; ----------------------------------------