simplify and extend the find-string method in text%

This implementation runs about 10% faster than the old when searching
for "λ" in (collection-file-path "unit.rkt" "drracket" "private"). It
is copied directly from the search search algorithm from Wikipedia
called Knuth-Morris-Pratt, but with a fancier version of get-char that
cooperates with text%'s snip interface for more efficient linear
scans of characters.

It also generalizes the previous one by supporting searching into
embedded text% objects inside editor-snip% objects

related to PR 14688
related to PR 14687
This commit is contained in:
Robby Findler 2014-10-25 21:45:35 -05:00
parent 28bc3ebeac
commit 06b9b4f0a2
3 changed files with 416 additions and 163 deletions

View File

@ -668,7 +668,28 @@ If @racket[case-sensitive?] is @racket[#f], then an uppercase and lowercase
} }
@defmethod[(find-string-embedded [str string?]
[direction (or/c 'forward 'backward) 'forward]
[start (or/c exact-nonnegative-integer? 'start) 'start]
[end (or/c exact-nonnegative-integer? 'eof) 'eof]
[get-start? any/c #t]
[case-sensitive? any/c #t])
(or/c exact-nonnegative-integer?
#f
(cons/c
(is-a?/c editor<%>)
(flat-rec-contract
nested-editor-search-result
(or/c (cons/c (is-a?/c editor<%>)
nested-editor-search-result)
exact-nonnegative-integer?))))]{
Like @method[text% find-string], but also searches in embedded editors,
returning a series of cons pairs whose @racket[car] positions
are the editors on the path to the editor where the search
string occurred and whose final @racket[cdr] position is the
search result position.
}
@defmethod[(find-string-all [str string?] @defmethod[(find-string-all [str string?]
[direction (or/c 'forward 'backward) 'forward] [direction (or/c 'forward 'backward) 'forward]
[start (or/c exact-nonnegative-integer? 'start) 'start] [start (or/c exact-nonnegative-integer? 'start) 'start]
@ -683,6 +704,24 @@ Finds all occurrences of a string using @method[text% find-string]. If
} }
@defmethod[(find-string-embedded-all [str string?]
[direction (or/c 'forward 'backward) 'forward]
[start (or/c exact-nonnegative-integer? 'start) 'start]
[end (or/c exact-nonnegative-integer? 'eof) 'eof]
[get-start? any/c #t]
[case-sensitive any/c #t])
(listof (or/c exact-nonnegative-integer?
(cons/c
(is-a?/c editor<%>)
(flat-rec-contract
nested-editor-search-result
(or/c (cons/c (is-a?/c editor<%>)
nested-editor-search-result)
(listof exact-nonnegative-integer?))))))]{
Like @method[text% find-string-embedded], but also searches in embedded
editors, returning search results a list of the editors that contain
the matches.
}
@defmethod[(find-wordbreak [start (or/c (box/c exact-nonnegative-integer?) #f)] @defmethod[(find-wordbreak [start (or/c (box/c exact-nonnegative-integer?) #f)]
[end (or/c (box/c exact-nonnegative-integer?) #f)] [end (or/c (box/c exact-nonnegative-integer?) #f)]

View File

@ -0,0 +1,142 @@
#lang racket/base
(module test racket/base
(require rackunit
racket/gui/base
(only-in "text.rkt" do-find-string-all)
racket/class)
(define (txt s)
(define t (new text%))
(send t insert s)
(send t set-position 0 0)
t)
(define (kmp-search txt str all?)
(send txt do-find-string-all str 'forward 0 (send txt last-position) (not all?) #t #t #f))
(check-equal? (kmp-search (txt "x") "x" #f) 0)
(check-equal? (kmp-search (txt "yx") "x" #f) 1)
(check-equal? (kmp-search (txt "yx") "yx" #f) 0)
(check-equal? (kmp-search (txt "zyx") "yx" #f) 1)
(check-equal? (kmp-search (txt "yyx") "yx" #f) 1)
(check-equal? (kmp-search (txt "qqq") "yx" #f) #f)
(check-equal? (kmp-search (txt "ABC ABCDAB ABCDABCDABDE") "ABCDABD" #f) 15)
(check-equal? (kmp-search (txt "xxxx") "y" #t) '())
(check-equal? (kmp-search (txt "xxxx") "x" #t) '(0 1 2 3))
(check-equal? (kmp-search (txt "xyxy") "x" #t) '(0 2))
(check-equal? (kmp-search (txt " x\n ") "x" #t) '(1))
(check-equal? (kmp-search (txt "") "x" #t) '())
(check-equal? (send (txt " x\n ") do-find-string-all "X" 'forward 0 'eof #f #t #f #f)
'(1))
(check-equal? (send (txt "xXxXxX") do-find-string-all "x" 'forward 0 'eof #f #t #f #f)
'(0 1 2 3 4 5))
(check-equal? (send (txt "xXxXxX") do-find-string-all "x" 'forward 2 4 #f #t #f #f)
'(2 3))
(check-equal? (send (txt "xyxyxyxyxyx") do-find-string-all "xy" 'forward 2 5 #f #t #t #f)
'(2))
(check-equal? (send (txt "abcdabcdabcd") do-find-string-all "abcd" 'forward 0 'eof #f #f #t #f)
'(4 8 12))
(check-equal? (send (txt "qqabcdabcdabcd") do-find-string-all "abcd" 'forward 0 'eof #t #f #t #f)
6)
(check-equal? (send (txt "qqabcdabcdabcd") do-find-string-all "abcd" 'forward 0 'eof #t #t #t #f)
2)
(check-equal? (send (txt "abcdabcdabcd") do-find-string-all "abcd" 'backward 12 0 #f #t #t #f)
'(12 8 4))
(check-equal? (send (txt "abcdabcdabcd") do-find-string-all "abcd" 'backward 12 0 #f #f #t #f)
'(8 4 0))
(check-equal? (send (txt "abcd\nabcdabcd") do-find-string-all "abcd" 'backward 12 0 #f #t #t #f)
'(9 4))
(check-equal? (send (txt "abcd\nabcdabcd") do-find-string-all "abcd" 'backward 13 0 #f #t #t #f)
'(13 9 4))
(check-equal? (send (txt "abcdabcd\nabcd") do-find-string-all "abcd" 'backward 12 0 #f #t #t #f)
'(8 4))
(check-equal? (send (txt "abcdabcd\nabcd") do-find-string-all "abcd" 'backward 13 0 #f #t #t #f)
'(13 8 4))
(check-equal? (send (txt "abcdabcd\nabcd") do-find-string-all "abcd" 'backward 8 0 #f #t #t #f)
'(8 4))
(check-equal? (send (txt "abcdabcd\nabcd") do-find-string-all "abcd" 'forward 4 13 #f #t #t #f)
'(4 9))
(check-equal? (send (txt "xyz") do-find-string-all "xyz" 'backward 3 0 #t #f #t #f)
0)
(check-equal? (send (txt "xyz") do-find-string-all "xyz" 'backward 3 0 #t #t #t #f)
3)
(let ([t1 (new text%)]
[t2 (new text%)])
(send t1 insert "abc")
(send t1 insert (new editor-snip% [editor t2]))
(send t1 insert "abc")
(send t2 insert "abc")
(check-equal? (send t1 do-find-string-all "abc" 'forward 0 (send t1 last-position) #f #t #t #t)
(list 0 (list t2 0) 4)))
(let ([t1 (new text%)]
[t2 (new text%)])
(send t1 insert "abc")
(send t1 insert (new editor-snip% [editor t2]))
(send t1 insert "abc")
(send t2 insert "abc")
(check-equal? (send t1 do-find-string-all "abc" 'backward (send t1 last-position) 0 #f #t #t #t)
(list 7 (list t2 3) 3)))
(let ([t1 (new text%)]
[t2 (new text%)])
(send t1 insert "abc")
(send t1 insert (new editor-snip% [editor t2]))
(send t1 insert "abcd")
(send t2 insert "abc")
(check-equal? (send t1 do-find-string-all "abcd" 'forward 0 (send t1 last-position) #t #t #t #t)
4))
(let ([t1 (new text%)]
[t2 (new text%)])
(send t1 insert "abc")
(send t1 insert (new editor-snip% [editor t2]))
(send t1 insert "abc")
(send t2 insert "abcd")
(check-equal? (send t1 do-find-string-all "abcd" 'forward 0 (send t1 last-position) #t #t #t #t)
(cons t2 0)))
(let ([t1 (new text%)]
[t2 (new text%)]
[pb (new pasteboard%)])
(send t1 insert "abc")
(send t1 insert (new editor-snip% [editor pb]))
(send pb insert (new editor-snip% [editor t2]))
(send t1 insert "abc")
(send t2 insert "abcd")
(check-equal? (send t1 do-find-string-all "abcd" 'forward 0 (send t1 last-position) #t #t #t #t)
(list* pb t2 0)))
(let ([t1 (new text%)]
[t2 (new text%)]
[t3 (new text%)]
[pb (new pasteboard%)])
(send t1 insert "abc")
(send t1 insert (new editor-snip% [editor pb]))
(send pb insert (new editor-snip% [editor t2]))
(send pb insert (new editor-snip% [editor t3]))
(send t1 insert "abc")
(send t2 insert "abcd")
(send t3 insert "abcd")
(check-equal? (send t1 do-find-string-all "abcd" 'forward 0 (send t1 last-position) #f #t #t #t)
(list (list pb (list t2 0) (list t3 0)))))
(let ([t1 (new text%)])
(send t1 insert "abc")
(define es (new editor-snip%))
(send t1 insert es)
(send t1 insert "abc")
(check-equal? (send t1 do-find-string-all "abcd" 'forward 0 (send t1 last-position) #f #t #t #t)
'()))
(let ([t1 (new text%)]
[pb (new pasteboard%)])
(send t1 insert "abc")
(send t1 insert (new editor-snip% [editor pb]))
(send t1 insert "abc")
(send pb insert (new editor-snip%))
(send pb insert (new editor-snip%))
(check-equal? (send t1 do-find-string-all "abcd" 'forward 0 (send t1 last-position) #f #t #t #t)
'()))
(check-equal? (send (txt "aaa") find-string-all "a") '(0 1 2)))

View File

@ -19,13 +19,17 @@
"keymap.rkt" "keymap.rkt"
(only-in "cycle.rkt" (only-in "cycle.rkt"
printer-dc% printer-dc%
set-text%!) set-text%!
editor-snip%)
"wordbreak.rkt" "wordbreak.rkt"
"stream.rkt" "stream.rkt"
"wx.rkt") "wx.rkt")
(provide text% (provide text%
add-text-keymap-functions) add-text-keymap-functions
;; for the test suite
do-find-string-all)
;; ---------------------------------------- ;; ----------------------------------------
@ -68,6 +72,8 @@
(define in-delayed-refresh (make-parameter #f)) (define in-delayed-refresh (make-parameter #f))
(define-local-member-name do-find-string-all do-find-string)
(defclass text% editor% (defclass text% editor%
(inherit-field s-admin (inherit-field s-admin
s-offscreen s-offscreen
@ -3491,9 +3497,9 @@
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]] [(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]] [any? [bos? #t]]
[any? [case-sens? #t]]) [any? [case-sens? #t]])
(if (not (check-recalc #f #f)) (if (check-recalc #f #f)
#f (do-find-string-all str direction start end #t bos? case-sens? #f)
(do-find-string-all str direction start end #t bos? case-sens?))) #f))
(def/public (find-string-all [string? str] (def/public (find-string-all [string? str]
[(symbol-in forward backward) [direction 'forward]] [(symbol-in forward backward) [direction 'forward]]
@ -3501,9 +3507,29 @@
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]] [(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]] [any? [bos? #t]]
[any? [case-sens? #t]]) [any? [case-sens? #t]])
(if (not (check-recalc #f #f)) (if (check-recalc #f #f)
null (do-find-string-all str direction start end #f bos? case-sens? #f)
(reverse (do-find-string-all str direction start end #f bos? case-sens?)))) null))
(def/public (find-string-embedded [string? str]
[(symbol-in forward backward) [direction 'forward]]
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]]
[any? [case-sens? #t]])
(if (check-recalc #f #f)
(do-find-string-all str direction start end #t bos? case-sens? #t)
#f))
(def/public (find-string-embedded-all [string? str]
[(symbol-in forward backward) [direction 'forward]]
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
[(make-alts exact-nonnegative-integer? (symbol-in eof)) [end 'eof]]
[any? [bos? #t]]
[any? [case-sens? #t]])
(if (check-recalc #f #f)
(do-find-string-all str direction start end #f bos? case-sens? #t)
null))
(def/public (find-newline [(symbol-in forward backward) [direction 'forward]] (def/public (find-newline [(symbol-in forward backward) [direction 'forward]]
[(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]] [(make-alts exact-nonnegative-integer? (symbol-in start)) [start 'start]]
@ -3526,160 +3552,206 @@
#f #f
pos)))) pos))))
(define/private (do-find-string-all str direction ;; this is only public for the test suite
start end ; (do-find-string-all is bound by define-local-member-name)
just-one? (define/public (do-find-string-all _word direction
bos? _start _end
case-sens?) just-one? beginning-of-match? case-sens? recur-inside?)
(define end
(let ([start (min (if (symbol? start) (cond
startpos [(equal? _end 'eof) (last-position)]
start) [else _end]))
len)] (define start
[end (min (if (symbol? end) (cond
(if (eq? direction 'forward) [(equal? _start 'start)
len (get-start-position)]
0) [else _start]))
end) (define forward? (equal? direction 'forward))
len)]) (define word
(let ([total-count (cond
(if (eq? direction 'backward) [forward? _word]
(- start end) [else
(- end start))]) (define l (string-length _word))
(if (or (negative? total-count) (define s (make-string l))
(string=? str "")) (for ([i (in-range (string-length _word))])
(if just-one? #f null) (string-set! s i (string-ref _word (- l i 1))))
s]))
(let ([slen (string-length str)]
[str (if case-sens? (do-find-string word start end
str just-one? case-sens? forward? recur-inside? beginning-of-match?))
(string-foldcase str))])
(let-values ([(snip s-pos) (find-snip/pos start (if (eq? direction 'forward) 'after 'before))]) (define/private (convert-result m word forward? beginning-of-match?)
(cond
(if (not snip) [forward?
(if just-one? #f null) (if beginning-of-match?
m
;; Knuth-Bendix (+ m (string-length word)))]
[else
(let-values ([(offset shorten sbase beyond sgoal direction) (define len (last-position))
(if (eq? direction 'forward) (if beginning-of-match?
(values (- start s-pos) 0 0 -1 slen 1) (- len m)
(values 0 (- (+ s-pos (snip->count snip)) start) (- slen 1) slen -1 -1))] (- len m (string-length word)))]))
[(smap) (make-vector slen 0)])
;; this uses the Knuth-Morris-Pratt string search algorithm, according to
;; initialize smap: ;; wikipedia: http://en.wikipedia.org/wiki/KnuthMorrisPratt_algorithm
(vector-set! smap sbase beyond) ;; this is a define-local-member-name to support the recur-inside? functionality
(let loop ([s beyond] (define/public (do-find-string _word _start _end
[i (+ sbase direction)]) just-one? case-sens? forward? recur-inside? beginning-of-match?)
(unless (= i sgoal) (define word (if case-sens?
(let iloop ([s s]) _word
(if (and (not (= beyond s)) (string-downcase _word)))
(not (char=? (string-ref str (+ s direction)) (string-ref str i)))) (define latest-snip-str #f)
(iloop (vector-ref smap s)) (define latest-snip-len #f)
(let ([s (if (char=? (string-ref str (+ s direction)) (define latest-snip-position #f)
(string-ref str i)) (define latest-snip #f)
(+ s direction) (define last-pos (last-position))
s)])
(vector-set! smap i s) (define start (if forward? _start (- last-pos _start)))
(loop s (+ i direction))))))) (define end (if forward? _end (- last-pos _end)))
(define text "")
(let a-loop ([s beyond] ;; the algorithm may consider the same position
[s-pos s-pos] ;; multiple times, so we track which positions that
[snip snip] ;; have embedded editors that are already considered.
[total-count total-count] (define embedded-editors-considered (make-hash))
[offset offset]
[shorten shorten] (define (get-char _i)
[results null]) (define i (if forward? _i (- last-pos _i 1)))
(if (and snip (positive? total-count)) (cond
(let*-values ([(need) (- (snip->count snip) shorten offset)] [(and latest-snip-str
[(need offset) (< -1
(if (need . > . total-count) (- i latest-snip-position)
(if (direction . < . 0) latest-snip-len))
(values total-count (+ offset (- need total-count))) (string-ref latest-snip-str (- i latest-snip-position))]
(values total-count offset)) [else
(values need offset))] (define-values (guess-snip guess-snip-position guess-snip-len)
[(total-count) (- total-count need)]) (cond
[(not latest-snip)
(let b-loop ([checked 0] (define fst (find-first-snip))
[need need] (values fst (and fst 0) (and fst (send fst get-count)))]
[results results]) [forward?
(let* ([thistime (min need 255)] (define next (send latest-snip next))
[need (- need thistime)] (values next
[thisoffset (+ offset (if (direction . < . 0) need checked))] (and next (+ latest-snip-position latest-snip-len))
[wl? write-locked?] (and next (send next get-count)))]
[fl? flow-locked?]) [else
(when (< (string-length text) (send snip get-count)) (define prev (send latest-snip previous))
(set! text (make-string (send snip get-count)))) (define pc (and prev (send prev get-count)))
(set! write-locked? #t) (values prev
(set! flow-locked? #t) (and prev (- latest-snip-position pc))
(send snip get-text! text thisoffset thistime 0) pc)]))
(set! write-locked? wl?) (cond
(set! flow-locked? fl?) [(and guess-snip
(<= guess-snip-position i)
(let c-loop ([i (if (direction . > . 0) 0 (- thistime 1))] (< i (+ guess-snip-position guess-snip-len)))
[n thistime] (set! latest-snip guess-snip)
[s s] (set! latest-snip-position guess-snip-position)
[results results]) (set! latest-snip-len guess-snip-len)]
(if (zero? n) [else
(if (positive? need) (define b (box #f))
(set! latest-snip (find-snip i 'after-or-none b))
(b-loop (add1 checked) (when latest-snip
need (set! latest-snip-position (unbox b))
results) (set! latest-snip-len (send latest-snip get-count)))])
(let* ([s-pos (if (direction . > . 0) (when (or (not latest-snip-str)
(+ s-pos (snip->count snip)) (< (string-length latest-snip-str)
s-pos)] latest-snip-len))
[snip (if (direction . > . 0) (set! latest-snip-str (make-string latest-snip-len)))
(snip->next snip) (send latest-snip get-text! latest-snip-str 0 latest-snip-len 0)
(snip->prev snip))] (unless case-sens?
[s-pos (if (and snip (direction . < . 0)) (for ([c (in-range latest-snip-len)])
(- s-pos (snip->count snip)) (string-set! latest-snip-str c (char-downcase (string-ref latest-snip-str c)))))
s-pos)]) (cond
(a-loop s [(and recur-inside?
s-pos (is-a? latest-snip editor-snip%))
snip (cond
total-count [(hash-ref embedded-editors-considered i #f) #f]
0 [else
0 (hash-set! embedded-editors-considered i #t)
results))) (let loop ([snip latest-snip])
(define ed (send snip get-editor))
(let* ([n (sub1 n)] (cond
[c (string-ref text i)] [(is-a? ed text%)
[c (if case-sens? c (char-foldcase c))] (define lp (send ed last-position))
[s (let loop ([s s]) (define result
(if (and (not (= beyond s)) (send ed do-find-string _word
(not (char=? (string-ref str (+ s direction)) c))) (if forward? 0 lp) (if forward? lp 0)
(loop (vector-ref smap s)) just-one? case-sens? forward? recur-inside? beginning-of-match?))
s))]) (and result (not (null? result)) (cons ed result))]
(if (char=? (string-ref str (+ s direction)) c) [(not ed) #f]
(let ([s (+ s direction)]) [else
(if (= (+ s direction) sgoal) (define inner-result
(let* ([p (+ s-pos i thisoffset)] (let inner-loop ([inner-snip (send ed find-first-snip)])
[p (if bos? (cond
(if (direction . < . 0) [(is-a? inner-snip editor-snip%)
(+ p slen) (define this-one (loop inner-snip))
(- p (- slen 1))) (if just-one?
(if (direction . > . 0) (or this-one
(add1 p) (inner-loop (send inner-snip next)))
p))]) (if this-one
(if just-one? (cons this-one
p ;; <------ single result returned here (inner-loop (send inner-snip next)))
(c-loop (+ i direction) (inner-loop (send inner-snip next))))]
n [(not inner-snip) (if just-one? #f '())]
beyond [else (inner-loop (send inner-snip next))])))
(cons p results)))) (and inner-result
(c-loop (+ i direction) (pair? inner-result)
n (cons ed inner-result))]))])]
s [else
results))) (string-ref latest-snip-str (- i latest-snip-position))])]))
(c-loop (+ i direction)
n (define t (build-table word))
s (define word-len-minus-one (- (string-length word) 1))
results))))))))
(if just-one? (let loop ([m start]
#f [i 0])
results))))))))))) (define m-plus-i (+ m i))
(cond
[(< m-plus-i end)
(define the-char (get-char m-plus-i))
(cond
[(pair? the-char)
;; found an embedded editor with a search result; transmit it
(if just-one?
the-char
(cons the-char (loop (+ m 1) 0)))]
[(and (char? the-char) (char=? (string-ref word i) the-char))
(cond
[(= i word-len-minus-one)
(if just-one?
(convert-result m word forward? beginning-of-match?)
(cons (convert-result m word forward? beginning-of-match?)
(loop (+ m 1) 0)))]
[else
(loop m (+ i 1))])]
[else
(define t-i (vector-ref t i))
(cond
[t-i
(loop (- m-plus-i t-i) t-i)]
[else
(loop (+ m 1) 0)])])]
[else
(if just-one? #f '())])))
(define/private (build-table word)
(define t (make-vector (string-length word) #f))
(when ((string-length word) . > . 1)
(vector-set! t 1 0)
(let loop ([pos 2]
[cnd 0])
(when (< pos (string-length word))
(cond
[(char=? (string-ref word (- pos 1))
(string-ref word cnd))
(vector-set! t pos (+ cnd 1))
(loop (+ pos 1) (+ cnd 1))]
[(> cnd 0)
(loop pos (vector-ref t cnd))]
[else
(vector-set! t pos 0)
(loop (+ pos 1) cnd)]))))
t)
;; ---------------------------------------- ;; ----------------------------------------