adjust the splay tree implementation so that it creates fewer cons

cells when searching in the tree

Two tricks: represent lists of nodes as improper lists so singleton
lists don't allocate a cons and pass around two accumulators that
correspond to the hd & tl of a path, instead of cons'ing that up
into a list and them immediately taking it apart again

measurement: when starting up drracket with
collects/drracket/private/unit.rkt and then waiting for the colorer to
finish, and then inserting an open quote right before the first open
quote in the file (and waiting again for the colorer to finish)
creates 249000 cons cells before this change and 116000 after this
change

After a little more work, I'm pretty much convinced that this was
the wrong approach and that the splaying implementation should just
change to not allocate the paths into lists at all, thus removing
the other 116k cons cells. (I plan to get to this another day;
it should not be difficult now that I roughly understand how these
things work.)

I also looked into top-down splaying and found these notes to
be illuminating:

  http://digital.cs.usu.edu/~allan/DS/Notes/Ch22.pdf

They essentially convinced me that we cannot use top-down splaying
here, since the "reassembling" stage requires moving some arbitrary,
unexplored subtree from a right-child to a left-child, and thus the
left-subtree-length cannot be updated properly.
This commit is contained in:
Robby Findler 2012-04-04 09:08:44 -05:00
parent a984f68a46
commit aa8b280f1c

View File

@ -12,46 +12,56 @@
;; ----- The algorithmic implementation of the splay tree for a buffer ------ ;; ----- The algorithmic implementation of the splay tree for a buffer ------
;; search-max: tree * tree list -> tree ;; search-max: tree * tree list -> tree
(define (search-max node path) (define (search-max node path) (search-dir node path node-right))
(cond
((not node)
(end-search path))
(else
(search-max (node-right node) (cons node path)))))
;; search-min: tree * tree list -> tree ;; search-min: tree * tree list -> tree
(define (search-min node path) (define (search-min node path) (search-dir node path node-left))
(define (search-dir node path direction)
(cond (cond
((not node) [(not node) #f]
(end-search path)) [else
(else (let loop ([node (direction node)]
(search-min (node-left node) (cons node path))))) [path-hd node]
[path-tl path]
[direction direction])
(cond
[(not node) (bottom-up-splay path-hd path-tl)]
[else
(loop (direction node) node (i-cons path-hd path-tl) direction)]))]))
;; internal-search: tree * NAT * NAT * tree list -> tree ;; internal-search: tree * NAT * NAT * tree list -> tree
;; key-position is the position in the buffer we are looking for ;; key-position is the position in the buffer we are looking for
;; offset is the offset for the whole subtree of node in the buffer. ;; offset is the offset for the whole subtree of node in the buffer.
;; path is the path back to the root ;; path is the path back to the root
(define (internal-search node key-position offset path) (define (internal-search node key-position)
(cond (cond
((not node) [(not node) #f]
(end-search path)) [else
(else (define-values (first-node first-offset) (internal-direction node key-position 0))
(let* ((node-start (+ offset (node-left-subtree-length node))) (let loop ([node first-node]
(node-end (+ node-start (node-token-length node)))) [key-position key-position]
[offset first-offset]
[path-hd node]
[path-tl '()])
(cond (cond
((< key-position node-start) [(not node)
(internal-search (node-left node) key-position offset (cons node path))) (bottom-up-splay path-hd path-tl)]
((>= key-position node-end) [else
(internal-search (node-right node) key-position node-end (cons node path))) (define-values (next-node next-offset) (internal-direction node key-position offset))
(else (loop next-node key-position next-offset node (i-cons path-hd path-tl))]))]))
(bottom-up-splay node path)))))))
;; end-search: tree list -> tree (define (internal-direction node key-position offset)
(define (end-search path) (define node-start (+ offset (node-left-subtree-length node)))
(define node-end (+ node-start (node-token-length node)))
(cond (cond
((null? path) #f) [(< key-position node-start)
(else (bottom-up-splay (car path) (cdr path))))) (values (node-left node) offset)]
[(>= key-position node-end)
(values (node-right node) node-end)]
[else
(values #f #f)]))
;; update-subtree-length-left-rotate: tree * tree -> ;; update-subtree-length-left-rotate: tree * tree ->
(define (update-subtree-length-left-rotate self parent) (define (update-subtree-length-left-rotate self parent)
(set-node-left-subtree-length! parent (set-node-left-subtree-length! parent
@ -68,66 +78,67 @@
;; bottom-up-splay: tree * tree list -> tree ;; bottom-up-splay: tree * tree list -> tree
(define (bottom-up-splay self path) (define (bottom-up-splay self path)
(cond (let loop ([self self] [path path])
((null? path) self) ;; node is root already (cond
((null? (cdr path)) ;; node's parent is root ((null? path) self) ;; node is root already
(let ((parent (car path))) ((null? (i-cdr path)) ;; node's parent is root
(cond (let ((parent (i-car path)))
((eq? self (node-left parent)) (cond
(set-node-left! parent (node-right self)) ((eq? self (node-left parent))
(set-node-right! self parent) (set-node-left! parent (node-right self))
(update-subtree-length-left-rotate self parent)) (set-node-right! self parent)
(else (update-subtree-length-left-rotate self parent))
(set-node-right! parent (node-left self)) (else
(set-node-left! self parent) (set-node-right! parent (node-left self))
(update-subtree-length-right-rotate self parent)))) (set-node-left! self parent)
self) (update-subtree-length-right-rotate self parent))))
(else self)
(let ((grand (cadr path)) (else
(parent (car path))) (let ((grand (i-cadr path))
(cond (parent (i-car path)))
((eq? self (node-left parent)) (cond
(cond ((eq? self (node-left parent))
((eq? parent (node-left grand)) (cond
(set-node-left! grand (node-right parent)) ((eq? parent (node-left grand))
(set-node-right! parent grand) (set-node-left! grand (node-right parent))
(set-node-left! parent (node-right self)) (set-node-right! parent grand)
(set-node-right! self parent) (set-node-left! parent (node-right self))
(update-subtree-length-left-rotate parent grand) (set-node-right! self parent)
(update-subtree-length-left-rotate self parent)) (update-subtree-length-left-rotate parent grand)
(else (update-subtree-length-left-rotate self parent))
(set-node-right! grand (node-left self)) (else
(set-node-left! self grand) (set-node-right! grand (node-left self))
(set-node-left! parent (node-right self)) (set-node-left! self grand)
(set-node-right! self parent) (set-node-left! parent (node-right self))
(update-subtree-length-left-rotate self parent) (set-node-right! self parent)
(update-subtree-length-right-rotate self grand)))) (update-subtree-length-left-rotate self parent)
(else (update-subtree-length-right-rotate self grand))))
(cond (else
((eq? parent (node-right grand)) (cond
(set-node-right! grand (node-left parent)) ((eq? parent (node-right grand))
(set-node-left! parent grand) (set-node-right! grand (node-left parent))
(set-node-right! parent (node-left self)) (set-node-left! parent grand)
(set-node-left! self parent) (set-node-right! parent (node-left self))
(update-subtree-length-right-rotate parent grand) (set-node-left! self parent)
(update-subtree-length-right-rotate self parent)) (update-subtree-length-right-rotate parent grand)
(else (update-subtree-length-right-rotate self parent))
(set-node-left! grand (node-right self)) (else
(set-node-right! self grand) (set-node-left! grand (node-right self))
(set-node-right! parent (node-left self)) (set-node-right! self grand)
(set-node-left! self parent) (set-node-right! parent (node-left self))
(set-node-left-subtree-length! grand (set-node-left! self parent)
(- (node-left-subtree-length grand) (set-node-left-subtree-length! grand
(node-left-subtree-length parent) (- (node-left-subtree-length grand)
(node-token-length parent) (node-left-subtree-length parent)
(node-left-subtree-length self) (node-token-length parent)
(node-token-length self))) (node-left-subtree-length self)
(update-subtree-length-right-rotate self parent))))) (node-token-length self)))
(unless (null? (cddr path)) (update-subtree-length-right-rotate self parent)))))
(if (eq? grand (node-left (caddr path))) (unless (null? (i-cddr path))
(set-node-left! (caddr path) self) (if (eq? grand (node-left (i-caddr path)))
(set-node-right! (caddr path) self))) (set-node-left! (i-caddr path) self)
(bottom-up-splay self (cddr path)))))) (set-node-right! (i-caddr path) self)))
(loop self (i-cddr path)))))))
(define (size node acc) (define (size node acc)
(cond (cond
@ -159,6 +170,14 @@
(f node-start (node-token-length node) (node-token-data node)) (f node-start (node-token-length node) (node-token-data node))
(do-splay-tree-for-each f (node-right node) (+ node-start (node-token-length node)))))) (do-splay-tree-for-each f (node-right node) (+ node-start (node-token-length node))))))
;; represent paths as improper lists of nodes
(define (i-cons hd tl) (if (null? tl) hd (cons hd tl)))
(define (i-car pr) (if (node? pr) pr (car pr)))
(define (i-cdr pr) (if (node? pr) '() (cdr pr)))
(define (i-cadr pr) (i-car (i-cdr pr)))
(define (i-cddr pr) (i-cdr (i-cdr pr)))
(define (i-caddr pr) (i-car (i-cddr pr)))
;; --------------------- The interface to the splay tree -------------------- ;; --------------------- The interface to the splay tree --------------------
(define-local-member-name set-root) (define-local-member-name set-root)
@ -221,7 +240,7 @@
;; Moves the node at key-position to the root ;; Moves the node at key-position to the root
(define/public (search! key-position) (define/public (search! key-position)
(when root (when root
(set! root (internal-search root key-position 0 null)))) (set! root (internal-search root key-position))))
;; search-max!: -> ;; search-max!: ->
;; moves the maximum node to the root ;; moves the maximum node to the root