From 3863e63ef9abd200c16e44ab1980f06c19748397 Mon Sep 17 00:00:00 2001 From: Bob Burger Date: Thu, 7 Dec 2017 17:17:03 -0500 Subject: [PATCH 1/3] added support for Visual Studio 2017.15.5 original commit: 33eaccf5d0105186d66faa76e8463bab9369bf1a --- LOG | 4 +++- wininstall/locate-vcredist.bat | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/LOG b/LOG index a2c1e94447..b1abeb3710 100644 --- a/LOG +++ b/LOG @@ -755,4 +755,6 @@ - fix signature of bytevector-[u/s]16-native-set! primdata.ss - fix enumerate signature - primdata.ss \ No newline at end of file + primdata.ss +- added support for Visual Studio 2017.15.5 + wininstall/locate-vcredist.bat diff --git a/wininstall/locate-vcredist.bat b/wininstall/locate-vcredist.bat index 3dabab8b6d..c31aeba68b 100755 --- a/wininstall/locate-vcredist.bat +++ b/wininstall/locate-vcredist.bat @@ -17,6 +17,11 @@ @SET "Path32=%VCINSTALLDIR%Redist\MSVC\14.11.25325\MergeModules\Microsoft_VC141_CRT_x86.msm" @SET "Path64=%VCINSTALLDIR%Redist\MSVC\14.11.25325\MergeModules\Microsoft_VC141_CRT_x64.msm" ) + + @IF EXIST "%VCINSTALLDIR%Redist\MSVC\14.12.25810" ( + @SET "Path32=%VCINSTALLDIR%Redist\MSVC\14.12.25810\MergeModules\Microsoft_VC141_CRT_x86.msm" + @SET "Path64=%VCINSTALLDIR%Redist\MSVC\14.12.25810\MergeModules\Microsoft_VC141_CRT_x64.msm" + ) ) @DEL vcredist.wxs >nul 2>&1 From f1b9fc95da10578ef640df384bc0468c06939f61 Mon Sep 17 00:00:00 2001 From: Andy Keep Date: Tue, 12 Dec 2017 08:43:01 -0500 Subject: [PATCH 2/3] Fixing output of substring-fill! and vector-fill! - fixed substring-fill! and vector-fill! to return void, reflecting the documented return value of unspecified value. Also changes substring-fill! to use define-who instead of repeating 'substring-fill! in all the error messages. 5_4.ss, 5_6.ss original commit: 3f65788b5422693f3648a9e2fe575f464eb31ccd --- LOG | 6 ++++++ s/5_4.ss | 10 +++++----- s/5_6.ss | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/LOG b/LOG index b1abeb3710..fc9f64c523 100644 --- a/LOG +++ b/LOG @@ -758,3 +758,9 @@ primdata.ss - added support for Visual Studio 2017.15.5 wininstall/locate-vcredist.bat +- fixed substring-fill! and vector-fill! to return void, reflecting the + documented return value of unspecified value. Also changes substring-fill! + to use define-who instead of repeating 'substring-fill! in all the error + messages. + 5_4.ss, 5_6.ss + diff --git a/s/5_4.ss b/s/5_4.ss index 0e46fab705..b92e339a02 100644 --- a/s/5_4.ss +++ b/s/5_4.ss @@ -120,19 +120,19 @@ ($string-set-immutable! v2) v2)]))) -(define substring-fill! +(define-who substring-fill! (lambda (s m n c) (unless (mutable-string? s) - ($oops 'substring-fill! "~s is not a mutable string" s)) + ($oops who "~s is not a mutable string" s)) (unless (char? c) - ($oops 'substring-fill! "~s is not a character" c)) + ($oops who "~s is not a character" c)) (let ([k (string-length s)]) (unless (and (fixnum? m) (fixnum? n) (fx<= 0 m n k)) - ($oops 'substring-fill! + ($oops who "~s and ~s are not valid start/end indices for ~s" m n s)) (do ([i m (fx+ i 1)]) - ((fx= i n) s) + ((fx= i n)) (string-set! s i c))))) (set! string-for-each diff --git a/s/5_6.ss b/s/5_6.ss index 4f5fde18d4..53cff82e79 100644 --- a/s/5_6.ss +++ b/s/5_6.ss @@ -85,7 +85,7 @@ (unless (mutable-vector? v) ($oops who "~s is not a mutable vector" v)) (let ([n (vector-length v)]) (do ([i 0 (fx+ i 1)]) - ((fx= i n) v) + ((fx= i n)) (vector-set! v i obj))))) (set! fxvector->list From 3ba909f3c679ffecf02af91f01f13b3931dbf004 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 20 Dec 2017 07:26:47 -0700 Subject: [PATCH 3/3] avoid quadratics in call-live information Using a tree representation enables sharing to avoid a quadratic-sized compiled form and intermediate quadtraic-time/space representations for a program like this one, where there are N calls each with an average of N/2 live variables: (define vars (let loop ([i 10000]) (cond [(zero? i) '()] [else (cons (gensym) (loop (sub1 i)))]))) (time (begin (compile `(lambda ,vars ,@(map (lambda (v) `(,v)) vars))) (void))) Keeping the variables in tree form (since they're already collected that way) and memoizing reduces on the tree allows sharing to be constructed and preserved. The tree approach persists even to the runtime mask for live variables. original commit: 35942accb14d1226189605548a9e05ca95e3f0b6 --- LOG | 6 +- c/gc.c | 58 +++++- s/Mf-base | 4 +- s/arm32.ss | 2 +- s/cmacros.ss | 2 +- s/cpnanopass.ss | 446 ++++++++++++++-------------------------------- s/inspect.ss | 80 ++++++--- s/np-languages.ss | 6 +- s/ppc32.ss | 2 +- s/primdata.ss | 4 + s/tree.ss | 303 +++++++++++++++++++++++++++++++ s/types.ss | 6 +- s/x86.ss | 2 +- s/x86_64.ss | 2 +- 14 files changed, 564 insertions(+), 359 deletions(-) create mode 100644 s/tree.ss diff --git a/LOG b/LOG index fc9f64c523..1258713ca7 100644 --- a/LOG +++ b/LOG @@ -763,4 +763,8 @@ to use define-who instead of repeating 'substring-fill! in all the error messages. 5_4.ss, 5_6.ss - +- keep call-live in tree form, which allows more efficient computation + of live-pointer masks when many live variables span many calls + cpnanopass.ss, inspect.ss, np-languages.ss, primdata.ss, + tree.ss (added), types.ss, + x86_64.ss, cmacros.ss, gc.c, s/Mf-base diff --git a/c/gc.c b/c/gc.c index 4d2970f523..ba559b0ab4 100644 --- a/c/gc.c +++ b/c/gc.c @@ -43,6 +43,7 @@ static void sweep_port PROTO((ptr p)); static void sweep_thread PROTO((ptr p)); static void sweep_continuation PROTO((ptr p)); static void sweep_stack PROTO((uptr base, uptr size, uptr ret)); +static void sweep_live_tree PROTO((uptr range, ptr tree, ptr *pp)); static void sweep_record PROTO((ptr x)); static IGEN sweep_dirty_record PROTO((ptr x)); static void sweep_code_object PROTO((ptr tc, ptr co)); @@ -1528,7 +1529,7 @@ static void sweep_continuation(p) ptr p; { /* assumes stack has already been copied to newspace */ static void sweep_stack(base, fp, ret) uptr base, fp, ret; { ptr *pp; iptr oldret; - ptr num; + ptr livemask; while (fp != base) { if (fp < base) @@ -1540,33 +1541,74 @@ static void sweep_stack(base, fp, ret) uptr base, fp, ret; { ret = (iptr)(*pp); relocate_return_addr(pp) - num = ENTRYLIVEMASK(oldret); - if (Sfixnump(num)) { - uptr mask = UNFIX(num); + livemask = ENTRYLIVEMASK(oldret); + if (Sfixnump(livemask)) { + uptr mask = UNFIX(livemask); while (mask != 0) { pp += 1; if (mask & 0x0001) relocate(pp) mask >>= 1; } - } else { + } else if (Spairp(livemask)) { + /* A tree: (range . tree). The tree must be shallow enough that + recursion in `sweep_tree_live` is ok. */ + relocate(&ENTRYLIVEMASK(oldret)) + livemask = ENTRYLIVEMASK(oldret); + + relocate(&INITCDR(livemask)) + + sweep_live_tree(UNFIX(Scar(livemask)), Scdr(livemask), pp); + } else if (Sbignump(livemask)) { + /* As of the addition of the above tree form, we + don't expect bignums to be used as a mask anymore, + but allow them for now. */ iptr index; relocate(&ENTRYLIVEMASK(oldret)) - num = ENTRYLIVEMASK(oldret); - index = BIGLEN(num); + livemask = ENTRYLIVEMASK(oldret); + + index = BIGLEN(livemask); while (index-- != 0) { INT bits = bigit_bits; - bigit mask = BIGIT(num,index); + bigit mask = BIGIT(livemask,index); while (bits-- > 0) { pp += 1; if (mask & 1) relocate(pp) mask >>= 1; } } + } else { + S_error_abort("sweep_stack(gc): unreocgnized mask format"); } } } +static void sweep_live_tree(range, tree, pp) uptr range; ptr tree, *pp; { + /* A tree is either a fixnum or a pair of two trees, with + half of the range on the left and the rest on the right */ + if (Sfixnump(tree)) { + uptr mask = UNFIX(tree); + while (mask != 0) { + pp += 1; + if (mask & 0x0001) relocate(pp) + mask >>= 1; + } + } else if (tree == Strue) { + while (range-- > 0) { + pp += 1; + relocate(pp) + } + } else { + uptr split = range >> 1; + + relocate(&INITCAR(tree)) + relocate(&INITCDR(tree)) + + sweep_live_tree(split, Scar(tree), pp); + sweep_live_tree(range - split, Scdr(tree), pp + split); + } +} + static void sweep_record(x) ptr x; { ptr *pp; ptr num; ptr rtd; diff --git a/s/Mf-base b/s/Mf-base index ba99af842c..65f2d86dcf 100644 --- a/s/Mf-base +++ b/s/Mf-base @@ -146,7 +146,7 @@ macroobj =\ allsrc =\ ${basesrc} ${compilersrc} cmacros.ss ${archincludes} setup.ss debug.ss priminfo.ss primdata.ss layout.ss\ base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss\ - np-languages.ss + np-languages.ss tree.ss # doit uses a different Scheme process to compile each target doit: ${PetiteBoot} ${SchemeBoot} ${Cheader} ${Cequates} @@ -476,7 +476,7 @@ ${asm} ${obj} mkheader.so: ${macroobj} nanopass.so base-lang.ss expand-lang.ss p primvars.so setup.so mkheader.so env.so: cmacros.so priminfo.so primref.ss setup.so: debug.ss -${patchobj}: ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss env.ss +${patchobj}: ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss tree.ss io-types.ss fasl-helpers.ss hashtable-types.ss env.ss cpnanopass.$m cpnanopass.patch: nanopass.so np-languages.ss ${archincludes} 5_4.$m: ../unicode/unicode-char-cases.ss ../unicode/unicode-charinfo.ss diff --git a/s/arm32.ss b/s/arm32.ss index 9710b6becd..30074b8923 100644 --- a/s/arm32.ss +++ b/s/arm32.ss @@ -2262,7 +2262,7 @@ (if (target-fixnum? lpm) `(long . ,(fix lpm)) `(abs 0 (object ,lpm))) - (aop-cons* `(asm livemask: ,(format "~b" lpm)) + (aop-cons* `(asm livemask: ,(if (number? lpm) (format "~b" lpm) (format "~s" lpm))) '(code-top-link) (aop-cons* `(asm code-top-link) `(long . ,fs) diff --git a/s/cmacros.ss b/s/cmacros.ss index 822d50d59f..edd55c12ed 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -1448,7 +1448,7 @@ [ptr link])) (define-primitive-structure-disps rp-header typemod - ([ptr livemask] + ([ptr livemask] ; a fixnum or (cons size tree) [uptr toplink] [iptr frame-size] [uptr mv-return-address])) diff --git a/s/cpnanopass.ss b/s/cpnanopass.ss index 8287a0b002..8bb967f721 100644 --- a/s/cpnanopass.ss +++ b/s/cpnanopass.ss @@ -250,236 +250,8 @@ (bytevector-u16-native-ref bv n)) count)))))))) - (module (empty-tree full-tree tree-extract tree-for-each tree-fold-left tree-bit-set? tree-bit-set tree-bit-unset tree-bit-count tree-same? tree-merge) - ; tree -> fixnum | (tree-node tree tree) - ; 0 represents any tree or subtree with no bits set, and a tree or subtree - ; with no bits set is always 0 - (define empty-tree 0) - - ; any tree or subtree with all bits set - (define full-tree #t) - - (define (full-fixnum size) (fxsrl (most-positive-fixnum) (fx- (fx- (fixnum-width) 1) size))) - - (define compute-split - (lambda (size) - (fxsrl size 1) - ; 2015/03/15 rkd: tried the following under the theory that we'd allocate - ; fewer nodes. for example, say fixmun-width is 30 and size is 80. if we - ; split 40/40 we create two nodes under the current node. if instead we - ; split 29/51 we create just one node and one fixnum under the current - ; node. this worked as planned; however, it reduced the number of nodes - ; created by only 3.3% on the x86 and made compile times slightly worse. - #;(if (fx<= size (fx* (fx- (fixnum-width) 1) 3)) (fx- (fixnum-width) 1) (fxsrl size 1)))) - - (meta-cond - [(fx= (optimize-level) 3) - (module (make-tree-node tree-node? tree-node-left tree-node-right) - (define make-tree-node cons) - (define tree-node? pair?) - (define tree-node-left car) - (define tree-node-right cdr))] - [else - (module (make-tree-node tree-node? tree-node-left tree-node-right) - (define-record-type tree-node - (nongenerative) - (sealed #t) - (fields left right) - (protocol - (lambda (new) - (lambda (left right) - (new left right))))) - (record-writer (record-type-descriptor tree-node) - (lambda (r p wr) - (define tree-node->s-exp - (lambda (tn) - (with-virgin-quasiquote - (let ([left (tree-node-left tn)] [right (tree-node-right tn)]) - `(tree-node - ,(if (tree-node? left) (tree-node->s-exp left) left) - ,(if (tree-node? right) (tree-node->s-exp right) right)))))) - (wr (tree-node->s-exp r) p))))]) - - (define tree-extract ; assumes empty-tree is 0 - (lambda (st size v) - (let extract ([st st] [size size] [offset 0] [x* '()]) - (cond - [(fixnum? st) - (do ([st st (fxsrl st 1)] - [offset offset (fx+ offset 1)] - [x* x* (if (fxodd? st) (cons (vector-ref v offset) x*) x*)]) - ((fx= st 0) x*))] - [(eq? st full-tree) - (do ([size size (fx- size 1)] - [offset offset (fx+ offset 1)] - [x* x* (cons (vector-ref v offset) x*)]) - ((fx= size 0) x*))] - [else - (let ([split (compute-split size)]) - (extract (tree-node-right st) (fx- size split) (fx+ offset split) - (extract (tree-node-left st) split offset x*)))])))) - - (define tree-for-each ; assumes empty-tree is 0 - (lambda (st size start end action) - (let f ([st st] [size size] [start start] [end end] [offset 0]) - (cond - [(fixnum? st) - (unless (eq? st empty-tree) - (do ([st (fxbit-field st start end) (fxsrl st 1)] [offset (fx+ offset start) (fx+ offset 1)]) - ((fx= st 0)) - (when (fxodd? st) (action offset))))] - [(eq? st full-tree) - (do ([start start (fx+ start 1)] [offset offset (fx+ offset 1)]) - ((fx= start end)) - (action offset))] - [else - (let ([split (compute-split size)]) - (when (fx< start split) - (f (tree-node-left st) split start (fxmin end split) offset)) - (when (fx> end split) - (f (tree-node-right st) (fx- size split) (fxmax (fx- start split) 0) (fx- end split) (fx+ offset split))))])))) - - (define tree-fold-left ; assumes empty-tree is 0 - (lambda (proc size init st) - (let f ([st st] [size size] [offset 0] [init init]) - (cond - [(fixnum? st) - (do ([st st (fxsrl st 1)] - [offset offset (fx+ offset 1)] - [init init (if (fxodd? st) (proc init offset) init)]) - ((fx= st 0) init))] - [(eq? st full-tree) - (do ([size size (fx- size 1)] - [offset offset (fx+ offset 1)] - [init init (proc init offset)]) - ((fx= size 0) init))] - [else - (let ([split (compute-split size)]) - (f (tree-node-left st) split offset - (f (tree-node-right st) (fx- size split) (fx+ offset split) init)))])))) - - (define tree-bit-set? ; assumes empty-tree is 0 - (lambda (st size bit) - (let loop ([st st] [size size] [bit bit]) - (cond - [(fixnum? st) - (and (not (eqv? st empty-tree)) - ; fxlogbit? is unnecessarily general, so roll our own - (fxlogtest st (fxsll 1 bit)))] - [(eq? st full-tree) #t] - [else - (let ([split (compute-split size)]) - (if (fx< bit split) - (loop (tree-node-left st) split bit) - (loop (tree-node-right st) (fx- size split) (fx- bit split))))])))) - - (define tree-bit-set ; assumes empty-tree is 0 - (lambda (st size bit) - ; set bit in tree. result is eq? to tr if result is same as tr. - (cond - [(eq? st full-tree) st] - [(fx< size (fixnum-width)) - (let ([st (fxlogbit1 bit st)]) - (if (fx= st (full-fixnum size)) - full-tree - st))] - [else - (let ([split (compute-split size)]) - (if (eqv? st empty-tree) - (if (fx< bit split) - (make-tree-node (tree-bit-set empty-tree split bit) empty-tree) - (make-tree-node empty-tree (tree-bit-set empty-tree (fx- size split) (fx- bit split)))) - (let ([lst (tree-node-left st)] [rst (tree-node-right st)]) - (if (fx< bit split) - (let ([new-lst (tree-bit-set lst split bit)]) - (if (eq? new-lst lst) - st - (if (and (eq? new-lst full-tree) (eq? rst full-tree)) - full-tree - (make-tree-node new-lst rst)))) - (let ([new-rst (tree-bit-set rst (fx- size split) (fx- bit split))]) - (if (eq? new-rst rst) - st - (if (and (eq? lst full-tree) (eq? new-rst full-tree)) - full-tree - (make-tree-node lst new-rst))))))))]))) - - (define tree-bit-unset ; assumes empty-tree is 0 - (lambda (st size bit) - ; reset bit in tree. result is eq? to tr if result is same as tr. - (cond - [(fixnum? st) - (if (eqv? st empty-tree) - empty-tree - (fxlogbit0 bit st))] - [(eq? st full-tree) - (if (fx< size (fixnum-width)) - (fxlogbit0 bit (full-fixnum size)) - (let ([split (compute-split size)]) - (if (fx< bit split) - (make-tree-node (tree-bit-unset full-tree split bit) full-tree) - (make-tree-node full-tree (tree-bit-unset full-tree (fx- size split) (fx- bit split))))))] - [else - (let ([split (compute-split size)] [lst (tree-node-left st)] [rst (tree-node-right st)]) - (if (fx< bit split) - (let ([new-lst (tree-bit-unset lst split bit)]) - (if (eq? new-lst lst) - st - (if (and (eq? new-lst empty-tree) (eq? rst empty-tree)) - empty-tree - (make-tree-node new-lst rst)))) - (let ([new-rst (tree-bit-unset rst (fx- size split) (fx- bit split))]) - (if (eq? new-rst rst) - st - (if (and (eq? lst empty-tree) (eq? new-rst empty-tree)) - empty-tree - (make-tree-node lst new-rst))))))]))) - - (define tree-bit-count ; assumes empty-tree is 0 - (lambda (st size) - (cond - [(fixnum? st) (fxbit-count st)] - [(eq? st full-tree) size] - [else - (let ([split (compute-split size)]) - (fx+ - (tree-bit-count (tree-node-left st) split) - (tree-bit-count (tree-node-right st) (fx- size split))))]))) - - (define tree-same? ; assumes empty-tree is 0 - (lambda (st1 st2) - (or (eq? st1 st2) ; assuming fixnums and full trees are eq-comparable - (and (tree-node? st1) - (tree-node? st2) - (tree-same? (tree-node-left st1) (tree-node-left st2)) - (tree-same? (tree-node-right st1) (tree-node-right st2)))))) - - (define tree-merge - ; merge tr1 and tr2. result is eq? to tr1 if result is same as tr1. - (lambda (st1 st2 size) - (cond - [(or (eq? st1 st2) (eq? st2 empty-tree)) st1] - [(eq? st1 empty-tree) st2] - [(or (eq? st1 full-tree) (eq? st2 full-tree)) full-tree] - [(fixnum? st1) - (safe-assert (fixnum? st2)) - (let ([st (fxlogor st1 st2)]) - (if (fx= st (full-fixnum size)) - full-tree - st))] - [else - (let ([lst1 (tree-node-left st1)] - [rst1 (tree-node-right st1)] - [lst2 (tree-node-left st2)] - [rst2 (tree-node-right st2)]) - (let ([split (compute-split size)]) - (let ([l (tree-merge lst1 lst2 split)] [r (tree-merge rst1 rst2 (fx- size split))]) - (cond - [(and (eq? l lst1) (eq? r rst1)) st1] - [(and (eq? l lst2) (eq? r rst2)) st2] - [(and (eq? l full-tree) (eq? r full-tree)) full-tree] - [else (make-tree-node l r)]))))])))) - + (include "tree.ss") + (define-syntax tc-disp (lambda (x) (syntax-case x () @@ -764,7 +536,7 @@ (define-record-type ctrpi ; compile-time version of rp-info (nongenerative) (sealed #t) - (fields label src sexpr mask)) + (fields label src sexpr mask)) ; mask is like a livemask: an integer or (cons size tree) (define-threaded next-lambda-seqno) @@ -809,7 +581,7 @@ nfv* nfv** (mutable weight) - (mutable call-live*) + (mutable call-live*) ; a tree (mutable frame-words) (mutable local-save*)) (protocol @@ -14281,24 +14053,37 @@ (define-who record-call-live! (lambda (block* varvec) - (for-each - (lambda (block) - (when (newframe-block? block) - (let ([newframe-info (newframe-block-info block)]) - (let ([call-live* (get-live-vars (newframe-block-live-call block) (vector-length varvec) varvec)]) - (for-each - (lambda (x) - (define fixnum (lambda (x) (if (fixnum? x) x (most-positive-fixnum)))) - (when (uvar? x) - (uvar-spilled! x #t) - (unless (block-pariah? block) - (uvar-save-weight-set! x - (fixnum - (+ (uvar-save-weight x) - (* (info-newframe-weight newframe-info) 2))))))) - call-live*) - (info-newframe-call-live*-set! newframe-info call-live*))))) - block*))) + (let ([spill-and-get-non-poison + (make-memoized-tree-reduce (vector-length varvec) + ;; handle one var from tree: + (lambda (v offset) + (let ([x (vector-ref varvec offset)]) + (cond + [(uvar? x) + (uvar-spilled! x #t) + (if (uvar-poison? x) + v + (cons x v))] + [else v]))) + ;; merge results from two trees: + append + '())]) + (for-each + (lambda (block) + (when (newframe-block? block) + (let ([newframe-info (newframe-block-info block)]) + (let ([non-poison-live* (spill-and-get-non-poison (newframe-block-live-call block))]) + (unless (block-pariah? block) + (for-each + (lambda (x) + (define fixnum (lambda (x) (if (fixnum? x) x (most-positive-fixnum)))) + (uvar-save-weight-set! x + (fixnum + (+ (uvar-save-weight x) + (* (info-newframe-weight newframe-info) 2))))) + non-poison-live*)) + (info-newframe-call-live*-set! newframe-info (newframe-block-live-call block)))))) + block*)))) ; maintain move sets as (var . weight) lists, sorted by weight (largest first) ; 2014/06/26: allx move set size averages .79 elements with a max of 12, so no @@ -14322,6 +14107,8 @@ (values move (cons move2 move*))))))))) cons)))))) + (define poison-spillable-threshold 1000) ; NB: parameter? + (define-who identify-poison! (lambda (kspillable varvec live-size block*) (define kpoison 0) @@ -14353,6 +14140,31 @@ (unless (or (fx= stride 16) (< (* (fx- kspillable kpoison) (fx* stride 2)) 1000000)) (refine (fxsrl skip 1) skip))))))) + (define (make-get-non-poison varvec live-size kspillable keep-nonspillable?) + (cond + [(fx> kspillable poison-spillable-threshold) + ;; It's probably worth pruning trees to non-poison when + ;; iterating through spillables. This pays off when there + ;; are enough iterations through spillables for non-poison + ;; spillables and unspillables: + (make-memoized-tree-reduce live-size + (lambda (non-poison-out y-offset) + (cond + [(< y-offset kspillable) + (let ([y (vector-ref varvec y-offset)]) + (if (uvar-poison? y) + non-poison-out + (tree-bit-set non-poison-out live-size y-offset)))] + [keep-nonspillable? + (tree-bit-set non-poison-out live-size y-offset)] + [else + non-poison-out])) + (lambda (t1 t2) (tree-merge t1 t2 live-size)) + empty-tree)] + [else + ;; Probably not worthwhile: + (lambda (out) out)])) + (define-who do-spillable-conflict! (lambda (kspillable kfv varvec live-size block*) (define remove-var (make-remove-var live-size)) @@ -14361,6 +14173,8 @@ (when (var-index x2) ($add-move! x1 x2 2) ($add-move! x2 x1 2)))) + (define get-non-poison + (make-get-non-poison varvec live-size kspillable #f)) (define add-conflict! (lambda (x out) ; invariants: @@ -14373,10 +14187,11 @@ (lambda (y-offset) ; frame y -> poison spillable x (conflict-bit-set! (var-spillable-conflict* (vector-ref varvec y-offset)) x-offset))) - (let ([cset (var-spillable-conflict* x)]) + (let ([cset (var-spillable-conflict* x)] + [non-poison-out (get-non-poison out)]) (if (fx< x-offset kspillable) (begin - (tree-for-each out live-size 0 kspillable + (tree-for-each non-poison-out live-size 0 kspillable (lambda (y-offset) (let ([y (vector-ref varvec y-offset)]) (unless (uvar-poison? y) @@ -14394,7 +14209,7 @@ (lambda (y-offset) ; frame x -> poison or non-poison spillable y (conflict-bit-set! cset y-offset))) - (tree-for-each out live-size 0 kspillable + (tree-for-each non-poison-out live-size 0 kspillable (lambda (y-offset) (unless (uvar-poison? (vector-ref varvec y-offset)) ; register x -> non-poison spillable y @@ -14563,11 +14378,57 @@ (definitions (define remove-var (make-remove-var live-size)) (define find-max-fv - (lambda (call-live*) - (fold-left - (lambda (call-max-fv x) - (fxmax (fv-offset (if (uvar? x) (uvar-location x) x)) call-max-fv)) - -1 call-live*))) + (make-memoized-tree-reduce (vector-length varvec) + (lambda (call-max-fv offset) + (let ([x (vector-ref varvec offset)]) + (fxmax (fv-offset (if (uvar? x) (uvar-location x) x)) call-max-fv))) + fxmax -1)) + (define add-to-live-pointer-tree + (lambda (lpt live) + (define (add-fv fv lpt) + (let ([offset (fv-offset fv)]) + (if (fx= offset 0) ; no bit for fv0 + lpt + (tree-bit-set lpt live-size (fx- offset 1))))) + (cond + [(fv? live) (add-fv live lpt)] + [(eq? (uvar-type live) 'ptr) (add-fv (uvar-location live) lpt)] + [else lpt]))) + (define build-live-pointer-tree-from-tree + (make-memoized-tree-reduce (vector-length varvec) + (lambda (lpt offset) + (add-to-live-pointer-tree lpt (vector-ref varvec offset))) + (lambda (t1 t2) (tree-merge t1 t2 live-size)) + empty-tree)) + (define build-live-pointer-tree-from-list + (lambda (live*) (fold-left add-to-live-pointer-tree empty-tree live*))) + (define build-inspector-tree + (cond + [(info-lambda-ctci lambda-info) => + (lambda (ctci) + (make-memoized-tree-reduce (vector-length varvec) + (lambda (tree offset lpm) + (let ([x (vector-ref varvec offset)]) + (cond + [(and (uvar? x) (uvar-iii x)) => + (lambda (index) + (safe-assert + (let ([name.offset (vector-ref (ctci-live ctci) index)]) + ($livemask-member? lpm (fx- (cdr name.offset) 1)))) + (tree-bit-set tree live-size index))] + [else tree]))) + (lambda (t1 t2) (tree-merge t1 t2 live-size)) + empty-tree))] + [else #f])) + (define extract-local-save + (make-memoized-tree-reduce (vector-length varvec) + (lambda (var* offset) + (let ([x (vector-ref varvec offset)]) + (if (and (uvar? x) (uvar-local-save? x)) + (cons x var*) + var*))) + append + '())) (define cool? (lambda (base nfv*) (let loop ([nfv* nfv*] [offset base]) @@ -14596,41 +14457,6 @@ (for-each (lambda (nfv*) (set-offsets! nfv* arg-base)) nfv**) base) (loop (fx+ base 1)))))))) - (define build-mask - (lambda (index*) - (define bucket-width (if (fx> (fixnum-width) 32) 32 16)) - (let* ([nbits (fx+ (fold-left (lambda (m index) (fxmax m index)) -1 index*) 1)] - [nbuckets (fxdiv (fx+ nbits (fx- bucket-width 1)) bucket-width)] - [buckets (make-fxvector nbuckets 0)]) - (for-each - (lambda (index) - (let-values ([(i j) (fxdiv-and-mod index bucket-width)]) - (fxvector-set! buckets i (fxlogbit1 j (fxvector-ref buckets i))))) - index*) - (let f ([base 0] [len nbuckets]) - (if (fx< len 2) - (if (fx= len 0) - 0 - (fxvector-ref buckets base)) - (let ([half (fxsrl len 1)]) - (logor - (bitwise-arithmetic-shift-left (f (fx+ base half) (fx- len half)) (fx* half bucket-width)) - (f base half)))))))) - (define build-live-pointer-mask - (lambda (live*) - (build-mask - (fold-left - (lambda (index* live) - (define (cons-fv fv index*) - (let ([offset (fv-offset fv)]) - (if (fx= offset 0) ; no bit for fv0 - index* - (cons (fx- offset 1) index*)))) - (cond - [(fv? live) (cons-fv live index*)] - [(eq? (uvar-type live) 'ptr) (cons-fv (uvar-location live) index*)] - [else index*])) - '() live*)))) (define (process-info-newframe! info) (unless (info-newframe-frame-words info) (let ([call-live* (info-newframe-call-live* info)]) @@ -14638,26 +14464,14 @@ (let ([cnfv* (info-newframe-cnfv* info)]) (fx+ (assign-new-frame! cnfv* (cons (info-newframe-nfv* info) (info-newframe-nfv** info)) call-live*) (length cnfv*)))) - (info-newframe-local-save*-set! info - (filter (lambda (x) (and (uvar? x) (uvar-local-save? x))) call-live*))))) + (info-newframe-local-save*-set! info (extract-local-save call-live*))))) (define record-inspector-info! (lambda (src sexpr rpl call-live* lpm) (safe-assert (if call-live* rpl (not rpl))) (cond [(and call-live* (info-lambda-ctci lambda-info)) => (lambda (ctci) - (let ([mask (build-mask - (fold-left - (lambda (i* x) - (cond - [(and (uvar? x) (uvar-iii x)) => - (lambda (index) - (safe-assert - (let ([name.offset (vector-ref (ctci-live ctci) index)]) - (logbit? (fx- (cdr name.offset) 1) lpm))) - (cons index i*))] - [else i*])) - '() call-live*))]) + (let ([mask ($make-livemask live-size (build-inspector-tree call-live* lpm))]) (when (or src sexpr (not (eqv? mask 0))) (ctci-rpi*-set! ctci (cons (make-ctrpi rpl src sexpr mask) (ctci-rpi* ctci))))))])))) (Pred : Pred (ir) -> Pred ()) @@ -14668,10 +14482,14 @@ (foldable-Effect : Effect (ir new-effect*) -> * (new-effect*) [(return-point ,info ,rpl ,mrvl (,cnfv* ...)) (process-info-newframe! info) - (let ([lpm (build-live-pointer-mask (append cnfv* (info-newframe-call-live* info)))]) + (let ([lpm ($make-livemask + live-size + (tree-merge (build-live-pointer-tree-from-list cnfv*) + (build-live-pointer-tree-from-tree (info-newframe-call-live* info)) + live-size))]) (record-inspector-info! (info-newframe-src info) (info-newframe-sexpr info) rpl (info-newframe-call-live* info) lpm) (with-output-language (L15b Effect) - (safe-assert (< -1 lpm (ash 1 (fx- (info-newframe-frame-words info) 1)))) + (safe-assert (<= ($livemask-size lpm) (fx- (info-newframe-frame-words info) 1))) (cons `(rp-header ,mrvl ,(fx* (info-newframe-frame-words info) (constant ptr-bytes)) ,lpm) new-effect*)))] [(remove-frame ,live-info ,info) (process-info-newframe! info) @@ -15118,6 +14936,8 @@ (define-who do-unspillable-conflict! (lambda (kfv kspillable varvec live-size kunspillable unvarvec block*) (define remove-var (make-remove-var live-size)) + (define get-non-poison + (make-get-non-poison varvec live-size kspillable #t)) (define unspillable? (lambda (x) (and (uvar? x) (uvar-unspillable? x)))) @@ -15203,7 +15023,7 @@ (Effect* (cdr e*) (nanopass-case (L15d Effect) (car e*) [(set! ,live-info ,x ,rhs) - (let ([spillable-live (live-info-live live-info)]) + (let ([spillable-live (get-non-poison (live-info-live live-info))]) (if (unspillable? x) (let ([unspillable* (remq x unspillable*)]) (safe-assert (uvar-seen? x)) @@ -15620,11 +15440,11 @@ (RApass unparse-L15a do-live-analysis! live-size entry-block*) ; this is worth enabling from time to time... #;(check-entry-live! (info-lambda-name info) live-size varvec entry-block*) - ; rerun intra-block live analysis and record (fv v reg v spillable) x spillable conflicts - (RApass unparse-L15a record-call-live! block* varvec) ;; NB: we could just use (vector-length varvec) to get live-size - (when (fx> kspillable 1000) ; NB: parameter? + (when (fx> kspillable poison-spillable-threshold) (RApass unparse-L15a identify-poison! kspillable varvec live-size block*)) + (RApass unparse-L15a record-call-live! block* varvec) + ; rerun intra-block live analysis and record (fv v reg v spillable) x spillable conflicts (RApass unparse-L15a do-spillable-conflict! kspillable kfv varvec live-size block*) #;(show-conflicts (info-lambda-name info) varvec '#()) ; find frame homes for call-live variables; adds new fv x spillable conflicts diff --git a/s/inspect.ss b/s/inspect.ss index 242c41fb0f..a80989f0e1 100644 --- a/s/inspect.ss +++ b/s/inspect.ss @@ -1744,6 +1744,29 @@ ) +(let () + (include "tree.ss") + (set! $livemask? + (lambda (v) + ;; intended to be constant-time, so just check plausible: + (or (integer? v) (and (pair? v) (integer? (car v)))))) + (set! $make-livemask + (lambda (size t) + (let ([t (or (tree-simplify-for-readonly t size) t)]) + (if (integer? t) + t + (cons size t))))) + (set! $livemask-size + (lambda (livemask) + (if (number? livemask) + (integer-length livemask) + (tree-bit-length (cdr livemask) (car livemask))))) + (set! $livemask-member? + (lambda (livemask index) + (if (number? livemask) + (logbit? index livemask) + (tree-bit-set? (cdr livemask) (car livemask) index))))) + (define inspect/object (lambda (x) (define compute-size @@ -2165,7 +2188,7 @@ (values (source-file-descriptor-name sfd) fp))] [(path line char) (values path line char)])) (values)))) - + (define-who make-continuation-object (lambda (x pos) (include "types.ss") @@ -2192,19 +2215,20 @@ (let ([cookie '(chocolate . chip)]) (let ([vals (make-vector len cookie)] [vars (make-vector len '())] [live (code-info-live info)]) ; fill vals based on live-pointer mask - (let f ([i 1] [lpm lpm]) + (let f ([i 1]) (unless (>= i len) - (when (odd? lpm) + (when ($livemask-member? lpm (fx- i 1)) (vector-set! vals (fx1- i) ($continuation-stack-ref x i))) - (f (fx1+ i) (ash lpm -1)))) + (f (fx1+ i)))) ; fill vars based on code-info variable mask - (let f ([i 0] [mask (rp-info-mask rpi)]) - (unless (eqv? mask 0) - (when (odd? mask) - (let ([p (vector-ref live i)]) - (let ([index (fx1- (cdr p))]) - (vector-set! vars index (cons (car p) (vector-ref vars index)))))) - (f (+ i 1) (ash mask -1)))) + (let ([mask (rp-info-mask rpi)]) + (let f ([i 0]) + (unless (fx= i ($livemask-size mask)) + (when ($livemask-member? mask i) + (let ([p (vector-ref live i)]) + (let ([index (fx1- (cdr p))]) + (vector-set! vars index (cons (car p) (vector-ref vars index)))))) + (f (+ i 1))))) ; create return vector (with-values (let f ([i 0] [count 0] [cp #f] [cpvar* '()]) @@ -2255,13 +2279,13 @@ (real-make-continuation-object x (rp-info-src rpi) (rp-info-sexpr rpi) cp v frame-count pos))))))] [else (let ([v (list->vector - (let f ([i 1] [lpm lpm]) + (let f ([i 1]) (cond [(>= i len) '()] - [(odd? lpm) + [($livemask-member? lpm (fx- i 1)) (cons (make-variable-object ($continuation-stack-ref x i) #f) - (f (fx1+ i) (ash lpm -1)))] - [else (f (fx1+ i) (ash lpm -1))])))]) + (f (fx1+ i)))] + [else (f (fx1+ i))])))]) (real-make-continuation-object x #f #f #f v (vector-length v) pos))])))) (define real-make-continuation-object @@ -2502,9 +2526,9 @@ ($split-continuation x 0) ; not following RA slot at base of the frame, but this should always hold dounderflow, ; which will be in the static generation and therefore ignored anyway after compact heap - (let ([len ($continuation-stack-length x)]) + (let ([len ($continuation-stack-length x)] + [lpm ($continuation-return-livemask x)]) (let loop ([i 1] - [lpm ($continuation-return-livemask x)] [size (fx+ (constant size-continuation) (align (fx* len (constant ptr-bytes))) (compute-size ($continuation-return-code x)) @@ -2513,7 +2537,7 @@ (compute-size ($continuation-winders x)))]) (if (fx>= i len) size - (loop (fx+ i 1) (ash lpm -1) (if (odd? lpm) (fx+ size (compute-size ($continuation-stack-ref x i))) size))))))) + (loop (fx+ i 1) (if ($livemask-member? lpm (fx- i 1)) (fx+ size (compute-size ($continuation-stack-ref x i))) size))))))) (let ([n ($closure-length x)]) (do ([i 0 (fx+ i 1)] [size (fx+ (align (fx+ (constant header-size-closure) (fx* n (constant ptr-bytes)))) (compute-size ($closure-code x))) @@ -2665,12 +2689,13 @@ (compute-composition! ($closure-code x)) (compute-composition! ($continuation-link x)) (compute-composition! ($continuation-winders x)) - (let ([len ($continuation-stack-length x)]) + (let ([len ($continuation-stack-length x)] + [lpm ($continuation-return-livemask x)]) (incr! stack (align (fx* len (constant ptr-bytes)))) - (let loop ([i 1] [lpm ($continuation-return-livemask x)]) + (let loop ([i 1]) (unless (fx>= i len) - (when (odd? lpm) (compute-composition! ($continuation-stack-ref x i))) - (loop (fx+ i 1) (ash lpm -1))))))) + (when ($livemask-member? lpm (fx- i 1)) (compute-composition! ($continuation-stack-ref x i))) + (loop (fx+ i 1))))))) (begin (compute-composition! ($closure-code x)) (let ([n ($closure-length x)]) @@ -2800,13 +2825,14 @@ ($split-continuation x 0) ; not following RA slot at base of the frame, but this should always hold dounderflow, ; which will be in the static generation and therefore ignored anyway after compact heap - (let ([len ($continuation-stack-length x)]) - (let loop ([i 1] [lpm ($continuation-return-livemask x)]) + (let ([len ($continuation-stack-length x)] + [lpm ($continuation-return-livemask x)]) + (let loop ([i 1]) (if (fx>= i len) (construct-proc ($continuation-return-code x) ($closure-code x) ($continuation-link x) ($continuation-winders x) next-proc) - (if (odd? lpm) - (construct-proc ($continuation-stack-ref x i) (loop (fx+ i 1) (ash lpm -1))) - (loop (fx+ i 1) (ash lpm -1)))))))) + (if ($livemask-member? lpm (fx- i 1)) + (construct-proc ($continuation-stack-ref x i) (loop (fx+ i 1))) + (loop (fx+ i 1)))))))) (construct-proc ($closure-code x) (let ([n ($closure-length x)]) (let f ([i 0]) diff --git a/s/np-languages.ss b/s/np-languages.ss index c6e9de1cb9..dfb1dd03f9 100644 --- a/s/np-languages.ss +++ b/s/np-languages.ss @@ -778,6 +778,8 @@ (lambda (x) (and (integer? x) (exact? x)))) + (define livemask? $livemask?) + ; calling conventions are imposed; clauses no longer have formals (they are ; now locals set by arguments from argument registers and frame); calls no ; longer have arguments; case-lambda is resposible for dispatching to correct @@ -793,7 +795,7 @@ (pred-primitive (pred-prim)) (value-primitive (value-prim)) (immediate (imm fs)) - (exact-integer (lpm)) + (livemask (lpm)) (info (info)) (maybe-label (mrvl)) (label (l rpl)) @@ -921,7 +923,7 @@ (pred-primitive (pred-prim)) (value-primitive (value-prim)) (immediate (imm fs)) - (exact-integer (lpm)) + (livemask (lpm)) (live-info (live-info)) (info (info)) (label (l rpl)) diff --git a/s/ppc32.ss b/s/ppc32.ss index 4bcbe8f8f9..b12c529eb5 100644 --- a/s/ppc32.ss +++ b/s/ppc32.ss @@ -2061,7 +2061,7 @@ (if (target-fixnum? lpm) `(long . ,(fix lpm)) `(abs 0 (object ,lpm))) - (aop-cons* `(asm livemask: ,(format "~b" lpm)) + (aop-cons* `(asm livemask: ,(if (number? lpm) (format "~b" lpm) (format "~s" lpm))) '(code-top-link) (aop-cons* `(asm code-top-link) `(long . ,fs) diff --git a/s/primdata.ss b/s/primdata.ss index 6eb18d228d..df7de5dff9 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -2024,6 +2024,9 @@ ($library-requirements-options [flags]) ($library-search [flags]) ($list-length [flags]) + ($livemask? [flags]) + ($livemask-size [flags]) + ($livemask-member? [flags]) ($load-library [flags]) ($locate-source [flags]) ($logand [flags]) @@ -2058,6 +2061,7 @@ ($make-fptr [flags pure mifoldable discard true]) ($make-graph-env [flags]) ($make-library-requirements-options [flags pure discard true]) + ($make-livemask [flags]) ($make-object-finder [flags]) ($make-promise [flags alloc]) ($make-read [flags]) diff --git a/s/tree.ss b/s/tree.ss new file mode 100644 index 0000000000..099724d2cf --- /dev/null +++ b/s/tree.ss @@ -0,0 +1,303 @@ +(module (empty-tree full-tree tree-extract tree-for-each tree-fold-left tree-bit-set? tree-bit-set tree-bit-unset + tree-bit-count tree-bit-length tree-simplify-for-readonly tree-same? tree-merge + make-memoized-tree-reduce) + ; tree -> fixnum | (tree-node tree tree) | #t + ; 0 represents any tree or subtree with no bits set, and a tree or subtree + ; with no bits set is always 0. + ; #t represents a subtree with all bits set + (define empty-tree 0) + + ; any tree or subtree with all bits set + (define full-tree #t) + + (define (full-fixnum size) (fxsrl (most-positive-fixnum) (fx- (fx- (fixnum-width) 1) size))) + + (define compute-split + (lambda (size) + (fxsrl size 1) + ; 2015/03/15 rkd: tried the following under the theory that we'd allocate + ; fewer nodes. for example, say fixmun-width is 30 and size is 80. if we + ; split 40/40 we create two nodes under the current node. if instead we + ; split 29/51 we create just one node and one fixnum under the current + ; node. this worked as planned; however, it reduced the number of nodes + ; created by only 3.3% on the x86 and made compile times slightly worse. + #;(if (fx<= size (fx* (fx- (fixnum-width) 1) 3)) (fx- (fixnum-width) 1) (fxsrl size 1)))) + + ;; A tree is represented with pairs so that it can serve directly as + ;; a livemask when paired with its sized, as long as the tree + ;; doesn't have any `full-tree` parts + (module (make-tree-node tree-node? tree-node-left tree-node-right) + (define make-tree-node cons) + (define tree-node? pair?) + (define tree-node-left car) + (define tree-node-right cdr)) + + (define tree-extract ; assumes empty-tree is 0 + (lambda (st size v) + (let extract ([st st] [size size] [offset 0] [x* '()]) + (cond + [(fixnum? st) + (do ([st st (fxsrl st 1)] + [offset offset (fx+ offset 1)] + [x* x* (if (fxodd? st) (cons (vector-ref v offset) x*) x*)]) + ((fx= st 0) x*))] + [(eq? st full-tree) + (do ([size size (fx- size 1)] + [offset offset (fx+ offset 1)] + [x* x* (cons (vector-ref v offset) x*)]) + ((fx= size 0) x*))] + [else + (let ([split (compute-split size)]) + (extract (tree-node-right st) (fx- size split) (fx+ offset split) + (extract (tree-node-left st) split offset x*)))])))) + + (define tree-for-each ; assumes empty-tree is 0 + (lambda (st size start end action) + (let f ([st st] [size size] [start start] [end end] [offset 0]) + (cond + [(fixnum? st) + (unless (eq? st empty-tree) + (do ([st (fxbit-field st start end) (fxsrl st 1)] [offset (fx+ offset start) (fx+ offset 1)]) + ((fx= st 0)) + (when (fxodd? st) (action offset))))] + [(eq? st full-tree) + (do ([start start (fx+ start 1)] [offset offset (fx+ offset 1)]) + ((fx= start end)) + (action offset))] + [else + (let ([split (compute-split size)]) + (when (fx< start split) + (f (tree-node-left st) split start (fxmin end split) offset)) + (when (fx> end split) + (f (tree-node-right st) (fx- size split) (fxmax (fx- start split) 0) (fx- end split) (fx+ offset split))))])))) + + (define tree-fold-left ; assumes empty-tree is 0 + (lambda (proc size init st) + (let f ([st st] [size size] [offset 0] [init init]) + (cond + [(fixnum? st) + (do ([st st (fxsrl st 1)] + [offset offset (fx+ offset 1)] + [init init (if (fxodd? st) (proc init offset) init)]) + ((fx= st 0) init))] + [(eq? st full-tree) + (do ([size size (fx- size 1)] + [offset offset (fx+ offset 1)] + [init init (proc init offset)]) + ((fx= size 0) init))] + [else + (let ([split (compute-split size)]) + (f (tree-node-left st) split offset + (f (tree-node-right st) (fx- size split) (fx+ offset split) init)))])))) + + (define tree-bit-set? ; assumes empty-tree is 0 + (lambda (st size bit) + (let loop ([st st] [size size] [bit bit]) + (cond + [(fixnum? st) + (and (not (eqv? st empty-tree)) + ; fxlogbit? is unnecessarily general, so roll our own + (fxlogtest st (fxsll 1 bit)))] + [(eq? st full-tree) #t] + [else + (let ([split (compute-split size)]) + (if (fx< bit split) + (loop (tree-node-left st) split bit) + (loop (tree-node-right st) (fx- size split) (fx- bit split))))])))) + + (define tree-bit-set ; assumes empty-tree is 0 + (lambda (st size bit) + ; set bit in tree. result is eq? to tr if result is same as tr. + (cond + [(eq? st full-tree) st] + [(fx< size (fixnum-width)) + (let ([st (fxlogbit1 bit st)]) + (if (fx= st (full-fixnum size)) + full-tree + st))] + [else + (let ([split (compute-split size)]) + (if (eqv? st empty-tree) + (if (fx< bit split) + (make-tree-node (tree-bit-set empty-tree split bit) empty-tree) + (make-tree-node empty-tree (tree-bit-set empty-tree (fx- size split) (fx- bit split)))) + (let ([lst (tree-node-left st)] [rst (tree-node-right st)]) + (if (fx< bit split) + (let ([new-lst (tree-bit-set lst split bit)]) + (if (eq? new-lst lst) + st + (if (and (eq? new-lst full-tree) (eq? rst full-tree)) + full-tree + (make-tree-node new-lst rst)))) + (let ([new-rst (tree-bit-set rst (fx- size split) (fx- bit split))]) + (if (eq? new-rst rst) + st + (if (and (eq? lst full-tree) (eq? new-rst full-tree)) + full-tree + (make-tree-node lst new-rst))))))))]))) + + (define tree-bit-unset ; assumes empty-tree is 0 + (lambda (st size bit) + ; reset bit in tree. result is eq? to tr if result is same as tr. + (cond + [(fixnum? st) + (if (eqv? st empty-tree) + empty-tree + (fxlogbit0 bit st))] + [(eq? st full-tree) + (if (fx< size (fixnum-width)) + (fxlogbit0 bit (full-fixnum size)) + (let ([split (compute-split size)]) + (if (fx< bit split) + (make-tree-node (tree-bit-unset full-tree split bit) full-tree) + (make-tree-node full-tree (tree-bit-unset full-tree (fx- size split) (fx- bit split))))))] + [else + (let ([split (compute-split size)] [lst (tree-node-left st)] [rst (tree-node-right st)]) + (if (fx< bit split) + (let ([new-lst (tree-bit-unset lst split bit)]) + (if (eq? new-lst lst) + st + (if (and (eq? new-lst empty-tree) (eq? rst empty-tree)) + empty-tree + (make-tree-node new-lst rst)))) + (let ([new-rst (tree-bit-unset rst (fx- size split) (fx- bit split))]) + (if (eq? new-rst rst) + st + (if (and (eq? lst empty-tree) (eq? new-rst empty-tree)) + empty-tree + (make-tree-node lst new-rst))))))]))) + + (define tree-bit-count ; assumes empty-tree is 0 + (lambda (st size) + (cond + [(fixnum? st) (fxbit-count st)] + [(eq? st full-tree) size] + [else + (let ([split (compute-split size)]) + (fx+ + (tree-bit-count (tree-node-left st) split) + (tree-bit-count (tree-node-right st) (fx- size split))))]))) + + (define tree-bit-length + (lambda (st size) + (cond + [(fixnum? st) (integer-length st)] + [(eq? st full-tree) size] + [else + (let ([split (compute-split size)]) + (let ([rlen (tree-bit-length (tree-node-right st) (fx- size split))]) + (if (fx= 0 rlen) + (tree-bit-length (tree-node-left st) split) + (fx+ split rlen))))]))) + + ;; If a tree has only a leftmost fixnum, then we can + ;; reperesent it for `tree-bit-set?` as just the fixnum. + (define tree-simplify-for-readonly + (lambda (st size) + (cond + [(fixnum? st) st] + [(eq? st full-tree) + (and (fx< size (fixnum-width)) + (full-fixnum size))] + [else + (and (eq? (tree-node-right st) empty-tree) + (tree-simplify-for-readonly (tree-node-left st) (compute-split size)))]))) + + (define tree-same? ; assumes empty-tree is 0 + (lambda (st1 st2) + (or (eq? st1 st2) ; assuming fixnums and full trees are eq-comparable + (and (tree-node? st1) + (tree-node? st2) + (tree-same? (tree-node-left st1) (tree-node-left st2)) + (tree-same? (tree-node-right st1) (tree-node-right st2)))))) + + (define tree-merge + ; merge tr1 and tr2. result is eq? to tr1 if result is same as tr1. + (lambda (st1 st2 size) + (cond + [(or (eq? st1 st2) (eq? st2 empty-tree)) st1] + [(eq? st1 empty-tree) st2] + [(or (eq? st1 full-tree) (eq? st2 full-tree)) full-tree] + [(fixnum? st1) + (safe-assert (fixnum? st2)) + (let ([st (fxlogor st1 st2)]) + (if (fx= st (full-fixnum size)) + full-tree + st))] + [else + (let ([lst1 (tree-node-left st1)] + [rst1 (tree-node-right st1)] + [lst2 (tree-node-left st2)] + [rst2 (tree-node-right st2)]) + (let ([split (compute-split size)]) + (let ([l (tree-merge lst1 lst2 split)] [r (tree-merge rst1 rst2 (fx- size split))]) + (cond + [(and (eq? l lst1) (eq? r rst1)) st1] + [(and (eq? l lst2) (eq? r rst2)) st2] + [(and (eq? l full-tree) (eq? r full-tree)) full-tree] + [else (make-tree-node l r)]))))]))) + + (define make-memoized-tree-reduce + ;; A function produced by `make-memoized-tree-reduce` is meant to + ;; be applied to different trees where the same bit in different + ;; trees means the same thing. Memoizing reduces over multiple + ;; trees is useful when the tree are likely to share nodes. + ;; Memoizing is approximate, applied only when it seems like to be + ;; worthwhile. Memoizing also relies on results being non-#f. + (lambda (total-size leaf-proc merge-proc init) + (cond + [(< total-size (* 8 (fixnum-width))) + ;; Memoizing probably isn't worthwhile + (case-lambda + [(st) + (tree-fold-left leaf-proc total-size init st)] + [(st . extra-leaf-args) + (tree-fold-left (lambda (init offset) + (apply leaf-proc init offset extra-leaf-args)) + total-size init st)])] + [else + ;; Memoizing could be worthwhile... + (let ([cache (make-eq-hashtable)] ; maps st to a result + [full-cache (make-eqv-hashtable)] ; maps offset+size to a result for full trees + [apply-leaf-proc (lambda (init offset extra-leaf-args) + (if (null? extra-leaf-args) + (leaf-proc init offset) + (apply leaf-proc init offset extra-leaf-args)))]) + (lambda (st . extra-leaf-args) + (let f ([st st] [size total-size] [offset 0]) + (cond + [(fixnum? st) + ;; No memoizing at fixnum leaves, since it seems unlikely + ;; to be useful + (do ([st st (fxsrl st 1)] + [offset offset (fx+ offset 1)] + [init init (if (fxodd? st) (apply-leaf-proc init offset extra-leaf-args) init)]) + ((fx= st 0) init))] + [(eq? st full-tree) + ;; Memoizing at full subtrees uses offset and size + ;; (combined into one number) to identity the subtree. + (let ([key (+ offset (* total-size size))]) + (cond + [(hashtable-ref full-cache key #f) + => (lambda (v) v)] + [else + (let ([v (do ([size size (fx- size 1)] + [offset offset (fx+ offset 1)] + [init init (apply-leaf-proc init offset extra-leaf-args)]) + ((fx= size 0) init))]) + (hashtable-set! full-cache key v) + v)]))] + [else + ;; We're relying on a fresh `cons`es to repersent different parts + ;; of a tree, even if the parts have the same local content. So, + ;; `eq?` identifies a subtree. + (let ([cell (eq-hashtable-cell cache st #f)] + [key (+ offset (* total-size size))]) + (cond + [(cdr cell) => (lambda (v) v)] + [else + (let ([v (let ([split (compute-split size)]) + (merge-proc (f (tree-node-left st) split offset) + (f (tree-node-right st) (fx- size split) (fx+ offset split))))]) + (set-cdr! cell v) + v)]))]))))])))) diff --git a/s/types.ss b/s/types.ss index 30d0c7323d..460e0aa027 100644 --- a/s/types.ss +++ b/s/types.ss @@ -59,7 +59,11 @@ (sealed #t)) (define-record-type rp-info - (fields (immutable offset) (immutable src) (immutable sexpr) (immutable mask)) + (fields + (immutable offset) + (immutable src) + (immutable sexpr) + (immutable mask)) ; an integer or (cons size tree) (nongenerative #{rp-info gr886ae7iuw4wt9ft4vxym-1}) (sealed #t)) diff --git a/s/x86.ss b/s/x86.ss index 28962e4893..2812bea586 100644 --- a/s/x86.ss +++ b/s/x86.ss @@ -2195,7 +2195,7 @@ (if (target-fixnum? lpm) `(long . ,(fix lpm)) `(abs 0 (object ,lpm))) - (aop-cons* `(asm livemask: ,(format "~b" lpm)) + (aop-cons* `(asm livemask: ,(if (number? lpm) (format "~b" lpm) (format "~s" lpm))) '(code-top-link) (aop-cons* `(asm code-top-link) `(long . ,fs) diff --git a/s/x86_64.ss b/s/x86_64.ss index 3162df302a..d2d35ed7b3 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -2377,7 +2377,7 @@ (if (target-fixnum? lpm) `(,size . ,(fix lpm)) `(abs 0 (object ,lpm))) - (aop-cons* `(asm livemask: ,(format "~b" lpm)) + (aop-cons* `(asm livemask: ,(if (number? lpm) (format "~b" lpm) (format "~s" lpm))) '(code-top-link) (aop-cons* `(asm code-top-link) `(,size . ,fs)