From aaaa5fefa1d932e4c407de4fc9f49f136b945c87 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 3 Jan 2019 08:33:20 -0700 Subject: [PATCH] add `compute-size-increments` Also adds `get-initial-thread`, since threa values are useful with `compute-size[-increments]`. Changes the compiler to inline `weak-pair?` and `ephemeron-pair?`, since that provides better performance for `compute-size-increments`. original commit: 57d0cc13f8e932972cba3837b4f54e9c86786091 --- LOG | 5 + c/prim.c | 1 + c/types.h | 2 + csug/debug.stex | 62 +++++++++ csug/threads.stex | 12 +- mats/misc.ms | 102 ++++++++++++++ mats/thread.ms | 3 + release_notes/release_notes.stex | 9 ++ s/Mf-base | 3 +- s/bitset.ss | 73 +++++++++++ s/cmacros.ss | 45 +++++++ s/cpnanopass.ss | 96 ++++++++++++++ s/inspect.ss | 219 ++++++++++++++++++++++++------- s/primdata.ss | 8 ++ s/prims.ss | 27 +++- 15 files changed, 614 insertions(+), 53 deletions(-) create mode 100644 s/bitset.ss diff --git a/LOG b/LOG index e5aae978a0..1441cb6a5d 100644 --- a/LOG +++ b/LOG @@ -1027,3 +1027,8 @@ cpnanopass.ss, x86_64.ss, x86.ss, foreign2.c, foreign.ms - added initialization of seginfo sorted and trigger_ephemerons fields. segment.c +- added `compute-size-increments` and `get-initial-thread`, and + adjust compiler to inline `weak-pair?` and `ephemeron-pair?` + inspect.ss, bitset.ss, cmacros.ss, prims.ss, primdata.ss, Mf-base, + prim.c, types.h, misc.ms, thread.ms, debug.stex, threads.stex, + release_notes.stex diff --git a/c/prim.c b/c/prim.c index 85856e3be1..f6f3b1f7b6 100644 --- a/c/prim.c +++ b/c/prim.c @@ -139,6 +139,7 @@ static void create_c_entry_vector() { install_c_entry(CENTRY_Sreturn, proc2ptr(S_return)); install_c_entry(CENTRY_Scall_one_result, proc2ptr(S_call_one_result)); install_c_entry(CENTRY_Scall_any_results, proc2ptr(S_call_any_results)); + install_c_entry(CENTRY_segment_info, proc2ptr(S_segment_info)); for (i = 0; i < c_entry_vector_size; i++) { #ifndef PTHREADS diff --git a/c/types.h b/c/types.h index 872dc17671..0560860f28 100644 --- a/c/types.h +++ b/c/types.h @@ -115,6 +115,8 @@ typedef int IFASLCODE; /* fasl type codes */ #define ptr_align(size) (((size)+byte_alignment-1) & ~(byte_alignment-1)) +/* The inlined implementation of primitives like `weak-pair?` + rely on the first two fields of `seginfo`: */ typedef struct _seginfo { unsigned char space; /* space the segment is in */ unsigned char generation; /* generation the segment is in */ diff --git a/csug/debug.stex b/csug/debug.stex index b12b1c3e7c..b00d99885c 100644 --- a/csug/debug.stex +++ b/csug/debug.stex @@ -1452,6 +1452,14 @@ of all objects in the heap is sufficient, \index{\scheme{object-counts}}\scheme{object-counts} is potentially much more efficient. +The procedure \scheme{compute-size-increments} is similar to +\scheme{compute-size} mapped over a list of elements, but it reports a +size for each later element without including the size of objects +reachable from ealier elements, and it treats weak and ephemeron pairs +differently. For a large number of reachable objects, +\scheme{compute-size-increments} uses much less memory than +\scheme{compute-size}. + These procedures treat immediate objects such as fixnums, booleans, and characters as zero-count, zero-byte leaves. @@ -1495,6 +1503,11 @@ If \var{generation} is not supplied, it defaults to the value of \var{object} and anything reachable from \var{object} in any generation less than or equal to \var{generation}. Immediate values such as fixnums, booleans, and characters have zero size. +Size computation for a thread is limited when the thread is still active, +since its full continuation cannot be inspected in that case, but the +full continuation is inspected if the thread is inactive (such as during +a garbage collection rendezvous when a different thread is selected by +the rendezvous). The following examples are valid for machines with 32-bit pointers. @@ -1569,3 +1582,52 @@ The following examples are valid for machines with 32-bit pointers. 0) ;=> ((pair 1 . 8) (# 1 . 8)) \endschemedisplay + +\entryheader +\formdef{compute-size-increments}{\categoryprocedure}{(compute-size-increments \var{list})} +\formdef{compute-size-increments}{\categoryprocedure}{(compute-size-increments \var{list} \var{generation})} +\returns a list as described below +\listlibraries +\endentryheader + +\var{list} must be a list, but each element can be any object. +\var{generation} must be a fixnum between 0 and the value of +\scheme{collect-maximum-generation}, inclusive, or the symbol +\scheme{static}. +If \var{generation} is not supplied, it defaults to the value of +\scheme{collect-maximum-generation}. + +\scheme{compute-size-increments} is like mapping \scheme{compute-size} +over \var{list}, except that any object reachable from an earlier +element of \var{list} is not treated as reachable by a later element +of \var{list}. In addition, each immediate element of \var{list} is +not treated as reachable by earlier elements of \var{list}---although +other values reachable from later elements of \var{list} may be +considered reachable from earlier elements. + +Unlike \var{compute-size}, \scheme{compute-size-increments} does not +consider the \scheme{car} of a weak pair reachable from the weak pair. +It also does not consider the \scheme{car} or \scheme{cdr} of an +ephemeron pair to be reachable from the ephemeron pair, unless the +\scheme{car} is already determined to be reachable (perhaps from an +earlier element of \var{list}); if the \scheme{car} of an ephemeron +pair is discovered to be reachable later (perhaps from a later element +of \var{list}), then the \scheme{cdr} of the ephemeron pair is +considered to be reachable from the \scheme{car}, which has the effect +of charging the memory of the \scheme{cdr} to the same element of +\var{list} as the memory of the \scheme{car}. + +The following examples are valid for machines with 32-bit pointers. + +\schemedisplay +(compute-size-increments (list 0)) ;=> (0) +(compute-size-increments (list (cons 0 0))) ;=> (8) +(compute-size-increments (list (cons 0 0) (cons 0 0))) ;=> (8 8) +(compute-size-increments (let ([p (cons 0 0)]) + (list p p))) ;=> (8 0) +(compute-size-increments (let ([p (cons 0 0)]) + (list (cons 1 p) (cons 1 p)))) ;=> (16 8) +(compute-size-increments (let* ([p (cons 0 0)] + [e (ephemeron-cons p (cons 0 0))]) + (list e p))) ;=> (8 16) +\endschemedisplay diff --git a/csug/threads.stex b/csug/threads.stex index a098280758..195dce82cf 100644 --- a/csug/threads.stex +++ b/csug/threads.stex @@ -90,13 +90,22 @@ synchronized across processors. a thread object. Nothing can be done with the thread object returned by -\scheme{fork-thread}, other than to print it. +\scheme{fork-thread}, other than to print it or use it with inspection +functions such as \scheme{compute-size}. Threads created by foreign code using some means other than \scheme{fork-thread} must call \scheme{Sactivate_thread} (Section~\ref{SECTFOREIGNCLIB}) before touching any Scheme data or calling any Scheme procedures. +%---------------------------------------------------------------------------- +\entryheader +\formdef{get-initial-thread}{\categoryprocedure}{(get-initial-thread)} +\returns a thread object for the initial thread +\listlibraries +\endentryheader + + %---------------------------------------------------------------------------- \entryheader \formdef{thread?}{\categoryprocedure}{(thread? \var{obj})} @@ -117,7 +126,6 @@ relationship to the process id returned by \index{\scheme{get-process-id}}\scheme{get-process-id}, which is the same in all threads. - \section{Mutexes} %---------------------------------------------------------------------------- diff --git a/mats/misc.ms b/mats/misc.ms index 215e5b3c0a..e5a3f6b951 100644 --- a/mats/misc.ms +++ b/mats/misc.ms @@ -1049,6 +1049,108 @@ (fixnum? (let ([k (call/1cc (lambda (k) k))]) (collect) (compute-size k))) ) +(mat compute-size-increments + (error? (compute-size-increments 'not-a-list)) + (error? (compute-size-increments 0)) + (error? (compute-size-increments (list 0) -1)) + (error? (compute-size-increments (list 0) "static")) + (error? (compute-size-increments (list 0) '())) + (begin + (define pair-size (compute-size (cons 1 2))) + #t) + (equal? (list pair-size pair-size) + (compute-size-increments (list (cons 1 2) (cons 3 4)))) + (equal? (list (* 3 pair-size) pair-size) + (let ([l (list 1 2)]) + (compute-size-increments (list (cons 3 l) (cons 4 l))))) + (equal? (list pair-size) + (compute-size-increments (list (weak-cons (make-bytevector 100) #f)))) + (let* ([x (make-bytevector 100)] + [ls (list (lambda () x) x)]) + (equal? (compute-size-increments ls) + (reverse (compute-size-increments (reverse ls))))) + ;; Ephemeron(s) found before key: + (equal? (list pair-size (* 2 pair-size)) + (compute-size-increments (let* ([p (cons 0 0)] + [e (ephemeron-cons p (cons 0 0))]) + (list e p)))) + (equal? (list pair-size (* 3 pair-size)) + (let* ([v (cons 1 2)] + [e (ephemeron-cons v (cons 3 4))]) + (compute-size-increments (list e (cons v #f))))) + (equal? (list (* 4 pair-size) (* 4 pair-size)) + (let* ([v (cons 1 2)] + [e* (list (ephemeron-cons v (cons 3 4)) + (ephemeron-cons v (cons 5 6)))]) + (compute-size-increments (list e* (cons v #f))))) + ;; Key found before ephemeron(s): + (equal? (list (* 2 pair-size) (* 2 pair-size)) + (let* ([v (cons 1 2)] + [e (ephemeron-cons v (cons 3 4))]) + (compute-size-increments (list (cons v #f) e)))) + (equal? (list (* 2 pair-size) (* 6 pair-size)) + (let* ([v (cons 1 2)] + [e* (list (ephemeron-cons v (cons 3 4)) + (ephemeron-cons v (cons 5 6)))]) + (compute-size-increments (list (cons v #f) e*)))) + ;; This call will encounter many kinds of objects, just to make + ;; sure it doesn't fail: + (list? (compute-size-increments (list (call/cc values)) 'static)) + ;; Check that a deactivated thread's continuation can be traversed + ;; for `compute-size-increments`: + (or (not (threaded?)) + (let* ([ready (box #f)] + [saved (box #f)] + [m (make-mutex)] + [N 1000000] + [pause-until (lambda (check) + (let loop () + (unless (check) + (sleep (make-time 'time-duration 10000 0)) + (loop))))] + [th (fork-thread + (lambda () + (let ([bstr (make-bytevector N)]) + (box-cas! ready #f 'go) + ;; Block so that thread becomes deactivated + (mutex-acquire m) + (mutex-release m) + ;; bstr is retained in the thread's continuation until here + (set-box! saved (bytevector-u8-ref bstr 0)) + (pause-until (lambda () (box-cas! ready 'finish 'done))) + ;; Block so that thread becomes deactivated, again + (mutex-acquire m) + (mutex-release m))))]) + (mutex-acquire m) + ;; Wait for thread to start + (pause-until (lambda () (eq? 'go (unbox ready)))) + ;; Wait for thread to become inactive, blocked on the mutex + (pause-until (lambda () (= 1 (#%$top-level-value '$active-threads)))) + ;; Get thread's size, which should include bstr + (let ([pre-sizes (compute-size-increments (list th))]) + (mutex-release m) + ;; Wait for bytevector to be discarded in the thread + (pause-until (lambda () (unbox saved))) + (mutex-acquire m) + (set-box! ready 'finish) + ;; Wait for thread to become inactive again + (pause-until (lambda () (= 1 (#%$top-level-value '$active-threads)))) + ;; Get thread's size, which should'nt include bstr + (let ([post-sizes (compute-size-increments (list th))]) + (mutex-release m) + ;; Wait for thread to exit + (let () + (define $threads (foreign-procedure "(cs)threads" () scheme-object)) + (pause-until (lambda () (= 1 (length ($threads)))))) + ;; Make sure `compute-size-increments` doesn't crash on a + ;; terminated thread: + (compute-size-increments (list th)) + ;; Main result: detected size of `bstr` in the thread + ;; while it was part of the continuation + (and (> (car pre-sizes) N) + (< (car post-sizes) N)))))) +) + (mat compute-composition (error? (compute-composition 0 -1)) (error? (compute-composition 0 "static")) diff --git a/mats/thread.ms b/mats/thread.ms index 3f1a4a9fbb..c6523c5e42 100644 --- a/mats/thread.ms +++ b/mats/thread.ms @@ -89,6 +89,7 @@ (not (thread-condition? m)) (not (mutex? 'mutex)) (not (thread-condition? 'condition)))) + (thread? (get-initial-thread)) (begin (define $threads (foreign-procedure "(cs)threads" () scheme-object)) (define $fib (lambda (x) (if (< x 2) 1 (+ ($fib (- x 1)) ($fib (- x 2)))))) @@ -98,6 +99,8 @@ (lambda () (sleep t)))) (define $thread-check (lambda () + (unless (memq (get-initial-thread) ($threads)) + (errorf #f "initial thread is missing from list")) (let loop ([n 100] [nt (length ($threads))]) (cond [(<= nt $nthreads) diff --git a/release_notes/release_notes.stex b/release_notes/release_notes.stex index 289d6d1eb5..7753e8b165 100644 --- a/release_notes/release_notes.stex +++ b/release_notes/release_notes.stex @@ -58,6 +58,15 @@ Online versions of both books can be found at %----------------------------------------------------------------------------- \section{Functionality Changes}\label{section:functionality} +\subsection{Counting reachable objects from multiple sources (9.5.1)} + +The new procedure \scheme{compute-size-increments} is like +\scheme{compute-size}, but it reports sizes for only newly reached +objects for later elements of a given list. It also treats weak and +ephemeron pairs in a way that assigns sizes to references that cause a +weakly held value to be retained, as opposed to assigning the size to +the pair. + \subsection{Procedure source location without inspector information (9.5.1)} When \scheme{generate-inspector-information} is set to \scheme{#f} and diff --git a/s/Mf-base b/s/Mf-base index 30427e8089..e35cdb1202 100644 --- a/s/Mf-base +++ b/s/Mf-base @@ -149,7 +149,7 @@ macroobj =\ allsrc =\ ${basesrc} ${compilersrc} cmacros.ss ${archincludes} setup.ss debug.ss priminfo.ss primdata.ss layout.ss\ base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss\ - np-languages.ss + np-languages.ss bitset.ss # doit uses a different Scheme process to compile each target doit: ${PetiteBoot} ${SchemeBoot} ${Cheader} ${Cequates} @@ -497,6 +497,7 @@ setup.so: debug.ss ${patchobj}: ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss env.ss cpnanopass.$m cpnanopass.patch: nanopass.so np-languages.ss ${archincludes} 5_4.$m: ../unicode/unicode-char-cases.ss ../unicode/unicode-charinfo.ss +inspect.$m: bitset.ss ${Cheader}: mkheader.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss (if [ -r ${Cheader} ]; then mv -f ${Cheader} ${Cheader}.bak; fi) diff --git a/s/bitset.ss b/s/bitset.ss new file mode 100644 index 0000000000..3da4ae7bf1 --- /dev/null +++ b/s/bitset.ss @@ -0,0 +1,73 @@ +;; The eq-bitset implementation assumes that the addresses of an +;; object won't change, so only use an eq-bitset while interrupts are +;; disabled from start to end of the eq-bitset use. The eq-bitset +;; implementation also assumes that `$fxaddress` is a perfect hash for +;; a non-immediate object. + +(define (make-eq-bitset) + (make-vector (fxsll 1 (constant eq-bitset-l1-bits)) #f)) + +(constant-case eq-bitset-l2-bits + [(0) + (define (get-l1 bm n) + (let* ([l1 (fxsrl n (constant eq-bitset-l1-shift))] + [bm1 (vector-ref bm l1)]) + (or bm1 + (let ([bm1 (make-fxvector (fxsll 1 (constant eq-bitset-l4-bits)) 0)]) + (vector-set! bm l1 bm1) + bm1)))) + (define (get-l2 bm1 n) bm1) + (define (get-l3 bm1 n) bm1)] + [else + (define (get-l1 bm n) + (let* ([l1 (fxsrl n (constant eq-bitset-l1-shift))] + [bm1 (vector-ref bm l1)]) + (or bm1 + (let ([bm1 (make-vector (fxsll 1 (constant eq-bitset-l2-bits)) #f)]) + (vector-set! bm l1 bm1) + bm1)))) + + (define (get-l2 bm1 n) + (let* ([l2 (fxand (fxsrl n (constant eq-bitset-l2-shift)) + (constant eq-bitset-l2-mask))] + [bm2 (vector-ref bm1 l2)]) + (or bm2 + (let ([bm2 (make-vector (fxsll 1 (constant eq-bitset-l3-bits)) #f)]) + (vector-set! bm1 l2 bm2) + bm2)))) + + (define (get-l3 bm2 n) + (let* ([l3 (fxand (fxsrl n (constant eq-bitset-l3-shift)) + (constant eq-bitset-l3-mask))] + [bm3 (vector-ref bm2 l3)]) + (or bm3 + (let ([bm3 (make-fxvector (fxsll 1 (constant eq-bitset-l4-bits)) 0)]) + (vector-set! bm2 l3 bm3) + bm3))))]) + +(define (get-l4-index n) + (fxand (fxsrl n (constant eq-bitset-l4-shift)) (constant eq-bitset-l4-mask))) + +(define (get-lo-index n) + (fxand (fxsrl n (constant eq-bitset-discard-bits)) + (constant eq-bitset-lo-mask))) + +(define (eq-bitset-member? bm p) + (let* ([n ($fxaddress p)] + [a (get-l3 (get-l2 (get-l1 bm n) n) n)] + [i (get-l4-index n)]) + (fxbit-set? (fxvector-ref a i) (get-lo-index n)))) + +(define (eq-bitset-add! bm p) + (let* ([n ($fxaddress p)] + [a (get-l3 (get-l2 (get-l1 bm n) n) n)] + [i (get-l4-index n)]) + (fxvector-set! a i (fxior (fxvector-ref a i) + (fxsll 1 (get-lo-index n)))))) + +(define (eq-bitset-remove! bm p) + (let* ([n ($fxaddress p)] + [a (get-l3 (get-l2 (get-l1 bm n) n) n)] + [i (get-l4-index n)]) + (fxvector-set! a i (fxand (fxvector-ref a i) + (fxnot (fxsll 1 (get-lo-index n))))))) diff --git a/s/cmacros.ss b/s/cmacros.ss index bd188e982a..1f8d6d7e5e 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -1750,6 +1750,50 @@ (syntax-rules () ((_ x) (let ((t x)) (and (pair? t) (symbol? (car t))))))) +;;; bitset constants + +;; For a bitset ranging over all fixnum values, use an array of ... +;; array of fixnums, where the lo bits of a key fixnum are used to +;; index a bit within one bitset fixnum. +(define-constant eq-bitset-lo-bits (fx- (integer-length (constant fixnum-bits)) 1)) + +;; Using `$fxaddress` discards typemod bits, but we may be able to +;; discard additional bits due to allocation alignment: +(define-constant eq-bitset-discard-bits (fx- (log2 (constant byte-alignment)) + (log2 (constant typemod)))) + +(constant-case ptr-bits + [(64) + ;; Break fixnum into 5 levels: [l1:14] [l2:14] [l3:14] [l4:14-discard] [lo:5] + (define-constant eq-bitset-l1-bits 14) + (define-constant eq-bitset-l2-bits 14) + (define-constant eq-bitset-l3-bits 14)] + [(32) + ;; Break fixnum into 3 levels: [l1:13] [l4:13-discard] [lo:4] + (define-constant eq-bitset-l1-bits 13) + (define-constant eq-bitset-l2-bits 0) + (define-constant eq-bitset-l3-bits 0)]) + +(define-constant eq-bitset-l4-bits (fx- (constant fixnum-bits) + (constant eq-bitset-l1-bits) + (constant eq-bitset-l2-bits) + (constant eq-bitset-l3-bits) + (constant eq-bitset-lo-bits) + (constant eq-bitset-discard-bits))) + +(define-constant eq-bitset-l1-shift (fx- (constant fixnum-bits) (constant eq-bitset-l1-bits))) +(define-constant eq-bitset-l2-shift (fx- (constant fixnum-bits) (constant eq-bitset-l1-bits) + (constant eq-bitset-l2-bits))) +(define-constant eq-bitset-l3-shift (fx- (constant fixnum-bits) (constant eq-bitset-l1-bits) + (constant eq-bitset-l2-bits) (constant eq-bitset-l3-bits))) +(define-constant eq-bitset-l4-shift (fx+ (constant eq-bitset-lo-bits) + (constant eq-bitset-discard-bits))) + +(define-constant eq-bitset-l2-mask (fx- (fxsll 1 (constant eq-bitset-l2-bits)) 1)) +(define-constant eq-bitset-l3-mask (fx- (fxsll 1 (constant eq-bitset-l3-bits)) 1)) +(define-constant eq-bitset-l4-mask (fx- (fxsll 1 (constant eq-bitset-l4-bits)) 1)) +(define-constant eq-bitset-lo-mask (fx- (fxsll 1 (constant eq-bitset-lo-bits)) 1)) + ;;; heap/stack mangement constants (define-constant collect-interrupt-index 1) @@ -2645,5 +2689,6 @@ Sreturn Scall-one-result Scall-any-results + segment-info )) ) diff --git a/s/cpnanopass.ss b/s/cpnanopass.ss index 310054441b..419139aede 100644 --- a/s/cpnanopass.ss +++ b/s/cpnanopass.ss @@ -5019,6 +5019,102 @@ (inline-accessor port-name port-name-disp) (inline-accessor $thread-tc thread-tc-disp) ) + (let () + (define (build-maybe-seginfo e) + (let ([ptr (make-assigned-tmp 'ptr)] + [seginfo (make-assigned-tmp 'seginfo)]) + (define (build-level-3 seginfo k) + (constant-case segment-table-levels + [(3) + (let ([s3 (make-assigned-tmp 's3)]) + `(let ([,s3 ,(%mref ,seginfo + ,(%inline sll ,(%inline srl ,ptr (immediate ,(+ (constant segment-t1-bits) + (constant segment-t2-bits)))) + (immediate ,(constant log2-ptr-bytes))) + ,0)]) + (if ,(%inline eq? ,s3 (immediate 0)) + (immediate 0) + ,(k s3))))] + [else (k seginfo)])) + (define (build-level-2 s3 k) + (constant-case segment-table-levels + [(2 3) + (let ([s2 (make-assigned-tmp 's2)]) + `(let ([,s2 ,(%mref ,s3 ,(%inline logand + ,(%inline srl ,ptr (immediate ,(fx- (constant segment-t1-bits) + (constant log2-ptr-bytes)))) + (immediate ,(fxsll (fx- (fxsll 1 (constant segment-t2-bits)) 1) + (constant log2-ptr-bytes)))) + 0)]) + (if ,(%inline eq? ,s2 (immediate 0)) + (immediate 0) + ,(k s2))))] + [else (k s3)])) + `(let ([,ptr ,(%inline srl ,(%inline + ,e (immediate ,(fx- (constant typemod) 1))) + (immediate ,(constant segment-offset-bits)))]) + (let ([,seginfo (literal ,(make-info-literal #f 'entry (lookup-c-entry segment-info) 0))]) + ,(build-level-3 seginfo + (lambda (s3) + (build-level-2 s3 + (lambda (s2) + (%mref ,s2 ,(%inline sll ,(%inline logand ,ptr + (immediate ,(fx- (fxsll 1 (constant segment-t1-bits)) 1))) + (immediate ,(constant log2-ptr-bytes))) + 0))))))))) + (define (build-space-test e space) + `(if ,(%type-check mask-fixnum type-fixnum ,e) + ,(%constant sfalse) + (if ,(%type-check mask-immediate type-immediate ,e) + ,(%constant sfalse) + ,(let ([s-e (build-maybe-seginfo e)] + [si (make-assigned-tmp 'si)]) + `(let ([,si ,s-e]) + (if ,(%inline eq? ,si (immediate 0)) + ,(%constant sfalse) + ,(let ([s `(inline ,(make-info-load 'unsigned-8 #f) ,%load ,si ,%zero (immediate 0))]) + (%inline eq? (immediate ,space) + ,(%inline logand ,s (immediate ,(fxnot (constant space-locked)))))))))))) + + (define-inline 2 $maybe-seginfo + [(e) + (bind #t (e) + `(if ,(%type-check mask-fixnum type-fixnum ,e) + ,(%constant sfalse) + (if ,(%type-check mask-immediate type-immediate ,e) + ,(%constant sfalse) + ,(let ([s-e (build-maybe-seginfo e)] + [si (make-assigned-tmp 'si)]) + `(let ([,si ,s-e]) + (if ,(%inline eq? ,si (immediate 0)) + ,(%constant sfalse) + ,si))))))]) + ;; Generation is first unsigned char in `seginfo` as defined in "types.h" + (define-inline 2 $seginfo-generation + [(e) + (bind #f (e) (build-object-ref #f 'unsigned-8 e %zero 1))]) + ;; Space is second unsigned char in `seginfo` as defined in "types.h" + (define-inline 2 $seginfo-space + [(e) + (bind #f (e) + (%inline logand ,(build-object-ref #f 'unsigned-8 e %zero 0) + (immediate ,(fxnot (fix (constant space-locked))))))]) + + (define-inline 2 $generation + [(e) + (bind #t (e) + `(if ,(%type-check mask-fixnum type-fixnum ,e) + ,(%constant sfalse) + ,(let ([s-e (build-maybe-seginfo e)] + [si (make-assigned-tmp 'si)]) + `(let ([,si ,s-e]) + (if ,(%inline eq? ,si (immediate 0)) + ,(%constant sfalse) + ,(build-object-ref #f 'unsigned-8 si %zero 1))))))]) + (define-inline 2 weak-pair? + [(e) (bind #t (e) (build-space-test e (constant space-weakpair)))]) + (define-inline 2 ephemeron-pair? + [(e) (bind #t (e) (build-space-test e (constant space-ephemeron)))])) + (define-inline 2 unbox [(e) (bind #t (e) diff --git a/s/inspect.ss b/s/inspect.ss index bd54c87116..089b69fc06 100644 --- a/s/inspect.ss +++ b/s/inspect.ss @@ -2389,7 +2389,6 @@ (let () (define rtd-size (csv7:record-field-accessor #!base-rtd 'size)) (define rtd-flds (csv7:record-field-accessor #!base-rtd 'flds)) - (define $generation (foreign-procedure "(cs)generation" (ptr) ptr)) (define $get-code-obj (foreign-procedure "(cs)get_code_obj" (int ptr iptr iptr) ptr)) (define $code-reloc-size (lambda (x) @@ -2427,41 +2426,134 @@ (define align (lambda (n) (fxlogand (fx+ n (fx- (constant byte-alignment) 1)) (fx- (constant byte-alignment))))) + (include "bitset.ss") - (set-who! $compute-size - (rec $compute-size + (define (thread->stack-objects thread) + (with-tc-mutex + (let ([tc ($thread-tc thread)]) + (cond + [(eqv? tc 0) + ;; Thread terminated + '()] + [(zero? ($object-ref 'integer-32 tc (constant tc-active-disp))) + ;; Inactive, so we can traverse it while holding the tc mutex + (let ([stack ($object-ref 'scheme-object tc (constant tc-scheme-stack-disp))]) + (let loop ([frame ($object-ref 'scheme-object tc (constant tc-sfp-disp))] [x* '()]) + (cond + [(fx= frame stack) + x*] + [else + (let* ([ret ($object-ref 'scheme-object frame 0)] + [size ($object-ref 'scheme-object ret (constant return-address-frame-size-disp))] + [livemask ($object-ref 'scheme-object ret (constant return-address-livemask-disp))] + [next-frame (fx- frame size)]) + (let frame-loop ([p (fx+ next-frame 1)] [livemask livemask] [x* x*]) + (if (eqv? livemask 0) + (loop next-frame x*) + (frame-loop (fx+ p 1) + (bitwise-arithmetic-shift-right livemask 1) + (if (bitwise-bit-set? livemask 0) + (cons ($object-ref 'scheme-object p 0) x*) + x*)))))])))] + [else + ;; Can't inspect active thread + '()])))) + + (define (thread->objects thread) + ;; Get immediate content while holding the tc mutex to be sure + ;; that the thread doesn't terminate while getting its content + (with-tc-mutex + (let ([tc ($thread-tc thread)]) + (cond + [(eqv? tc 0) + ;; Thread terminated + '()] + [else + (map (lambda (disp) ($object-ref 'scheme-object tc disp)) + tc-ptr-offsets)])))) + + ;; call with interrupts disabled if not `single-inspect-mode?` + (set-who! $compute-size-increments + (rec $compute-size-increments (case-lambda - [(x maxgen) ($compute-size x maxgen (make-eq-hashtable))] - [(x maxgen size-ht) - (define cookie (cons 'date 'nut)) ; recreate on each call to $compute-size + [(x* maxgen) ($compute-size-increments x* maxgen #f (make-eq-bitset))] + [(x* maxgen single-inspect-mode? size-ht-or-bitset) + (define ephemeron-triggers #f) + (define ephemeron-triggers-bitset #f) + (define ephemeron-non-keys (and (not single-inspect-mode?) (make-eq-hashtable))) + (define cookie (and single-inspect-mode? + (cons 'date 'nut))) ; recreate on each call to $compute-size-increments (define compute-size (lambda (x) - (if (or ($immediate? x) - (let ([g ($generation x)]) - (or (not g) (fx> g maxgen)))) - 0 - (let ([a (eq-hashtable-cell size-ht x #f)]) + (let ([si ($maybe-seginfo x)]) + (cond + [(or (not si) + (fx> ($seginfo-generation si) maxgen)) + 0] + [single-inspect-mode? + (let ([a (eq-hashtable-cell size-ht-or-bitset x #f)]) (cond - [(cdr a) => - (lambda (p) - ; if we find our cookie, return 0 to avoid counting shared structure twice. - ; otherwise, (car p) must be a cookie from an earlier call to $compute-size, - ; so return the recorded size - (if (eq? (car p) cookie) - 0 - (begin - (set-car! p cookie) - (cdr p))))] - [else - (let ([p (cons cookie 0)]) - (set-cdr! a p) - (let ([size (really-compute-size x)]) - (set-cdr! p size) - size))]))))) + [(cdr a) => + (lambda (p) + ; if we find our cookie, return 0 to avoid counting shared structure twice. + ; otherwise, (car p) must be a cookie from an earlier call to $compute-size, + ; so return the recorded size + (if (eq? (car p) cookie) + 0 + (begin + (set-car! p cookie) + (cdr p))))] + [else + (let ([p (cons cookie 0)]) + (set-cdr! a p) + (let ([size (really-compute-size x si)]) + (set-cdr! p size) + size))]))] + [else + (cond + [(eq-bitset-member? size-ht-or-bitset x) 0] + [else + (eq-bitset-add! size-ht-or-bitset x) + (let ([size (really-compute-size x si)]) + (let ([ds (and ephemeron-triggers-bitset + (eq-bitset-member? ephemeron-triggers-bitset x) + (eq-hashtable-ref ephemeron-triggers x #f))]) + (cond + [ds + (eq-hashtable-delete! ephemeron-triggers x) + (fold-left (lambda (size d) (fx+ size (compute-size d))) + size + ds)] + [else size])))])])))) (define really-compute-size - (lambda (x) + (lambda (x si) (cond - [(pair? x) (fx+ (constant size-pair) (compute-size (car x)) (compute-size (cdr x)))] + [(pair? x) + (let ([space ($seginfo-space si)]) + (cond + [(and (eqv? space (constant space-weakpair)) + (not single-inspect-mode?)) + (fx+ (constant size-pair) (compute-size (cdr x)))] + [(and (eqv? space (constant space-ephemeron)) + (not single-inspect-mode?) + (let ([a (car x)]) + (not (or ($immediate? a) + (let ([g ($generation a)]) + (or (not g) (fx> g maxgen))) + (and (eq-bitset-member? size-ht-or-bitset a) + (not (eq-hashtable-ref ephemeron-non-keys a #f))))))) + (let ([d (cdr x)]) + (unless ($immediate? d) + (unless ephemeron-triggers-bitset + (set! ephemeron-triggers-bitset (make-eq-bitset)) + (set! ephemeron-triggers (make-eq-hashtable))) + (let ([v (car x)]) + (eq-bitset-add! ephemeron-triggers-bitset v) + (let ([a (eq-hashtable-cell ephemeron-triggers v '())]) + (set-cdr! a (cons d (cdr a))))))) + (constant size-pair)] + [else + (fx+ (constant size-pair) (compute-size (car x)) (compute-size (cdr x)))]))] [(symbol? x) (fx+ (constant size-symbol) (compute-size (#3%$top-level-value x)) @@ -2553,12 +2645,13 @@ (compute-size ($port-info x)) (compute-size (port-name x)))] [(thread? x) - (let ([tc ($object-ref 'scheme-object x (constant thread-tc-disp))]) - (fold-left - (lambda (size disp) - (fx+ size (compute-size ($object-ref 'scheme-object tc disp)))) - (constant size-thread) - tc-ptr-offsets))] + (fx+ (fold-left (lambda (size x) + (fx+ size (compute-size x))) + (constant size-thread) + (thread->objects x)) + (fold-left (lambda (size x) (fx+ size (compute-size x))) + 0 + (thread->stack-objects x)))] [($tlc? x) (fx+ (constant size-tlc) (compute-size ($tlc-ht x)) @@ -2566,9 +2659,35 @@ (compute-size ($tlc-next x)))] [($rtd-counts? x) (constant size-rtd-counts)] [else ($oops who "missing case for ~s" x)]))) - ; ensure size-ht isn't counted in the size of any object - (eq-hashtable-set! size-ht size-ht (cons cookie 0)) - (compute-size x)]))) + (cond + [single-inspect-mode? + ; ensure size-ht isn't counted in the size of any object + (eq-hashtable-set! size-ht-or-bitset size-ht-or-bitset (cons cookie 0)) + (map compute-size x*)] + [else + ; ensure bitset isn't counted in the size of any object + (eq-bitset-add! size-ht-or-bitset size-ht-or-bitset) + ;; Stop at each element of `x` when getting results for other elements, + ;; but don't treat later elements as already-reached ephemeron keys: + (for-each (lambda (x) + (eq-bitset-add! size-ht-or-bitset x) + (eq-hashtable-set! ephemeron-non-keys x #t)) + x*) + ;; Traverse `x*` in order: + (let loop ([x* x*]) + (cond + [(null? x*) '()] + [else + (let ([x (car x*)]) + (eq-bitset-remove! size-ht-or-bitset x) + (eq-hashtable-delete! ephemeron-non-keys x) + (cons (compute-size x) + (loop (cdr x*))))]))])]))) + + (set-who! $compute-size + (case-lambda + [(x maxgen) (car ($compute-size-increments (list x) maxgen #t (make-eq-hashtable)))] + [(x maxgen size-ht) (car ($compute-size-increments (list x) maxgen #t size-ht))])) (set-who! $compute-composition (lambda (x maxgen) @@ -2708,8 +2827,8 @@ (compute-composition! (port-name x))] [(thread? x) (incr! thread (constant size-thread)) - (let ([tc ($object-ref 'scheme-object x (constant thread-tc-disp))]) - (for-each (lambda (disp) (compute-composition! ($object-ref 'scheme-object tc disp))) tc-ptr-offsets))] + (for-each compute-composition! (thread->objects x)) + (for-each compute-composition! (thread->stack-objects x))] [($tlc? x) (incr! tlc (constant size-tlc)) (compute-composition! ($tlc-ht x)) @@ -2838,10 +2957,12 @@ (if (input-port? x) (construct-proc (port-input-buffer x) (th)) (th))))] [(thread? x) (let ([tc ($object-ref 'scheme-object x (constant thread-tc-disp))]) - (let f ([disp-list tc-ptr-offsets]) - (if (null? disp-list) - next-proc - (construct-proc ($object-ref 'scheme-object tc (car disp-list)) (f (cdr tc-ptr-offsets))))))] + (if (eqv? tc 0) + next-proc + (let f ([disp-list tc-ptr-offsets]) + (if (null? disp-list) + next-proc + (construct-proc ($object-ref 'scheme-object tc (car disp-list)) (f (cdr tc-ptr-offsets)))))))] [($tlc? x) (construct-proc ($tlc-ht x) ($tlc-keyval x) ($tlc-next x) next-proc)] [else ($oops who "missing case for ~s" x)])]) ; check if this node is what we're looking for @@ -2874,6 +2995,16 @@ [(x) ($compute-size x (collect-maximum-generation))] [(x g) ($compute-size x (filter-generation who g))])) + (set-who! compute-size-increments + (rec compute-size-increments + (case-lambda + [(x*) (compute-size-increments x* (collect-maximum-generation))] + [(x* g) + (unless (list? x*) ($oops who "~s is not a list" x*)) + (let ([g (filter-generation who g)]) + (with-interrupts-disabled + ($compute-size-increments x* g)))]))) + (set-who! compute-composition (case-lambda [(x) ($compute-composition x (collect-maximum-generation))] diff --git a/s/primdata.ss b/s/primdata.ss index 244c957bd3..6477f7519b 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -893,6 +893,7 @@ (define-symbol-flags* ([libraries] [flags primitive proc]) ; constant parameters (directory-separator [sig [() -> (char)]] [flags pure unrestricted true cp02]) + (get-initial-thread [sig [() -> (boolean)]] [flags pure unrestricted true]) (get-process-id [sig [() -> (uint)]] [flags pure unrestricted]) (get-thread-id [sig [() -> (uint)]] [flags discard unrestricted]) (machine-type [sig [() -> (symbol)]] [flags pure unrestricted true cp02]) @@ -1213,6 +1214,7 @@ (compile-whole-library [sig [(string string) -> (void)]] [flags]) (compute-composition [sig [(ptr) -> (list)] [(ptr sub-ufixnum) -> (list)]] [flags alloc]) (compute-size [sig [(ptr) -> (uint)] [(ptr sub-ufixnum) -> (uint)]] [flags alloc]) + (compute-size-increments [sig [(list) -> (list)] [(list sub-ufixnum) -> (list)]] [flags alloc]) (condition-broadcast [feature pthreads] [sig [(condition-object) -> (void)]] [flags true]) (condition-continuation [sig [(continuation-condition) -> (ptr)]] [flags pure mifoldable discard]) (condition-signal [feature pthreads] [sig [(condition-object) -> (void)]] [flags true]) @@ -1756,6 +1758,7 @@ ($compound-condition? [flags pure unrestricted mifoldable discard]) ($compute-composition [flags]) ($compute-size [flags]) + ($compute-size-increments [flags]) ($constituent? [flags]) ($constituent-ports [flags]) ($continuation? [flags]) @@ -1997,6 +2000,7 @@ ($fxvector-set-immutable! #;[sig [(fxvector) -> (ptr)]] [flags true]) ($gc-cpu-time [flags true]) ($gc-real-time [flags true]) + ($generation [flags]) ($gensym->pretty-name [flags]) ($guard [flags]) ($hand-coded [flags]) @@ -2096,6 +2100,7 @@ ($map [flags]) ($mark-invoked! [flags]) ($maybe-compile-file [flags]) + ($maybe-seginfo [flags]) ($noexpand? [flags]) ($np-boot-code [flags]) ($np-compile [flags]) @@ -2162,6 +2167,8 @@ ($sc-put-property! [flags]) ($script [flags]) ($sealed-record? [flags pure mifoldable discard]) + ($seginfo-generation [flags]) + ($seginfo-space [flags]) ($set-code-byte! [flags]) ($set-code-long! [flags]) ($set-code-quad! [flags]) @@ -2213,6 +2220,7 @@ ($tc-field [flags]) ($tc [flags]) ($thread-list [flags]) + ($thread-stack-token [flags]) ($thread-tc [flags]) ($tlc? [flags pure unrestricted mifoldable discard]) ($tlc-ht [flags mifoldable discard]) diff --git a/s/prims.ss b/s/prims.ss index a56fde1174..8cb141b64d 100644 --- a/s/prims.ss +++ b/s/prims.ss @@ -59,9 +59,7 @@ scheme-object)) (define weak-pair? - (foreign-procedure "(cs)s_weak_pairp" - (scheme-object) - scheme-object)) + (lambda (p) (weak-pair? p))) (define ephemeron-cons (foreign-procedure "(cs)s_ephemeron_cons" @@ -69,9 +67,7 @@ scheme-object)) (define ephemeron-pair? - (foreign-procedure "(cs)s_ephemeron_pairp" - (scheme-object) - scheme-object)) + (lambda (p) (ephemeron-pair? p))) (define $split-continuation (foreign-procedure "(cs)single_continuation" @@ -1483,6 +1479,7 @@ (define $close-resurrected-mutexes&conditions) (define $tc-mutex) (define $collect-cond) +(define get-initial-thread) (let () ; scheme-object's below are mutex and condition addresses, which are ; assumed to be at least ptr aligned and therefore look like fixnums @@ -1499,6 +1496,7 @@ (define cw (foreign-procedure "(cs)condition_wait" (scheme-object scheme-object scheme-object) boolean)) (define cb (foreign-procedure "(cs)condition_broadcast" (scheme-object) void)) (define cs (foreign-procedure "(cs)condition_signal" (scheme-object) void)) +(define ts (foreign-procedure "(cs)threads" () scheme-object)) (define-record-type (condition $make-condition $condition?) (fields (mutable addr $condition-addr $condition-addr-set!)) @@ -1631,6 +1629,10 @@ (set! $tc-mutex ($make-mutex ($raw-tc-mutex))) (set! $collect-cond ($make-condition ($raw-collect-cond))) + +(set! get-initial-thread + (let ([thread (car (ts))]) + (lambda () thread))) )) (let () @@ -1721,6 +1723,19 @@ (define-tlc-parameter $tlc-next $set-tlc-next!) ) +(define $generation + (lambda (x) + ($generation x))) +(define $maybe-seginfo + (lambda (x) + ($maybe-seginfo x))) +(define $seginfo-generation + (lambda (x) + ($seginfo-generation x))) +(define $seginfo-space + (lambda (x) + ($seginfo-space x))) + (define ($fxaddress x) (#3%$fxaddress x)) (define $logand