Chez Scheme: move write-barrier fixnum guard outside write fence
On an Apple M1, an example like (let ([v (make-vector 1000000)]) (for* ([_ (in-range 100)] [i (in-range 1000000)]) (vector-set! v i (+ 1 i)))) would generate tightly interleaved fence and store operations, which seems to make the processor unhappy so that the code run 40x slower than it should. A key part of the example is that `(+ 1 i)` defeats cp0-level inference that the result will be a fixnum. A dynamic fixnum test avoids adding the update to a remembered set as part of the write barrier, but the memory fence needed for ARM must be before the store, while the fixnum test was after the store. This change bundles the write fence and remember-set update under a `fixnum?` guard, so they happen to gether or not at all --- at the small cost of generating the store instruction(s) in two branches. In the example above, neither the fence nor remember-set update happen, but changing `(+ 1 i)` to `(quote x)` triggers both, and performance is still ok.
This commit is contained in:
parent
0a945cd5f7
commit
41623f3027
|
@ -4097,22 +4097,26 @@
|
|||
,(%inline sll ,e
|
||||
(immediate ,(fx- (constant char-data-offset) (constant fixnum-offset))))
|
||||
,(%constant type-char))))
|
||||
(define need-store-fence?
|
||||
(if-feature pthreads
|
||||
(constant-case architecture
|
||||
[(arm32 arm64) #t]
|
||||
[else #f])
|
||||
#f))
|
||||
(define add-store-fence
|
||||
;; A store--store fence should be good enough for safety on a platform that
|
||||
;; orders load dependencies (which is anything except Alpha)
|
||||
(lambda (e)
|
||||
(if-feature pthreads
|
||||
(constant-case architecture
|
||||
[(arm32 arm64) `(seq ,(%inline store-store-fence) ,e)]
|
||||
[else e])
|
||||
e)))
|
||||
(if need-store-fence?
|
||||
`(seq ,(%inline store-store-fence) ,e)
|
||||
e)))
|
||||
(define build-dirty-store
|
||||
(case-lambda
|
||||
[(base offset e) (build-dirty-store base %zero offset e)]
|
||||
[(base index offset e) (build-dirty-store base index offset e
|
||||
(lambda (base index offset e) `(set! ,(%mref ,base ,index ,offset) ,e))
|
||||
(lambda (s r) (add-store-fence `(seq ,s ,r))))]
|
||||
[(base index offset e build-assign build-barrier-seq)
|
||||
(lambda (s r) `(seq ,s ,r)))]
|
||||
[(base index offset e build-assign build-remember-seq)
|
||||
(nanopass-case (L7 Expr) e
|
||||
[(call ,info ,mdcl ,pr ,e)
|
||||
(guard (eq? (primref-name pr) '$immediate))
|
||||
|
@ -4135,17 +4139,30 @@
|
|||
(bind #f ([e e])
|
||||
; eval a second so the address is not live across any calls
|
||||
(bind #t ([a a])
|
||||
(build-barrier-seq
|
||||
(build-assign a %zero 0 e)
|
||||
(%inline remember ,a))))
|
||||
(add-store-fence
|
||||
(build-remember-seq
|
||||
(build-assign a %zero 0 e)
|
||||
(%inline remember ,a)))))
|
||||
(bind #t ([e e])
|
||||
; eval a second so the address is not live across any calls
|
||||
(bind #t ([a a])
|
||||
(build-barrier-seq
|
||||
(build-assign a %zero 0 e)
|
||||
`(if ,(%type-check mask-fixnum type-fixnum ,e)
|
||||
,(%constant svoid)
|
||||
,(%inline remember ,a))))))))])]))
|
||||
(if need-store-fence?
|
||||
;; Fence needs to be before store, so duplicate
|
||||
;; store instruction to lift out fixnum check; this
|
||||
;; appears to be worthwhile on the Apple M1 to avoid
|
||||
;; tighly interleaved writes and fences
|
||||
`(if ,(%type-check mask-fixnum type-fixnum ,e)
|
||||
,(build-assign a %zero 0 e)
|
||||
,(add-store-fence
|
||||
(build-remember-seq
|
||||
(build-assign a %zero 0 e)
|
||||
(%inline remember ,a))))
|
||||
;; Generate one copy of store instruction
|
||||
(build-remember-seq
|
||||
(build-assign a %zero 0 e)
|
||||
`(if ,(%type-check mask-fixnum type-fixnum ,e)
|
||||
,(%constant svoid)
|
||||
,(%inline remember ,a)))))))))])]))
|
||||
(define make-build-cas
|
||||
(lambda (old-v)
|
||||
(lambda (base index offset v)
|
||||
|
@ -4154,10 +4171,9 @@
|
|||
(inline ,(make-info-condition-code 'eq? #f #t) ,%condition-code)))))
|
||||
(define build-cas-seq
|
||||
(lambda (cas remember)
|
||||
(add-store-fence
|
||||
`(if ,cas
|
||||
(seq ,remember ,(%constant strue))
|
||||
,(%constant sfalse)))))
|
||||
`(if ,cas
|
||||
(seq ,remember ,(%constant strue))
|
||||
,(%constant sfalse))))
|
||||
(define build-$record
|
||||
(lambda (tag args)
|
||||
(bind #f (tag)
|
||||
|
|
Loading…
Reference in New Issue
Block a user