make installers: fixes for parallel mode and timeouts

The `parallel` form in a site configuration was working only
with leaf machines; now, it should be fixed for `sequential`
nested under `parallel`.

Also, fix timeout handling and reporting of failures.
(cherry picked from commit 266e4ab119)
This commit is contained in:
Matthew Flatt 2013-12-18 11:34:22 -07:00 committed by Ryan Culpepper
parent 9d6e629a2a
commit d08ff9c217

View File

@ -22,6 +22,7 @@
(define default-release? #f) (define default-release? #f)
(define default-clean? #f) (define default-clean? #f)
(define dry-run #f)
(define snapshot-install-name "snapshot") (define snapshot-install-name "snapshot")
@ -35,6 +36,12 @@
(set! default-release? #t)] (set! default-release? #t)]
[("--clean") "Erase client directories before building" [("--clean") "Erase client directories before building"
(set! default-clean? #t)] (set! default-clean? #t)]
[("--dry-run") mode
("Don't actually use the clients;"
" <mode> can be `ok', `fail', `error', `stuck', or `frozen'")
(unless (member mode '("ok" "fail" "error" "stuck" "frozen"))
(raise-user-error 'drive-clients "bad dry-run mode: ~a" mode))
(set! dry-run (string->symbol mode))]
#:args (config-file config-mode #:args (config-file config-mode
server server-port server-hosts pkgs doc-search server server-port server-hosts pkgs doc-search
dist-name dist-base dist-dir) dist-name dist-base dist-dir)
@ -105,7 +112,14 @@
(map (lambda (p) (if (path? p) (path->string p) p)) (map (lambda (p) (if (path? p) (path->string p) p))
(cons exe args)))) (cons exe args))))
(flush-output) (flush-output)
(apply system* exe args)) (case dry-run
[(ok) #t]
[(fail) #f]
[(error) (error "error")]
[(stuck) (semaphore-wait (make-semaphore))]
[(frozen) (break-enabled #f) (semaphore-wait (make-semaphore))]
[else
(apply system* exe args)]))
(define (system*/string . args) (define (system*/string . args)
(define s (open-output-string)) (define s (open-output-string))
@ -168,26 +182,28 @@
(when vbox (when vbox
(printf "Starting VirtualBox machine ~s\n" vbox) (printf "Starting VirtualBox machine ~s\n" vbox)
(flush-output) (flush-output)
(case (vbox-state vbox) (unless dry-run
[(running) (void)] (case (vbox-state vbox)
[(paused) (vbox-control vbox "resume")] [(running) (void)]
[(off saved) (call-with-vbox-lock [(paused) (vbox-control vbox "resume")]
(lambda () [(off saved) (call-with-vbox-lock
(check-count) (lambda ()
(vbox-start vbox)))]) (check-count)
(unless (eq? (vbox-state vbox) 'running) (vbox-start vbox)))])
(error 'start-client "could not get virtual machine started: ~s" (client-name c)))) (unless (eq? (vbox-state vbox) 'running)
;; pause a little to let the VM get networkign ready, etc. (error 'start-client "could not get virtual machine started: ~s" (client-name c)))
(sleep 3)) ;; pause a little to let the VM get networking ready, etc.
(sleep 3))))
(define (stop-client c) (define (stop-client c)
(define vbox (get-opt c '#:vbox)) (define vbox (get-opt c '#:vbox))
(when vbox (when vbox
(printf "Stopping VirtualBox machine ~s\n" vbox) (printf "Stopping VirtualBox machine ~s\n" vbox)
(flush-output) (flush-output)
(vbox-control vbox "savestate") (unless dry-run
(unless (eq? (vbox-state vbox) 'saved) (vbox-control vbox "savestate")
(error 'start-client "virtual machine isn't in the expected saved state: ~s" c)))) (unless (eq? (vbox-state vbox) 'saved)
(error 'start-client "virtual machine isn't in the expected saved state: ~s" c)))))
;; ---------------------------------------- ;; ----------------------------------------
@ -410,59 +426,76 @@
;; ---------------------------------------- ;; ----------------------------------------
(define stop? #f) (define stop? #f)
(define failures null)
(define failures-sema (make-semaphore 1))
(define (limit-and-report-failure c timeout-factor thunk) (define failures (make-hasheq))
(unless stop? (define (record-failure name)
(define cust (make-custodian)) ;; relies on atomicity of `eq?'-based hash table:
(define timeout (or (get-opt c '#:timeout) (hash-set! failures (string->symbol name) #t))
(* 30 60)))
(define orig-thread (current-thread))
(define timeout? #f)
(begin0
(parameterize ([current-custodian cust])
(thread (lambda ()
(sleep (* timeout-factor timeout))
(eprintf "timeout for ~s\n" (client-name c))
;; try nice interrupt, first:
(set! timeout? #t)
(break-thread orig-thread)
(sleep 1)
;; force quit:
(custodian-shutdown-all cust)))
(with-handlers ([exn? (lambda (exn)
(when (exn:break? exn)
(unless timeout?
(set! stop? #t)))
(log-error "~a failed..." (client-name c))
(log-error (exn-message exn))
#f)])
(thunk)))
(custodian-shutdown-all cust))))
(define (client-thread c sequential? thunk) (define (limit-and-report-failure c timeout-factor
shutdown report-fail
thunk)
(define cust (make-custodian))
(define timeout (or (get-opt c '#:timeout)
(* 30 60)))
(define orig-thread (current-thread))
(define timeout? #f)
(begin0
(parameterize ([current-custodian cust])
(thread (lambda ()
(sleep (* timeout-factor timeout))
(eprintf "timeout for ~s\n" (client-name c))
;; try nice interrupt, first:
(set! timeout? #t)
(break-thread orig-thread)
(sleep 1)
;; force quit:
(report-fail)
(shutdown)))
(with-handlers ([exn? (lambda (exn)
(when (exn:break? exn)
;; This is useful only when everything is
;; sequential, which is the only time that
;; we'll get break events that aren't timeouts:
(unless timeout?
(set! stop? #t)))
(log-error "~a failed..." (client-name c))
(log-error (exn-message exn))
(report-fail)
#f)])
(thunk)))
(custodian-shutdown-all cust)))
(define (client-thread c all-seq? proc)
(unless stop? (unless stop?
(define log-dir (build-path "build" "log")) (define log-dir (build-path "build" "log"))
(define log-file (build-path log-dir (client-name c))) (define log-file (build-path log-dir (client-name c)))
(make-directory* log-dir) (make-directory* log-dir)
(printf "Logging build: ~a\n" log-file) (printf "Logging build: ~a\n" log-file)
(flush-output) (flush-output)
(define (go) (define cust (make-custodian))
(define (go shutdown)
(define p (open-output-file log-file (define p (open-output-file log-file
#:exists 'truncate/replace)) #:exists 'truncate/replace))
(file-stream-buffer-mode p 'line) (file-stream-buffer-mode p 'line)
(define (report-fail)
(record-failure (client-name c))
(printf "Build FAILED for ~s\n" (client-name c)))
(unless (parameterize ([current-output-port p] (unless (parameterize ([current-output-port p]
[current-error-port p]) [current-error-port p])
(thunk)) (proc shutdown report-fail))
(call-with-semaphore (report-fail))
failures-sema (display-time))
(lambda ()
(set! failures (cons (client-name c) failures))))
(printf "Build FAILED for ~s\n" (client-name c))))
(cond (cond
[sequential? (go) (thread void)] [all-seq?
[else (thread go)]))) (go (lambda () (exit 1)))
(thread void)]
[else
(parameterize ([current-custodian cust])
(thread
(lambda ()
(go (lambda ()
(custodian-shutdown-all cust))))))])))
;; ---------------------------------------- ;; ----------------------------------------
@ -470,56 +503,64 @@
(display-time) (display-time)
(void (void
(let loop ([config config] (sync
[mode 'sequential] (let loop ([config config]
[opts (hasheq)]) [all-seq? #t] ; Ctl-C handling is better if nothing is in parallel
(unless stop? [opts (hasheq)])
(case (site-config-tag config) (cond
[(parallel sequential) [stop? (thread void)]
(define new-opts (merge-options opts config)) [else
(define ts (case (site-config-tag config)
(map (lambda (c) (loop c [(parallel)
(site-config-tag config) (define new-opts (merge-options opts config))
new-opts)) (define ts
(get-content config))) (map (lambda (c) (loop c #f new-opts))
(define (wait) (get-content config)))
(for ([t (in-list ts)]) (thread
(sync t))) (lambda ()
(cond (for ([t (in-list ts)])
[(eq? mode 'sequential) (wait) (thread void)] (sync t))))]
[else (thread wait)])] [(sequential)
[else (define new-opts (merge-options opts config))
(define c (merge-options opts config)) (define (go)
(client-thread (for-each (lambda (c) (sync (loop c all-seq? new-opts)))
c (get-content config)))
(eq? mode 'sequential) (if all-seq?
(lambda () (begin (go) (thread void))
(limit-and-report-failure (thread go))]
c 2 [else
(lambda () (define c (merge-options opts config))
(sleep (get-opt c '#:pause-before 0)) (client-thread
;; start client, if a VM: c
(start-client c (or (get-opt c '#:max-vm) 1)) all-seq?
;; catch failure in build step proper, so we (lambda (shutdown report-fail)
;; can more likely stop the client: (limit-and-report-failure
(begin0 c 2 shutdown report-fail
(limit-and-report-failure (lambda ()
c 1 (sleep (get-opt c '#:pause-before 0))
(lambda () (client-build c))) ;; start client, if a VM:
;; stop client, if a VM: (start-client c (or (get-opt c '#:max-vm) 1))
(stop-client c) ;; catch failure in build step proper, so we
(sleep (get-opt c '#:pause-after 0)))))))])))) ;; can more likely stop the client:
(begin0
(limit-and-report-failure
c 1 shutdown report-fail
(lambda () (client-build c)))
;; stop client, if a VM:
(stop-client c)
(sleep (get-opt c '#:pause-after 0)))))))])]))))
(display-time) (display-time)
(define end-seconds (current-seconds)) (define end-seconds (current-seconds))
(let ([opts (merge-options (hasheq) config)]) (unless stop?
(let ([to-email (get-opt opts '#:email-to null)]) (let ([opts (merge-options (hasheq) config)])
(unless (null? to-email) (let ([to-email (get-opt opts '#:email-to null)])
(printf "Sending report to ~a\n" (apply ~a to-email #:separator ", ")) (unless (null? to-email)
(send-email to-email (lambda (key def) (printf "Sending report to ~a\n" (apply ~a to-email #:separator ", "))
(get-opt opts key def)) (send-email to-email (lambda (key def)
(get-opt opts '#:build-stamp (current-stamp)) (get-opt opts key def))
start-seconds end-seconds (get-opt opts '#:build-stamp (current-stamp))
failures) start-seconds end-seconds
(display-time)))) (hash-map failures (lambda (k v) (symbol->string k))))
(display-time)))))