From 8c1b5db81553b54b35e753441efd0465426707a3 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Tue, 20 Nov 2012 14:41:06 -0700 Subject: [PATCH] raco setup: build database mapping doc tags to "out.sxref"s The `xref' produced by `setup/xref' uses the database to delay loading "out.sxref"s, which cuts 64-bit DrRacket's initial footprint by around 50MB (i.e., about 20%). --- collects/scribble/base-render.rkt | 19 ++- collects/scribble/core.rkt | 12 +- collects/scribble/html-render.rkt | 5 +- collects/scribble/xref.rkt | 17 ++- collects/scribblings/scribble/core.scrbl | 4 +- collects/scribblings/scribble/renderer.scrbl | 16 ++- collects/scribblings/scribble/xref.scrbl | 8 ++ collects/setup/doc-db.rkt | 128 +++++++++++++++++++ collects/setup/scribble.rkt | 32 +++-- collects/setup/xref.rkt | 48 +++++-- 10 files changed, 253 insertions(+), 36 deletions(-) create mode 100644 collects/setup/doc-db.rkt diff --git a/collects/scribble/base-render.rkt b/collects/scribble/base-render.rkt index cbc6da6cb0..e09318dbf6 100644 --- a/collects/scribble/base-render.rkt +++ b/collects/scribble/base-render.rkt @@ -288,7 +288,11 @@ (define/public (transfer-info ci src-ci) (let ([in-ht (collect-info-ext-ht ci)]) (for ([(k v) (collect-info-ext-ht src-ci)]) - (hash-set! in-ht k v)))) + (hash-set! in-ht k v))) + (set-demand-chain-demands! + (collect-info-ext-demand ci) + (cons (collect-info-ext-demand src-ci) + (demand-chain-demands (collect-info-ext-demand ci))))) ;; ---------------------------------------- ;; document-order traversal @@ -386,10 +390,11 @@ ;; ---------------------------------------- ;; global-info collection - (define/public (collect ds fns fp) + (define/public (collect ds fns fp [demand (lambda (key ci) #f)]) (let ([ci (make-collect-info fp (make-hash) (make-hash) + (make-demand-chain (list demand)) (make-hasheq) (make-hasheq) null @@ -407,6 +412,7 @@ (collect-info-fp ci) (make-hash) (collect-info-ext-ht ci) + (collect-info-ext-demand ci) (collect-info-parts ci) (collect-info-tags ci) (if (part-tag-prefix d) @@ -911,3 +917,12 @@ ;; ---------------------------------------- (super-new))) + + +;; ---------------------------------------- + +(define-struct demand-chain ([demands #:mutable]) + #:property prop:procedure (lambda (self key ci) + (for/or ([demand (in-list (demand-chain-demands self))]) + (demand key ci)))) + diff --git a/collects/scribble/core.rkt b/collects/scribble/core.rkt index 966e560653..109ccd43bd 100644 --- a/collects/scribble/core.rkt +++ b/collects/scribble/core.rkt @@ -6,7 +6,7 @@ ;; ---------------------------------------- -(define-struct collect-info (fp ht ext-ht parts tags gen-prefix relatives parents) #:transparent) +(define-struct collect-info (fp ht ext-ht ext-demand parts tags gen-prefix relatives parents) #:transparent) (define-struct resolve-info (ci delays undef searches) #:transparent) (define (part-collected-info part ri) @@ -34,8 +34,14 @@ (collected-info-parent (part-collected-info part ri)) ri key)] [else - (values (hash-ref (collect-info-ext-ht (resolve-info-ci ri)) key #f) - #t)])))) + (define ci (resolve-info-ci ri)) + (define (try-ext) + (hash-ref (collect-info-ext-ht ci) key #f)) + (values + (or (try-ext) + (and ((collect-info-ext-demand ci) key ci) + (try-ext))) + #t)])))) (define (resolve-get/ext? part ri key) (resolve-get/ext?* part ri key #f)) diff --git a/collects/scribble/html-render.rkt b/collects/scribble/html-render.rkt index 016fb29d55..c97037b16d 100644 --- a/collects/scribble/html-render.rkt +++ b/collects/scribble/html-render.rkt @@ -1483,11 +1483,12 @@ (define/override (include-navigation?) #t) - (define/override (collect ds fns fp) + (define/override (collect ds fns fp [demand (lambda (key ci) #f)]) (super collect ds (map (lambda (fn) (build-path fn "index.html")) fns) - fp)) + fp + demand)) (define/override (current-part-whole-page? d) (collecting-whole-page)) diff --git a/collects/scribble/xref.rkt b/collects/scribble/xref.rkt index 4b53608b40..a6ed768d0f 100644 --- a/collects/scribble/xref.rkt +++ b/collects/scribble/xref.rkt @@ -34,16 +34,23 @@ (define-namespace-anchor here) (define (load-xref sources + #:demand-source [demand-source (lambda (key) #f)] #:render% [render% (html:render-mixin render%)] #:root [root-path #f]) (let* ([renderer (new render% [dest-dir (find-system-path 'temp-dir)])] [fp (send renderer traverse null null)] - [ci (send renderer collect null null fp)]) + [load-source (lambda (src ci) + (parameterize ([current-namespace + (namespace-anchor->empty-namespace here)]) + (let ([v (src)]) + (when v (send renderer deserialize-info v ci #:root root-path)))))] + [ci (send renderer collect null null fp + (lambda (key ci) + (define src (demand-source key)) + (and src + (load-source src ci))))]) (for ([src sources]) - (parameterize ([current-namespace - (namespace-anchor->empty-namespace here)]) - (let ([v (src)]) - (when v (send renderer deserialize-info v ci #:root root-path))))) + (load-source src ci)) (make-xrefs renderer (send renderer resolve null null ci)))) ;; ---------------------------------------- diff --git a/collects/scribblings/scribble/core.scrbl b/collects/scribblings/scribble/core.scrbl index 7ded2d58f6..0c03eb8c1b 100644 --- a/collects/scribblings/scribble/core.scrbl +++ b/collects/scribblings/scribble/core.scrbl @@ -1193,7 +1193,9 @@ Returns the width in characters of the given @tech{content}. Returns the width in characters of the given @tech{block}.} -@defstruct[collect-info ([ht any/c] [ext-ht any/c] [parts any/c] +@defstruct[collect-info ([ht any/c] [ext-ht any/c] + [ext-demand (tag? collect-info? . -> . any/c)] + [parts any/c] [tags any/c] [gen-prefix any/c] [relatives any/c] [parents (listof part?)])]{ diff --git a/collects/scribblings/scribble/renderer.scrbl b/collects/scribblings/scribble/renderer.scrbl index 4d058634e7..57a3b446d2 100644 --- a/collects/scribblings/scribble/renderer.scrbl +++ b/collects/scribblings/scribble/renderer.scrbl @@ -127,12 +127,22 @@ information on the @racket[dests] argument.} @defmethod[(collect [srcs (listof part?)] [dests (listof path-string?)] - [fp (and/c hash? immutable?)]) + [fp (and/c hash? immutable?)] + [demand (tag? collect-info? . -> . any/c) (lambda (_tag _ci) #f)]) collect-info?]{ Performs the @techlink{collect pass}. See @method[render<%> render] for -information on the @racket[dests] argument. The @racket[fp] argument -is a result from the @method[render<%> traverse] method.} +information on the @racket[dests] arguments. The @racket[fp] argument +is a result from the @method[render<%> traverse] method. + +The @racket[demand] argument supplies external tag mappings on demand. +When the @racket[collect-info] result is later used to find a mapping +for a tag and no mapping is already available, @racket[demand] is +called with the tag and the @racket[collect-info]. The @racket[demand] +function returns true to indicate when it adds information to the +@racket[collect-info] so that the lookup should be tried again; the +@racket[demand] function should return @racket[#f] if it does not +extend @racket[collect-info].} @defmethod[(resolve [srcs (listof part?)] [dests (listof path-string?)] diff --git a/collects/scribblings/scribble/xref.scrbl b/collects/scribblings/scribble/xref.scrbl index e5ce9e2dd1..9f7d0f08c5 100644 --- a/collects/scribblings/scribble/xref.scrbl +++ b/collects/scribblings/scribble/xref.scrbl @@ -20,6 +20,9 @@ by @racket[load-xref], @racket[#f] otherwise.} @defproc[(load-xref [sources (listof (-> any/c))] + [#:demand-source demand-source + (tag? -> (or/c (-> any/c) #f)) + (lambda (_tag) #f)] [#:render% using-render% (implementation?/c render<%>) (render-mixin render%)] [#:root root-path (or/c path-string? false/c) #f]) @@ -30,6 +33,11 @@ produce a serialized information obtained from @xmethod[render<%> serialize-info]. If a @racket[sources] element produces @racket[#f], its result is ignored. +The @racket[demand-source] function can effectively add a new source +to @racket[sources] in response to a search for information on the +given tag. The @racket[demand-source] function returns @racket[#f] +to indicate that no new sources satisfy the given tag. + Since the format of serialized information is specific to a rendering class, the optional @racket[using-render%] argument accepts the relevant class. It defaults to HTML rendering, partly because diff --git a/collects/setup/doc-db.rkt b/collects/setup/doc-db.rkt new file mode 100644 index 0000000000..ca260030eb --- /dev/null +++ b/collects/setup/doc-db.rkt @@ -0,0 +1,128 @@ +#lang racket/base +(require db + racket/format + "main-doc.rkt") + +(provide doc-db-available? + doc-db-record-provides + doc-db-key->path + doc-db-file->connection) + +(define (doc-db-available?) #t) + +(define (doc-db-file->connection db-file) + (sqlite3-connect #:database db-file)) + +(define select-pathid-vq + (virtual-statement "SELECT pathid FROM documented WHERE stag=$1")) +(define select-path-vq + (virtual-statement "SELECT atmain, path FROM pathids WHERE pathid=$1")) + +(define (doc-db-key->path db-file key) + (define db (if (connection? db-file) + db-file + (doc-db-file->connection db-file))) + + (define pathid + (call-with-transaction/retry + db + (lambda () + (define row (query-maybe-row db + select-pathid-vq + (~s key))) + (and row + (vector-ref row 0))))) + + (begin0 + (and pathid + (call-with-transaction/retry + db + (lambda () + (define row (query-maybe-row db + select-path-vq + pathid)) + (and row + (let ([path (read (open-input-bytes (vector-ref row 1)))]) + (if (equal? "y" (vector-ref row 0)) + (main-doc-relative->path (cons 'doc path)) + (bytes->path path))))))) + (unless (connection? db-file) + (disconnect db)))) + + +(define (doc-db-record-provides db-file provides filename) + (define filename* (path->main-doc-relative filename)) + (define filename-bytes (if (pair? filename*) + (string->bytes/utf-8 (~s (cdr filename*))) + (path->bytes filename*))) + + (define db (sqlite3-connect #:database db-file #:mode 'create)) + + ;; Make sure tables are present: + (call-with-transaction/retry + db + (lambda () + (when (null? + (query-rows db (~a "SELECT name FROM sqlite_master" + " WHERE type='table' AND name='documented'"))) + (query-exec db (~a "CREATE TABLE documented " + "(stag VARCHAR(256)," + " pathid SMALLINT," + " PRIMARY KEY (stag))"))))) + (call-with-transaction/retry + db + (lambda () + (when (null? + (query-rows db (~a "SELECT name FROM sqlite_master" + " WHERE type='table' AND name='pathids'"))) + (query-exec db (~a "CREATE TABLE pathids " + "(pathid SMALLINT," + " atmain CHAR(1)," + " path VARCHAR(1024)," + " PRIMARY KEY (pathid))"))))) + + (define pathid + (call-with-transaction/retry + db + (lambda () + (define id (query-maybe-row db (~a "SELECT pathid FROM pathids" + " WHERE atmain=$1 AND path=$2") + (if (pair? filename*) "y" "n") + filename-bytes)) + (cond + [(not id) + (define num (vector-ref (query-row db "SELECT COUNT(pathid) FROM pathids") 0)) + (query-exec db "INSERT INTO pathids VALUES ($1, $2, $3)" + (add1 num) + (if (pair? filename*) "y" "n") + filename-bytes) + (add1 num)] + [else (vector-ref id 0)])))) + + (call-with-transaction/retry + db + (lambda () + (for ([p (in-list provides)]) + (define stag (~s p)) + (query-exec db "DELETE FROM documented WHERE stag=$1" + stag) + (query-exec db "INSERT INTO documented VALUES ($1, $2)" + stag + pathid)))) + + (disconnect db)) + +(define (call-with-transaction/retry db thunk) + (let loop ([tries 0]) + (with-handlers ([(lambda (v) + (and (tries . < . 100) + (exn:fail? v) + (regexp-match #rx"the database file is locked" + (exn-message v)))) + (lambda (exn) + ;; Try again: + (sleep) + (loop (add1 tries)))]) + (call-with-transaction + db + thunk)))) diff --git a/collects/setup/scribble.rkt b/collects/setup/scribble.rkt index 038dcc1022..4c25f835e5 100644 --- a/collects/setup/scribble.rkt +++ b/collects/setup/scribble.rkt @@ -7,6 +7,7 @@ "main-collects.rkt" "main-doc.rkt" "parallel-do.rkt" + "doc-db.rkt" scheme/class scheme/list scheme/file @@ -512,11 +513,18 @@ [(equal? (list-ref stamp-data pos) (get-sha1 file)) stamp-time] [else t]))) +(define (find-db-file doc) + (build-path (if (main-doc? doc) + (find-doc-dir) + (find-user-doc-dir)) + "docindex.sqlite")) + (define ((get-doc-info only-dirs latex-dest auto-main? auto-user? with-record-error setup-printf workerid) doc) (let* ([info-out-file (sxref-path latex-dest doc "out.sxref")] [info-in-file (sxref-path latex-dest doc "in.sxref")] + [db-file (find-db-file doc)] [stamp-file (sxref-path latex-dest doc "stamp.sxref")] [out-file (build-path (doc-dest-dir doc) "index.html")] [src-zo (let-values ([(base name dir?) (split-path (doc-src-file doc))]) @@ -674,7 +682,7 @@ #f #f)]) (when need-out-write? - (render-time "xref-out" (write-out/info latex-dest info sci)) + (render-time "xref-out" (write-out/info latex-dest info sci db-file)) (set-info-need-out-write?! info #f)) (when (info-need-in-write? info) (render-time "xref-in" (write-in/info latex-dest info)) @@ -779,7 +787,8 @@ [undef (render-time "undefined" (send renderer get-external ri))] [in-delta? (not (equal? (any-order undef) (any-order ff-undef)))] [out-delta? (or (not (serialized=? sci ff-sci)) - (not (equal? (any-order defs) (any-order ff-provides))))]) + (not (equal? (any-order defs) (any-order ff-provides))))] + [db-file (find-db-file doc)]) (when (verbose) (printf " [~a~afor ~a]\n" (if in-delta? "New in " "") @@ -791,7 +800,7 @@ (when in-delta? (render-time "xref-in" (write-in latex-dest vers doc undef ff-deps-rel ff-searches ff-dep-docs))) (when out-delta? - (render-time "xref-out" (write-out latex-dest vers doc sci defs))) + (render-time "xref-out" (write-out latex-dest vers doc sci defs db-file))) (cleanup-dest-dir doc) (render-time @@ -836,21 +845,25 @@ (parameterize ([current-namespace p]) (call-in-nested-thread (lambda () (dynamic-require mod-path 'doc))))))) -(define (write- latex-dest vers doc name data) +(define (write- latex-dest vers doc name data prep!) (let* ([filename (sxref-path latex-dest doc name)]) + (prep! filename) (when (verbose) (printf " [Caching to disk ~a]\n" filename)) (make-directory*/ignore-exists-exn (doc-dest-dir doc)) (with-compile-output filename (lambda (out tmp-filename) (write-bytes (s-exp->fasl (append (list (list vers (doc-flags doc))) data)) out))))) -(define (write-out latex-dest vers doc sci provides) +(define (write-out latex-dest vers doc sci provides db-file) (write- latex-dest vers doc "out.sxref" (list sci - (serialize provides)))) + (serialize provides)) + (lambda (filename) + (unless latex-dest + (doc-db-record-provides db-file provides filename))))) -(define (write-out/info latex-dest info sci) - (write-out latex-dest (info-vers info) (info-doc info) sci (info-provides info))) +(define (write-out/info latex-dest info sci db-file) + (write-out latex-dest (info-vers info) (info-doc info) sci (info-provides info) db-file)) (define (write-in latex-dest vers doc undef rels searches dep-docs) (write- latex-dest vers doc "in.sxref" @@ -864,7 +877,8 @@ ;; for a place to reconstruct a suitable `doc' record. ;; It probably would be better to reconstruct the `doc' ;; record in a place from the path. - (serialize (map doc->rel-doc dep-docs))))) + (serialize (map doc->rel-doc dep-docs))) + void)) (define (write-in/info latex-dest info) (write-in latex-dest diff --git a/collects/setup/xref.rkt b/collects/setup/xref.rkt index e3cd70fe19..40f8ac6ec2 100644 --- a/collects/setup/xref.rkt +++ b/collects/setup/xref.rkt @@ -3,9 +3,11 @@ (require scribble/xref scheme/fasl scheme/path + racket/promise setup/dirs "getinfo.rkt" - "private/path-utils.rkt") + "private/path-utils.rkt" + "doc-db.rkt") (provide load-collections-xref) @@ -37,20 +39,44 @@ [p (and d (build-path d "out.sxref"))]) (and p (file-exists? p) p)))))) +(define (dest->source dest) + (lambda () + (with-handlers ([exn:fail? (lambda (exn) + (log-error + "warning: ~a" + (if (exn? exn) + (exn-message exn) + (format "~e" exn))) + #f)]) + (cadr (call-with-input-file* dest fasl->s-exp))))) + +(define (dir->connection dir) + (define p (build-path dir "docindex.sqlite")) + (and (file-exists? p) + (doc-db-file->connection p))) + +(define main-db (delay (dir->connection (find-doc-dir)))) +(define user-db (delay (dir->connection (find-user-doc-dir)))) + +(define (key->source key) + (define (try p) + (and p + (doc-db-key->path p key))) + (define dest (or (try (force main-db)) + (try (force user-db)))) + (and dest + (dest->source dest))) + (define (get-reader-thunks) - (map (lambda (dest) - (lambda () - (with-handlers ([exn:fail? (lambda (exn) - (eprintf "WARNING: ~a\n" - (if (exn? exn) - (exn-message exn) - (format "~e" exn))) - #f)]) - (cadr (call-with-input-file* dest fasl->s-exp))))) + (map dest->source (filter values (get-dests)))) (define (load-collections-xref [report-loading void]) (or cached-xref (begin (report-loading) - (set! cached-xref (load-xref (get-reader-thunks))) + (set! cached-xref + (if (doc-db-available?) + (load-xref null + #:demand-source key->source) + (load-xref (get-reader-thunks)))) cached-xref)))