add file/cache
In base, so it can be used by the package manager.
This commit is contained in:
parent
02bbccc200
commit
0ac601db66
76
pkgs/racket-pkgs/racket-doc/file/scribblings/cache.scrbl
Normal file
76
pkgs/racket-pkgs/racket-doc/file/scribblings/cache.scrbl
Normal file
|
@ -0,0 +1,76 @@
|
|||
#lang scribble/doc
|
||||
@(require "common.rkt"
|
||||
(for-label file/cache))
|
||||
|
||||
@title[#:tag "cache"]{Caching}
|
||||
|
||||
@defmodule[file/cache]{The @racketmodname[file/cache] library provides
|
||||
utilities for managing a local cache of files, such as downloaded
|
||||
files. The cache is safe for concurrent use across processes, since
|
||||
it uses filesystem locks, and it isolates clients from filesystem
|
||||
failures.}
|
||||
|
||||
@defproc[(cache-file [dest-file path-string?]
|
||||
[#:exists-ok? exists-ok? any/c #f]
|
||||
[key (not/c #f)]
|
||||
[cache-dir path-string?]
|
||||
[fetch (-> any)]
|
||||
[#:notify-cache-use notify-cache-use (string? . -> . any)
|
||||
void]
|
||||
[#:max-cache-files max-files real? 1024]
|
||||
[#:max-cache-size max-size real? (* 64 1024 1024)]
|
||||
[#:evict-before? evict-before? (hash? hash? . -> . boolean?)
|
||||
(lambda (a b)
|
||||
(< (hash-ref a 'modify-seconds)
|
||||
(hash-ref b 'modify-seconds)))]
|
||||
[#:log-error-string log-error-string (string? . -> . any)
|
||||
(lambda (s) (log-error s))]
|
||||
[#:log-debug-string log-debug-string (string? . -> . any)
|
||||
(lambda (s) (log-debug s))])
|
||||
void?]{
|
||||
|
||||
Looks for a file in @racket[cache-dir] previously cached with
|
||||
@racket[key], and copies it to @racket[dest-file] (which must not
|
||||
exist already, unless @racket[exists-ok?] is true) if a cached file
|
||||
is found. Otherwise, @racket[fetch] is called; if @racket[dest-file]
|
||||
exists after calling @racket[fetch], it is copied to @racket[cache-dir]
|
||||
and recorded with @racket[key]. When a cache entry is used,
|
||||
@racket[notify-cache-use] is called with the name of the cache file.
|
||||
|
||||
When a new file is cached, @racket[max-files] (as a file count) and
|
||||
@racket[max-size] (in bytes) determine whether any previously cached
|
||||
files should be evicted from the cache. If so, @racket[evict-before?]
|
||||
determines an order on existing cache entries for eviction; each
|
||||
argument to @racket[evict-before?] is a hash table with at least the
|
||||
following keys:
|
||||
|
||||
@itemlist[
|
||||
|
||||
@item{@racket['modify-seconds] --- the file's modification date}
|
||||
|
||||
@item{@racket['size] --- the file's size in bytes}
|
||||
|
||||
@item{@racket['key] --- the cache entry's key}
|
||||
|
||||
@item{@racket['name] --- the cache file's name}
|
||||
|
||||
]
|
||||
|
||||
The @racket[log-error-string] and @racket[log-debug-string] functions
|
||||
are used to record errors and debugging information.}
|
||||
|
||||
|
||||
@defproc[(cache-remove [key any/c]
|
||||
[cache-dir path-string?]
|
||||
[#:log-error-string log-error-string (string? . -> . any)
|
||||
(lambda (s) (log-error s))]
|
||||
[#:log-debug-string log-debug-string (string? . -> . any)
|
||||
(lambda (s) (log-debug s))])
|
||||
void?]{
|
||||
|
||||
Removes the cache entry matching @racket[key] (if any) from the cache
|
||||
in @racket[cache-dir], or removes all cached files if @racket[key] is
|
||||
@racket[#f].
|
||||
|
||||
The @racket[log-error-string] and @racket[log-debug-string] functions
|
||||
are used to record errors and debugging information.}
|
|
@ -1,7 +1,7 @@
|
|||
#lang scribble/doc
|
||||
@(require "common.rkt")
|
||||
|
||||
@title{File: Racket File Format Libraries}
|
||||
@title{File: Racket File and Format Libraries}
|
||||
|
||||
@table-of-contents[]
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
|||
@include-section["gif.scrbl"]
|
||||
@include-section["ico.scrbl"]
|
||||
@include-section["resource.scrbl"]
|
||||
@include-section["cache.scrbl"]
|
||||
|
||||
@(bibliography
|
||||
(bib-entry #:key "Gervautz1990"
|
||||
|
|
139
pkgs/racket-pkgs/racket-test/tests/file/cache.rkt
Normal file
139
pkgs/racket-pkgs/racket-test/tests/file/cache.rkt
Normal file
|
@ -0,0 +1,139 @@
|
|||
#lang racket/base
|
||||
(require file/cache
|
||||
racket/file)
|
||||
|
||||
(define-logger cache)
|
||||
|
||||
|
||||
(define tmp-dir (make-temporary-file "rkttmp~a"
|
||||
'directory
|
||||
(find-system-path 'temp-dir)))
|
||||
|
||||
(define keep-in-cache '(3 7 13))
|
||||
(define complain-if-uncached? #f)
|
||||
(define concurrent? #f)
|
||||
(define nonread? #f)
|
||||
|
||||
(define (ok-error? s)
|
||||
(cond
|
||||
[concurrent?
|
||||
(regexp-match? #rx"could not acquire .* lock" s)]
|
||||
[nonread?
|
||||
(regexp-match? #rx"copy-file: cannot open source file" s)]
|
||||
[else #f]))
|
||||
|
||||
(define (get n #:result [result void]
|
||||
#:dir [dir-suffix ""])
|
||||
(define files-dir (build-path tmp-dir (format "files~a" dir-suffix)))
|
||||
(make-directory* files-dir)
|
||||
(define fn (build-path files-dir (format "~a" n)))
|
||||
(define content (make-bytes (* n 100) n))
|
||||
(when (file-exists? fn) (delete-file fn))
|
||||
(define got? #f)
|
||||
(define errored? #f)
|
||||
(cache-file fn
|
||||
n
|
||||
(build-path tmp-dir "cache")
|
||||
(lambda ()
|
||||
(call-with-output-file*
|
||||
fn
|
||||
(lambda (o)
|
||||
(write-bytes content o))))
|
||||
#:evict-before? (lambda (a b)
|
||||
(define a-keep? (member (hash-ref a 'key) keep-in-cache))
|
||||
(define b-keep? (member (hash-ref b 'key) keep-in-cache))
|
||||
(cond
|
||||
[(and (not a-keep?) b-keep?) #t]
|
||||
[(and (not b-keep?) a-keep?) #f]
|
||||
[else
|
||||
(< (hash-ref a 'modify-seconds)
|
||||
(hash-ref b 'modify-seconds))]))
|
||||
#:max-cache-size 10000
|
||||
#:log-debug-string (lambda (s) (log-cache-debug s))
|
||||
#:log-error-string (lambda (s)
|
||||
(if (ok-error? s)
|
||||
(set! errored? #t)
|
||||
(log-error s)))
|
||||
#:notify-cache-use (lambda (s)
|
||||
(set! got? #t)))
|
||||
(unless (= (file-size fn) (bytes-length content))
|
||||
(error 'test "wrong file size for ~a" fn))
|
||||
(unless (equal? content (file->bytes fn))
|
||||
(error 'test "wrong content for ~a" fn))
|
||||
(unless (or got? errored?)
|
||||
(when complain-if-uncached?
|
||||
(when (member n keep-in-cache)
|
||||
(error 'test "wasn't in cache: ~a" n))))
|
||||
(result got?))
|
||||
|
||||
;; --------------------------------------------------
|
||||
;; Test basic caching
|
||||
|
||||
(for ([i keep-in-cache]) (get i))
|
||||
(set! complain-if-uncached? #t)
|
||||
|
||||
(for ([i 20]) (get i))
|
||||
(for ([i (in-range 7 12)]) (get i))
|
||||
(for ([i (in-range 7 12)]) (get i))
|
||||
(for ([i (in-range 7 12)]) (get i))
|
||||
|
||||
(for ([i 20]) (get i))
|
||||
|
||||
(for ([i 100])
|
||||
(for ([i 20]) (get i)))
|
||||
|
||||
(cache-remove (car keep-in-cache)
|
||||
(build-path tmp-dir "cache"))
|
||||
(set! complain-if-uncached? #f)
|
||||
(when (get (car keep-in-cache) #:result values)
|
||||
(error 'test "should not have been in cache"))
|
||||
(set! complain-if-uncached? #t)
|
||||
(for ([i keep-in-cache]) (get i))
|
||||
|
||||
(cache-remove #f
|
||||
(build-path tmp-dir "cache"))
|
||||
(set! complain-if-uncached? #f)
|
||||
(for ([i keep-in-cache])
|
||||
(when (get i #:result values)
|
||||
(error 'test "should not have been in cache")))
|
||||
(set! complain-if-uncached? #t)
|
||||
(for ([i keep-in-cache]) (get i))
|
||||
|
||||
;; --------------------------------------------------
|
||||
;; Test concurrent use
|
||||
|
||||
(set! concurrent? #t)
|
||||
(for-each
|
||||
sync
|
||||
(for/list ([j 100])
|
||||
(thread (lambda ()
|
||||
(for ([i 20]) (get i #:dir j))))))
|
||||
(set! concurrent? #f)
|
||||
|
||||
;; --------------------------------------------------
|
||||
;; Test uncooperative filesystem
|
||||
|
||||
(define (all-file-perms perms)
|
||||
(define dir (build-path tmp-dir "cache"))
|
||||
(for ([f (directory-list dir)])
|
||||
(when (regexp-match? #rx"^cached" f)
|
||||
(file-or-directory-permissions (build-path dir f) perms))))
|
||||
|
||||
(set! nonread? #t)
|
||||
(for ([j 2])
|
||||
(for ([i 20])
|
||||
(get i)
|
||||
(all-file-perms 0)))
|
||||
|
||||
;; make files readable and writable again, and cache should recover:
|
||||
(all-file-perms #o777)
|
||||
(set! nonread? #f)
|
||||
(set! complain-if-uncached? #f)
|
||||
(for ([i 20]) (get i))
|
||||
|
||||
(set! complain-if-uncached? #t)
|
||||
(for ([i 20]) (get i))
|
||||
|
||||
;; --------------------------------------------------
|
||||
|
||||
(delete-directory/files tmp-dir)
|
265
racket/collects/file/cache.rkt
Normal file
265
racket/collects/file/cache.rkt
Normal file
|
@ -0,0 +1,265 @@
|
|||
#lang racket/base
|
||||
(require racket/file
|
||||
racket/path
|
||||
racket/contract/base)
|
||||
|
||||
(provide
|
||||
(contract-out [cache-file
|
||||
(->* (path-string?
|
||||
(not/c #f)
|
||||
path-string?
|
||||
(-> any))
|
||||
(#:log-error-string (string? . -> . any)
|
||||
#:log-debug-string (string? . -> . any)
|
||||
#:notify-cache-use (string? . -> . any)
|
||||
#:max-cache-files real?
|
||||
#:max-cache-size real?
|
||||
#:evict-before? (hash? hash? . -> . boolean?)
|
||||
#:exists-ok? any/c)
|
||||
void?)]
|
||||
[cache-remove
|
||||
(->* (any/c
|
||||
path-string?)
|
||||
(#:log-error-string (string? . -> . any)
|
||||
#:log-debug-string (string? . -> . any))
|
||||
void?)]))
|
||||
|
||||
(define (cache-file dest-file ; the file that `fetch` is supposed to put in place
|
||||
source-key ; use (or write) cache entry with this key
|
||||
cache-dir ; direct to hold cache files and database
|
||||
fetch ; the download operation for cache misses; writes `dest-file`
|
||||
#:log-error-string [log-error-string log-error-string]
|
||||
#:log-debug-string [log-debug-string log-debug-string]
|
||||
#:notify-cache-use [notify-cache-use void]
|
||||
#:max-cache-files [max-cache-files 1024]
|
||||
#:max-cache-size [max-cache-size (* 64 1024 1024)]
|
||||
#:evict-before? [evict-before? (lambda (a b)
|
||||
(< (hash-ref a 'modify-seconds)
|
||||
(hash-ref b 'modify-seconds)))]
|
||||
#:exists-ok? [exists-ok? #f])
|
||||
;; First phase, returns a thunk to continue (i.e., download or not):
|
||||
(define (try-read-cache)
|
||||
(call-with-cache-db/catch-exn-until-success-finishes
|
||||
cache-dir
|
||||
log-error-string
|
||||
'shared
|
||||
;; On success:
|
||||
(lambda (db cache-db-file)
|
||||
;; Lock is still held here to make sure a cache file isn't
|
||||
;; deleted and replaced with a different file while we're
|
||||
;; trying to copy it.
|
||||
(cond
|
||||
[(hash-ref db source-key #f)
|
||||
=> (lambda (fn)
|
||||
(notify-cache-use fn)
|
||||
(copy-file (build-path cache-dir fn) dest-file exists-ok?)
|
||||
;; no work in continuation:
|
||||
void)]
|
||||
[else fetch-and-continue]))
|
||||
;; On failure (exception is logged already):
|
||||
(lambda (exn) fetch-and-continue)))
|
||||
|
||||
;; Second phase (used when cache read fails):
|
||||
(define (fetch-and-continue)
|
||||
(fetch)
|
||||
(when (file-exists? dest-file)
|
||||
(try-write-cache)))
|
||||
|
||||
;; Third phase, when fetching seems to have worked:
|
||||
(define (try-write-cache)
|
||||
(call-with-cache-db/catch-exn-until-success-finishes
|
||||
cache-dir
|
||||
log-error-string
|
||||
'exclusive
|
||||
;; On success getting the current db:
|
||||
(lambda (db cache-db-file)
|
||||
;; Lock is still held here to make sure no one else
|
||||
;; tries to clean up while we copy a file into
|
||||
;; the cache.
|
||||
|
||||
;; We assume that the cost of this filesystem traversal
|
||||
;; is small compared to download costs:
|
||||
(define revised-db (limit-sizes cache-dir
|
||||
(clean-database db cache-dir
|
||||
log-debug-string)
|
||||
(sub1 max-cache-files)
|
||||
(- max-cache-size (file-size dest-file))
|
||||
evict-before?
|
||||
log-debug-string))
|
||||
|
||||
;; Copy file into cache and update cache db:
|
||||
(log-debug-string (format "caching for ~s" source-key))
|
||||
(define cache-file (make-temporary-file "cached~a" #f cache-dir))
|
||||
(copy-file dest-file cache-file #t)
|
||||
(write-db cache-db-file
|
||||
(hash-set revised-db
|
||||
source-key
|
||||
(path->string
|
||||
(file-name-from-path cache-file)))))
|
||||
;; On failure, we can just give up writing to the cache;
|
||||
;; the exception is already logged:
|
||||
void))
|
||||
|
||||
;; Start with first phase, and continue with its result:
|
||||
((try-read-cache)))
|
||||
|
||||
(define (cache-remove source-key ; #f => clear all
|
||||
cache-dir
|
||||
#:log-error-string [log-error-string log-error-string]
|
||||
#:log-debug-string [log-debug-string log-debug-string])
|
||||
(call-with-cache-db/catch-exn-until-success-finishes
|
||||
cache-dir
|
||||
log-error-string
|
||||
'exclusive
|
||||
;; success:
|
||||
(lambda (db cache-db-file)
|
||||
(write-db cache-db-file
|
||||
(if source-key
|
||||
(hash-remove db source-key)
|
||||
(hash))))
|
||||
;; failure:
|
||||
(lambda (exn) (raise exn))))
|
||||
|
||||
;; Extract cache-db, holding file lock (in `lock-mode`) while calling
|
||||
;; `success-handler` on the db. If the cache-db read fails, use an
|
||||
;; empty db. If any failure happens other than reading the db,
|
||||
;; including duing `success-handler`, then call `failure-handler` with
|
||||
;; the exception after logging it. Note that breaks are disabled when
|
||||
;; calling the failure handler, so it should return quickly.
|
||||
(define call-with-cache-db/catch-exn-until-success-finishes
|
||||
(lambda (cache-dir log-error-string lock-mode
|
||||
success-handler
|
||||
failure-handler)
|
||||
(define cache-db-file (build-path cache-dir "cache.rktd"))
|
||||
(define (succeed db)
|
||||
(success-handler db cache-db-file))
|
||||
(with-handlers ([exn:fail?
|
||||
(lambda (exn)
|
||||
(log-error-string (format "cache attempt failed: ~a"
|
||||
(exn-message exn)))
|
||||
(failure-handler exn))])
|
||||
(make-directory* cache-dir)
|
||||
(call-with-file-lock/timeout
|
||||
cache-db-file
|
||||
lock-mode
|
||||
(lambda ()
|
||||
(succeed (read-db cache-db-file log-error-string)))
|
||||
(lambda ()
|
||||
;; raise exception to be caught above:
|
||||
(error (format "could not acquire ~s lock" lock-mode)))))))
|
||||
|
||||
;; Called with read lock, and handles failure by returning
|
||||
;; an empty db:
|
||||
(define (read-db cache-db-file log-error-string)
|
||||
(if (not (file-exists? cache-db-file)) ; avoid logging error if the file does not exist
|
||||
(hash)
|
||||
(with-handlers ([exn:fail?
|
||||
(lambda (exn)
|
||||
(log-error-string (format "cache file read failure: ~a"
|
||||
(exn-message exn)))
|
||||
(hash))])
|
||||
(define db (call-with-input-file*
|
||||
cache-db-file
|
||||
(lambda (i)
|
||||
(call-with-default-reading-parameterization
|
||||
(lambda ()
|
||||
(read i))))))
|
||||
(cond
|
||||
[(and (hash? db)
|
||||
;; be picky about hash content:
|
||||
(for/and ([v (in-hash-values db)])
|
||||
(and (string? v)
|
||||
(regexp-match? #rx"[a-zA-Z0-9]*" v)
|
||||
((string-length v) . <= . 64))))
|
||||
db]
|
||||
[else
|
||||
(log-error-string "cache file read failure: ill-formed content")
|
||||
(hash)]))))
|
||||
|
||||
;; Called with write lock:
|
||||
(define (write-db cache-db-file db)
|
||||
(call-with-output-file*
|
||||
cache-db-file
|
||||
#:exists 'truncate/replace
|
||||
(lambda (o)
|
||||
(write db o)
|
||||
(newline o))))
|
||||
|
||||
;; Called with write lock; delete files not in
|
||||
;; db, and removes non-existent files from db:
|
||||
(define (clean-database db cache-dir log-debug-string)
|
||||
(define rev (for/hash ([(k v) (in-hash db)])
|
||||
(values v k)))
|
||||
;; Delete files not in db:
|
||||
(define exist
|
||||
(for/fold ([exist (hash)]) ([f (directory-list cache-dir)])
|
||||
(define s (path->string f))
|
||||
(cond
|
||||
[(hash-ref rev s #f)
|
||||
(hash-set exist s #t)]
|
||||
[(not (regexp-match? #rx"^cached" s))
|
||||
;; not a cache file, so leave it alone:
|
||||
exist]
|
||||
[else
|
||||
;; delete unrecorded cache file:
|
||||
(define p (build-path cache-dir f))
|
||||
(log-debug-string (format "removing file from cache: ~a" p))
|
||||
(delete-file p)
|
||||
exist])))
|
||||
;; Filter db by existing files:
|
||||
(for/hash ([(k v) (in-hash db)]
|
||||
#:when (or (hash-ref exist v #f)
|
||||
(begin
|
||||
(log-debug-string (format "lost cache file for entry: ~s" k))
|
||||
#f)))
|
||||
|
||||
(values k v)))
|
||||
|
||||
;; Called with the write lock, deletes oldest files as needed
|
||||
;; to meet count and size constraints, returns updated db:
|
||||
(define (limit-sizes cache-dir db
|
||||
max-count max-size evict-before?
|
||||
log-debug-string)
|
||||
(define files
|
||||
(for/list ([(k v) (in-hash db)])
|
||||
(define path (build-path cache-dir v))
|
||||
(hash 'name v
|
||||
'key k
|
||||
'size (file-size path)
|
||||
'modify-seconds (file-or-directory-modify-seconds path))))
|
||||
(define count (hash-count db))
|
||||
(define size (for/sum ([f (in-list files)])
|
||||
(hash-ref f 'size)))
|
||||
(cond
|
||||
[(and (count . < . max-count)
|
||||
(size . < . max-size))
|
||||
;; no need to remove anything
|
||||
db]
|
||||
[else
|
||||
;; Sort, so we can remove the oldest (or whatever `evict-before?`
|
||||
;; says should be evicted first):
|
||||
(define sorted (sort files evict-before?))
|
||||
(let loop ([sorted sorted]
|
||||
[db db]
|
||||
[count count]
|
||||
[size size])
|
||||
(cond
|
||||
[(or (null? sorted)
|
||||
(and (count . < . max-count)
|
||||
(size . < . max-size)))
|
||||
db]
|
||||
[else
|
||||
;; Delete first cached file in the eviction list:
|
||||
(define key (hash-ref (car sorted) 'key))
|
||||
(log-debug-string (format "deleting cached file for ~s" key))
|
||||
(delete-file (build-path cache-dir (hash-ref (car sorted) 'name)))
|
||||
(loop (cdr sorted)
|
||||
(hash-remove db key)
|
||||
(sub1 count)
|
||||
(- size (hash-ref (car sorted) 'size)))]))]))
|
||||
|
||||
;; Default logging functions
|
||||
(define (log-error-string s)
|
||||
(log-error s))
|
||||
(define (log-debug-string s)
|
||||
(log-debug s))
|
Loading…
Reference in New Issue
Block a user