add file/cache

In base, so it can be used by the package manager.
This commit is contained in:
Matthew Flatt 2013-11-06 09:40:51 -07:00
parent 02bbccc200
commit 0ac601db66
4 changed files with 482 additions and 1 deletions

View File

@ -0,0 +1,76 @@
#lang scribble/doc
@(require "common.rkt"
(for-label file/cache))
@title[#:tag "cache"]{Caching}
@defmodule[file/cache]{The @racketmodname[file/cache] library provides
utilities for managing a local cache of files, such as downloaded
files. The cache is safe for concurrent use across processes, since
it uses filesystem locks, and it isolates clients from filesystem
failures.}
@defproc[(cache-file [dest-file path-string?]
[#:exists-ok? exists-ok? any/c #f]
[key (not/c #f)]
[cache-dir path-string?]
[fetch (-> any)]
[#:notify-cache-use notify-cache-use (string? . -> . any)
void]
[#:max-cache-files max-files real? 1024]
[#:max-cache-size max-size real? (* 64 1024 1024)]
[#:evict-before? evict-before? (hash? hash? . -> . boolean?)
(lambda (a b)
(< (hash-ref a 'modify-seconds)
(hash-ref b 'modify-seconds)))]
[#:log-error-string log-error-string (string? . -> . any)
(lambda (s) (log-error s))]
[#:log-debug-string log-debug-string (string? . -> . any)
(lambda (s) (log-debug s))])
void?]{
Looks for a file in @racket[cache-dir] previously cached with
@racket[key], and copies it to @racket[dest-file] (which must not
exist already, unless @racket[exists-ok?] is true) if a cached file
is found. Otherwise, @racket[fetch] is called; if @racket[dest-file]
exists after calling @racket[fetch], it is copied to @racket[cache-dir]
and recorded with @racket[key]. When a cache entry is used,
@racket[notify-cache-use] is called with the name of the cache file.
When a new file is cached, @racket[max-files] (as a file count) and
@racket[max-size] (in bytes) determine whether any previously cached
files should be evicted from the cache. If so, @racket[evict-before?]
determines an order on existing cache entries for eviction; each
argument to @racket[evict-before?] is a hash table with at least the
following keys:
@itemlist[
@item{@racket['modify-seconds] --- the file's modification date}
@item{@racket['size] --- the file's size in bytes}
@item{@racket['key] --- the cache entry's key}
@item{@racket['name] --- the cache file's name}
]
The @racket[log-error-string] and @racket[log-debug-string] functions
are used to record errors and debugging information.}
@defproc[(cache-remove [key any/c]
[cache-dir path-string?]
[#:log-error-string log-error-string (string? . -> . any)
(lambda (s) (log-error s))]
[#:log-debug-string log-debug-string (string? . -> . any)
(lambda (s) (log-debug s))])
void?]{
Removes the cache entry matching @racket[key] (if any) from the cache
in @racket[cache-dir], or removes all cached files if @racket[key] is
@racket[#f].
The @racket[log-error-string] and @racket[log-debug-string] functions
are used to record errors and debugging information.}

View File

@ -1,7 +1,7 @@
#lang scribble/doc
@(require "common.rkt")
@title{File: Racket File Format Libraries}
@title{File: Racket File and Format Libraries}
@table-of-contents[]
@ -18,6 +18,7 @@
@include-section["gif.scrbl"]
@include-section["ico.scrbl"]
@include-section["resource.scrbl"]
@include-section["cache.scrbl"]
@(bibliography
(bib-entry #:key "Gervautz1990"

View File

@ -0,0 +1,139 @@
#lang racket/base
(require file/cache
racket/file)
(define-logger cache)
(define tmp-dir (make-temporary-file "rkttmp~a"
'directory
(find-system-path 'temp-dir)))
(define keep-in-cache '(3 7 13))
(define complain-if-uncached? #f)
(define concurrent? #f)
(define nonread? #f)
(define (ok-error? s)
(cond
[concurrent?
(regexp-match? #rx"could not acquire .* lock" s)]
[nonread?
(regexp-match? #rx"copy-file: cannot open source file" s)]
[else #f]))
(define (get n #:result [result void]
#:dir [dir-suffix ""])
(define files-dir (build-path tmp-dir (format "files~a" dir-suffix)))
(make-directory* files-dir)
(define fn (build-path files-dir (format "~a" n)))
(define content (make-bytes (* n 100) n))
(when (file-exists? fn) (delete-file fn))
(define got? #f)
(define errored? #f)
(cache-file fn
n
(build-path tmp-dir "cache")
(lambda ()
(call-with-output-file*
fn
(lambda (o)
(write-bytes content o))))
#:evict-before? (lambda (a b)
(define a-keep? (member (hash-ref a 'key) keep-in-cache))
(define b-keep? (member (hash-ref b 'key) keep-in-cache))
(cond
[(and (not a-keep?) b-keep?) #t]
[(and (not b-keep?) a-keep?) #f]
[else
(< (hash-ref a 'modify-seconds)
(hash-ref b 'modify-seconds))]))
#:max-cache-size 10000
#:log-debug-string (lambda (s) (log-cache-debug s))
#:log-error-string (lambda (s)
(if (ok-error? s)
(set! errored? #t)
(log-error s)))
#:notify-cache-use (lambda (s)
(set! got? #t)))
(unless (= (file-size fn) (bytes-length content))
(error 'test "wrong file size for ~a" fn))
(unless (equal? content (file->bytes fn))
(error 'test "wrong content for ~a" fn))
(unless (or got? errored?)
(when complain-if-uncached?
(when (member n keep-in-cache)
(error 'test "wasn't in cache: ~a" n))))
(result got?))
;; --------------------------------------------------
;; Test basic caching
(for ([i keep-in-cache]) (get i))
(set! complain-if-uncached? #t)
(for ([i 20]) (get i))
(for ([i (in-range 7 12)]) (get i))
(for ([i (in-range 7 12)]) (get i))
(for ([i (in-range 7 12)]) (get i))
(for ([i 20]) (get i))
(for ([i 100])
(for ([i 20]) (get i)))
(cache-remove (car keep-in-cache)
(build-path tmp-dir "cache"))
(set! complain-if-uncached? #f)
(when (get (car keep-in-cache) #:result values)
(error 'test "should not have been in cache"))
(set! complain-if-uncached? #t)
(for ([i keep-in-cache]) (get i))
(cache-remove #f
(build-path tmp-dir "cache"))
(set! complain-if-uncached? #f)
(for ([i keep-in-cache])
(when (get i #:result values)
(error 'test "should not have been in cache")))
(set! complain-if-uncached? #t)
(for ([i keep-in-cache]) (get i))
;; --------------------------------------------------
;; Test concurrent use
(set! concurrent? #t)
(for-each
sync
(for/list ([j 100])
(thread (lambda ()
(for ([i 20]) (get i #:dir j))))))
(set! concurrent? #f)
;; --------------------------------------------------
;; Test uncooperative filesystem
(define (all-file-perms perms)
(define dir (build-path tmp-dir "cache"))
(for ([f (directory-list dir)])
(when (regexp-match? #rx"^cached" f)
(file-or-directory-permissions (build-path dir f) perms))))
(set! nonread? #t)
(for ([j 2])
(for ([i 20])
(get i)
(all-file-perms 0)))
;; make files readable and writable again, and cache should recover:
(all-file-perms #o777)
(set! nonread? #f)
(set! complain-if-uncached? #f)
(for ([i 20]) (get i))
(set! complain-if-uncached? #t)
(for ([i 20]) (get i))
;; --------------------------------------------------
(delete-directory/files tmp-dir)

View File

@ -0,0 +1,265 @@
#lang racket/base
(require racket/file
racket/path
racket/contract/base)
(provide
(contract-out [cache-file
(->* (path-string?
(not/c #f)
path-string?
(-> any))
(#:log-error-string (string? . -> . any)
#:log-debug-string (string? . -> . any)
#:notify-cache-use (string? . -> . any)
#:max-cache-files real?
#:max-cache-size real?
#:evict-before? (hash? hash? . -> . boolean?)
#:exists-ok? any/c)
void?)]
[cache-remove
(->* (any/c
path-string?)
(#:log-error-string (string? . -> . any)
#:log-debug-string (string? . -> . any))
void?)]))
(define (cache-file dest-file ; the file that `fetch` is supposed to put in place
source-key ; use (or write) cache entry with this key
cache-dir ; direct to hold cache files and database
fetch ; the download operation for cache misses; writes `dest-file`
#:log-error-string [log-error-string log-error-string]
#:log-debug-string [log-debug-string log-debug-string]
#:notify-cache-use [notify-cache-use void]
#:max-cache-files [max-cache-files 1024]
#:max-cache-size [max-cache-size (* 64 1024 1024)]
#:evict-before? [evict-before? (lambda (a b)
(< (hash-ref a 'modify-seconds)
(hash-ref b 'modify-seconds)))]
#:exists-ok? [exists-ok? #f])
;; First phase, returns a thunk to continue (i.e., download or not):
(define (try-read-cache)
(call-with-cache-db/catch-exn-until-success-finishes
cache-dir
log-error-string
'shared
;; On success:
(lambda (db cache-db-file)
;; Lock is still held here to make sure a cache file isn't
;; deleted and replaced with a different file while we're
;; trying to copy it.
(cond
[(hash-ref db source-key #f)
=> (lambda (fn)
(notify-cache-use fn)
(copy-file (build-path cache-dir fn) dest-file exists-ok?)
;; no work in continuation:
void)]
[else fetch-and-continue]))
;; On failure (exception is logged already):
(lambda (exn) fetch-and-continue)))
;; Second phase (used when cache read fails):
(define (fetch-and-continue)
(fetch)
(when (file-exists? dest-file)
(try-write-cache)))
;; Third phase, when fetching seems to have worked:
(define (try-write-cache)
(call-with-cache-db/catch-exn-until-success-finishes
cache-dir
log-error-string
'exclusive
;; On success getting the current db:
(lambda (db cache-db-file)
;; Lock is still held here to make sure no one else
;; tries to clean up while we copy a file into
;; the cache.
;; We assume that the cost of this filesystem traversal
;; is small compared to download costs:
(define revised-db (limit-sizes cache-dir
(clean-database db cache-dir
log-debug-string)
(sub1 max-cache-files)
(- max-cache-size (file-size dest-file))
evict-before?
log-debug-string))
;; Copy file into cache and update cache db:
(log-debug-string (format "caching for ~s" source-key))
(define cache-file (make-temporary-file "cached~a" #f cache-dir))
(copy-file dest-file cache-file #t)
(write-db cache-db-file
(hash-set revised-db
source-key
(path->string
(file-name-from-path cache-file)))))
;; On failure, we can just give up writing to the cache;
;; the exception is already logged:
void))
;; Start with first phase, and continue with its result:
((try-read-cache)))
(define (cache-remove source-key ; #f => clear all
cache-dir
#:log-error-string [log-error-string log-error-string]
#:log-debug-string [log-debug-string log-debug-string])
(call-with-cache-db/catch-exn-until-success-finishes
cache-dir
log-error-string
'exclusive
;; success:
(lambda (db cache-db-file)
(write-db cache-db-file
(if source-key
(hash-remove db source-key)
(hash))))
;; failure:
(lambda (exn) (raise exn))))
;; Extract cache-db, holding file lock (in `lock-mode`) while calling
;; `success-handler` on the db. If the cache-db read fails, use an
;; empty db. If any failure happens other than reading the db,
;; including duing `success-handler`, then call `failure-handler` with
;; the exception after logging it. Note that breaks are disabled when
;; calling the failure handler, so it should return quickly.
(define call-with-cache-db/catch-exn-until-success-finishes
(lambda (cache-dir log-error-string lock-mode
success-handler
failure-handler)
(define cache-db-file (build-path cache-dir "cache.rktd"))
(define (succeed db)
(success-handler db cache-db-file))
(with-handlers ([exn:fail?
(lambda (exn)
(log-error-string (format "cache attempt failed: ~a"
(exn-message exn)))
(failure-handler exn))])
(make-directory* cache-dir)
(call-with-file-lock/timeout
cache-db-file
lock-mode
(lambda ()
(succeed (read-db cache-db-file log-error-string)))
(lambda ()
;; raise exception to be caught above:
(error (format "could not acquire ~s lock" lock-mode)))))))
;; Called with read lock, and handles failure by returning
;; an empty db:
(define (read-db cache-db-file log-error-string)
(if (not (file-exists? cache-db-file)) ; avoid logging error if the file does not exist
(hash)
(with-handlers ([exn:fail?
(lambda (exn)
(log-error-string (format "cache file read failure: ~a"
(exn-message exn)))
(hash))])
(define db (call-with-input-file*
cache-db-file
(lambda (i)
(call-with-default-reading-parameterization
(lambda ()
(read i))))))
(cond
[(and (hash? db)
;; be picky about hash content:
(for/and ([v (in-hash-values db)])
(and (string? v)
(regexp-match? #rx"[a-zA-Z0-9]*" v)
((string-length v) . <= . 64))))
db]
[else
(log-error-string "cache file read failure: ill-formed content")
(hash)]))))
;; Called with write lock:
(define (write-db cache-db-file db)
(call-with-output-file*
cache-db-file
#:exists 'truncate/replace
(lambda (o)
(write db o)
(newline o))))
;; Called with write lock; delete files not in
;; db, and removes non-existent files from db:
(define (clean-database db cache-dir log-debug-string)
(define rev (for/hash ([(k v) (in-hash db)])
(values v k)))
;; Delete files not in db:
(define exist
(for/fold ([exist (hash)]) ([f (directory-list cache-dir)])
(define s (path->string f))
(cond
[(hash-ref rev s #f)
(hash-set exist s #t)]
[(not (regexp-match? #rx"^cached" s))
;; not a cache file, so leave it alone:
exist]
[else
;; delete unrecorded cache file:
(define p (build-path cache-dir f))
(log-debug-string (format "removing file from cache: ~a" p))
(delete-file p)
exist])))
;; Filter db by existing files:
(for/hash ([(k v) (in-hash db)]
#:when (or (hash-ref exist v #f)
(begin
(log-debug-string (format "lost cache file for entry: ~s" k))
#f)))
(values k v)))
;; Called with the write lock, deletes oldest files as needed
;; to meet count and size constraints, returns updated db:
(define (limit-sizes cache-dir db
max-count max-size evict-before?
log-debug-string)
(define files
(for/list ([(k v) (in-hash db)])
(define path (build-path cache-dir v))
(hash 'name v
'key k
'size (file-size path)
'modify-seconds (file-or-directory-modify-seconds path))))
(define count (hash-count db))
(define size (for/sum ([f (in-list files)])
(hash-ref f 'size)))
(cond
[(and (count . < . max-count)
(size . < . max-size))
;; no need to remove anything
db]
[else
;; Sort, so we can remove the oldest (or whatever `evict-before?`
;; says should be evicted first):
(define sorted (sort files evict-before?))
(let loop ([sorted sorted]
[db db]
[count count]
[size size])
(cond
[(or (null? sorted)
(and (count . < . max-count)
(size . < . max-size)))
db]
[else
;; Delete first cached file in the eviction list:
(define key (hash-ref (car sorted) 'key))
(log-debug-string (format "deleting cached file for ~s" key))
(delete-file (build-path cache-dir (hash-ref (car sorted) 'name)))
(loop (cdr sorted)
(hash-remove db key)
(sub1 count)
(- size (hash-ref (car sorted) 'size)))]))]))
;; Default logging functions
(define (log-error-string s)
(log-error s))
(define (log-debug-string s)
(log-debug s))