db: various fixes and additions

- fix connection-pool for nested tx, fix race condition
 - ensure connected? always nonblocking
 - added and reorganized some doc sections
 - added grouping, contracts to in-query
 - added rows->dict
This commit is contained in:
Ryan Culpepper 2012-05-02 02:27:35 -06:00
parent aa0d8aaa33
commit 00fd18bc62
18 changed files with 1042 additions and 522 deletions

View File

@ -28,8 +28,9 @@
(but this could also be done by two locks: outer "ownership" lock
and inner "invariant-protecting" lock)
- audit code for break-safety, disable breaks as needed
- make implementation notes section of docs
- explain cursor impl (& rationale)
- explain nested tx impl
- invalidate statement cache on query error
- 2 call-with-transactions from separate threads can conflict

View File

@ -1,5 +1,10 @@
#lang racket/base
(require racket/contract/base)
(require (for-syntax racket/base
syntax/parse
syntax/parse/experimental/template)
racket/dict
syntax/location
racket/contract/base)
;; ============================================================
@ -74,6 +79,56 @@
(require "private/generic/functions.rkt")
(define fetch-size/c
(or/c exact-positive-integer? +inf.0))
(define grouping-field/c (or/c string? exact-nonnegative-integer?))
(define group/c (or/c grouping-field/c (vectorof grouping-field/c)))
(define grouping/c (or/c group/c (listof group/c)))
(define group-mode/c
(listof (or/c 'list 'preserve-null)))
(define in-query/c
(->* (connection? statement?)
(#:fetch fetch-size/c
#:group grouping/c
#:group-mode group-mode/c)
#:rest list?
sequence?))
(define here-mod-path (quote-module-path))
(define-syntax contracted-in-query
(make-provide/contract-transformer
(quote-syntax in-query/c)
(quote-syntax in-query)
(quote-syntax in-query)
(quote-syntax here-mod-path)))
(define-sequence-syntax in-query*
(lambda () #'contracted-in-query)
(lambda (stx)
(syntax-parse stx
[[(var ...) (~and form
(in-query (~or (~optional (~seq #:fetch fetch-size))
(~optional (~seq #:group grouping-fields))
(~optional (~seq #:group-mode group-mode))
(~between arg:expr 2 +inf.0))
...))]
#:declare fetch-size (expr/c #'fetch-size/c #:context #'form) #:role "fetch size argument"
#:declare grouping-fields (expr/c #'grouping/c #:context #'form) #:role "grouping fields argument"
#:declare group-mode (expr/c #'group-mode/c #:context #'form) #:role "group mode argument"
#:with (c stmt q-arg ...) #'(arg ...)
#:declare c (expr/c #'connection? #:context #'form) #:role "connection argument"
#:declare stmt (expr/c #'statement? #:context #'form) #:role "statement argument"
(template
[(var ...) (in-query-helper (length '(var ...)) c.c stmt.c q-arg ...
(?? (?@ #:fetch fetch-size.c))
(?? (?@ #:group grouping-fields.c))
(?? (?@ #:group-mode group-mode.c)))])]
[_ #f])))
(provide (rename-out [in-query* in-query]))
(provide/contract
@ -105,7 +160,8 @@
(->* (connection? statement?) () #:rest list? any)]
[query-rows
(->* (connection? statement?)
(#:group (or/c (vectorof string?) (listof (vectorof string?))))
(#:group grouping/c
#:group-mode group-mode/c)
#:rest list? (listof vector?))]
[query-list
(->* (connection? statement?) () #:rest list? list?)]
@ -165,9 +221,15 @@
[group-rows
(->* (rows-result?
#:group (or/c (vectorof string?) (listof (vectorof string?))))
(#:group-mode (listof (or/c 'preserve-null-rows 'list)))
rows-result?)])
#:group grouping/c)
(#:group-mode (listof (or/c 'list 'preserve-null #|deprecated:|# 'preserve-null-rows)))
rows-result?)]
[rows->dict
(->* (rows-result? #:key grouping/c #:value grouping/c)
(#:value-mode group-mode/c)
dict?)]
)
;; ============================================================

View File

@ -421,12 +421,33 @@
dprintf)
(super-new)
(field [max-cache-size 50])
;; Statement Cache
;; updated by prepare; potentially invalidated by query (via check/invalidate-cache)
(define pst-cache '#hash())
(field [pst-cache '#hash()]
[cache-mode 'in-transaction]
[cache-flush-next? #f] ;; flush cache on next query
[max-cache-size 20])
(define/private (use-cache?)
(and (not cache-flush-next?)
(case cache-mode
((always) #t)
((never) #f)
((in-transaction) (eq? (get-tx-status) #t)))))
(define/public (stmt-cache-ctl who mode)
(case mode
((get) cache-mode)
((flush) (begin (set! cache-flush-next? #t) cache-mode))
(else (unless (eq? mode cache-mode)
(call-with-lock who
(lambda ()
(set! cache-mode mode)
(set! cache-flush-next? #t)
cache-mode))))))
;; --
(define/public (get-cached-statement stmt)
(let ([cached-pst (hash-ref pst-cache stmt #f)])
@ -447,12 +468,6 @@
(dprintf " ** caching statement\n")
(set! pst-cache (hash-set pst-cache sql pst))))))
(define/private (use-cache?)
(case cache-statements
((always) #t)
((never) #f)
((in-transaction) (eq? (get-tx-status) #t))))
;; check/invalidate-cache : statement/pst -> hash/#f
;; Returns old cache on invalidation, or #f if stmt is safe.
;; May also return part of old cache (excluding pst) when cache gets too big.
@ -463,7 +478,11 @@
unsafe, because they're usually transactional SQL.
|#
(define (invalidate! except)
(dprintf " ** invalidating statement cache~a\n" (if except " (too big)" ""))
;; FIXME: smarter cache ejection (LRU?)
(dprintf " ** invalidating statement cache~a\n"
(cond [except " (too big)"]
[cache-flush-next? " (mode changed)"]
[else ""]))
(let ([cache pst-cache])
(set! pst-cache '#hash())
(cond [except
@ -471,7 +490,9 @@
(hash-remove cache (send except get-stmt))]
[else
cache])))
(cond [(statement-binding? x)
(cond [cache-flush-next?
(invalidate! #f)]
[(statement-binding? x)
(check/invalidate-cache (statement-binding-pst x))]
[(prepared-statement? x)
(let ([stmt-type (send x get-stmt-type)])

View File

@ -13,33 +13,49 @@
(class object%
;; other-evt : (-> evt)
;; generates other evt to sync on besides req-channel, eg timeouts
(init-field (other-evt (lambda () never-evt)))
(init-field (other-evt (lambda () never-evt))
(alt-enabled? (lambda () #t)))
(super-new)
(define req-channel (make-channel))
(define alt-req-channel (make-channel))
(define mthread
(thread/suspend-to-kill
(lambda ()
(let loop ()
(sync (wrap-evt req-channel (lambda (p) (p)))
(if (alt-enabled?)
(wrap-evt alt-req-channel (lambda (p) (p)))
never-evt)
(other-evt))
(loop)))))
(define/public (call proc)
(call* proc req-channel #f))
(define/public (alt-call-evt proc)
(call* proc alt-req-channel #t))
(define/private (call* proc chan as-evt?)
(thread-resume mthread (current-thread))
(let ([result #f]
[sema (make-semaphore 0)])
(channel-put req-channel
(lambda ()
(let* ([result #f]
[sema (make-semaphore 0)]
[proc (lambda ()
(set! result
(with-handlers ([(lambda (e) #t)
(lambda (e) (cons 'exn e))])
(cons 'values (call-with-values proc list))))
(semaphore-post sema)))
(semaphore-post sema))]
[handler
(lambda (_evt)
(semaphore-wait sema)
(case (car result)
((values) (apply values (cdr result)))
((exn) (raise (cdr result))))))))
((exn) (raise (cdr result)))))])
(if as-evt?
(wrap-evt (channel-put-evt chan proc) handler)
(begin (channel-put chan proc)
(handler #f)))))))
;; ----
@ -53,14 +69,20 @@
(init-private connection)
(define mgr (new manager%))
(define last-connected? #t)
(define-syntax-rule (define-forward (method arg ...) ...)
(begin
(define/public (method arg ...)
(send mgr call (lambda () (send connection method arg ...)))) ...))
(send mgr call (lambda ()
(begin0
(send connection method arg ...)
(set! last-connected? (send connection connected?))))))
...))
(define/public (connected?) last-connected?)
(define-forward
(connected?)
(disconnect)
(get-dbsystem)
(query fsym stmt cursor?)
@ -88,8 +110,7 @@
(define virtual-connection%
(class* object% (connection<%>)
(init-private connector ;; called from client thread
get-key ;; called from client thread
timeout)
get-key) ;; called from client thread
(super-new)
(define custodian (current-custodian))
@ -99,36 +120,17 @@
;; key=>conn : hasheq[key => connection]
(define key=>conn (make-hasheq))
;; alarms : hasheq[connection => evt] (alarm wrapped to return key)
(define alarms (make-hasheq))
(define/private (get key) ;; also refreshes alarm
(let ([c (hash-ref key=>conn key #f)])
(when c (hash-set! alarms c (fresh-alarm-for key)))
c))
(define/private (get key) ;; also called by client thread for connected?
(hash-ref key=>conn key #f))
(define/private (put! key c)
(hash-set! key=>conn key c)
(hash-set! alarms c (fresh-alarm-for key)))
(hash-set! key=>conn key c))
(define/private (fresh-alarm-for key)
(wrap-evt (alarm-evt (+ (current-inexact-milliseconds) timeout))
(lambda (a) key)))
(define/private (remove! key timeout?)
;; timeout? = if connection open, then wait longer
(let* ([c (hash-ref key=>conn key #f)]
[in-trans? (with-handlers ([exn:fail? (lambda (e) #f)])
(and c
(send c connected?)
(send c transaction-status 'virtual-connection)))])
(cond [(not c) (void)]
[(and timeout? in-trans?)
(hash-set! alarms c (fresh-alarm-for key))]
[else
(define/private (remove! key)
(let ([c (get key)])
(when c
(hash-remove! key=>conn key)
(hash-remove! alarms c)
(send c disconnect)])))
(send c disconnect))))
(define mgr
(new manager%
@ -137,16 +139,10 @@
(choice-evt
(let ([keys (hash-map key=>conn (lambda (k v) k))])
(handle-evt (apply choice-evt keys)
;; Assignment to key has expired: move to idle or disconnect.
;; Assignment to key has expired
(lambda (key)
(dbdebug "virtual-connection: key expiration: ~e" key)
(remove! key #f))))
(let ([alarm-evts (hash-map alarms (lambda (k v) v))])
(handle-evt (apply choice-evt alarm-evts)
;; Disconnect idle connection.
(lambda (key)
(dbdebug "virtual-connection: timeout")
(remove! key #t)))))))))
(remove! key)))))))))
;; == methods called in client thread ==
@ -159,12 +155,12 @@
(connector))])
(send mgr call
(lambda ()
(when c (remove! key #f))
(when c (remove! key))
(put! key c*)))
c*)]
[else
(when c ;; got a disconnected connection
(send mgr call (lambda () (remove! key #f))))
(send mgr call (lambda () (remove! key))))
#f])))
;; ----
@ -178,24 +174,27 @@
...))
(define-forward
(#f #f (connected?))
(#t '_ (get-dbsystem))
(#t '_ (query fsym stmt cursor?))
(#t '_ (fetch/cursor fsym stmt fetch-size))
(#t '_ (start-transaction fsym isolation cwt?))
(#f (void) (end-transaction fsym mode cwt?))
(#t '_ (transaction-status fsym))
(#f #f (transaction-status fsym))
(#t '_ (list-tables fsym schema)))
(define/public (get-base)
(get-connection #t))
(define/public (connected?)
(let ([c (get (get-key))])
(and c (send c connected?))))
(define/public (disconnect)
(let ([c (get-connection #f)]
[key (get-key)])
(when c
(send c disconnect)
(send mgr call (lambda () (remove! key #f)))))
(send mgr call (lambda () (remove! key)))))
(void))
(define/public (prepare fsym stmt close-on-exec?)
@ -210,8 +209,7 @@
;; ----
(define (virtual-connection connector
#:timeout [timeout +inf.0])
(define (virtual-connection connector)
(let ([connector
(cond [(connection-pool? connector)
(lambda () (connection-pool-lease connector))]
@ -219,8 +217,7 @@
[get-key (lambda () (thread-dead-evt (current-thread)))])
(new virtual-connection%
(connector connector)
(get-key (lambda () (thread-dead-evt (current-thread))))
(timeout (* 1000 timeout)))))
(get-key get-key))))
;; ========================================
@ -233,20 +230,15 @@
max-idle-connections)
(super-new)
;; max-connections is either in [1, 10000] or +inf.0,
;; if leave-evt is sema, then counter = (max-connections - assigned connections)
;; ie, includes idle connections
(define lease-evt
(if (= max-connections +inf.0)
always-evt
(make-semaphore max-connections)))
(define proxy-counter 1) ;; for debugging
(define actual-counter 1) ;; for debugging
(define actual=>number (make-weak-hasheq))
;; == methods called in manager thread ==
;; assigned-connections : nat
(define assigned-connections 0)
;; proxy=>evt : hasheq[proxy-connection => evt]
(define proxy=>evt (make-hasheq))
@ -266,7 +258,8 @@
proxy-number
(if take-idle? "idle" "new")
(hash-ref actual=>number raw-c "???"))
(hash-set! proxy=>evt c key)
(hash-set! proxy=>evt c (wrap-evt key (lambda (_e) c)))
(set! assigned-connections (add1 assigned-connections))
c))
(define/private (release* proxy raw-c why)
@ -279,11 +272,14 @@
(hash-remove! proxy=>evt proxy)
(when raw-c
(with-handlers ([exn:fail? void])
(send raw-c end-transaction 'connection-pool 'rollback))
(cond [(< (length idle-list) max-idle-connections)
;; If in tx, just disconnect (for simplicity; else must loop for nested txs)
(when (send raw-c transaction-status 'connection-pool)
(send raw-c disconnect)))
(cond [(and (< (length idle-list) max-idle-connections)
(send raw-c connected?))
(set! idle-list (cons raw-c idle-list))]
[else (send raw-c disconnect)])
(when (semaphore? lease-evt) (semaphore-post lease-evt))))
(set! assigned-connections (sub1 assigned-connections))))
(define/private (new-connection)
(let ([c (connector)]
@ -299,19 +295,18 @@
(new manager%
(other-evt
(lambda ()
(let ([evts (hash-map proxy=>evt (lambda (k v) (wrap-evt v (lambda (e) k))))])
(let ([evts (hash-values proxy=>evt)])
(handle-evt (apply choice-evt evts)
(lambda (proxy)
(release* proxy
(send proxy release-connection)
"release-evt"))))))))
"release-evt"))))))
(alt-enabled? (lambda () (< assigned-connections max-connections)))))
;; == methods called in client thread ==
(define/public (lease key)
(wrap-evt lease-evt
(lambda (_e)
(send mgr call (lambda () (lease* key))))))
(define/public (lease-evt key)
(send mgr alt-call-evt (lambda () (lease* key))))
(define/public (release proxy)
(let ([raw-c (send proxy release-connection)])
@ -350,8 +345,7 @@
(end-transaction fsym mode cwt?)
(list-tables fsym schema))
;; (define-forward define/override (connected?))
(define/override (connected?) (and connection #t))
(define/override (connected?) (and connection (send connection connected?)))
(define/public (disconnect)
(send pool release this))
@ -380,7 +374,7 @@
(cond [(thread? key) (thread-dead-evt key)]
[(custodian? key) (make-custodian-box key #t)]
[else key])]
[result (sync/timeout 0.1 (send pool lease key))])
[result (sync/timeout 0.1 (send pool lease-evt key))])
(unless result
(uerror 'connection-pool-lease
"cannot obtain connection; connection pool limit reached"))

View File

@ -146,9 +146,7 @@
[result (query/rows c 'query-rows sql #f)]
[result
(cond [(not (null? group-fields-list))
(group-rows-result* 'query-rows result group-fields-list
(not (memq 'preserve-null-rows group-mode))
(memq 'list group-mode))]
(group-rows-result* 'query-rows result group-fields-list group-mode)]
[else result])])
(rows-result-rows result)))
@ -204,34 +202,38 @@
;; ========================================
(define (in-query c stmt #:fetch [fetch-size +inf.0] . args)
(apply in-query-helper #f c stmt #:fetch fetch-size args))
(define-sequence-syntax in-query*
(lambda () #'in-query)
(lambda (stx)
(syntax-case stx ()
[[(var ...) (in-query c stmt arg ...)]
#'[(var ...)
(in-query-helper (length '(var ...)) c stmt arg ...)]]
[_ #f])))
(define (in-query c stmt
#:fetch [fetch-size +inf.0]
#:group [grouping-fields null]
#:group-mode [group-mode null]
. args)
(apply in-query-helper #f c stmt
#:fetch fetch-size
#:group grouping-fields
#:group-mode group-mode
args))
(define (in-query-helper vars c stmt
#:fetch [fetch-size +inf.0]
#:group [grouping-fields null]
#:group-mode [group-mode null]
. args)
;; Not protected by contract
(unless (connection? c)
(apply raise-type-error 'in-query "connection" 0 c stmt args))
(unless (statement? stmt)
(apply raise-type-error 'in-query "statement" 1 c stmt args))
(unless (or (exact-positive-integer? fetch-size) (eqv? fetch-size +inf.0))
(raise-type-error 'in-query "positive integer or +inf.0" fetch-size))
(let* ([check (or vars 'rows)]
(when (and (not (null? grouping-fields))
(< fetch-size +inf.0))
(error 'in-query "cannot apply grouping to cursor (finite fetch-size)"))
(let* ([check
;; If grouping, can't check expected arity.
;; FIXME: should check header includes named fields
(cond [(null? grouping-fields) (or vars 'rows)]
[else 'rows])]
[stmt (compose-statement 'in-query c stmt args check)])
(cond [(eqv? fetch-size +inf.0)
(in-list/vector->values
(rows-result-rows
(query/rows c 'in-query stmt vars)))]
(let ([result (query/rows c 'in-query stmt vars)])
(if (null? grouping-fields)
result
(group-rows-result* 'in-query result grouping-fields group-mode)))))]
[else
(let ([cursor (query/cursor c 'in-query stmt vars)])
(in-list-generator/vector->values
@ -333,62 +335,38 @@
;; ========================================
;; FIXME: add 'assume-sorted optimization option?
(define (group-rows result
#:group key-fields-list
#:group-mode [group-mode null])
(when (null? key-fields-list)
(error 'group-rows "expected at least one grouping field set"))
(group-rows-result* 'group-rows
result
key-fields-list
(not (memq 'preserve-null-rows group-mode))
(memq 'list group-mode)))
(group-rows-result* 'group-rows result key-fields-list group-mode))
(define (group-rows-result* fsym result key-fields-list invert-outer? as-list?)
(let* ([key-fields-list
(if (list? key-fields-list) key-fields-list (list key-fields-list))]
[total-fields (length (rows-result-headers result))]
[name-map
(for/hash ([header (in-list (rows-result-headers result))]
[i (in-naturals)]
#:when (assq 'name header))
(values (cdr (assq 'name header)) i))]
(define (group-rows-result* fsym result key-fields-list group-mode)
(let* ([invert-outer? (not (or (memq 'preserve-null group-mode)
;; old flag, deprecated:
(memq 'preserve-null-rows group-mode)))]
[as-list? (memq 'list group-mode)]
[headers (rows-result-headers result)]
[total-fields (length headers)]
[name-map (headers->name-map headers)]
[fields-used (make-vector total-fields #f)]
[key-indexes-list
(for/list ([key-fields (in-list key-fields-list)])
(for/vector ([key-field (in-vector key-fields)])
(let ([key-index
(cond [(string? key-field)
(hash-ref name-map key-field #f)]
[else key-field])])
(when (string? key-field)
(unless key-index
(error fsym "grouping field ~s not found" key-field)))
(when (exact-integer? key-field)
(unless (< key-index total-fields)
(error fsym "grouping index ~s out of range [0, ~a]"
key-index (sub1 total-fields))))
(when (vector-ref fields-used key-index)
(error fsym "grouping field ~s~a used multiple times"
key-field
(if (string? key-field)
(format " (index ~a)" key-index)
"")))
(vector-set! fields-used key-index #t)
key-index)))]
(group-list->indexes fsym name-map total-fields fields-used key-fields-list)]
[residual-length
(for/sum ([x (in-vector fields-used)])
(if x 0 1))])
(for/sum ([x (in-vector fields-used)]) (if x 0 1))])
(when (= residual-length 0)
(error fsym "cannot group by all fields"))
(when (and (> residual-length 1) as-list?)
(error fsym
"exactly one residual field expected for #:group-mode 'list, got ~a"
"expected exactly one residual field for #:group-mode 'list, got ~a"
residual-length))
(let* ([initial-projection
(for/vector #:length total-fields ([i (in-range total-fields)]) i)]
[headers
(group-headers (list->vector (rows-result-headers result))
(group-headers (list->vector headers)
initial-projection
key-indexes-list)]
[rows
@ -400,6 +378,46 @@
as-list?)])
(rows-result headers rows))))
(define (headers->name-map headers)
(for/hash ([header (in-list headers)]
[i (in-naturals)]
#:when (assq 'name header))
(values (cdr (assq 'name header)) i)))
(define (group-list->indexes fsym name-map total-fields fields-used key-fields-list)
(let ([key-fields-list (if (list? key-fields-list) key-fields-list (list key-fields-list))])
(for/list ([key-fields (in-list key-fields-list)])
(group->indexes fsym name-map total-fields fields-used key-fields))))
(define (group->indexes fsym name-map total-fields fields-used key-fields)
(let ([key-fields (if (vector? key-fields) key-fields (vector key-fields))])
(for/vector ([key-field (in-vector key-fields)])
(grouping-field->index fsym name-map total-fields fields-used key-field))))
(define (grouping-field->index fsym name-map total-fields fields-used key-field)
(let ([key-index
(cond [(string? key-field)
(hash-ref name-map key-field #f)]
[else key-field])])
(when (string? key-field)
(unless key-index
(error fsym "expected grouping field in ~s, got: ~e"
(sort (hash-keys name-map) string<?)
key-field)))
(when (exact-integer? key-field)
(unless (< key-index total-fields)
(error fsym "grouping index ~s out of range [0, ~a]"
key-index (sub1 total-fields))))
(when fields-used
(when (vector-ref fields-used key-index)
(error fsym "grouping field ~s~a used multiple times"
key-field
(if (string? key-field)
(format " (index ~a)" key-index)
"")))
(vector-set! fields-used key-index #t))
key-index))
(define (group-headers headers projection key-indexes-list)
(define (get-headers vec)
(for/list ([index (in-vector vec)])
@ -414,7 +432,7 @@
[residual-headers
(group-headers headers residual-projection (cdr key-indexes-list))])
(append (get-headers key-indexes)
(list `((grouped . ,residual-headers)))))]))
(list `((name . "grouped") (grouped . ,residual-headers)))))]))
(define (group-rows* fsym rows projection key-indexes-list invert-outer? as-list?)
;; projection is vector of indexes (actually projection and permutation)
@ -443,17 +461,14 @@
(define residual-projection
(vector-filter-not (lambda (index) (vector-member index key-indexes))
projection))
(define key-row-length (vector-length key-indexes))
(define (row->key-row row)
(for/vector #:length key-row-length
([i (in-vector key-indexes)])
(vector-ref row i)))
(define (residual-all-null? row)
(for/and ([i (in-vector residual-projection)])
(sql-null? (vector-ref row i))))
(let* ([key-table (make-hash)]
[r-keys
(for/fold ([r-keys null])
@ -476,3 +491,55 @@
invert-outer?
as-list?)])
(vector-append key (vector residuals))))))]))
;; ========================================
(define not-given (gensym 'not-given))
(define (rows->dict result
#:key key-field/s
#:value value-field/s
#:value-mode [value-mode null])
(let* ([who 'rows->dict]
[headers (rows-result-headers result)]
[total-fields (length headers)]
[name-map (headers->name-map headers)]
[preserve-null? (memq 'preserve-null value-mode)]
[value-list? (memq 'list value-mode)])
(define (make-project field/s)
(if (vector? field/s)
(let* ([indexes (group->indexes who name-map total-fields #f field/s)]
[indexes-length (vector-length indexes)])
(lambda (v)
(for/vector #:length indexes-length ([i (in-vector indexes)])
(vector-ref v i))))
(let ([index (grouping-field->index who name-map total-fields #f field/s)])
(lambda (v) (vector-ref v index)))))
(define get-key (make-project key-field/s))
(define get-value (make-project value-field/s))
(define ok-value?
(cond [preserve-null? (lambda (v) #t)]
[(vector? value-field/s)
(lambda (v) (not (for/or ([e (in-vector v)]) (sql-null? e))))]
[else (lambda (v) (not (sql-null? v)))]))
(for/fold ([table '#hash()]) ([row (in-list (if value-list?
(reverse (rows-result-rows result))
(rows-result-rows result)))])
(let* ([key (get-key row)]
[value (get-value row)]
[old-value (hash-ref table key (if value-list? '() not-given))])
(unless (or value-list?
(eq? (hash-ref table key not-given) not-given)
;; FIXME: okay to coalesce values if equal?
(equal? value old-value))
(error who "duplicate value for key: ~e; values are ~e and ~e"
key old-value value))
(if value-list?
(hash-set table key
(if (ok-value? value)
(cons value old-value)
;; If all-NULL value, still enter key => '() into dict
old-value))
(if (ok-value? value)
(hash-set table key value)
table))))))

View File

@ -29,7 +29,9 @@
;; connection<%>
(define connection<%>
(interface ()
;; connected? method must return promptly (eg, without acquiring lock)
connected? ;; -> boolean
disconnect ;; -> void
get-dbsystem ;; -> dbsystem<%>
query ;; symbol statement -> QueryResult

View File

@ -49,18 +49,20 @@
(define/private (call* method-name args need-connected?)
(cond [channel
(pchan-put channel (cons method-name args))
(match (pchan-get channel)
(let* ([response (pchan-get channel)]
[still-connected? (car response)])
(when (not still-connected?) (set! channel #f))
(match (cdr response)
[(cons 'values vals)
(apply values (for/list ([val (in-list vals)]) (sexpr->result val)))]
[(list 'error message)
(raise (make-exn:fail message (current-continuation-marks)))])]
(raise (make-exn:fail message (current-continuation-marks)))]))]
[need-connected?
(unless channel
(error/not-connected method-name))]
[else (void)]))
(define/override (connected?)
;; FIXME: can underlying connection disconnect w/o us knowing?
(and channel #t))
(define/public (disconnect)

View File

@ -68,8 +68,8 @@ where <connect-spec> ::= (list 'sqlite3 path/sym mode-sym delay-num limit-num)
Connection methods protocol
client -> server: (list '<method-name> arg ...)
server -> client: (or (list 'values result ...)
(list 'error string))
server -> client: (or (list boolean 'values result ...)
(list boolean 'error string))
|#
(define proxy-server%
@ -86,10 +86,12 @@ server -> client: (or (list 'values result ...)
(serve1)
(when connection (serve)))
(define/private (still-connected?) (and connection (send connection connected?)))
(define/private (serve1)
(with-handlers ([exn?
(lambda (e)
(pchan-put channel (list 'error (exn-message e))))])
(pchan-put channel (list (still-connected?) 'error (exn-message e))))])
(call-with-values
(lambda ()
(match (pchan-get channel)
@ -117,7 +119,7 @@ server -> client: (or (list 'values result ...)
(transaction-status w))]))
(lambda results
(let ([results (for/list ([result (in-list results)]) (result->sexpr result))])
(pchan-put channel (cons 'values results)))))))
(pchan-put channel (cons (still-connected?) (cons 'values results))))))))
(define/private (sexpr->statement x)
(match x

View File

@ -10,6 +10,12 @@
(define (tech/reference . pre-flows)
(apply tech #:doc '(lib "scribblings/reference/reference.scrbl") pre-flows))
(define (parheading . pre-flows)
(elem (apply bold pre-flows) (hspace 1)))
(define (wplink path . pre-flows)
(apply hyperlink (string-append "http://en.wikipedia.org/wiki/" path) pre-flows))
;; ----
(define the-eval (make-base-eval))

View File

@ -19,18 +19,8 @@ administrative functions for managing connections.
There are four kinds of base connection, and they are divided into two
groups: @deftech{wire-based connections} and @deftech{FFI-based
connections}. PostgreSQL and MySQL connections are wire-based, and
SQLite and ODBC connections are FFI-based.
Wire-based connections communicate using @tech/reference{ports}, which
do not cause other Racket threads to block. In contrast, an FFI call
causes all Racket threads to block until it completes, so FFI-based
connections can degrade the interactivity of a Racket program,
particularly if long-running queries are performed using the
connection. This problem can be avoided by creating the FFI-based
connection in a separate @tech/reference{place} using the
@racket[#:use-place] keyword argument. Such a connection will not
block all Racket threads during queries; the disadvantage is the cost
of creating and communicating with a separate @tech/reference{place}.
SQLite and ODBC connections are FFI-based. See also
@secref["ffi-concurrency"].
Base connections are made using the following functions.
@ -240,7 +230,7 @@ Base connections are made using the following functions.
If @racket[use-place] is true, the actual connection is created in
a distinct @tech/reference{place} for database connections and a
proxy is returned.
proxy is returned; see @secref["ffi-concurrency"].
If the connection cannot be made, an exception is raised.
@ -289,7 +279,7 @@ Base connections are made using the following functions.
If @racket[use-place] is true, the actual connection is created in
a distinct @tech/reference{place} for database connections and a
proxy is returned.
proxy is returned; see @secref["ffi-concurrency"].
If the connection cannot be made, an exception is raised.
}
@ -436,14 +426,13 @@ connection associated with the current thread, one is obtained by
calling @racket[connect]. An actual connection is disconnected when
its associated thread dies.
@;{or if @racket[timeout] seconds elapse since the actual connection was last used.}
Virtual connections are especially useful in contexts such as web
servlets, where each request is handled in a fresh thread. A single
global virtual connection can be defined, freeing each servlet request
from explicitly opening and closing its own connections. In
particular, a @tech{virtual connection} backed by a @tech{connection
pool} combines convenience with efficiency:
servlets (see @secref["intro-servlets"]), where each request is
handled in a fresh thread. A single global virtual connection can be
defined, freeing each servlet request from explicitly opening and
closing its own connections. In particular, a @tech{virtual
connection} backed by a @tech{connection pool} combines convenience
with efficiency:
@examples/results[
[(define the-connection

View File

@ -50,7 +50,7 @@ and Doug Orleans for contributions to @tt{spgsql}, the PostgreSQL-only
predecessor of this library. The SQLite support is based in part on
code from Jay McCarthy's @tt{sqlite} package.
@include-section["introduction.scrbl"]
@include-section["using-db.scrbl"]
@include-section["connect.scrbl"]
@include-section["query.scrbl"]
@include-section["sql-types.scrbl"]

View File

@ -1,290 +0,0 @@
#lang scribble/doc
@(require scribble/manual
scribble/eval
scribble/struct
racket/sandbox
"config.rkt"
(for-label db
web-server/lang/web))
@title[#:tag "introduction"]{Introduction}
This section introduces this library's basic features and discusses
how to build a database-backed web servlet.
@section[#:tag "intro-basic"]{Basic Features}
The following annotated program demonstrates how to connect to a
database and perform simple queries. Some of the SQL syntax used below
is PostgreSQL-specific, such as the syntax of query parameters
(@litchar{$1} rather than @litchar{?}).
@my-interaction[
[(require db)
(void)]
]
First we create a connection. Replace @racket[_user], @racket[_db],
and @racket[_password] below with the appropriate values for your
configuration (see @secref{creating-connections} for other connection examples):
@my-interaction[
[(define pgc
(postgresql-connect #:user _user
#:database _db
#:password _password))
(void)]
[pgc
(new connection%)]
]
Use @racket[query-exec] method to execute a SQL statement for effect.
@my-interaction[
[(query-exec pgc
"create temporary table the_numbers (n integer, d varchar(20))")
(void)]
[(query-exec pgc
"insert into the_numbers values (0, 'nothing')")
(void)]
[(query-exec pgc
"insert into the_numbers values (1, 'the loneliest number')")
(void)]
[(query-exec pgc
"insert into the_numbers values (2, 'company')")
(void)]
]
The @racket[query] function is a more general way to execute a
statement. It returns a structure encapsulating information about the
statement's execution. (But some of that information varies from
system to system and is subject to change.)
@my-interaction[
[(query pgc "insert into the_numbers values (3, 'a crowd')")
(simple-result '((command insert 0 1)))]
[(query pgc "select n, d from the_numbers where n % 2 = 0")
(rows-result
(list
'((name . "n") (typeid . 23))
'((name . "d") (typeid . 1043)))
'(#(0 "nothing") #(2 "company")))]
]
When the query is known to return rows and when the field
descriptions are not needed, it is more convenient to use the
@racket[query-rows] function.
@my-interaction[
[(query-rows pgc "select n, d from the_numbers where n % 2 = 0")
'(#(0 "nothing") #(2 "company"))]
]
Use @racket[query-row] for queries that are known to return exactly
one row.
@my-interaction[
[(query-row pgc "select * from the_numbers where n = 0")
(vector 0 "nothing")]
]
Similarly, use @racket[query-list] for queries that produce rows of
exactly one column.
@my-interaction[
[(query-list pgc "select d from the_numbers order by n")
(list "nothing" "the loneliest number" "company" "a crowd")]
]
When a query is known to return a single value (one row and one
column), use @racket[query-value].
@my-interaction[
[(query-value pgc "select count(*) from the_numbers")
4]
[(query-value pgc "select d from the_numbers where n = 5")
(error 'query-value
"query returned zero rows: ~s"
"select d from the_numbers where n = 5")]
]
When a query may return zero or one rows, as the last example, use
@racket[query-maybe-row] or @racket[query-maybe-value] instead.
@my-interaction[
[(query-maybe-value pgc "select d from the_numbers where n = 5")
(values #f)]
]
The @racket[in-query] function produces a sequence that can be used
with Racket's iteration forms:
@my-interaction[
[(for ([(n d) (in-query pgc "select * from the_numbers where n < 4")])
(printf "~a is ~a\n" n d))
(for-each (lambda (n d) (printf "~a: ~a\n" n d))
'(0 1 2 3)
'("nothing" "the loneliest number" "company" "a crowd"))]
[(for/fold ([sum 0]) ([n (in-query pgc "select n from the_numbers")])
(+ sum n))
(for/fold ([sum 0]) ([n (in-list '(0 1 2 3))])
(+ sum n))]
]
Errors in queries generally do not cause the connection to disconnect.
@my-interaction[
[(begin (with-handlers [(exn:fail?
(lambda (e) (printf "~a~n" (exn-message e))))]
(query-value pgc "select NoSuchField from NoSuchTable"))
(query-value pgc "select 'okay to proceed!'"))
(begin (display "query-value: relation \"nosuchtable\" does not exist (SQLSTATE 42P01)")
"okay to proceed!")]
]
Queries may contain parameters. The easiest way to execute a
parameterized query is to provide the parameters ``inline'' after the
SQL statement in the query function call.
@my-interaction[
[(query-value pgc
"select d from the_numbers where n = $1" 2)
"company"]
[(query-list pgc
"select n from the_numbers where n > $1 and n < $2" 0 3)
(list 1 2)]
]
Alternatively, a parameterized query may be prepared in advance and
executed later. @tech{Prepared statements} can be executed multiple
times with different parameter values.
@my-interaction[
[(define get-less-than-pst
(prepare pgc "select n from the_numbers where n < $1"))
(void)]
[(query-list pgc get-less-than-pst 1)
(list 0)]
[(query-list pgc (bind-prepared-statement get-less-than-pst '(2)))
(list 0 1)]
]
When a connection's work is done, it should be disconnected.
@my-interaction[
[(disconnect pgc)
(void)]
]
@;{============================================================}
@section[#:tag "intro-servlets"]{Databases and Web Servlets}
Using database connections in a web servlet is more complicated than
in a standalone program. A single servlet is potentially used to serve
many requests at once, each in a separate request-handling
thread. Furthermore, the use of @racket[send/suspend],
@racket[send/suspend/dispatch], etc means that there are many places
where a servlet may start and stop executing to service a request.
Why not use a single connection to handle all of a servlet's requests?
That is, create the connection with the servlet instance and never
disconnect it. Such a servlet would look something like the following:
@racketmod[
#:file "bad-servlet.rkt"
web-server
(require db)
(define db-conn (postgresql-connect ....))
(define (serve req)
.... db-conn ....)
]
The main problem with using one connection for all requests is that
multiple threads accessing the same connection are not properly
@hyperlink["http://en.wikipedia.org/wiki/Isolation_%28database_systems%29"]{isolated}. For
example, if two threads both attempt to start a new transaction, the
second one will fail, because the first thread has already put the
connection into an ``in transaction'' state. And if one thread is
accessing the connection within a transaction and another thread
issues a query, the second thread may see invalid data or even disrupt
the work of the first thread.
A secondary problem is performance. A connection can only perform a
single query at a time, whereas most database systems are capable of
concurrent query processing.
The proper way to use database connections in a servlet is to create a
connection for each request and disconnect it when the request
is handled. But since a request thread may start and stop executing in
many places (due to @racket[send/suspend], etc), inserting the code to
connect and disconnect at the proper places can be challenging and
messy.
A better solution is to use a @tech{virtual connection}, which creates
a request-specific (that is, thread-specific) ``actual connection'' by
need and disconnects it when the request is handled (that is, when the
thread terminates). Different request-handling threads using the same
virtual connection are assigned different actual connection, so the
threads are properly isolated.
@racketmod[
#:file "better-servlet.rkt"
web-server
(require db)
(define db-conn
(virtual-connection
(lambda () (postgresql-connect ....))))
(define (serve req)
.... db-conn ....)
]
This solution preserves the simplicity of the naive solution and fixes
the isolation problem but at the cost of creating many short-lived
database connections. That cost can be eliminated by using a
@tech{connection pool}:
@racketmod[
#:file "best-servlet.rkt"
web-server
(require db)
(define db-conn
(virtual-connection
(connection-pool
(lambda () (postgresql-connect ....)))))
(define (serve req)
.... db-conn ....)
]
By using a virtual connection backed by a connection pool, a servlet
can achieve simplicity, isolation, and performance.
@;{
TODO:
- talk about virtual statements, too
- show actual working servlet code
--
A prepared statement is tied to the connection used to create it;
attempting to use it with another connection results in an
error. Unfortunately, in some scenarios such as web servlets, the
lifetimes of connections are short or difficult to track, making
prepared statements inconvenient. In such cases, a better tool is the
@tech{virtual statement}, which prepares statements on demand and
caches them for future use with the same connection.
@my-interaction[
[(define get-less-than-pst
(virtual-statement "select n from the_numbers where n < $1"))
(void)]
[(code:line (query-list pgc1 get-less-than-pst 1) (code:comment "prepares statement for pgc1"))
(list 0)]
[(code:line (query-list pgc2 get-less-than-pst 2) (code:comment "prepares statement for pgc2"))
(list 0 1)]
[(code:line (query-list pgc1 get-less-than-pst 3) (code:comment "uses existing prep. stmt."))
(list 0 1 2)]
]
}

View File

@ -96,6 +96,21 @@ package in Red Hat.}
]
@section[#:tag "ffi-concurrency"]{FFI-Based Connections and Concurrency}
@tech{Wire-based connections} communicate using
@tech/reference{ports}, which do not cause other Racket threads to
block. In contrast, an FFI call causes all Racket threads to block
until it completes, so @tech{FFI-based connections} can degrade the
interactivity of a Racket program, particularly if long-running
queries are performed using the connection. This problem can be
avoided by creating the FFI-based connection in a separate
@tech/reference{place} using the @racket[#:use-place] keyword
argument. Such a connection will not block all Racket threads during
queries; the disadvantage is the cost of creating and communicating
with a separate @tech/reference{place}.
@section[#:tag "odbc-requirements"]{ODBC Requirements}
ODBC requires the appropriate driver manager native library as well as

View File

@ -5,7 +5,7 @@
racket/sandbox
"config.rkt"
"tabbing.rkt"
(for-label db db/util/geometry db/util/postgresql))
(for-label db db/util/geometry db/util/postgresql racket/dict))
@title[#:tag "query-api"]{Queries}
@ -19,7 +19,7 @@ raises an exception. Different query functions impose different
constraints on the query results and offer different mechanisms for
processing the results.
@bold{Errors} In most cases, a query error does not cause the
@parheading{Errors} In most cases, a query error does not cause the
connection to be disconnected. Specifically, the following kinds of
errors should never cause a connection to be disconnected:
@itemize[
@ -43,7 +43,7 @@ disconnected:
See @secref["transactions"] for information on how errors can affect
the transaction status.
@bold{Character encoding} This library is designed to interact with
@parheading{Character encoding} This library is designed to interact with
database systems using the UTF-8 character encoding. The connection
functions attempt to negotiate UTF-8 communication at the beginning of
every connection, but some systems also allow the character encoding
@ -53,13 +53,12 @@ and data might get corrupted in transmission. Avoid changing a
connection's character encoding. When possible, the connection will
observe the change and automatically disconnect with an error.
@bold{Synchronization} Connections are internally synchronized: it is
safe to perform concurrent queries on the same connection object from
different threads. Connections are not kill-safe: killing a thread
that is using a connection---or shutting down the connection's
managing custodian---may leave the connection locked, causing future
operations to block indefinitely. See @secref["kill-safe"] for a
way to make kill-safe connections.
@parheading{Synchronization} Connections are internally synchronized:
it is safe to use a connection from different threads
concurrently. Most connections are not kill-safe: killing a thread
that is using a connection may leave the connection locked, causing
future operations to block indefinitely. See also
@secref["kill-safe"].
@section{Statements}
@ -77,16 +76,14 @@ All query functions require both a connection and a
]
A SQL statement may contain parameter placeholders that stand for SQL
scalar values. The parameter values must be supplied when the
statement is executed; the parameterized statement and parameter
values are sent to the database back end, which combines them
correctly and safely.
scalar values; such statements are called @deftech{parameterized
queries}. The parameter values must be supplied when the statement is
executed; the parameterized statement and parameter values are sent to
the database back end, which combines them correctly and safely.
Use parameters instead of Racket string interpolation (eg,
@racket[format] or @racket[string-append]) to avoid
@hyperlink["http://xkcd.com/327/"]{SQL injection}, where a string
intended to represent a SQL scalar value is interpreted as---possibly
malicious---SQL code instead.
@secref["dbsec-sql-injection"].
The syntax of placeholders varies depending on the database
system. For example:
@ -140,11 +137,13 @@ The types of parameters and returned fields are described in
@defproc[(query-rows [connection connection?]
[stmt statement?]
[arg any/c] ...
[#:group grouping-fields
(or/c (vectorof string?) (listof (vectorof string?)))
[#:group groupings
(let* ([field/c (or/c string? exact-nonnegative-integer?)]
[grouping/c (or/c field/c (vectorof field/c))])
(or/c grouping/c (listof grouping/c)))
null]
[#:group-mode group-mode
(listof (or/c 'preserve-null-rows 'list))
(listof (or/c 'preserve-null 'list))
null])
(listof vector?)]{
@ -158,7 +157,7 @@ The types of parameters and returned fields are described in
(list (vector 17))]
]
If @racket[grouping-fields] is not empty, the result is the same as if
If @racket[groupings] is not empty, the result is the same as if
@racket[group-rows] had been called on the result rows.
}
@ -245,7 +244,15 @@ The types of parameters and returned fields are described in
@defproc[(in-query [connection connection?]
[stmt statement?]
[arg any/c] ...
[#:fetch fetch-size (or/c exact-positive-integer? +inf.0) +inf.0])
[#:fetch fetch-size (or/c exact-positive-integer? +inf.0) +inf.0]
[#:group groupings
(let* ([field/c (or/c string? exact-nonnegative-integer?)]
[grouping/c (or/c field/c (vectorof field/c))])
(or/c grouping/c (listof grouping/c)))
null]
[#:group-mode group-mode
(listof (or/c 'preserve-null 'list))
null])
sequence?]{
Executes a SQL query, which must produce rows, and returns a
@ -260,6 +267,11 @@ The types of parameters and returned fields are described in
open cursors; attempting to fetch more rows may fail. On PostgreSQL,
a cursor can be opened only within a transaction.
If @racket[groupings] is not empty, the result is the same as
if @racket[group-rows] had been called on the result rows. If
@racket[groupings] is not empty, then @racket[fetch-size] must
be @racket[+inf.0]; otherwise, an exception is raised.
@examples/results[
[(for/list ([n (in-query pgc "select n from the_numbers where n < 2")])
n)
@ -329,25 +341,29 @@ future version of this library (even new minor versions).
}
@defproc[(group-rows [result rows-result?]
[#:group grouping-fields
(or/c (vectorof string?) (listof (vectorof string?)))]
[#:group groupings
(let* ([field/c (or/c string? exact-nonnegative-integer?)]
[grouping/c (or/c field/c (vectorof field/c))])
(or/c grouping/c (listof grouping/c)))]
[#:group-mode group-mode
(listof (or/c 'preserve-null-rows 'list))
(listof (or/c 'preserve-null 'list))
null])
rows-result?]{
If @racket[grouping-fields] is a vector, the elements must be names of
If @racket[groupings] is a vector, the elements must be names of
fields in @racket[result], and @racket[result]'s rows are regrouped
using the given fields. Each grouped row contains N+1 fields; the
first N fields are the @racket[grouping-fields], and the final field
first N fields are the @racket[groupings], and the final field
is a list of ``residual rows'' over the rest of the fields. A residual
row of all NULLs is dropped (for convenient processing of @tt{OUTER
JOIN} results) unless @racket[group-mode] includes
@racket['preserve-null-rows]. If @racket[group-mode] contains
@racket['preserve-null]. If @racket[group-mode] contains
@racket['list], there must be exactly one residual field, and its
values are included without a vector wrapper (similar to
@racket[query-list]).
See also @secref["dbperf-n+1"].
@examples[#:eval the-eval
(define vehicles-result
(rows-result
@ -361,7 +377,9 @@ values are included without a vector wrapper (similar to
#:group '(#("type")))
]
The @racket[grouping-fields] argument may also be a list of vectors;
The grouped final column is given the name @racket["grouped"].
The @racket[groupings] argument may also be a list of vectors;
in that case, the grouping process is repeated for each set of
grouping fields. The grouping fields must be distinct.
@ -372,6 +390,38 @@ grouping fields. The grouping fields must be distinct.
]
}
@defproc[(rows->dict [result rows-result?]
[#:key key-field/s
(let ([field/c (or/c string? exact-nonnegative-integer?)])
(or/c field/c (vectorof field/c)))]
[#:value value-field/s
(let ([field/c (or/c string? exact-nonnegative-integer?)])
(or/c field/c (vectorof field/c)))]
[#:value-mode value-mode
(listof (or/c 'list 'preserve-null))
null])
dict?]{
Creates a dictionary mapping @racket[key-field/s] to
@racket[value-field/s]. If @racket[key-field/s] is a single field name
or index, the keys are the field values; if @racket[key-field/s] is a
vector, the keys are vectors of the field values. Likewise for
@racket[value-field/s].
If @racket[value-mode] contains @racket['list], a list of values is
accumulated for each key; otherwise, there must be at most one value
for each key. Values consisting of all @racket[sql-null?] values are
dropped unless @racket[value-mode] contains
@racket['preserve-null].
@examples[#:eval the-eval
(rows->dict vehicles-result
#:key "model" #:value '#("type" "maker"))
(rows->dict vehicles-result
#:key "maker" #:value "model" #:value-mode '(list))
]
}
@section{Prepared Statements}
@ -533,7 +583,7 @@ statement implicitly commits the current transaction. These statements
also must not be used within @tech{managed transactions}. (In
contrast, PostgreSQL and SQLite both support transactional DDL.)
@bold{Errors} Query errors may affect an open transaction in one of
@parheading{Errors} Query errors may affect an open transaction in one of
three ways:
@itemlist[#:style 'ordered
@item{the transaction remains open and unchanged}

View File

@ -0,0 +1,511 @@
#lang scribble/doc
@(require scribble/manual
scribble/eval
scribble/struct
racket/sandbox
"config.rkt"
(for-label db db/util/testing racket/dict web-server/lang/web))
@title[#:tag "using-db"]{Using Database Connections}
This section introduces this library's basic features and covers some
practical issues with database programming in general and with this
library in particular.
@section[#:tag "intro-basic"]{Introduction to Using Database Connections}
The following annotated program demonstrates how to connect to a
database and perform simple queries. Some of the SQL syntax used below
is PostgreSQL-specific, such as the syntax of query parameters
(@litchar{$1} rather than @litchar{?}).
@my-interaction[
[(require db)
(void)]
]
First we create a connection. Replace @racket[_user], @racket[_db],
and @racket[_password] below with the appropriate values for your
configuration (see @secref{creating-connections} for other connection examples):
@my-interaction[
[(define pgc
(postgresql-connect #:user _user
#:database _db
#:password _password))
(void)]
[pgc
(new connection%)]
]
Use @racket[query-exec] method to execute a SQL statement for effect.
@my-interaction[
[(query-exec pgc
"create temporary table the_numbers (n integer, d varchar(20))")
(void)]
[(query-exec pgc
"insert into the_numbers values (0, 'nothing')")
(void)]
[(query-exec pgc
"insert into the_numbers values (1, 'the loneliest number')")
(void)]
[(query-exec pgc
"insert into the_numbers values (2, 'company')")
(void)]
]
The @racket[query] function is a more general way to execute a
statement. It returns a structure encapsulating information about the
statement's execution. (But some of that information varies from
system to system and is subject to change.)
@my-interaction[
[(query pgc "insert into the_numbers values (3, 'a crowd')")
(simple-result '((command insert 0 1)))]
[(query pgc "select n, d from the_numbers where n % 2 = 0")
(rows-result
(list
'((name . "n") (typeid . 23))
'((name . "d") (typeid . 1043)))
'(#(0 "nothing") #(2 "company")))]
]
When the query is known to return rows and when the field
descriptions are not needed, it is more convenient to use the
@racket[query-rows] function.
@my-interaction[
[(query-rows pgc "select n, d from the_numbers where n % 2 = 0")
'(#(0 "nothing") #(2 "company"))]
]
Use @racket[query-row] for queries that are known to return exactly
one row.
@my-interaction[
[(query-row pgc "select * from the_numbers where n = 0")
(vector 0 "nothing")]
]
Similarly, use @racket[query-list] for queries that produce rows of
exactly one column.
@my-interaction[
[(query-list pgc "select d from the_numbers order by n")
(list "nothing" "the loneliest number" "company" "a crowd")]
]
When a query is known to return a single value (one row and one
column), use @racket[query-value].
@my-interaction[
[(query-value pgc "select count(*) from the_numbers")
4]
[(query-value pgc "select d from the_numbers where n = 5")
(error 'query-value
"query returned zero rows: ~s"
"select d from the_numbers where n = 5")]
]
When a query may return zero or one rows, as the last example, use
@racket[query-maybe-row] or @racket[query-maybe-value] instead.
@my-interaction[
[(query-maybe-value pgc "select d from the_numbers where n = 5")
(values #f)]
]
The @racket[in-query] function produces a sequence that can be used
with Racket's iteration forms:
@my-interaction[
[(for ([(n d) (in-query pgc "select * from the_numbers where n < 4")])
(printf "~a is ~a\n" n d))
(for-each (lambda (n d) (printf "~a: ~a\n" n d))
'(0 1 2 3)
'("nothing" "the loneliest number" "company" "a crowd"))]
[(for/fold ([sum 0]) ([n (in-query pgc "select n from the_numbers")])
(+ sum n))
(for/fold ([sum 0]) ([n (in-list '(0 1 2 3))])
(+ sum n))]
]
Errors in queries generally do not cause the connection to disconnect.
@my-interaction[
[(begin (with-handlers [(exn:fail?
(lambda (e)
(printf "~a~n" (exn-message e))))]
(query-value pgc "select NoSuchField from NoSuchTable"))
(query-value pgc "select 'okay to proceed!'"))
(begin (display "query-value: relation \"nosuchtable\" does not exist (SQLSTATE 42P01)")
"okay to proceed!")]
]
Queries may contain parameters. The easiest way to execute a
parameterized query is to provide the parameters ``inline'' after the
SQL statement in the query function call.
@my-interaction[
[(query-value pgc
"select d from the_numbers where n = $1" 2)
"company"]
[(query-list pgc
"select n from the_numbers where n > $1 and n < $2" 0 3)
(list 1 2)]
]
Alternatively, a parameterized query may be prepared in advance and
executed later. @tech{Prepared statements} can be executed multiple
times with different parameter values.
@my-interaction[
[(define get-less-than-pst
(prepare pgc "select n from the_numbers where n < $1"))
(void)]
[(query-list pgc get-less-than-pst 1)
(list 0)]
[(query-list pgc (bind-prepared-statement get-less-than-pst '(2)))
(list 0 1)]
]
When a connection's work is done, it should be disconnected.
@my-interaction[
[(disconnect pgc)
(void)]
]
@section[#:tag "dbsec"]{Database Security}
Database security requires both that the database back end be secured
against unauthorized use and that authorized clients are not tricked
or subverted into violating the database's security.
Securing database back ends is mostly beyond the scope of this
manual. In brief: choose sufficiently strong authentication methods
and keep credentials secure, and follow the
@wplink["Principle_of_least_privilege"]{principle of least privilege}:
create and use roles that have the minimum permissions needed.
The following is an incomplete list of security issues related to
database @emph{client} programming.
@;{Add section on db roles and ro/rw access?
eg, for servlet create two connections: one ro and one rw
(in pg/my/etc, backed by two roles; in sqlite, connection options) }
@subsection[#:tag "dbsec-sql-injection"]{SQL Injection}
@wplink["SQL_injection"]{SQL injection} happens when part of a SQL
statement that was intended as SQL literal data is instead interpreted
as SQL code---possibly @hyperlink["http://xkcd.com/327/"]{malicious}
SQL code.
Avoid dynamically creating SQL query strings by string concatenation
or interpolation (eg, with @racket[string-append] or
@racket[format]). In most cases, it is possible to use
@tech{parameterized queries} instead. For example, instead of this
@racketblock[
(code:comment "WRONG! DANGER!")
(query-exec c
(format "UPDATE users SET passwd='~a' WHERE user='~a'"
user new-passwd))
]
write one of the following instead (depending on SQL dialect):
@racketblock[
(code:comment "for PostgreSQL, SQLite")
(query-exec c "UPDATE users SET passwd=$1 WHERE user=$2" user new-passwd)
(code:comment "for MySQL, SQLite, ODBC")
(query-exec c "UPDATE users SET passwd=? WHERE user=?" user new-passwd)
]
The first form would choke on names like @racket["Patrick O'Connor"].
Worse, it would be susceptible to attack by malicious input like
@racket["me' OR user='root'"], which yields the following SQL
statement:
@(element 'tt "UPDATE users SET passwd='whatever' WHERE user='me' OR user='root'")
In contrast, using a @tech{parameterized query} causes the
parameterized SQL and its arguments to be submitted to the back end
separately; the back end then combines them safely.
Only SQL literal values can be replaced with parameter placeholders; a
SQL statement cannot be parameterized over a column name or a sort
order, for example. In such cases, constructing the query dynamically
may be the only feasible solution. But while the query construction
may be influenced by external input, it should never directly
incorporate external input without validation. That is, don't do the
following:
@racketblock[
(code:comment "WRONG! DANGER!")
(query-rows c
(format "SELECT name, ~a FROM contestants" column))
(query-list c
(format "SELECT name FROM contestants ORDER BY score ~a" direction))
]
Instead, select the inserted SQL from known good alternatives:
@racketblock[
(code:comment "BETTER")
(query-rows c
(format "SELECT name, ~a FROM contestants"
(cond [(member column '("wins" "losses")) column]
[else (error ....)])))
(query-list c
(format "SELECT name FROM contestants ORDER BY score ~a"
(if ascending? "ASC" "DESC")))
]
@;{ Discuss dynamic IN comparisons? }
@subsection[#:tag "dbsec-xss"]{Cross-site Scripting (XSS)}
@wplink["Cross-site_scripting"]{Cross-site scripting}---which should
probably be called ``HTML injection'' or ``markup injection''---is
when arbitrary text from an untrusted source is embedded without
escaping into an HTML page. The @emph{unstructured text from the
untrusted source} is reinterpreted as @emph{markup from the web
server}; if the reinterpreted markup contains embedded Javascript
code, it executes with the security privileges associated with the web
server's domain.
This issue has little to do with databases @emph{per se} except that
such text is often stored in a database. This issue is mitigated by
using structured markup representations like SXML or X-expressions
(xexprs), since they automatically escape ``markup'' characters found
in embedded text.
@;{============================================================}
@section[#:tag "dbperf"]{Database Performance}
Achieving good database performance mostly consists of good database
design and intelligent client behavior.
On the database design side, most important are wise use of indexes
and choosing appropriate data representations. As an example of the
latter, a regexp-based search using @tt{LIKE} will probably be slower
than a specialized
@hyperlink["http://www.postgresql.org/docs/9.0/static/textsearch.html"]{full-text
search} feature for large data sets. Consult your database back end's
manual for additional performance advice.
The following sections describe a few client-side aspects of
performance.
@subsection[#:tag "dbperf-n+1"]{The N+1 Selects Problem}
@;{ per comments on http://stackoverflow.com/questions/97197/what-is-the-n1-selects-problem
Is N+1 actually a problem?
ie, Is communication overhead with db back end worse than
grouping cost? Should measure to see. }
A common mistake is to fetch a large amount of data by running a query
to get a set of initial records and then running another query inside
a loop with an iteration for each of the initial records. This is
sometimes called the ``n+1 selects problem.'' For example:
@racketblock[
(for/list ([(name id) (in-query c "SELECT name, id FROM contestants")])
(define wins
(query-list c "SELECT contest FROM contests WHERE winner = $1" id))
(make-contestant-record name wins))
]
The same information can be retrieved in a single query by performing
a @tt{LEFT OUTER JOIN} and grouping the results:
@racketblock[
(for/list ([(name id wins)
(in-query c
(string-append "SELECT name, id, contest "
"FROM contestants LEFT OUTER JOIN contests "
"ON contestants.id = contests.winner")
#:group '(#("name" "id"))
#:group-mode '(list))])
(make-contestant-record name wins))
]
The one-query form will perform better when database communication has
high latency. On the other hand, it may duplicate the contents of the
non-key @tt{name} column, using more bandwidth. Another approach is to
perform two queries:
@racketblock[
(let ([id=>name
(rows->dict #:key "id" #:value "name"
(query c "SELECT id, name FROM contestants"))])
(for/list ([(id wins)
(in-query c
(string-append "SELECT id, contest "
"FROM contestants LEFT OUTER JOIN contests "
"ON contestants.id = contests.winner")
#:group '(#("id"))
#:group-mode '(list))])
(make-contestant-record (dict-ref id=>name id) wins)))
]
Compared with the one-query form, the two-query form requires
additional communication, but it avoids duplicating @tt{name} values
in the @tt{OUTER JOIN} results. If additional non-key @tt{contestant}
fields were to be retrieved, the bandwidth savings of this approach
would be even greater.
See also @secref["dbperf-testing"].
@subsection[#:tag "dbperf-update-tx"]{Updates and Transactions}
Using transactions can dramatically improve the performance of bulk
database operations, especially @tt{UPDATE} and @tt{INSERT}
statements. As an extreme example, on commodity hardware in 2012,
SQLite is capable of executing thousands of @tt{INSERT} statements per
second within a transaction, but it is capable of only dozens of
single-@tt{INSERT} transactions per second.
@subsection[#:tag "dbperf-pstcache"]{Statement Caching}
Connections cache implicitly prepared statements (that is, statements
given in string form directly to a query function). The effect of the
cache is to eliminate an extra round-trip to the server (to send the
statement and receive a prepared statement handle), leaving just a
single round-trip (to send parameters and receive results) per
execution.
Currently, prepared statements are only cached within a
transaction. The statement cache is flushed when entering or leaving a
transaction and whenever a DDL statement is executed.
@;{ virtual statements are mostly obsolete }
@subsection[#:tag "dbperf-testing"]{Testing Performance of Database-Backed Programs}
When testing the performance of database-backed programs, remember to
test them in environments with realistic latency and
bandwidth. High-latency environments may be roughly approximated with
the @racket[high-latency-connection] function, but there's no
substitute for the real thing.
@;{============================================================}
@section[#:tag "intro-servlets"]{Databases and Web Servlets}
Using database connections in a web servlet is more complicated than
in a standalone program. A single servlet potentially serves many
requests at once, each in a separate request-handling
thread. Furthermore, the use of @racket[send/suspend],
@racket[send/suspend/dispatch], etc means that there are many places
where a servlet may start and stop executing to service a request.
Why not use a single connection to handle all of a servlet's requests?
That is, create the connection with the servlet instance and never
disconnect it. Such a servlet would look something like the following:
@racketmod[
#:file "bad-servlet.rkt"
web-server
(require db)
(define db-conn (postgresql-connect ....))
(define (serve req)
.... db-conn ....)
]
The main problem with using one connection for all requests is that
multiple threads accessing the same connection are not properly
@wplink["Isolation_%28database_systems%29"]{isolated}. For example, if
one thread is accessing the connection within a transaction and
another thread issues a query, the second thread may see invalid data
or even disrupt the work of the first thread.
A secondary problem is performance. A connection can only perform a
single query at a time, whereas most database systems are capable of
concurrent query processing.
The proper way to use database connections in a servlet is to create a
connection for each request and disconnect it when the request has
been handled. But since a request thread may start and stop executing
in many places (due to @racket[send/suspend], etc), inserting the code
to connect and disconnect at the proper places can be challenging and
messy.
A better solution is to use a @tech{virtual connection}, which
automatically creates a request-specific (that is, thread-specific)
``actual connection'' by need and disconnects it when the request has
been handled (that is, when the thread terminates). Different
request-handling threads using the same virtual connection are
assigned different actual connections, so the requests are properly
isolated.
@racketmod[
#:file "better-servlet.rkt"
web-server
(require db)
(define db-conn
(virtual-connection
(lambda () (postgresql-connect ....))))
(define (serve req)
.... db-conn ....)
]
This solution preserves the simplicity of the naive solution and fixes
the isolation problem but at the cost of creating many short-lived
database connections. That cost can be eliminated by using a
@tech{connection pool}:
@racketmod[
#:file "best-servlet.rkt"
web-server
(require db)
(define db-conn
(virtual-connection
(connection-pool
(lambda () (postgresql-connect ....)))))
(define (serve req)
.... db-conn ....)
]
By using a virtual connection backed by a connection pool, a servlet
can achieve simplicity, isolation, and performance all at the same
time.
@;{
TODO:
- talk about virtual statements, too
- show actual working servlet code
--
A prepared statement is tied to the connection used to create it;
attempting to use it with another connection results in an
error. Unfortunately, in some scenarios such as web servlets, the
lifetimes of connections are short or difficult to track, making
prepared statements inconvenient. In such cases, a better tool is the
@tech{virtual statement}, which prepares statements on demand and
caches them for future use with the same connection.
@my-interaction[
[(define get-less-than-pst
(virtual-statement "select n from the_numbers where n < $1"))
(void)]
[(code:line (query-list pgc1 get-less-than-pst 1) (code:comment "prepares statement for pgc1"))
(list 0)]
[(code:line (query-list pgc2 get-less-than-pst 2) (code:comment "prepares statement for pgc2"))
(list 0 1)]
[(code:line (query-list pgc1 get-less-than-pst 3) (code:comment "uses existing prep. stmt."))
(list 0 1 2)]
]
}

View File

@ -4,7 +4,7 @@
scribble/struct
racket/sandbox
"config.rkt"
(for-label db db/util/datetime db/util/geometry db/util/postgresql))
(for-label db db/util/datetime db/util/geometry db/util/postgresql db/util/testing))
@title[#:tag "util"]{Utilities}
@ -203,3 +203,33 @@ types that have no appropriate analogue in the OpenGIS model:
Note: PostgreSQL's built-in geometric types are distinct from those
provided by the PostGIS extension library (see @secref["geometry"]).
}
@;{========================================}
@section[#:tag "util-testing"]{Testing Database Programs}
@defmodule[db/util/testing]
This module provides utilities for testing programs that use database
connections.
@defproc[(high-latency-connection [connection connection?]
[latency (>=/c 0)]
[#:sleep-atomic? sleep-atomic? any/c #f])
connection?]{
Returns a proxy connection for @racket[connection] that introduces
@racket[latency] additional seconds of latency before operations that
require communicating with the database back end---@racket[prepare],
@racket[query], @racket[start-transaction], etc.
Use this function in performance testing to roughly simulate
environments with high-latency communication with a database back
end.
If @racket[sleep-atomic?] is true, then the proxy enters atomic mode
before sleeping, to better simulate the effect of a long-running FFI
call (see @secref["ffi-concurrency"]). Even so, it may not accurately
simulate an ODBC connection that internally uses cursors to fetch data
on demand, as each fetch would introduce additional latency.
}

View File

@ -0,0 +1,58 @@
#lang racket/base
(require racket/contract
racket/class
ffi/unsafe/atomic
"../private/generic/interfaces.rkt"
"../private/generic/common.rkt")
(define high-latency-connection%
(class* locking% (connection<%>)
(init-private connection
latency
sleep-atomic?)
(inherit call-with-lock)
(super-new)
(define-syntax-rule (define-forward defmethod (sleep-factor method arg ...) ...)
(begin
(defmethod (method arg ...)
(call-with-lock 'method
(lambda ()
(let ([c connection])
(unless c (error/not-connected 'method))
(when (positive? sleep-factor)
(if sleep-atomic?
(call-as-atomic (lambda () (sleep (* sleep-factor latency))))
(sleep (* sleep-factor latency))))
(send c method arg ...)))))
...))
(define-forward define/public
(0 get-dbsystem)
(2 query fsym stmt cursor?) ;; 2 because may require implicit prepare
(1 prepare fsym stmt close-on-exec?)
(1 fetch/cursor fsym stmt fetch-size)
(0 get-base)
(0 free-statement stmt need-lock?)
(0 transaction-status fsym)
(1 start-transaction fsym isolation cwt?)
(1 end-transaction fsym mode cwt?)
(1 list-tables fsym schema))
(define/override (connected?) (and connection (send connection connected?)))
(define/public (disconnect)
(set! connection #f))))
(define (high-latency-connection connection latency
#:sleep-atomic? [sleep-atomic? #f])
(new high-latency-connection%
[connection connection]
[latency latency]
[sleep-atomic? sleep-atomic?]))
(provide/contract
[high-latency-connection
(->* (connection? (>=/c 0))
(#:sleep-atomic? any/c)
connection?)])

View File

@ -171,7 +171,7 @@
(when (ANYFLAGS 'postgresql 'mysql 'sqlite3)
(check-roundtrip c (make-bytes #e1e6 (char->integer #\a)))
(check-roundtrip c (make-bytes #e1e7 (char->integer #\b)))
(check-roundtrip c (make-bytes #e1e8 (char->integer #\c))))
#| (check-roundtrip c (make-bytes #e1e8 (char->integer #\c))) |#)
(when (ANYFLAGS 'postgresql)
(let ([r (query-value c "select cast(repeat('a', 10000000) as bytea)")])
(check-pred bytes? r)