db: various fixes and additions
- fix connection-pool for nested tx, fix race condition - ensure connected? always nonblocking - added and reorganized some doc sections - added grouping, contracts to in-query - added rows->dict
This commit is contained in:
parent
aa0d8aaa33
commit
00fd18bc62
|
@ -28,8 +28,9 @@
|
|||
(but this could also be done by two locks: outer "ownership" lock
|
||||
and inner "invariant-protecting" lock)
|
||||
|
||||
- audit code for break-safety, disable breaks as needed
|
||||
|
||||
- make implementation notes section of docs
|
||||
- explain cursor impl (& rationale)
|
||||
- explain nested tx impl
|
||||
|
||||
- invalidate statement cache on query error
|
||||
|
||||
- 2 call-with-transactions from separate threads can conflict
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
#lang racket/base
|
||||
(require racket/contract/base)
|
||||
(require (for-syntax racket/base
|
||||
syntax/parse
|
||||
syntax/parse/experimental/template)
|
||||
racket/dict
|
||||
syntax/location
|
||||
racket/contract/base)
|
||||
|
||||
;; ============================================================
|
||||
|
||||
|
@ -74,6 +79,56 @@
|
|||
|
||||
(require "private/generic/functions.rkt")
|
||||
|
||||
(define fetch-size/c
|
||||
(or/c exact-positive-integer? +inf.0))
|
||||
|
||||
(define grouping-field/c (or/c string? exact-nonnegative-integer?))
|
||||
(define group/c (or/c grouping-field/c (vectorof grouping-field/c)))
|
||||
(define grouping/c (or/c group/c (listof group/c)))
|
||||
|
||||
(define group-mode/c
|
||||
(listof (or/c 'list 'preserve-null)))
|
||||
|
||||
(define in-query/c
|
||||
(->* (connection? statement?)
|
||||
(#:fetch fetch-size/c
|
||||
#:group grouping/c
|
||||
#:group-mode group-mode/c)
|
||||
#:rest list?
|
||||
sequence?))
|
||||
|
||||
(define here-mod-path (quote-module-path))
|
||||
|
||||
(define-syntax contracted-in-query
|
||||
(make-provide/contract-transformer
|
||||
(quote-syntax in-query/c)
|
||||
(quote-syntax in-query)
|
||||
(quote-syntax in-query)
|
||||
(quote-syntax here-mod-path)))
|
||||
|
||||
(define-sequence-syntax in-query*
|
||||
(lambda () #'contracted-in-query)
|
||||
(lambda (stx)
|
||||
(syntax-parse stx
|
||||
[[(var ...) (~and form
|
||||
(in-query (~or (~optional (~seq #:fetch fetch-size))
|
||||
(~optional (~seq #:group grouping-fields))
|
||||
(~optional (~seq #:group-mode group-mode))
|
||||
(~between arg:expr 2 +inf.0))
|
||||
...))]
|
||||
#:declare fetch-size (expr/c #'fetch-size/c #:context #'form) #:role "fetch size argument"
|
||||
#:declare grouping-fields (expr/c #'grouping/c #:context #'form) #:role "grouping fields argument"
|
||||
#:declare group-mode (expr/c #'group-mode/c #:context #'form) #:role "group mode argument"
|
||||
#:with (c stmt q-arg ...) #'(arg ...)
|
||||
#:declare c (expr/c #'connection? #:context #'form) #:role "connection argument"
|
||||
#:declare stmt (expr/c #'statement? #:context #'form) #:role "statement argument"
|
||||
(template
|
||||
[(var ...) (in-query-helper (length '(var ...)) c.c stmt.c q-arg ...
|
||||
(?? (?@ #:fetch fetch-size.c))
|
||||
(?? (?@ #:group grouping-fields.c))
|
||||
(?? (?@ #:group-mode group-mode.c)))])]
|
||||
[_ #f])))
|
||||
|
||||
(provide (rename-out [in-query* in-query]))
|
||||
|
||||
(provide/contract
|
||||
|
@ -105,7 +160,8 @@
|
|||
(->* (connection? statement?) () #:rest list? any)]
|
||||
[query-rows
|
||||
(->* (connection? statement?)
|
||||
(#:group (or/c (vectorof string?) (listof (vectorof string?))))
|
||||
(#:group grouping/c
|
||||
#:group-mode group-mode/c)
|
||||
#:rest list? (listof vector?))]
|
||||
[query-list
|
||||
(->* (connection? statement?) () #:rest list? list?)]
|
||||
|
@ -165,9 +221,15 @@
|
|||
|
||||
[group-rows
|
||||
(->* (rows-result?
|
||||
#:group (or/c (vectorof string?) (listof (vectorof string?))))
|
||||
(#:group-mode (listof (or/c 'preserve-null-rows 'list)))
|
||||
rows-result?)])
|
||||
#:group grouping/c)
|
||||
(#:group-mode (listof (or/c 'list 'preserve-null #|deprecated:|# 'preserve-null-rows)))
|
||||
rows-result?)]
|
||||
|
||||
[rows->dict
|
||||
(->* (rows-result? #:key grouping/c #:value grouping/c)
|
||||
(#:value-mode group-mode/c)
|
||||
dict?)]
|
||||
)
|
||||
|
||||
;; ============================================================
|
||||
|
||||
|
|
|
@ -421,12 +421,33 @@
|
|||
dprintf)
|
||||
(super-new)
|
||||
|
||||
(field [max-cache-size 50])
|
||||
|
||||
;; Statement Cache
|
||||
;; updated by prepare; potentially invalidated by query (via check/invalidate-cache)
|
||||
|
||||
(define pst-cache '#hash())
|
||||
(field [pst-cache '#hash()]
|
||||
[cache-mode 'in-transaction]
|
||||
[cache-flush-next? #f] ;; flush cache on next query
|
||||
[max-cache-size 20])
|
||||
|
||||
(define/private (use-cache?)
|
||||
(and (not cache-flush-next?)
|
||||
(case cache-mode
|
||||
((always) #t)
|
||||
((never) #f)
|
||||
((in-transaction) (eq? (get-tx-status) #t)))))
|
||||
|
||||
(define/public (stmt-cache-ctl who mode)
|
||||
(case mode
|
||||
((get) cache-mode)
|
||||
((flush) (begin (set! cache-flush-next? #t) cache-mode))
|
||||
(else (unless (eq? mode cache-mode)
|
||||
(call-with-lock who
|
||||
(lambda ()
|
||||
(set! cache-mode mode)
|
||||
(set! cache-flush-next? #t)
|
||||
cache-mode))))))
|
||||
|
||||
;; --
|
||||
|
||||
(define/public (get-cached-statement stmt)
|
||||
(let ([cached-pst (hash-ref pst-cache stmt #f)])
|
||||
|
@ -447,12 +468,6 @@
|
|||
(dprintf " ** caching statement\n")
|
||||
(set! pst-cache (hash-set pst-cache sql pst))))))
|
||||
|
||||
(define/private (use-cache?)
|
||||
(case cache-statements
|
||||
((always) #t)
|
||||
((never) #f)
|
||||
((in-transaction) (eq? (get-tx-status) #t))))
|
||||
|
||||
;; check/invalidate-cache : statement/pst -> hash/#f
|
||||
;; Returns old cache on invalidation, or #f if stmt is safe.
|
||||
;; May also return part of old cache (excluding pst) when cache gets too big.
|
||||
|
@ -463,7 +478,11 @@
|
|||
unsafe, because they're usually transactional SQL.
|
||||
|#
|
||||
(define (invalidate! except)
|
||||
(dprintf " ** invalidating statement cache~a\n" (if except " (too big)" ""))
|
||||
;; FIXME: smarter cache ejection (LRU?)
|
||||
(dprintf " ** invalidating statement cache~a\n"
|
||||
(cond [except " (too big)"]
|
||||
[cache-flush-next? " (mode changed)"]
|
||||
[else ""]))
|
||||
(let ([cache pst-cache])
|
||||
(set! pst-cache '#hash())
|
||||
(cond [except
|
||||
|
@ -471,7 +490,9 @@
|
|||
(hash-remove cache (send except get-stmt))]
|
||||
[else
|
||||
cache])))
|
||||
(cond [(statement-binding? x)
|
||||
(cond [cache-flush-next?
|
||||
(invalidate! #f)]
|
||||
[(statement-binding? x)
|
||||
(check/invalidate-cache (statement-binding-pst x))]
|
||||
[(prepared-statement? x)
|
||||
(let ([stmt-type (send x get-stmt-type)])
|
||||
|
|
|
@ -13,33 +13,49 @@
|
|||
(class object%
|
||||
;; other-evt : (-> evt)
|
||||
;; generates other evt to sync on besides req-channel, eg timeouts
|
||||
(init-field (other-evt (lambda () never-evt)))
|
||||
(init-field (other-evt (lambda () never-evt))
|
||||
(alt-enabled? (lambda () #t)))
|
||||
(super-new)
|
||||
|
||||
(define req-channel (make-channel))
|
||||
(define alt-req-channel (make-channel))
|
||||
|
||||
(define mthread
|
||||
(thread/suspend-to-kill
|
||||
(lambda ()
|
||||
(let loop ()
|
||||
(sync (wrap-evt req-channel (lambda (p) (p)))
|
||||
(if (alt-enabled?)
|
||||
(wrap-evt alt-req-channel (lambda (p) (p)))
|
||||
never-evt)
|
||||
(other-evt))
|
||||
(loop)))))
|
||||
|
||||
(define/public (call proc)
|
||||
(call* proc req-channel #f))
|
||||
(define/public (alt-call-evt proc)
|
||||
(call* proc alt-req-channel #t))
|
||||
|
||||
(define/private (call* proc chan as-evt?)
|
||||
(thread-resume mthread (current-thread))
|
||||
(let ([result #f]
|
||||
[sema (make-semaphore 0)])
|
||||
(channel-put req-channel
|
||||
(lambda ()
|
||||
(let* ([result #f]
|
||||
[sema (make-semaphore 0)]
|
||||
[proc (lambda ()
|
||||
(set! result
|
||||
(with-handlers ([(lambda (e) #t)
|
||||
(lambda (e) (cons 'exn e))])
|
||||
(cons 'values (call-with-values proc list))))
|
||||
(semaphore-post sema)))
|
||||
(semaphore-post sema))]
|
||||
[handler
|
||||
(lambda (_evt)
|
||||
(semaphore-wait sema)
|
||||
(case (car result)
|
||||
((values) (apply values (cdr result)))
|
||||
((exn) (raise (cdr result))))))))
|
||||
((exn) (raise (cdr result)))))])
|
||||
(if as-evt?
|
||||
(wrap-evt (channel-put-evt chan proc) handler)
|
||||
(begin (channel-put chan proc)
|
||||
(handler #f)))))))
|
||||
|
||||
;; ----
|
||||
|
||||
|
@ -53,14 +69,20 @@
|
|||
(init-private connection)
|
||||
|
||||
(define mgr (new manager%))
|
||||
(define last-connected? #t)
|
||||
|
||||
(define-syntax-rule (define-forward (method arg ...) ...)
|
||||
(begin
|
||||
(define/public (method arg ...)
|
||||
(send mgr call (lambda () (send connection method arg ...)))) ...))
|
||||
(send mgr call (lambda ()
|
||||
(begin0
|
||||
(send connection method arg ...)
|
||||
(set! last-connected? (send connection connected?))))))
|
||||
...))
|
||||
|
||||
(define/public (connected?) last-connected?)
|
||||
|
||||
(define-forward
|
||||
(connected?)
|
||||
(disconnect)
|
||||
(get-dbsystem)
|
||||
(query fsym stmt cursor?)
|
||||
|
@ -88,8 +110,7 @@
|
|||
(define virtual-connection%
|
||||
(class* object% (connection<%>)
|
||||
(init-private connector ;; called from client thread
|
||||
get-key ;; called from client thread
|
||||
timeout)
|
||||
get-key) ;; called from client thread
|
||||
(super-new)
|
||||
|
||||
(define custodian (current-custodian))
|
||||
|
@ -99,36 +120,17 @@
|
|||
;; key=>conn : hasheq[key => connection]
|
||||
(define key=>conn (make-hasheq))
|
||||
|
||||
;; alarms : hasheq[connection => evt] (alarm wrapped to return key)
|
||||
(define alarms (make-hasheq))
|
||||
|
||||
(define/private (get key) ;; also refreshes alarm
|
||||
(let ([c (hash-ref key=>conn key #f)])
|
||||
(when c (hash-set! alarms c (fresh-alarm-for key)))
|
||||
c))
|
||||
(define/private (get key) ;; also called by client thread for connected?
|
||||
(hash-ref key=>conn key #f))
|
||||
|
||||
(define/private (put! key c)
|
||||
(hash-set! key=>conn key c)
|
||||
(hash-set! alarms c (fresh-alarm-for key)))
|
||||
(hash-set! key=>conn key c))
|
||||
|
||||
(define/private (fresh-alarm-for key)
|
||||
(wrap-evt (alarm-evt (+ (current-inexact-milliseconds) timeout))
|
||||
(lambda (a) key)))
|
||||
|
||||
(define/private (remove! key timeout?)
|
||||
;; timeout? = if connection open, then wait longer
|
||||
(let* ([c (hash-ref key=>conn key #f)]
|
||||
[in-trans? (with-handlers ([exn:fail? (lambda (e) #f)])
|
||||
(and c
|
||||
(send c connected?)
|
||||
(send c transaction-status 'virtual-connection)))])
|
||||
(cond [(not c) (void)]
|
||||
[(and timeout? in-trans?)
|
||||
(hash-set! alarms c (fresh-alarm-for key))]
|
||||
[else
|
||||
(define/private (remove! key)
|
||||
(let ([c (get key)])
|
||||
(when c
|
||||
(hash-remove! key=>conn key)
|
||||
(hash-remove! alarms c)
|
||||
(send c disconnect)])))
|
||||
(send c disconnect))))
|
||||
|
||||
(define mgr
|
||||
(new manager%
|
||||
|
@ -137,16 +139,10 @@
|
|||
(choice-evt
|
||||
(let ([keys (hash-map key=>conn (lambda (k v) k))])
|
||||
(handle-evt (apply choice-evt keys)
|
||||
;; Assignment to key has expired: move to idle or disconnect.
|
||||
;; Assignment to key has expired
|
||||
(lambda (key)
|
||||
(dbdebug "virtual-connection: key expiration: ~e" key)
|
||||
(remove! key #f))))
|
||||
(let ([alarm-evts (hash-map alarms (lambda (k v) v))])
|
||||
(handle-evt (apply choice-evt alarm-evts)
|
||||
;; Disconnect idle connection.
|
||||
(lambda (key)
|
||||
(dbdebug "virtual-connection: timeout")
|
||||
(remove! key #t)))))))))
|
||||
(remove! key)))))))))
|
||||
|
||||
;; == methods called in client thread ==
|
||||
|
||||
|
@ -159,12 +155,12 @@
|
|||
(connector))])
|
||||
(send mgr call
|
||||
(lambda ()
|
||||
(when c (remove! key #f))
|
||||
(when c (remove! key))
|
||||
(put! key c*)))
|
||||
c*)]
|
||||
[else
|
||||
(when c ;; got a disconnected connection
|
||||
(send mgr call (lambda () (remove! key #f))))
|
||||
(send mgr call (lambda () (remove! key))))
|
||||
#f])))
|
||||
|
||||
;; ----
|
||||
|
@ -178,24 +174,27 @@
|
|||
...))
|
||||
|
||||
(define-forward
|
||||
(#f #f (connected?))
|
||||
(#t '_ (get-dbsystem))
|
||||
(#t '_ (query fsym stmt cursor?))
|
||||
(#t '_ (fetch/cursor fsym stmt fetch-size))
|
||||
(#t '_ (start-transaction fsym isolation cwt?))
|
||||
(#f (void) (end-transaction fsym mode cwt?))
|
||||
(#t '_ (transaction-status fsym))
|
||||
(#f #f (transaction-status fsym))
|
||||
(#t '_ (list-tables fsym schema)))
|
||||
|
||||
(define/public (get-base)
|
||||
(get-connection #t))
|
||||
|
||||
(define/public (connected?)
|
||||
(let ([c (get (get-key))])
|
||||
(and c (send c connected?))))
|
||||
|
||||
(define/public (disconnect)
|
||||
(let ([c (get-connection #f)]
|
||||
[key (get-key)])
|
||||
(when c
|
||||
(send c disconnect)
|
||||
(send mgr call (lambda () (remove! key #f)))))
|
||||
(send mgr call (lambda () (remove! key)))))
|
||||
(void))
|
||||
|
||||
(define/public (prepare fsym stmt close-on-exec?)
|
||||
|
@ -210,8 +209,7 @@
|
|||
|
||||
;; ----
|
||||
|
||||
(define (virtual-connection connector
|
||||
#:timeout [timeout +inf.0])
|
||||
(define (virtual-connection connector)
|
||||
(let ([connector
|
||||
(cond [(connection-pool? connector)
|
||||
(lambda () (connection-pool-lease connector))]
|
||||
|
@ -219,8 +217,7 @@
|
|||
[get-key (lambda () (thread-dead-evt (current-thread)))])
|
||||
(new virtual-connection%
|
||||
(connector connector)
|
||||
(get-key (lambda () (thread-dead-evt (current-thread))))
|
||||
(timeout (* 1000 timeout)))))
|
||||
(get-key get-key))))
|
||||
|
||||
;; ========================================
|
||||
|
||||
|
@ -233,20 +230,15 @@
|
|||
max-idle-connections)
|
||||
(super-new)
|
||||
|
||||
;; max-connections is either in [1, 10000] or +inf.0,
|
||||
;; if leave-evt is sema, then counter = (max-connections - assigned connections)
|
||||
;; ie, includes idle connections
|
||||
(define lease-evt
|
||||
(if (= max-connections +inf.0)
|
||||
always-evt
|
||||
(make-semaphore max-connections)))
|
||||
|
||||
(define proxy-counter 1) ;; for debugging
|
||||
(define actual-counter 1) ;; for debugging
|
||||
(define actual=>number (make-weak-hasheq))
|
||||
|
||||
;; == methods called in manager thread ==
|
||||
|
||||
;; assigned-connections : nat
|
||||
(define assigned-connections 0)
|
||||
|
||||
;; proxy=>evt : hasheq[proxy-connection => evt]
|
||||
(define proxy=>evt (make-hasheq))
|
||||
|
||||
|
@ -266,7 +258,8 @@
|
|||
proxy-number
|
||||
(if take-idle? "idle" "new")
|
||||
(hash-ref actual=>number raw-c "???"))
|
||||
(hash-set! proxy=>evt c key)
|
||||
(hash-set! proxy=>evt c (wrap-evt key (lambda (_e) c)))
|
||||
(set! assigned-connections (add1 assigned-connections))
|
||||
c))
|
||||
|
||||
(define/private (release* proxy raw-c why)
|
||||
|
@ -279,11 +272,14 @@
|
|||
(hash-remove! proxy=>evt proxy)
|
||||
(when raw-c
|
||||
(with-handlers ([exn:fail? void])
|
||||
(send raw-c end-transaction 'connection-pool 'rollback))
|
||||
(cond [(< (length idle-list) max-idle-connections)
|
||||
;; If in tx, just disconnect (for simplicity; else must loop for nested txs)
|
||||
(when (send raw-c transaction-status 'connection-pool)
|
||||
(send raw-c disconnect)))
|
||||
(cond [(and (< (length idle-list) max-idle-connections)
|
||||
(send raw-c connected?))
|
||||
(set! idle-list (cons raw-c idle-list))]
|
||||
[else (send raw-c disconnect)])
|
||||
(when (semaphore? lease-evt) (semaphore-post lease-evt))))
|
||||
(set! assigned-connections (sub1 assigned-connections))))
|
||||
|
||||
(define/private (new-connection)
|
||||
(let ([c (connector)]
|
||||
|
@ -299,19 +295,18 @@
|
|||
(new manager%
|
||||
(other-evt
|
||||
(lambda ()
|
||||
(let ([evts (hash-map proxy=>evt (lambda (k v) (wrap-evt v (lambda (e) k))))])
|
||||
(let ([evts (hash-values proxy=>evt)])
|
||||
(handle-evt (apply choice-evt evts)
|
||||
(lambda (proxy)
|
||||
(release* proxy
|
||||
(send proxy release-connection)
|
||||
"release-evt"))))))))
|
||||
"release-evt"))))))
|
||||
(alt-enabled? (lambda () (< assigned-connections max-connections)))))
|
||||
|
||||
;; == methods called in client thread ==
|
||||
|
||||
(define/public (lease key)
|
||||
(wrap-evt lease-evt
|
||||
(lambda (_e)
|
||||
(send mgr call (lambda () (lease* key))))))
|
||||
(define/public (lease-evt key)
|
||||
(send mgr alt-call-evt (lambda () (lease* key))))
|
||||
|
||||
(define/public (release proxy)
|
||||
(let ([raw-c (send proxy release-connection)])
|
||||
|
@ -350,8 +345,7 @@
|
|||
(end-transaction fsym mode cwt?)
|
||||
(list-tables fsym schema))
|
||||
|
||||
;; (define-forward define/override (connected?))
|
||||
(define/override (connected?) (and connection #t))
|
||||
(define/override (connected?) (and connection (send connection connected?)))
|
||||
|
||||
(define/public (disconnect)
|
||||
(send pool release this))
|
||||
|
@ -380,7 +374,7 @@
|
|||
(cond [(thread? key) (thread-dead-evt key)]
|
||||
[(custodian? key) (make-custodian-box key #t)]
|
||||
[else key])]
|
||||
[result (sync/timeout 0.1 (send pool lease key))])
|
||||
[result (sync/timeout 0.1 (send pool lease-evt key))])
|
||||
(unless result
|
||||
(uerror 'connection-pool-lease
|
||||
"cannot obtain connection; connection pool limit reached"))
|
||||
|
|
|
@ -146,9 +146,7 @@
|
|||
[result (query/rows c 'query-rows sql #f)]
|
||||
[result
|
||||
(cond [(not (null? group-fields-list))
|
||||
(group-rows-result* 'query-rows result group-fields-list
|
||||
(not (memq 'preserve-null-rows group-mode))
|
||||
(memq 'list group-mode))]
|
||||
(group-rows-result* 'query-rows result group-fields-list group-mode)]
|
||||
[else result])])
|
||||
(rows-result-rows result)))
|
||||
|
||||
|
@ -204,34 +202,38 @@
|
|||
|
||||
;; ========================================
|
||||
|
||||
(define (in-query c stmt #:fetch [fetch-size +inf.0] . args)
|
||||
(apply in-query-helper #f c stmt #:fetch fetch-size args))
|
||||
|
||||
(define-sequence-syntax in-query*
|
||||
(lambda () #'in-query)
|
||||
(lambda (stx)
|
||||
(syntax-case stx ()
|
||||
[[(var ...) (in-query c stmt arg ...)]
|
||||
#'[(var ...)
|
||||
(in-query-helper (length '(var ...)) c stmt arg ...)]]
|
||||
[_ #f])))
|
||||
(define (in-query c stmt
|
||||
#:fetch [fetch-size +inf.0]
|
||||
#:group [grouping-fields null]
|
||||
#:group-mode [group-mode null]
|
||||
. args)
|
||||
(apply in-query-helper #f c stmt
|
||||
#:fetch fetch-size
|
||||
#:group grouping-fields
|
||||
#:group-mode group-mode
|
||||
args))
|
||||
|
||||
(define (in-query-helper vars c stmt
|
||||
#:fetch [fetch-size +inf.0]
|
||||
#:group [grouping-fields null]
|
||||
#:group-mode [group-mode null]
|
||||
. args)
|
||||
;; Not protected by contract
|
||||
(unless (connection? c)
|
||||
(apply raise-type-error 'in-query "connection" 0 c stmt args))
|
||||
(unless (statement? stmt)
|
||||
(apply raise-type-error 'in-query "statement" 1 c stmt args))
|
||||
(unless (or (exact-positive-integer? fetch-size) (eqv? fetch-size +inf.0))
|
||||
(raise-type-error 'in-query "positive integer or +inf.0" fetch-size))
|
||||
(let* ([check (or vars 'rows)]
|
||||
(when (and (not (null? grouping-fields))
|
||||
(< fetch-size +inf.0))
|
||||
(error 'in-query "cannot apply grouping to cursor (finite fetch-size)"))
|
||||
(let* ([check
|
||||
;; If grouping, can't check expected arity.
|
||||
;; FIXME: should check header includes named fields
|
||||
(cond [(null? grouping-fields) (or vars 'rows)]
|
||||
[else 'rows])]
|
||||
[stmt (compose-statement 'in-query c stmt args check)])
|
||||
(cond [(eqv? fetch-size +inf.0)
|
||||
(in-list/vector->values
|
||||
(rows-result-rows
|
||||
(query/rows c 'in-query stmt vars)))]
|
||||
(let ([result (query/rows c 'in-query stmt vars)])
|
||||
(if (null? grouping-fields)
|
||||
result
|
||||
(group-rows-result* 'in-query result grouping-fields group-mode)))))]
|
||||
[else
|
||||
(let ([cursor (query/cursor c 'in-query stmt vars)])
|
||||
(in-list-generator/vector->values
|
||||
|
@ -333,62 +335,38 @@
|
|||
|
||||
;; ========================================
|
||||
|
||||
;; FIXME: add 'assume-sorted optimization option?
|
||||
|
||||
(define (group-rows result
|
||||
#:group key-fields-list
|
||||
#:group-mode [group-mode null])
|
||||
(when (null? key-fields-list)
|
||||
(error 'group-rows "expected at least one grouping field set"))
|
||||
(group-rows-result* 'group-rows
|
||||
result
|
||||
key-fields-list
|
||||
(not (memq 'preserve-null-rows group-mode))
|
||||
(memq 'list group-mode)))
|
||||
(group-rows-result* 'group-rows result key-fields-list group-mode))
|
||||
|
||||
(define (group-rows-result* fsym result key-fields-list invert-outer? as-list?)
|
||||
(let* ([key-fields-list
|
||||
(if (list? key-fields-list) key-fields-list (list key-fields-list))]
|
||||
[total-fields (length (rows-result-headers result))]
|
||||
[name-map
|
||||
(for/hash ([header (in-list (rows-result-headers result))]
|
||||
[i (in-naturals)]
|
||||
#:when (assq 'name header))
|
||||
(values (cdr (assq 'name header)) i))]
|
||||
(define (group-rows-result* fsym result key-fields-list group-mode)
|
||||
(let* ([invert-outer? (not (or (memq 'preserve-null group-mode)
|
||||
;; old flag, deprecated:
|
||||
(memq 'preserve-null-rows group-mode)))]
|
||||
[as-list? (memq 'list group-mode)]
|
||||
[headers (rows-result-headers result)]
|
||||
[total-fields (length headers)]
|
||||
[name-map (headers->name-map headers)]
|
||||
[fields-used (make-vector total-fields #f)]
|
||||
[key-indexes-list
|
||||
(for/list ([key-fields (in-list key-fields-list)])
|
||||
(for/vector ([key-field (in-vector key-fields)])
|
||||
(let ([key-index
|
||||
(cond [(string? key-field)
|
||||
(hash-ref name-map key-field #f)]
|
||||
[else key-field])])
|
||||
(when (string? key-field)
|
||||
(unless key-index
|
||||
(error fsym "grouping field ~s not found" key-field)))
|
||||
(when (exact-integer? key-field)
|
||||
(unless (< key-index total-fields)
|
||||
(error fsym "grouping index ~s out of range [0, ~a]"
|
||||
key-index (sub1 total-fields))))
|
||||
(when (vector-ref fields-used key-index)
|
||||
(error fsym "grouping field ~s~a used multiple times"
|
||||
key-field
|
||||
(if (string? key-field)
|
||||
(format " (index ~a)" key-index)
|
||||
"")))
|
||||
(vector-set! fields-used key-index #t)
|
||||
key-index)))]
|
||||
(group-list->indexes fsym name-map total-fields fields-used key-fields-list)]
|
||||
[residual-length
|
||||
(for/sum ([x (in-vector fields-used)])
|
||||
(if x 0 1))])
|
||||
(for/sum ([x (in-vector fields-used)]) (if x 0 1))])
|
||||
(when (= residual-length 0)
|
||||
(error fsym "cannot group by all fields"))
|
||||
(when (and (> residual-length 1) as-list?)
|
||||
(error fsym
|
||||
"exactly one residual field expected for #:group-mode 'list, got ~a"
|
||||
"expected exactly one residual field for #:group-mode 'list, got ~a"
|
||||
residual-length))
|
||||
(let* ([initial-projection
|
||||
(for/vector #:length total-fields ([i (in-range total-fields)]) i)]
|
||||
[headers
|
||||
(group-headers (list->vector (rows-result-headers result))
|
||||
(group-headers (list->vector headers)
|
||||
initial-projection
|
||||
key-indexes-list)]
|
||||
[rows
|
||||
|
@ -400,6 +378,46 @@
|
|||
as-list?)])
|
||||
(rows-result headers rows))))
|
||||
|
||||
(define (headers->name-map headers)
|
||||
(for/hash ([header (in-list headers)]
|
||||
[i (in-naturals)]
|
||||
#:when (assq 'name header))
|
||||
(values (cdr (assq 'name header)) i)))
|
||||
|
||||
(define (group-list->indexes fsym name-map total-fields fields-used key-fields-list)
|
||||
(let ([key-fields-list (if (list? key-fields-list) key-fields-list (list key-fields-list))])
|
||||
(for/list ([key-fields (in-list key-fields-list)])
|
||||
(group->indexes fsym name-map total-fields fields-used key-fields))))
|
||||
|
||||
(define (group->indexes fsym name-map total-fields fields-used key-fields)
|
||||
(let ([key-fields (if (vector? key-fields) key-fields (vector key-fields))])
|
||||
(for/vector ([key-field (in-vector key-fields)])
|
||||
(grouping-field->index fsym name-map total-fields fields-used key-field))))
|
||||
|
||||
(define (grouping-field->index fsym name-map total-fields fields-used key-field)
|
||||
(let ([key-index
|
||||
(cond [(string? key-field)
|
||||
(hash-ref name-map key-field #f)]
|
||||
[else key-field])])
|
||||
(when (string? key-field)
|
||||
(unless key-index
|
||||
(error fsym "expected grouping field in ~s, got: ~e"
|
||||
(sort (hash-keys name-map) string<?)
|
||||
key-field)))
|
||||
(when (exact-integer? key-field)
|
||||
(unless (< key-index total-fields)
|
||||
(error fsym "grouping index ~s out of range [0, ~a]"
|
||||
key-index (sub1 total-fields))))
|
||||
(when fields-used
|
||||
(when (vector-ref fields-used key-index)
|
||||
(error fsym "grouping field ~s~a used multiple times"
|
||||
key-field
|
||||
(if (string? key-field)
|
||||
(format " (index ~a)" key-index)
|
||||
"")))
|
||||
(vector-set! fields-used key-index #t))
|
||||
key-index))
|
||||
|
||||
(define (group-headers headers projection key-indexes-list)
|
||||
(define (get-headers vec)
|
||||
(for/list ([index (in-vector vec)])
|
||||
|
@ -414,7 +432,7 @@
|
|||
[residual-headers
|
||||
(group-headers headers residual-projection (cdr key-indexes-list))])
|
||||
(append (get-headers key-indexes)
|
||||
(list `((grouped . ,residual-headers)))))]))
|
||||
(list `((name . "grouped") (grouped . ,residual-headers)))))]))
|
||||
|
||||
(define (group-rows* fsym rows projection key-indexes-list invert-outer? as-list?)
|
||||
;; projection is vector of indexes (actually projection and permutation)
|
||||
|
@ -443,17 +461,14 @@
|
|||
(define residual-projection
|
||||
(vector-filter-not (lambda (index) (vector-member index key-indexes))
|
||||
projection))
|
||||
|
||||
(define key-row-length (vector-length key-indexes))
|
||||
(define (row->key-row row)
|
||||
(for/vector #:length key-row-length
|
||||
([i (in-vector key-indexes)])
|
||||
(vector-ref row i)))
|
||||
|
||||
(define (residual-all-null? row)
|
||||
(for/and ([i (in-vector residual-projection)])
|
||||
(sql-null? (vector-ref row i))))
|
||||
|
||||
(let* ([key-table (make-hash)]
|
||||
[r-keys
|
||||
(for/fold ([r-keys null])
|
||||
|
@ -476,3 +491,55 @@
|
|||
invert-outer?
|
||||
as-list?)])
|
||||
(vector-append key (vector residuals))))))]))
|
||||
|
||||
;; ========================================
|
||||
|
||||
(define not-given (gensym 'not-given))
|
||||
|
||||
(define (rows->dict result
|
||||
#:key key-field/s
|
||||
#:value value-field/s
|
||||
#:value-mode [value-mode null])
|
||||
(let* ([who 'rows->dict]
|
||||
[headers (rows-result-headers result)]
|
||||
[total-fields (length headers)]
|
||||
[name-map (headers->name-map headers)]
|
||||
[preserve-null? (memq 'preserve-null value-mode)]
|
||||
[value-list? (memq 'list value-mode)])
|
||||
(define (make-project field/s)
|
||||
(if (vector? field/s)
|
||||
(let* ([indexes (group->indexes who name-map total-fields #f field/s)]
|
||||
[indexes-length (vector-length indexes)])
|
||||
(lambda (v)
|
||||
(for/vector #:length indexes-length ([i (in-vector indexes)])
|
||||
(vector-ref v i))))
|
||||
(let ([index (grouping-field->index who name-map total-fields #f field/s)])
|
||||
(lambda (v) (vector-ref v index)))))
|
||||
(define get-key (make-project key-field/s))
|
||||
(define get-value (make-project value-field/s))
|
||||
(define ok-value?
|
||||
(cond [preserve-null? (lambda (v) #t)]
|
||||
[(vector? value-field/s)
|
||||
(lambda (v) (not (for/or ([e (in-vector v)]) (sql-null? e))))]
|
||||
[else (lambda (v) (not (sql-null? v)))]))
|
||||
(for/fold ([table '#hash()]) ([row (in-list (if value-list?
|
||||
(reverse (rows-result-rows result))
|
||||
(rows-result-rows result)))])
|
||||
(let* ([key (get-key row)]
|
||||
[value (get-value row)]
|
||||
[old-value (hash-ref table key (if value-list? '() not-given))])
|
||||
(unless (or value-list?
|
||||
(eq? (hash-ref table key not-given) not-given)
|
||||
;; FIXME: okay to coalesce values if equal?
|
||||
(equal? value old-value))
|
||||
(error who "duplicate value for key: ~e; values are ~e and ~e"
|
||||
key old-value value))
|
||||
(if value-list?
|
||||
(hash-set table key
|
||||
(if (ok-value? value)
|
||||
(cons value old-value)
|
||||
;; If all-NULL value, still enter key => '() into dict
|
||||
old-value))
|
||||
(if (ok-value? value)
|
||||
(hash-set table key value)
|
||||
table))))))
|
||||
|
|
|
@ -29,7 +29,9 @@
|
|||
;; connection<%>
|
||||
(define connection<%>
|
||||
(interface ()
|
||||
;; connected? method must return promptly (eg, without acquiring lock)
|
||||
connected? ;; -> boolean
|
||||
|
||||
disconnect ;; -> void
|
||||
get-dbsystem ;; -> dbsystem<%>
|
||||
query ;; symbol statement -> QueryResult
|
||||
|
|
|
@ -49,18 +49,20 @@
|
|||
(define/private (call* method-name args need-connected?)
|
||||
(cond [channel
|
||||
(pchan-put channel (cons method-name args))
|
||||
(match (pchan-get channel)
|
||||
(let* ([response (pchan-get channel)]
|
||||
[still-connected? (car response)])
|
||||
(when (not still-connected?) (set! channel #f))
|
||||
(match (cdr response)
|
||||
[(cons 'values vals)
|
||||
(apply values (for/list ([val (in-list vals)]) (sexpr->result val)))]
|
||||
[(list 'error message)
|
||||
(raise (make-exn:fail message (current-continuation-marks)))])]
|
||||
(raise (make-exn:fail message (current-continuation-marks)))]))]
|
||||
[need-connected?
|
||||
(unless channel
|
||||
(error/not-connected method-name))]
|
||||
[else (void)]))
|
||||
|
||||
(define/override (connected?)
|
||||
;; FIXME: can underlying connection disconnect w/o us knowing?
|
||||
(and channel #t))
|
||||
|
||||
(define/public (disconnect)
|
||||
|
|
|
@ -68,8 +68,8 @@ where <connect-spec> ::= (list 'sqlite3 path/sym mode-sym delay-num limit-num)
|
|||
Connection methods protocol
|
||||
|
||||
client -> server: (list '<method-name> arg ...)
|
||||
server -> client: (or (list 'values result ...)
|
||||
(list 'error string))
|
||||
server -> client: (or (list boolean 'values result ...)
|
||||
(list boolean 'error string))
|
||||
|#
|
||||
|
||||
(define proxy-server%
|
||||
|
@ -86,10 +86,12 @@ server -> client: (or (list 'values result ...)
|
|||
(serve1)
|
||||
(when connection (serve)))
|
||||
|
||||
(define/private (still-connected?) (and connection (send connection connected?)))
|
||||
|
||||
(define/private (serve1)
|
||||
(with-handlers ([exn?
|
||||
(lambda (e)
|
||||
(pchan-put channel (list 'error (exn-message e))))])
|
||||
(pchan-put channel (list (still-connected?) 'error (exn-message e))))])
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(match (pchan-get channel)
|
||||
|
@ -117,7 +119,7 @@ server -> client: (or (list 'values result ...)
|
|||
(transaction-status w))]))
|
||||
(lambda results
|
||||
(let ([results (for/list ([result (in-list results)]) (result->sexpr result))])
|
||||
(pchan-put channel (cons 'values results)))))))
|
||||
(pchan-put channel (cons (still-connected?) (cons 'values results))))))))
|
||||
|
||||
(define/private (sexpr->statement x)
|
||||
(match x
|
||||
|
|
|
@ -10,6 +10,12 @@
|
|||
(define (tech/reference . pre-flows)
|
||||
(apply tech #:doc '(lib "scribblings/reference/reference.scrbl") pre-flows))
|
||||
|
||||
(define (parheading . pre-flows)
|
||||
(elem (apply bold pre-flows) (hspace 1)))
|
||||
|
||||
(define (wplink path . pre-flows)
|
||||
(apply hyperlink (string-append "http://en.wikipedia.org/wiki/" path) pre-flows))
|
||||
|
||||
;; ----
|
||||
|
||||
(define the-eval (make-base-eval))
|
||||
|
|
|
@ -19,18 +19,8 @@ administrative functions for managing connections.
|
|||
There are four kinds of base connection, and they are divided into two
|
||||
groups: @deftech{wire-based connections} and @deftech{FFI-based
|
||||
connections}. PostgreSQL and MySQL connections are wire-based, and
|
||||
SQLite and ODBC connections are FFI-based.
|
||||
|
||||
Wire-based connections communicate using @tech/reference{ports}, which
|
||||
do not cause other Racket threads to block. In contrast, an FFI call
|
||||
causes all Racket threads to block until it completes, so FFI-based
|
||||
connections can degrade the interactivity of a Racket program,
|
||||
particularly if long-running queries are performed using the
|
||||
connection. This problem can be avoided by creating the FFI-based
|
||||
connection in a separate @tech/reference{place} using the
|
||||
@racket[#:use-place] keyword argument. Such a connection will not
|
||||
block all Racket threads during queries; the disadvantage is the cost
|
||||
of creating and communicating with a separate @tech/reference{place}.
|
||||
SQLite and ODBC connections are FFI-based. See also
|
||||
@secref["ffi-concurrency"].
|
||||
|
||||
Base connections are made using the following functions.
|
||||
|
||||
|
@ -240,7 +230,7 @@ Base connections are made using the following functions.
|
|||
|
||||
If @racket[use-place] is true, the actual connection is created in
|
||||
a distinct @tech/reference{place} for database connections and a
|
||||
proxy is returned.
|
||||
proxy is returned; see @secref["ffi-concurrency"].
|
||||
|
||||
If the connection cannot be made, an exception is raised.
|
||||
|
||||
|
@ -289,7 +279,7 @@ Base connections are made using the following functions.
|
|||
|
||||
If @racket[use-place] is true, the actual connection is created in
|
||||
a distinct @tech/reference{place} for database connections and a
|
||||
proxy is returned.
|
||||
proxy is returned; see @secref["ffi-concurrency"].
|
||||
|
||||
If the connection cannot be made, an exception is raised.
|
||||
}
|
||||
|
@ -436,14 +426,13 @@ connection associated with the current thread, one is obtained by
|
|||
calling @racket[connect]. An actual connection is disconnected when
|
||||
its associated thread dies.
|
||||
|
||||
@;{or if @racket[timeout] seconds elapse since the actual connection was last used.}
|
||||
|
||||
Virtual connections are especially useful in contexts such as web
|
||||
servlets, where each request is handled in a fresh thread. A single
|
||||
global virtual connection can be defined, freeing each servlet request
|
||||
from explicitly opening and closing its own connections. In
|
||||
particular, a @tech{virtual connection} backed by a @tech{connection
|
||||
pool} combines convenience with efficiency:
|
||||
servlets (see @secref["intro-servlets"]), where each request is
|
||||
handled in a fresh thread. A single global virtual connection can be
|
||||
defined, freeing each servlet request from explicitly opening and
|
||||
closing its own connections. In particular, a @tech{virtual
|
||||
connection} backed by a @tech{connection pool} combines convenience
|
||||
with efficiency:
|
||||
|
||||
@examples/results[
|
||||
[(define the-connection
|
||||
|
|
|
@ -50,7 +50,7 @@ and Doug Orleans for contributions to @tt{spgsql}, the PostgreSQL-only
|
|||
predecessor of this library. The SQLite support is based in part on
|
||||
code from Jay McCarthy's @tt{sqlite} package.
|
||||
|
||||
@include-section["introduction.scrbl"]
|
||||
@include-section["using-db.scrbl"]
|
||||
@include-section["connect.scrbl"]
|
||||
@include-section["query.scrbl"]
|
||||
@include-section["sql-types.scrbl"]
|
||||
|
|
|
@ -1,290 +0,0 @@
|
|||
#lang scribble/doc
|
||||
@(require scribble/manual
|
||||
scribble/eval
|
||||
scribble/struct
|
||||
racket/sandbox
|
||||
"config.rkt"
|
||||
(for-label db
|
||||
web-server/lang/web))
|
||||
|
||||
@title[#:tag "introduction"]{Introduction}
|
||||
|
||||
This section introduces this library's basic features and discusses
|
||||
how to build a database-backed web servlet.
|
||||
|
||||
@section[#:tag "intro-basic"]{Basic Features}
|
||||
|
||||
The following annotated program demonstrates how to connect to a
|
||||
database and perform simple queries. Some of the SQL syntax used below
|
||||
is PostgreSQL-specific, such as the syntax of query parameters
|
||||
(@litchar{$1} rather than @litchar{?}).
|
||||
|
||||
@my-interaction[
|
||||
[(require db)
|
||||
(void)]
|
||||
]
|
||||
|
||||
First we create a connection. Replace @racket[_user], @racket[_db],
|
||||
and @racket[_password] below with the appropriate values for your
|
||||
configuration (see @secref{creating-connections} for other connection examples):
|
||||
|
||||
@my-interaction[
|
||||
[(define pgc
|
||||
(postgresql-connect #:user _user
|
||||
#:database _db
|
||||
#:password _password))
|
||||
(void)]
|
||||
[pgc
|
||||
(new connection%)]
|
||||
]
|
||||
|
||||
Use @racket[query-exec] method to execute a SQL statement for effect.
|
||||
|
||||
@my-interaction[
|
||||
[(query-exec pgc
|
||||
"create temporary table the_numbers (n integer, d varchar(20))")
|
||||
(void)]
|
||||
[(query-exec pgc
|
||||
"insert into the_numbers values (0, 'nothing')")
|
||||
(void)]
|
||||
[(query-exec pgc
|
||||
"insert into the_numbers values (1, 'the loneliest number')")
|
||||
(void)]
|
||||
[(query-exec pgc
|
||||
"insert into the_numbers values (2, 'company')")
|
||||
(void)]
|
||||
]
|
||||
|
||||
The @racket[query] function is a more general way to execute a
|
||||
statement. It returns a structure encapsulating information about the
|
||||
statement's execution. (But some of that information varies from
|
||||
system to system and is subject to change.)
|
||||
|
||||
@my-interaction[
|
||||
[(query pgc "insert into the_numbers values (3, 'a crowd')")
|
||||
(simple-result '((command insert 0 1)))]
|
||||
[(query pgc "select n, d from the_numbers where n % 2 = 0")
|
||||
(rows-result
|
||||
(list
|
||||
'((name . "n") (typeid . 23))
|
||||
'((name . "d") (typeid . 1043)))
|
||||
'(#(0 "nothing") #(2 "company")))]
|
||||
]
|
||||
|
||||
When the query is known to return rows and when the field
|
||||
descriptions are not needed, it is more convenient to use the
|
||||
@racket[query-rows] function.
|
||||
|
||||
@my-interaction[
|
||||
[(query-rows pgc "select n, d from the_numbers where n % 2 = 0")
|
||||
'(#(0 "nothing") #(2 "company"))]
|
||||
]
|
||||
|
||||
Use @racket[query-row] for queries that are known to return exactly
|
||||
one row.
|
||||
|
||||
@my-interaction[
|
||||
[(query-row pgc "select * from the_numbers where n = 0")
|
||||
(vector 0 "nothing")]
|
||||
]
|
||||
|
||||
Similarly, use @racket[query-list] for queries that produce rows of
|
||||
exactly one column.
|
||||
|
||||
@my-interaction[
|
||||
[(query-list pgc "select d from the_numbers order by n")
|
||||
(list "nothing" "the loneliest number" "company" "a crowd")]
|
||||
]
|
||||
|
||||
When a query is known to return a single value (one row and one
|
||||
column), use @racket[query-value].
|
||||
|
||||
@my-interaction[
|
||||
[(query-value pgc "select count(*) from the_numbers")
|
||||
4]
|
||||
[(query-value pgc "select d from the_numbers where n = 5")
|
||||
(error 'query-value
|
||||
"query returned zero rows: ~s"
|
||||
"select d from the_numbers where n = 5")]
|
||||
]
|
||||
|
||||
When a query may return zero or one rows, as the last example, use
|
||||
@racket[query-maybe-row] or @racket[query-maybe-value] instead.
|
||||
|
||||
@my-interaction[
|
||||
[(query-maybe-value pgc "select d from the_numbers where n = 5")
|
||||
(values #f)]
|
||||
]
|
||||
|
||||
The @racket[in-query] function produces a sequence that can be used
|
||||
with Racket's iteration forms:
|
||||
|
||||
@my-interaction[
|
||||
[(for ([(n d) (in-query pgc "select * from the_numbers where n < 4")])
|
||||
(printf "~a is ~a\n" n d))
|
||||
(for-each (lambda (n d) (printf "~a: ~a\n" n d))
|
||||
'(0 1 2 3)
|
||||
'("nothing" "the loneliest number" "company" "a crowd"))]
|
||||
[(for/fold ([sum 0]) ([n (in-query pgc "select n from the_numbers")])
|
||||
(+ sum n))
|
||||
(for/fold ([sum 0]) ([n (in-list '(0 1 2 3))])
|
||||
(+ sum n))]
|
||||
]
|
||||
|
||||
Errors in queries generally do not cause the connection to disconnect.
|
||||
|
||||
@my-interaction[
|
||||
[(begin (with-handlers [(exn:fail?
|
||||
(lambda (e) (printf "~a~n" (exn-message e))))]
|
||||
(query-value pgc "select NoSuchField from NoSuchTable"))
|
||||
(query-value pgc "select 'okay to proceed!'"))
|
||||
(begin (display "query-value: relation \"nosuchtable\" does not exist (SQLSTATE 42P01)")
|
||||
"okay to proceed!")]
|
||||
]
|
||||
|
||||
Queries may contain parameters. The easiest way to execute a
|
||||
parameterized query is to provide the parameters ``inline'' after the
|
||||
SQL statement in the query function call.
|
||||
|
||||
@my-interaction[
|
||||
[(query-value pgc
|
||||
"select d from the_numbers where n = $1" 2)
|
||||
"company"]
|
||||
[(query-list pgc
|
||||
"select n from the_numbers where n > $1 and n < $2" 0 3)
|
||||
(list 1 2)]
|
||||
]
|
||||
|
||||
Alternatively, a parameterized query may be prepared in advance and
|
||||
executed later. @tech{Prepared statements} can be executed multiple
|
||||
times with different parameter values.
|
||||
|
||||
@my-interaction[
|
||||
[(define get-less-than-pst
|
||||
(prepare pgc "select n from the_numbers where n < $1"))
|
||||
(void)]
|
||||
[(query-list pgc get-less-than-pst 1)
|
||||
(list 0)]
|
||||
[(query-list pgc (bind-prepared-statement get-less-than-pst '(2)))
|
||||
(list 0 1)]
|
||||
]
|
||||
|
||||
When a connection's work is done, it should be disconnected.
|
||||
|
||||
@my-interaction[
|
||||
[(disconnect pgc)
|
||||
(void)]
|
||||
]
|
||||
|
||||
|
||||
@;{============================================================}
|
||||
|
||||
@section[#:tag "intro-servlets"]{Databases and Web Servlets}
|
||||
|
||||
Using database connections in a web servlet is more complicated than
|
||||
in a standalone program. A single servlet is potentially used to serve
|
||||
many requests at once, each in a separate request-handling
|
||||
thread. Furthermore, the use of @racket[send/suspend],
|
||||
@racket[send/suspend/dispatch], etc means that there are many places
|
||||
where a servlet may start and stop executing to service a request.
|
||||
|
||||
Why not use a single connection to handle all of a servlet's requests?
|
||||
That is, create the connection with the servlet instance and never
|
||||
disconnect it. Such a servlet would look something like the following:
|
||||
|
||||
@racketmod[
|
||||
#:file "bad-servlet.rkt"
|
||||
web-server
|
||||
(require db)
|
||||
(define db-conn (postgresql-connect ....))
|
||||
(define (serve req)
|
||||
.... db-conn ....)
|
||||
]
|
||||
|
||||
The main problem with using one connection for all requests is that
|
||||
multiple threads accessing the same connection are not properly
|
||||
@hyperlink["http://en.wikipedia.org/wiki/Isolation_%28database_systems%29"]{isolated}. For
|
||||
example, if two threads both attempt to start a new transaction, the
|
||||
second one will fail, because the first thread has already put the
|
||||
connection into an ``in transaction'' state. And if one thread is
|
||||
accessing the connection within a transaction and another thread
|
||||
issues a query, the second thread may see invalid data or even disrupt
|
||||
the work of the first thread.
|
||||
|
||||
A secondary problem is performance. A connection can only perform a
|
||||
single query at a time, whereas most database systems are capable of
|
||||
concurrent query processing.
|
||||
|
||||
The proper way to use database connections in a servlet is to create a
|
||||
connection for each request and disconnect it when the request
|
||||
is handled. But since a request thread may start and stop executing in
|
||||
many places (due to @racket[send/suspend], etc), inserting the code to
|
||||
connect and disconnect at the proper places can be challenging and
|
||||
messy.
|
||||
|
||||
A better solution is to use a @tech{virtual connection}, which creates
|
||||
a request-specific (that is, thread-specific) ``actual connection'' by
|
||||
need and disconnects it when the request is handled (that is, when the
|
||||
thread terminates). Different request-handling threads using the same
|
||||
virtual connection are assigned different actual connection, so the
|
||||
threads are properly isolated.
|
||||
|
||||
@racketmod[
|
||||
#:file "better-servlet.rkt"
|
||||
web-server
|
||||
(require db)
|
||||
(define db-conn
|
||||
(virtual-connection
|
||||
(lambda () (postgresql-connect ....))))
|
||||
(define (serve req)
|
||||
.... db-conn ....)
|
||||
]
|
||||
|
||||
This solution preserves the simplicity of the naive solution and fixes
|
||||
the isolation problem but at the cost of creating many short-lived
|
||||
database connections. That cost can be eliminated by using a
|
||||
@tech{connection pool}:
|
||||
|
||||
@racketmod[
|
||||
#:file "best-servlet.rkt"
|
||||
web-server
|
||||
(require db)
|
||||
(define db-conn
|
||||
(virtual-connection
|
||||
(connection-pool
|
||||
(lambda () (postgresql-connect ....)))))
|
||||
(define (serve req)
|
||||
.... db-conn ....)
|
||||
]
|
||||
|
||||
By using a virtual connection backed by a connection pool, a servlet
|
||||
can achieve simplicity, isolation, and performance.
|
||||
|
||||
@;{
|
||||
|
||||
TODO:
|
||||
- talk about virtual statements, too
|
||||
- show actual working servlet code
|
||||
|
||||
--
|
||||
|
||||
A prepared statement is tied to the connection used to create it;
|
||||
attempting to use it with another connection results in an
|
||||
error. Unfortunately, in some scenarios such as web servlets, the
|
||||
lifetimes of connections are short or difficult to track, making
|
||||
prepared statements inconvenient. In such cases, a better tool is the
|
||||
@tech{virtual statement}, which prepares statements on demand and
|
||||
caches them for future use with the same connection.
|
||||
|
||||
@my-interaction[
|
||||
[(define get-less-than-pst
|
||||
(virtual-statement "select n from the_numbers where n < $1"))
|
||||
(void)]
|
||||
[(code:line (query-list pgc1 get-less-than-pst 1) (code:comment "prepares statement for pgc1"))
|
||||
(list 0)]
|
||||
[(code:line (query-list pgc2 get-less-than-pst 2) (code:comment "prepares statement for pgc2"))
|
||||
(list 0 1)]
|
||||
[(code:line (query-list pgc1 get-less-than-pst 3) (code:comment "uses existing prep. stmt."))
|
||||
(list 0 1 2)]
|
||||
]
|
||||
}
|
|
@ -96,6 +96,21 @@ package in Red Hat.}
|
|||
]
|
||||
|
||||
|
||||
@section[#:tag "ffi-concurrency"]{FFI-Based Connections and Concurrency}
|
||||
|
||||
@tech{Wire-based connections} communicate using
|
||||
@tech/reference{ports}, which do not cause other Racket threads to
|
||||
block. In contrast, an FFI call causes all Racket threads to block
|
||||
until it completes, so @tech{FFI-based connections} can degrade the
|
||||
interactivity of a Racket program, particularly if long-running
|
||||
queries are performed using the connection. This problem can be
|
||||
avoided by creating the FFI-based connection in a separate
|
||||
@tech/reference{place} using the @racket[#:use-place] keyword
|
||||
argument. Such a connection will not block all Racket threads during
|
||||
queries; the disadvantage is the cost of creating and communicating
|
||||
with a separate @tech/reference{place}.
|
||||
|
||||
|
||||
@section[#:tag "odbc-requirements"]{ODBC Requirements}
|
||||
|
||||
ODBC requires the appropriate driver manager native library as well as
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
racket/sandbox
|
||||
"config.rkt"
|
||||
"tabbing.rkt"
|
||||
(for-label db db/util/geometry db/util/postgresql))
|
||||
(for-label db db/util/geometry db/util/postgresql racket/dict))
|
||||
|
||||
@title[#:tag "query-api"]{Queries}
|
||||
|
||||
|
@ -19,7 +19,7 @@ raises an exception. Different query functions impose different
|
|||
constraints on the query results and offer different mechanisms for
|
||||
processing the results.
|
||||
|
||||
@bold{Errors} In most cases, a query error does not cause the
|
||||
@parheading{Errors} In most cases, a query error does not cause the
|
||||
connection to be disconnected. Specifically, the following kinds of
|
||||
errors should never cause a connection to be disconnected:
|
||||
@itemize[
|
||||
|
@ -43,7 +43,7 @@ disconnected:
|
|||
See @secref["transactions"] for information on how errors can affect
|
||||
the transaction status.
|
||||
|
||||
@bold{Character encoding} This library is designed to interact with
|
||||
@parheading{Character encoding} This library is designed to interact with
|
||||
database systems using the UTF-8 character encoding. The connection
|
||||
functions attempt to negotiate UTF-8 communication at the beginning of
|
||||
every connection, but some systems also allow the character encoding
|
||||
|
@ -53,13 +53,12 @@ and data might get corrupted in transmission. Avoid changing a
|
|||
connection's character encoding. When possible, the connection will
|
||||
observe the change and automatically disconnect with an error.
|
||||
|
||||
@bold{Synchronization} Connections are internally synchronized: it is
|
||||
safe to perform concurrent queries on the same connection object from
|
||||
different threads. Connections are not kill-safe: killing a thread
|
||||
that is using a connection---or shutting down the connection's
|
||||
managing custodian---may leave the connection locked, causing future
|
||||
operations to block indefinitely. See @secref["kill-safe"] for a
|
||||
way to make kill-safe connections.
|
||||
@parheading{Synchronization} Connections are internally synchronized:
|
||||
it is safe to use a connection from different threads
|
||||
concurrently. Most connections are not kill-safe: killing a thread
|
||||
that is using a connection may leave the connection locked, causing
|
||||
future operations to block indefinitely. See also
|
||||
@secref["kill-safe"].
|
||||
|
||||
|
||||
@section{Statements}
|
||||
|
@ -77,16 +76,14 @@ All query functions require both a connection and a
|
|||
]
|
||||
|
||||
A SQL statement may contain parameter placeholders that stand for SQL
|
||||
scalar values. The parameter values must be supplied when the
|
||||
statement is executed; the parameterized statement and parameter
|
||||
values are sent to the database back end, which combines them
|
||||
correctly and safely.
|
||||
scalar values; such statements are called @deftech{parameterized
|
||||
queries}. The parameter values must be supplied when the statement is
|
||||
executed; the parameterized statement and parameter values are sent to
|
||||
the database back end, which combines them correctly and safely.
|
||||
|
||||
Use parameters instead of Racket string interpolation (eg,
|
||||
@racket[format] or @racket[string-append]) to avoid
|
||||
@hyperlink["http://xkcd.com/327/"]{SQL injection}, where a string
|
||||
intended to represent a SQL scalar value is interpreted as---possibly
|
||||
malicious---SQL code instead.
|
||||
@secref["dbsec-sql-injection"].
|
||||
|
||||
The syntax of placeholders varies depending on the database
|
||||
system. For example:
|
||||
|
@ -140,11 +137,13 @@ The types of parameters and returned fields are described in
|
|||
@defproc[(query-rows [connection connection?]
|
||||
[stmt statement?]
|
||||
[arg any/c] ...
|
||||
[#:group grouping-fields
|
||||
(or/c (vectorof string?) (listof (vectorof string?)))
|
||||
[#:group groupings
|
||||
(let* ([field/c (or/c string? exact-nonnegative-integer?)]
|
||||
[grouping/c (or/c field/c (vectorof field/c))])
|
||||
(or/c grouping/c (listof grouping/c)))
|
||||
null]
|
||||
[#:group-mode group-mode
|
||||
(listof (or/c 'preserve-null-rows 'list))
|
||||
(listof (or/c 'preserve-null 'list))
|
||||
null])
|
||||
(listof vector?)]{
|
||||
|
||||
|
@ -158,7 +157,7 @@ The types of parameters and returned fields are described in
|
|||
(list (vector 17))]
|
||||
]
|
||||
|
||||
If @racket[grouping-fields] is not empty, the result is the same as if
|
||||
If @racket[groupings] is not empty, the result is the same as if
|
||||
@racket[group-rows] had been called on the result rows.
|
||||
}
|
||||
|
||||
|
@ -245,7 +244,15 @@ The types of parameters and returned fields are described in
|
|||
@defproc[(in-query [connection connection?]
|
||||
[stmt statement?]
|
||||
[arg any/c] ...
|
||||
[#:fetch fetch-size (or/c exact-positive-integer? +inf.0) +inf.0])
|
||||
[#:fetch fetch-size (or/c exact-positive-integer? +inf.0) +inf.0]
|
||||
[#:group groupings
|
||||
(let* ([field/c (or/c string? exact-nonnegative-integer?)]
|
||||
[grouping/c (or/c field/c (vectorof field/c))])
|
||||
(or/c grouping/c (listof grouping/c)))
|
||||
null]
|
||||
[#:group-mode group-mode
|
||||
(listof (or/c 'preserve-null 'list))
|
||||
null])
|
||||
sequence?]{
|
||||
|
||||
Executes a SQL query, which must produce rows, and returns a
|
||||
|
@ -260,6 +267,11 @@ The types of parameters and returned fields are described in
|
|||
open cursors; attempting to fetch more rows may fail. On PostgreSQL,
|
||||
a cursor can be opened only within a transaction.
|
||||
|
||||
If @racket[groupings] is not empty, the result is the same as
|
||||
if @racket[group-rows] had been called on the result rows. If
|
||||
@racket[groupings] is not empty, then @racket[fetch-size] must
|
||||
be @racket[+inf.0]; otherwise, an exception is raised.
|
||||
|
||||
@examples/results[
|
||||
[(for/list ([n (in-query pgc "select n from the_numbers where n < 2")])
|
||||
n)
|
||||
|
@ -329,25 +341,29 @@ future version of this library (even new minor versions).
|
|||
}
|
||||
|
||||
@defproc[(group-rows [result rows-result?]
|
||||
[#:group grouping-fields
|
||||
(or/c (vectorof string?) (listof (vectorof string?)))]
|
||||
[#:group groupings
|
||||
(let* ([field/c (or/c string? exact-nonnegative-integer?)]
|
||||
[grouping/c (or/c field/c (vectorof field/c))])
|
||||
(or/c grouping/c (listof grouping/c)))]
|
||||
[#:group-mode group-mode
|
||||
(listof (or/c 'preserve-null-rows 'list))
|
||||
(listof (or/c 'preserve-null 'list))
|
||||
null])
|
||||
rows-result?]{
|
||||
|
||||
If @racket[grouping-fields] is a vector, the elements must be names of
|
||||
If @racket[groupings] is a vector, the elements must be names of
|
||||
fields in @racket[result], and @racket[result]'s rows are regrouped
|
||||
using the given fields. Each grouped row contains N+1 fields; the
|
||||
first N fields are the @racket[grouping-fields], and the final field
|
||||
first N fields are the @racket[groupings], and the final field
|
||||
is a list of ``residual rows'' over the rest of the fields. A residual
|
||||
row of all NULLs is dropped (for convenient processing of @tt{OUTER
|
||||
JOIN} results) unless @racket[group-mode] includes
|
||||
@racket['preserve-null-rows]. If @racket[group-mode] contains
|
||||
@racket['preserve-null]. If @racket[group-mode] contains
|
||||
@racket['list], there must be exactly one residual field, and its
|
||||
values are included without a vector wrapper (similar to
|
||||
@racket[query-list]).
|
||||
|
||||
See also @secref["dbperf-n+1"].
|
||||
|
||||
@examples[#:eval the-eval
|
||||
(define vehicles-result
|
||||
(rows-result
|
||||
|
@ -361,7 +377,9 @@ values are included without a vector wrapper (similar to
|
|||
#:group '(#("type")))
|
||||
]
|
||||
|
||||
The @racket[grouping-fields] argument may also be a list of vectors;
|
||||
The grouped final column is given the name @racket["grouped"].
|
||||
|
||||
The @racket[groupings] argument may also be a list of vectors;
|
||||
in that case, the grouping process is repeated for each set of
|
||||
grouping fields. The grouping fields must be distinct.
|
||||
|
||||
|
@ -372,6 +390,38 @@ grouping fields. The grouping fields must be distinct.
|
|||
]
|
||||
}
|
||||
|
||||
@defproc[(rows->dict [result rows-result?]
|
||||
[#:key key-field/s
|
||||
(let ([field/c (or/c string? exact-nonnegative-integer?)])
|
||||
(or/c field/c (vectorof field/c)))]
|
||||
[#:value value-field/s
|
||||
(let ([field/c (or/c string? exact-nonnegative-integer?)])
|
||||
(or/c field/c (vectorof field/c)))]
|
||||
[#:value-mode value-mode
|
||||
(listof (or/c 'list 'preserve-null))
|
||||
null])
|
||||
dict?]{
|
||||
|
||||
Creates a dictionary mapping @racket[key-field/s] to
|
||||
@racket[value-field/s]. If @racket[key-field/s] is a single field name
|
||||
or index, the keys are the field values; if @racket[key-field/s] is a
|
||||
vector, the keys are vectors of the field values. Likewise for
|
||||
@racket[value-field/s].
|
||||
|
||||
If @racket[value-mode] contains @racket['list], a list of values is
|
||||
accumulated for each key; otherwise, there must be at most one value
|
||||
for each key. Values consisting of all @racket[sql-null?] values are
|
||||
dropped unless @racket[value-mode] contains
|
||||
@racket['preserve-null].
|
||||
|
||||
@examples[#:eval the-eval
|
||||
(rows->dict vehicles-result
|
||||
#:key "model" #:value '#("type" "maker"))
|
||||
(rows->dict vehicles-result
|
||||
#:key "maker" #:value "model" #:value-mode '(list))
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@section{Prepared Statements}
|
||||
|
||||
|
@ -533,7 +583,7 @@ statement implicitly commits the current transaction. These statements
|
|||
also must not be used within @tech{managed transactions}. (In
|
||||
contrast, PostgreSQL and SQLite both support transactional DDL.)
|
||||
|
||||
@bold{Errors} Query errors may affect an open transaction in one of
|
||||
@parheading{Errors} Query errors may affect an open transaction in one of
|
||||
three ways:
|
||||
@itemlist[#:style 'ordered
|
||||
@item{the transaction remains open and unchanged}
|
||||
|
|
511
collects/db/scribblings/using-db.scrbl
Normal file
511
collects/db/scribblings/using-db.scrbl
Normal file
|
@ -0,0 +1,511 @@
|
|||
#lang scribble/doc
|
||||
@(require scribble/manual
|
||||
scribble/eval
|
||||
scribble/struct
|
||||
racket/sandbox
|
||||
"config.rkt"
|
||||
(for-label db db/util/testing racket/dict web-server/lang/web))
|
||||
|
||||
@title[#:tag "using-db"]{Using Database Connections}
|
||||
|
||||
This section introduces this library's basic features and covers some
|
||||
practical issues with database programming in general and with this
|
||||
library in particular.
|
||||
|
||||
|
||||
@section[#:tag "intro-basic"]{Introduction to Using Database Connections}
|
||||
|
||||
The following annotated program demonstrates how to connect to a
|
||||
database and perform simple queries. Some of the SQL syntax used below
|
||||
is PostgreSQL-specific, such as the syntax of query parameters
|
||||
(@litchar{$1} rather than @litchar{?}).
|
||||
|
||||
@my-interaction[
|
||||
[(require db)
|
||||
(void)]
|
||||
]
|
||||
|
||||
First we create a connection. Replace @racket[_user], @racket[_db],
|
||||
and @racket[_password] below with the appropriate values for your
|
||||
configuration (see @secref{creating-connections} for other connection examples):
|
||||
|
||||
@my-interaction[
|
||||
[(define pgc
|
||||
(postgresql-connect #:user _user
|
||||
#:database _db
|
||||
#:password _password))
|
||||
(void)]
|
||||
[pgc
|
||||
(new connection%)]
|
||||
]
|
||||
|
||||
Use @racket[query-exec] method to execute a SQL statement for effect.
|
||||
|
||||
@my-interaction[
|
||||
[(query-exec pgc
|
||||
"create temporary table the_numbers (n integer, d varchar(20))")
|
||||
(void)]
|
||||
[(query-exec pgc
|
||||
"insert into the_numbers values (0, 'nothing')")
|
||||
(void)]
|
||||
[(query-exec pgc
|
||||
"insert into the_numbers values (1, 'the loneliest number')")
|
||||
(void)]
|
||||
[(query-exec pgc
|
||||
"insert into the_numbers values (2, 'company')")
|
||||
(void)]
|
||||
]
|
||||
|
||||
The @racket[query] function is a more general way to execute a
|
||||
statement. It returns a structure encapsulating information about the
|
||||
statement's execution. (But some of that information varies from
|
||||
system to system and is subject to change.)
|
||||
|
||||
@my-interaction[
|
||||
[(query pgc "insert into the_numbers values (3, 'a crowd')")
|
||||
(simple-result '((command insert 0 1)))]
|
||||
[(query pgc "select n, d from the_numbers where n % 2 = 0")
|
||||
(rows-result
|
||||
(list
|
||||
'((name . "n") (typeid . 23))
|
||||
'((name . "d") (typeid . 1043)))
|
||||
'(#(0 "nothing") #(2 "company")))]
|
||||
]
|
||||
|
||||
When the query is known to return rows and when the field
|
||||
descriptions are not needed, it is more convenient to use the
|
||||
@racket[query-rows] function.
|
||||
|
||||
@my-interaction[
|
||||
[(query-rows pgc "select n, d from the_numbers where n % 2 = 0")
|
||||
'(#(0 "nothing") #(2 "company"))]
|
||||
]
|
||||
|
||||
Use @racket[query-row] for queries that are known to return exactly
|
||||
one row.
|
||||
|
||||
@my-interaction[
|
||||
[(query-row pgc "select * from the_numbers where n = 0")
|
||||
(vector 0 "nothing")]
|
||||
]
|
||||
|
||||
Similarly, use @racket[query-list] for queries that produce rows of
|
||||
exactly one column.
|
||||
|
||||
@my-interaction[
|
||||
[(query-list pgc "select d from the_numbers order by n")
|
||||
(list "nothing" "the loneliest number" "company" "a crowd")]
|
||||
]
|
||||
|
||||
When a query is known to return a single value (one row and one
|
||||
column), use @racket[query-value].
|
||||
|
||||
@my-interaction[
|
||||
[(query-value pgc "select count(*) from the_numbers")
|
||||
4]
|
||||
[(query-value pgc "select d from the_numbers where n = 5")
|
||||
(error 'query-value
|
||||
"query returned zero rows: ~s"
|
||||
"select d from the_numbers where n = 5")]
|
||||
]
|
||||
|
||||
When a query may return zero or one rows, as the last example, use
|
||||
@racket[query-maybe-row] or @racket[query-maybe-value] instead.
|
||||
|
||||
@my-interaction[
|
||||
[(query-maybe-value pgc "select d from the_numbers where n = 5")
|
||||
(values #f)]
|
||||
]
|
||||
|
||||
The @racket[in-query] function produces a sequence that can be used
|
||||
with Racket's iteration forms:
|
||||
|
||||
@my-interaction[
|
||||
[(for ([(n d) (in-query pgc "select * from the_numbers where n < 4")])
|
||||
(printf "~a is ~a\n" n d))
|
||||
(for-each (lambda (n d) (printf "~a: ~a\n" n d))
|
||||
'(0 1 2 3)
|
||||
'("nothing" "the loneliest number" "company" "a crowd"))]
|
||||
[(for/fold ([sum 0]) ([n (in-query pgc "select n from the_numbers")])
|
||||
(+ sum n))
|
||||
(for/fold ([sum 0]) ([n (in-list '(0 1 2 3))])
|
||||
(+ sum n))]
|
||||
]
|
||||
|
||||
Errors in queries generally do not cause the connection to disconnect.
|
||||
|
||||
@my-interaction[
|
||||
[(begin (with-handlers [(exn:fail?
|
||||
(lambda (e)
|
||||
(printf "~a~n" (exn-message e))))]
|
||||
(query-value pgc "select NoSuchField from NoSuchTable"))
|
||||
(query-value pgc "select 'okay to proceed!'"))
|
||||
(begin (display "query-value: relation \"nosuchtable\" does not exist (SQLSTATE 42P01)")
|
||||
"okay to proceed!")]
|
||||
]
|
||||
|
||||
Queries may contain parameters. The easiest way to execute a
|
||||
parameterized query is to provide the parameters ``inline'' after the
|
||||
SQL statement in the query function call.
|
||||
|
||||
@my-interaction[
|
||||
[(query-value pgc
|
||||
"select d from the_numbers where n = $1" 2)
|
||||
"company"]
|
||||
[(query-list pgc
|
||||
"select n from the_numbers where n > $1 and n < $2" 0 3)
|
||||
(list 1 2)]
|
||||
]
|
||||
|
||||
Alternatively, a parameterized query may be prepared in advance and
|
||||
executed later. @tech{Prepared statements} can be executed multiple
|
||||
times with different parameter values.
|
||||
|
||||
@my-interaction[
|
||||
[(define get-less-than-pst
|
||||
(prepare pgc "select n from the_numbers where n < $1"))
|
||||
(void)]
|
||||
[(query-list pgc get-less-than-pst 1)
|
||||
(list 0)]
|
||||
[(query-list pgc (bind-prepared-statement get-less-than-pst '(2)))
|
||||
(list 0 1)]
|
||||
]
|
||||
|
||||
When a connection's work is done, it should be disconnected.
|
||||
|
||||
@my-interaction[
|
||||
[(disconnect pgc)
|
||||
(void)]
|
||||
]
|
||||
|
||||
|
||||
@section[#:tag "dbsec"]{Database Security}
|
||||
|
||||
Database security requires both that the database back end be secured
|
||||
against unauthorized use and that authorized clients are not tricked
|
||||
or subverted into violating the database's security.
|
||||
|
||||
Securing database back ends is mostly beyond the scope of this
|
||||
manual. In brief: choose sufficiently strong authentication methods
|
||||
and keep credentials secure, and follow the
|
||||
@wplink["Principle_of_least_privilege"]{principle of least privilege}:
|
||||
create and use roles that have the minimum permissions needed.
|
||||
|
||||
The following is an incomplete list of security issues related to
|
||||
database @emph{client} programming.
|
||||
|
||||
@;{Add section on db roles and ro/rw access?
|
||||
eg, for servlet create two connections: one ro and one rw
|
||||
(in pg/my/etc, backed by two roles; in sqlite, connection options) }
|
||||
|
||||
@subsection[#:tag "dbsec-sql-injection"]{SQL Injection}
|
||||
|
||||
@wplink["SQL_injection"]{SQL injection} happens when part of a SQL
|
||||
statement that was intended as SQL literal data is instead interpreted
|
||||
as SQL code---possibly @hyperlink["http://xkcd.com/327/"]{malicious}
|
||||
SQL code.
|
||||
|
||||
Avoid dynamically creating SQL query strings by string concatenation
|
||||
or interpolation (eg, with @racket[string-append] or
|
||||
@racket[format]). In most cases, it is possible to use
|
||||
@tech{parameterized queries} instead. For example, instead of this
|
||||
|
||||
@racketblock[
|
||||
(code:comment "WRONG! DANGER!")
|
||||
(query-exec c
|
||||
(format "UPDATE users SET passwd='~a' WHERE user='~a'"
|
||||
user new-passwd))
|
||||
]
|
||||
write one of the following instead (depending on SQL dialect):
|
||||
@racketblock[
|
||||
(code:comment "for PostgreSQL, SQLite")
|
||||
(query-exec c "UPDATE users SET passwd=$1 WHERE user=$2" user new-passwd)
|
||||
(code:comment "for MySQL, SQLite, ODBC")
|
||||
(query-exec c "UPDATE users SET passwd=? WHERE user=?" user new-passwd)
|
||||
]
|
||||
|
||||
The first form would choke on names like @racket["Patrick O'Connor"].
|
||||
Worse, it would be susceptible to attack by malicious input like
|
||||
@racket["me' OR user='root'"], which yields the following SQL
|
||||
statement:
|
||||
|
||||
@(element 'tt "UPDATE users SET passwd='whatever' WHERE user='me' OR user='root'")
|
||||
|
||||
In contrast, using a @tech{parameterized query} causes the
|
||||
parameterized SQL and its arguments to be submitted to the back end
|
||||
separately; the back end then combines them safely.
|
||||
|
||||
Only SQL literal values can be replaced with parameter placeholders; a
|
||||
SQL statement cannot be parameterized over a column name or a sort
|
||||
order, for example. In such cases, constructing the query dynamically
|
||||
may be the only feasible solution. But while the query construction
|
||||
may be influenced by external input, it should never directly
|
||||
incorporate external input without validation. That is, don't do the
|
||||
following:
|
||||
|
||||
@racketblock[
|
||||
(code:comment "WRONG! DANGER!")
|
||||
(query-rows c
|
||||
(format "SELECT name, ~a FROM contestants" column))
|
||||
(query-list c
|
||||
(format "SELECT name FROM contestants ORDER BY score ~a" direction))
|
||||
]
|
||||
|
||||
Instead, select the inserted SQL from known good alternatives:
|
||||
|
||||
@racketblock[
|
||||
(code:comment "BETTER")
|
||||
(query-rows c
|
||||
(format "SELECT name, ~a FROM contestants"
|
||||
(cond [(member column '("wins" "losses")) column]
|
||||
[else (error ....)])))
|
||||
(query-list c
|
||||
(format "SELECT name FROM contestants ORDER BY score ~a"
|
||||
(if ascending? "ASC" "DESC")))
|
||||
]
|
||||
|
||||
@;{ Discuss dynamic IN comparisons? }
|
||||
|
||||
|
||||
@subsection[#:tag "dbsec-xss"]{Cross-site Scripting (XSS)}
|
||||
|
||||
@wplink["Cross-site_scripting"]{Cross-site scripting}---which should
|
||||
probably be called ``HTML injection'' or ``markup injection''---is
|
||||
when arbitrary text from an untrusted source is embedded without
|
||||
escaping into an HTML page. The @emph{unstructured text from the
|
||||
untrusted source} is reinterpreted as @emph{markup from the web
|
||||
server}; if the reinterpreted markup contains embedded Javascript
|
||||
code, it executes with the security privileges associated with the web
|
||||
server's domain.
|
||||
|
||||
This issue has little to do with databases @emph{per se} except that
|
||||
such text is often stored in a database. This issue is mitigated by
|
||||
using structured markup representations like SXML or X-expressions
|
||||
(xexprs), since they automatically escape ``markup'' characters found
|
||||
in embedded text.
|
||||
|
||||
|
||||
@;{============================================================}
|
||||
|
||||
@section[#:tag "dbperf"]{Database Performance}
|
||||
|
||||
Achieving good database performance mostly consists of good database
|
||||
design and intelligent client behavior.
|
||||
|
||||
On the database design side, most important are wise use of indexes
|
||||
and choosing appropriate data representations. As an example of the
|
||||
latter, a regexp-based search using @tt{LIKE} will probably be slower
|
||||
than a specialized
|
||||
@hyperlink["http://www.postgresql.org/docs/9.0/static/textsearch.html"]{full-text
|
||||
search} feature for large data sets. Consult your database back end's
|
||||
manual for additional performance advice.
|
||||
|
||||
The following sections describe a few client-side aspects of
|
||||
performance.
|
||||
|
||||
@subsection[#:tag "dbperf-n+1"]{The N+1 Selects Problem}
|
||||
|
||||
@;{ per comments on http://stackoverflow.com/questions/97197/what-is-the-n1-selects-problem
|
||||
Is N+1 actually a problem?
|
||||
ie, Is communication overhead with db back end worse than
|
||||
grouping cost? Should measure to see. }
|
||||
|
||||
A common mistake is to fetch a large amount of data by running a query
|
||||
to get a set of initial records and then running another query inside
|
||||
a loop with an iteration for each of the initial records. This is
|
||||
sometimes called the ``n+1 selects problem.'' For example:
|
||||
|
||||
@racketblock[
|
||||
(for/list ([(name id) (in-query c "SELECT name, id FROM contestants")])
|
||||
(define wins
|
||||
(query-list c "SELECT contest FROM contests WHERE winner = $1" id))
|
||||
(make-contestant-record name wins))
|
||||
]
|
||||
|
||||
The same information can be retrieved in a single query by performing
|
||||
a @tt{LEFT OUTER JOIN} and grouping the results:
|
||||
|
||||
@racketblock[
|
||||
(for/list ([(name id wins)
|
||||
(in-query c
|
||||
(string-append "SELECT name, id, contest "
|
||||
"FROM contestants LEFT OUTER JOIN contests "
|
||||
"ON contestants.id = contests.winner")
|
||||
#:group '(#("name" "id"))
|
||||
#:group-mode '(list))])
|
||||
(make-contestant-record name wins))
|
||||
]
|
||||
|
||||
The one-query form will perform better when database communication has
|
||||
high latency. On the other hand, it may duplicate the contents of the
|
||||
non-key @tt{name} column, using more bandwidth. Another approach is to
|
||||
perform two queries:
|
||||
|
||||
@racketblock[
|
||||
(let ([id=>name
|
||||
(rows->dict #:key "id" #:value "name"
|
||||
(query c "SELECT id, name FROM contestants"))])
|
||||
(for/list ([(id wins)
|
||||
(in-query c
|
||||
(string-append "SELECT id, contest "
|
||||
"FROM contestants LEFT OUTER JOIN contests "
|
||||
"ON contestants.id = contests.winner")
|
||||
#:group '(#("id"))
|
||||
#:group-mode '(list))])
|
||||
(make-contestant-record (dict-ref id=>name id) wins)))
|
||||
]
|
||||
|
||||
Compared with the one-query form, the two-query form requires
|
||||
additional communication, but it avoids duplicating @tt{name} values
|
||||
in the @tt{OUTER JOIN} results. If additional non-key @tt{contestant}
|
||||
fields were to be retrieved, the bandwidth savings of this approach
|
||||
would be even greater.
|
||||
|
||||
See also @secref["dbperf-testing"].
|
||||
|
||||
|
||||
@subsection[#:tag "dbperf-update-tx"]{Updates and Transactions}
|
||||
|
||||
Using transactions can dramatically improve the performance of bulk
|
||||
database operations, especially @tt{UPDATE} and @tt{INSERT}
|
||||
statements. As an extreme example, on commodity hardware in 2012,
|
||||
SQLite is capable of executing thousands of @tt{INSERT} statements per
|
||||
second within a transaction, but it is capable of only dozens of
|
||||
single-@tt{INSERT} transactions per second.
|
||||
|
||||
|
||||
@subsection[#:tag "dbperf-pstcache"]{Statement Caching}
|
||||
|
||||
Connections cache implicitly prepared statements (that is, statements
|
||||
given in string form directly to a query function). The effect of the
|
||||
cache is to eliminate an extra round-trip to the server (to send the
|
||||
statement and receive a prepared statement handle), leaving just a
|
||||
single round-trip (to send parameters and receive results) per
|
||||
execution.
|
||||
|
||||
Currently, prepared statements are only cached within a
|
||||
transaction. The statement cache is flushed when entering or leaving a
|
||||
transaction and whenever a DDL statement is executed.
|
||||
|
||||
@;{ virtual statements are mostly obsolete }
|
||||
|
||||
|
||||
@subsection[#:tag "dbperf-testing"]{Testing Performance of Database-Backed Programs}
|
||||
|
||||
When testing the performance of database-backed programs, remember to
|
||||
test them in environments with realistic latency and
|
||||
bandwidth. High-latency environments may be roughly approximated with
|
||||
the @racket[high-latency-connection] function, but there's no
|
||||
substitute for the real thing.
|
||||
|
||||
|
||||
@;{============================================================}
|
||||
|
||||
@section[#:tag "intro-servlets"]{Databases and Web Servlets}
|
||||
|
||||
Using database connections in a web servlet is more complicated than
|
||||
in a standalone program. A single servlet potentially serves many
|
||||
requests at once, each in a separate request-handling
|
||||
thread. Furthermore, the use of @racket[send/suspend],
|
||||
@racket[send/suspend/dispatch], etc means that there are many places
|
||||
where a servlet may start and stop executing to service a request.
|
||||
|
||||
Why not use a single connection to handle all of a servlet's requests?
|
||||
That is, create the connection with the servlet instance and never
|
||||
disconnect it. Such a servlet would look something like the following:
|
||||
|
||||
@racketmod[
|
||||
#:file "bad-servlet.rkt"
|
||||
web-server
|
||||
(require db)
|
||||
(define db-conn (postgresql-connect ....))
|
||||
(define (serve req)
|
||||
.... db-conn ....)
|
||||
]
|
||||
|
||||
The main problem with using one connection for all requests is that
|
||||
multiple threads accessing the same connection are not properly
|
||||
@wplink["Isolation_%28database_systems%29"]{isolated}. For example, if
|
||||
one thread is accessing the connection within a transaction and
|
||||
another thread issues a query, the second thread may see invalid data
|
||||
or even disrupt the work of the first thread.
|
||||
|
||||
A secondary problem is performance. A connection can only perform a
|
||||
single query at a time, whereas most database systems are capable of
|
||||
concurrent query processing.
|
||||
|
||||
The proper way to use database connections in a servlet is to create a
|
||||
connection for each request and disconnect it when the request has
|
||||
been handled. But since a request thread may start and stop executing
|
||||
in many places (due to @racket[send/suspend], etc), inserting the code
|
||||
to connect and disconnect at the proper places can be challenging and
|
||||
messy.
|
||||
|
||||
A better solution is to use a @tech{virtual connection}, which
|
||||
automatically creates a request-specific (that is, thread-specific)
|
||||
``actual connection'' by need and disconnects it when the request has
|
||||
been handled (that is, when the thread terminates). Different
|
||||
request-handling threads using the same virtual connection are
|
||||
assigned different actual connections, so the requests are properly
|
||||
isolated.
|
||||
|
||||
@racketmod[
|
||||
#:file "better-servlet.rkt"
|
||||
web-server
|
||||
(require db)
|
||||
(define db-conn
|
||||
(virtual-connection
|
||||
(lambda () (postgresql-connect ....))))
|
||||
(define (serve req)
|
||||
.... db-conn ....)
|
||||
]
|
||||
|
||||
This solution preserves the simplicity of the naive solution and fixes
|
||||
the isolation problem but at the cost of creating many short-lived
|
||||
database connections. That cost can be eliminated by using a
|
||||
@tech{connection pool}:
|
||||
|
||||
@racketmod[
|
||||
#:file "best-servlet.rkt"
|
||||
web-server
|
||||
(require db)
|
||||
(define db-conn
|
||||
(virtual-connection
|
||||
(connection-pool
|
||||
(lambda () (postgresql-connect ....)))))
|
||||
(define (serve req)
|
||||
.... db-conn ....)
|
||||
]
|
||||
|
||||
By using a virtual connection backed by a connection pool, a servlet
|
||||
can achieve simplicity, isolation, and performance all at the same
|
||||
time.
|
||||
|
||||
@;{
|
||||
|
||||
TODO:
|
||||
- talk about virtual statements, too
|
||||
- show actual working servlet code
|
||||
|
||||
--
|
||||
|
||||
A prepared statement is tied to the connection used to create it;
|
||||
attempting to use it with another connection results in an
|
||||
error. Unfortunately, in some scenarios such as web servlets, the
|
||||
lifetimes of connections are short or difficult to track, making
|
||||
prepared statements inconvenient. In such cases, a better tool is the
|
||||
@tech{virtual statement}, which prepares statements on demand and
|
||||
caches them for future use with the same connection.
|
||||
|
||||
@my-interaction[
|
||||
[(define get-less-than-pst
|
||||
(virtual-statement "select n from the_numbers where n < $1"))
|
||||
(void)]
|
||||
[(code:line (query-list pgc1 get-less-than-pst 1) (code:comment "prepares statement for pgc1"))
|
||||
(list 0)]
|
||||
[(code:line (query-list pgc2 get-less-than-pst 2) (code:comment "prepares statement for pgc2"))
|
||||
(list 0 1)]
|
||||
[(code:line (query-list pgc1 get-less-than-pst 3) (code:comment "uses existing prep. stmt."))
|
||||
(list 0 1 2)]
|
||||
]
|
||||
}
|
|
@ -4,7 +4,7 @@
|
|||
scribble/struct
|
||||
racket/sandbox
|
||||
"config.rkt"
|
||||
(for-label db db/util/datetime db/util/geometry db/util/postgresql))
|
||||
(for-label db db/util/datetime db/util/geometry db/util/postgresql db/util/testing))
|
||||
|
||||
@title[#:tag "util"]{Utilities}
|
||||
|
||||
|
@ -203,3 +203,33 @@ types that have no appropriate analogue in the OpenGIS model:
|
|||
Note: PostgreSQL's built-in geometric types are distinct from those
|
||||
provided by the PostGIS extension library (see @secref["geometry"]).
|
||||
}
|
||||
|
||||
@;{========================================}
|
||||
|
||||
@section[#:tag "util-testing"]{Testing Database Programs}
|
||||
|
||||
@defmodule[db/util/testing]
|
||||
|
||||
This module provides utilities for testing programs that use database
|
||||
connections.
|
||||
|
||||
@defproc[(high-latency-connection [connection connection?]
|
||||
[latency (>=/c 0)]
|
||||
[#:sleep-atomic? sleep-atomic? any/c #f])
|
||||
connection?]{
|
||||
|
||||
Returns a proxy connection for @racket[connection] that introduces
|
||||
@racket[latency] additional seconds of latency before operations that
|
||||
require communicating with the database back end---@racket[prepare],
|
||||
@racket[query], @racket[start-transaction], etc.
|
||||
|
||||
Use this function in performance testing to roughly simulate
|
||||
environments with high-latency communication with a database back
|
||||
end.
|
||||
|
||||
If @racket[sleep-atomic?] is true, then the proxy enters atomic mode
|
||||
before sleeping, to better simulate the effect of a long-running FFI
|
||||
call (see @secref["ffi-concurrency"]). Even so, it may not accurately
|
||||
simulate an ODBC connection that internally uses cursors to fetch data
|
||||
on demand, as each fetch would introduce additional latency.
|
||||
}
|
||||
|
|
58
collects/db/util/testing.rkt
Normal file
58
collects/db/util/testing.rkt
Normal file
|
@ -0,0 +1,58 @@
|
|||
#lang racket/base
|
||||
(require racket/contract
|
||||
racket/class
|
||||
ffi/unsafe/atomic
|
||||
"../private/generic/interfaces.rkt"
|
||||
"../private/generic/common.rkt")
|
||||
|
||||
(define high-latency-connection%
|
||||
(class* locking% (connection<%>)
|
||||
(init-private connection
|
||||
latency
|
||||
sleep-atomic?)
|
||||
(inherit call-with-lock)
|
||||
(super-new)
|
||||
|
||||
(define-syntax-rule (define-forward defmethod (sleep-factor method arg ...) ...)
|
||||
(begin
|
||||
(defmethod (method arg ...)
|
||||
(call-with-lock 'method
|
||||
(lambda ()
|
||||
(let ([c connection])
|
||||
(unless c (error/not-connected 'method))
|
||||
(when (positive? sleep-factor)
|
||||
(if sleep-atomic?
|
||||
(call-as-atomic (lambda () (sleep (* sleep-factor latency))))
|
||||
(sleep (* sleep-factor latency))))
|
||||
(send c method arg ...)))))
|
||||
...))
|
||||
|
||||
(define-forward define/public
|
||||
(0 get-dbsystem)
|
||||
(2 query fsym stmt cursor?) ;; 2 because may require implicit prepare
|
||||
(1 prepare fsym stmt close-on-exec?)
|
||||
(1 fetch/cursor fsym stmt fetch-size)
|
||||
(0 get-base)
|
||||
(0 free-statement stmt need-lock?)
|
||||
(0 transaction-status fsym)
|
||||
(1 start-transaction fsym isolation cwt?)
|
||||
(1 end-transaction fsym mode cwt?)
|
||||
(1 list-tables fsym schema))
|
||||
|
||||
(define/override (connected?) (and connection (send connection connected?)))
|
||||
|
||||
(define/public (disconnect)
|
||||
(set! connection #f))))
|
||||
|
||||
(define (high-latency-connection connection latency
|
||||
#:sleep-atomic? [sleep-atomic? #f])
|
||||
(new high-latency-connection%
|
||||
[connection connection]
|
||||
[latency latency]
|
||||
[sleep-atomic? sleep-atomic?]))
|
||||
|
||||
(provide/contract
|
||||
[high-latency-connection
|
||||
(->* (connection? (>=/c 0))
|
||||
(#:sleep-atomic? any/c)
|
||||
connection?)])
|
|
@ -171,7 +171,7 @@
|
|||
(when (ANYFLAGS 'postgresql 'mysql 'sqlite3)
|
||||
(check-roundtrip c (make-bytes #e1e6 (char->integer #\a)))
|
||||
(check-roundtrip c (make-bytes #e1e7 (char->integer #\b)))
|
||||
(check-roundtrip c (make-bytes #e1e8 (char->integer #\c))))
|
||||
#| (check-roundtrip c (make-bytes #e1e8 (char->integer #\c))) |#)
|
||||
(when (ANYFLAGS 'postgresql)
|
||||
(let ([r (query-value c "select cast(repeat('a', 10000000) as bytea)")])
|
||||
(check-pred bytes? r)
|
||||
|
|
Loading…
Reference in New Issue
Block a user