diff --git a/collects/db/TODO b/collects/db/TODO index 6e1a33f427..67f6b852ce 100644 --- a/collects/db/TODO +++ b/collects/db/TODO @@ -28,8 +28,9 @@ (but this could also be done by two locks: outer "ownership" lock and inner "invariant-protecting" lock) -- audit code for break-safety, disable breaks as needed - - make implementation notes section of docs - - explain cursor impl (& rationale) - explain nested tx impl + +- invalidate statement cache on query error + +- 2 call-with-transactions from separate threads can conflict diff --git a/collects/db/base.rkt b/collects/db/base.rkt index d7c6e7ed37..e366406da5 100644 --- a/collects/db/base.rkt +++ b/collects/db/base.rkt @@ -1,5 +1,10 @@ #lang racket/base -(require racket/contract/base) +(require (for-syntax racket/base + syntax/parse + syntax/parse/experimental/template) + racket/dict + syntax/location + racket/contract/base) ;; ============================================================ @@ -74,6 +79,56 @@ (require "private/generic/functions.rkt") +(define fetch-size/c + (or/c exact-positive-integer? +inf.0)) + +(define grouping-field/c (or/c string? exact-nonnegative-integer?)) +(define group/c (or/c grouping-field/c (vectorof grouping-field/c))) +(define grouping/c (or/c group/c (listof group/c))) + +(define group-mode/c + (listof (or/c 'list 'preserve-null))) + +(define in-query/c + (->* (connection? statement?) + (#:fetch fetch-size/c + #:group grouping/c + #:group-mode group-mode/c) + #:rest list? + sequence?)) + +(define here-mod-path (quote-module-path)) + +(define-syntax contracted-in-query + (make-provide/contract-transformer + (quote-syntax in-query/c) + (quote-syntax in-query) + (quote-syntax in-query) + (quote-syntax here-mod-path))) + +(define-sequence-syntax in-query* + (lambda () #'contracted-in-query) + (lambda (stx) + (syntax-parse stx + [[(var ...) (~and form + (in-query (~or (~optional (~seq #:fetch fetch-size)) + (~optional (~seq #:group grouping-fields)) + (~optional (~seq #:group-mode group-mode)) + (~between arg:expr 2 +inf.0)) + ...))] + #:declare fetch-size (expr/c #'fetch-size/c #:context #'form) #:role "fetch size argument" + #:declare grouping-fields (expr/c #'grouping/c #:context #'form) #:role "grouping fields argument" + #:declare group-mode (expr/c #'group-mode/c #:context #'form) #:role "group mode argument" + #:with (c stmt q-arg ...) #'(arg ...) + #:declare c (expr/c #'connection? #:context #'form) #:role "connection argument" + #:declare stmt (expr/c #'statement? #:context #'form) #:role "statement argument" + (template + [(var ...) (in-query-helper (length '(var ...)) c.c stmt.c q-arg ... + (?? (?@ #:fetch fetch-size.c)) + (?? (?@ #:group grouping-fields.c)) + (?? (?@ #:group-mode group-mode.c)))])] + [_ #f]))) + (provide (rename-out [in-query* in-query])) (provide/contract @@ -105,7 +160,8 @@ (->* (connection? statement?) () #:rest list? any)] [query-rows (->* (connection? statement?) - (#:group (or/c (vectorof string?) (listof (vectorof string?)))) + (#:group grouping/c + #:group-mode group-mode/c) #:rest list? (listof vector?))] [query-list (->* (connection? statement?) () #:rest list? list?)] @@ -165,9 +221,15 @@ [group-rows (->* (rows-result? - #:group (or/c (vectorof string?) (listof (vectorof string?)))) - (#:group-mode (listof (or/c 'preserve-null-rows 'list))) - rows-result?)]) + #:group grouping/c) + (#:group-mode (listof (or/c 'list 'preserve-null #|deprecated:|# 'preserve-null-rows))) + rows-result?)] + + [rows->dict + (->* (rows-result? #:key grouping/c #:value grouping/c) + (#:value-mode group-mode/c) + dict?)] + ) ;; ============================================================ diff --git a/collects/db/private/generic/common.rkt b/collects/db/private/generic/common.rkt index c60bfb5cd8..ae4a625327 100644 --- a/collects/db/private/generic/common.rkt +++ b/collects/db/private/generic/common.rkt @@ -421,12 +421,33 @@ dprintf) (super-new) - (field [max-cache-size 50]) - ;; Statement Cache ;; updated by prepare; potentially invalidated by query (via check/invalidate-cache) - (define pst-cache '#hash()) + (field [pst-cache '#hash()] + [cache-mode 'in-transaction] + [cache-flush-next? #f] ;; flush cache on next query + [max-cache-size 20]) + + (define/private (use-cache?) + (and (not cache-flush-next?) + (case cache-mode + ((always) #t) + ((never) #f) + ((in-transaction) (eq? (get-tx-status) #t))))) + + (define/public (stmt-cache-ctl who mode) + (case mode + ((get) cache-mode) + ((flush) (begin (set! cache-flush-next? #t) cache-mode)) + (else (unless (eq? mode cache-mode) + (call-with-lock who + (lambda () + (set! cache-mode mode) + (set! cache-flush-next? #t) + cache-mode)))))) + + ;; -- (define/public (get-cached-statement stmt) (let ([cached-pst (hash-ref pst-cache stmt #f)]) @@ -447,12 +468,6 @@ (dprintf " ** caching statement\n") (set! pst-cache (hash-set pst-cache sql pst)))))) - (define/private (use-cache?) - (case cache-statements - ((always) #t) - ((never) #f) - ((in-transaction) (eq? (get-tx-status) #t)))) - ;; check/invalidate-cache : statement/pst -> hash/#f ;; Returns old cache on invalidation, or #f if stmt is safe. ;; May also return part of old cache (excluding pst) when cache gets too big. @@ -463,7 +478,11 @@ unsafe, because they're usually transactional SQL. |# (define (invalidate! except) - (dprintf " ** invalidating statement cache~a\n" (if except " (too big)" "")) + ;; FIXME: smarter cache ejection (LRU?) + (dprintf " ** invalidating statement cache~a\n" + (cond [except " (too big)"] + [cache-flush-next? " (mode changed)"] + [else ""])) (let ([cache pst-cache]) (set! pst-cache '#hash()) (cond [except @@ -471,7 +490,9 @@ (hash-remove cache (send except get-stmt))] [else cache]))) - (cond [(statement-binding? x) + (cond [cache-flush-next? + (invalidate! #f)] + [(statement-binding? x) (check/invalidate-cache (statement-binding-pst x))] [(prepared-statement? x) (let ([stmt-type (send x get-stmt-type)]) diff --git a/collects/db/private/generic/connect-util.rkt b/collects/db/private/generic/connect-util.rkt index a19b277ad3..660213c22a 100644 --- a/collects/db/private/generic/connect-util.rkt +++ b/collects/db/private/generic/connect-util.rkt @@ -13,33 +13,49 @@ (class object% ;; other-evt : (-> evt) ;; generates other evt to sync on besides req-channel, eg timeouts - (init-field (other-evt (lambda () never-evt))) + (init-field (other-evt (lambda () never-evt)) + (alt-enabled? (lambda () #t))) (super-new) (define req-channel (make-channel)) + (define alt-req-channel (make-channel)) + (define mthread (thread/suspend-to-kill (lambda () (let loop () (sync (wrap-evt req-channel (lambda (p) (p))) + (if (alt-enabled?) + (wrap-evt alt-req-channel (lambda (p) (p))) + never-evt) (other-evt)) (loop))))) (define/public (call proc) + (call* proc req-channel #f)) + (define/public (alt-call-evt proc) + (call* proc alt-req-channel #t)) + + (define/private (call* proc chan as-evt?) (thread-resume mthread (current-thread)) - (let ([result #f] - [sema (make-semaphore 0)]) - (channel-put req-channel - (lambda () - (set! result - (with-handlers ([(lambda (e) #t) - (lambda (e) (cons 'exn e))]) - (cons 'values (call-with-values proc list)))) - (semaphore-post sema))) - (semaphore-wait sema) - (case (car result) - ((values) (apply values (cdr result))) - ((exn) (raise (cdr result)))))))) + (let* ([result #f] + [sema (make-semaphore 0)] + [proc (lambda () + (set! result + (with-handlers ([(lambda (e) #t) + (lambda (e) (cons 'exn e))]) + (cons 'values (call-with-values proc list)))) + (semaphore-post sema))] + [handler + (lambda (_evt) + (semaphore-wait sema) + (case (car result) + ((values) (apply values (cdr result))) + ((exn) (raise (cdr result)))))]) + (if as-evt? + (wrap-evt (channel-put-evt chan proc) handler) + (begin (channel-put chan proc) + (handler #f))))))) ;; ---- @@ -53,14 +69,20 @@ (init-private connection) (define mgr (new manager%)) + (define last-connected? #t) (define-syntax-rule (define-forward (method arg ...) ...) (begin (define/public (method arg ...) - (send mgr call (lambda () (send connection method arg ...)))) ...)) + (send mgr call (lambda () + (begin0 + (send connection method arg ...) + (set! last-connected? (send connection connected?)))))) + ...)) + + (define/public (connected?) last-connected?) (define-forward - (connected?) (disconnect) (get-dbsystem) (query fsym stmt cursor?) @@ -88,8 +110,7 @@ (define virtual-connection% (class* object% (connection<%>) (init-private connector ;; called from client thread - get-key ;; called from client thread - timeout) + get-key) ;; called from client thread (super-new) (define custodian (current-custodian)) @@ -99,36 +120,17 @@ ;; key=>conn : hasheq[key => connection] (define key=>conn (make-hasheq)) - ;; alarms : hasheq[connection => evt] (alarm wrapped to return key) - (define alarms (make-hasheq)) - - (define/private (get key) ;; also refreshes alarm - (let ([c (hash-ref key=>conn key #f)]) - (when c (hash-set! alarms c (fresh-alarm-for key))) - c)) + (define/private (get key) ;; also called by client thread for connected? + (hash-ref key=>conn key #f)) (define/private (put! key c) - (hash-set! key=>conn key c) - (hash-set! alarms c (fresh-alarm-for key))) + (hash-set! key=>conn key c)) - (define/private (fresh-alarm-for key) - (wrap-evt (alarm-evt (+ (current-inexact-milliseconds) timeout)) - (lambda (a) key))) - - (define/private (remove! key timeout?) - ;; timeout? = if connection open, then wait longer - (let* ([c (hash-ref key=>conn key #f)] - [in-trans? (with-handlers ([exn:fail? (lambda (e) #f)]) - (and c - (send c connected?) - (send c transaction-status 'virtual-connection)))]) - (cond [(not c) (void)] - [(and timeout? in-trans?) - (hash-set! alarms c (fresh-alarm-for key))] - [else - (hash-remove! key=>conn key) - (hash-remove! alarms c) - (send c disconnect)]))) + (define/private (remove! key) + (let ([c (get key)]) + (when c + (hash-remove! key=>conn key) + (send c disconnect)))) (define mgr (new manager% @@ -137,16 +139,10 @@ (choice-evt (let ([keys (hash-map key=>conn (lambda (k v) k))]) (handle-evt (apply choice-evt keys) - ;; Assignment to key has expired: move to idle or disconnect. + ;; Assignment to key has expired (lambda (key) (dbdebug "virtual-connection: key expiration: ~e" key) - (remove! key #f)))) - (let ([alarm-evts (hash-map alarms (lambda (k v) v))]) - (handle-evt (apply choice-evt alarm-evts) - ;; Disconnect idle connection. - (lambda (key) - (dbdebug "virtual-connection: timeout") - (remove! key #t))))))))) + (remove! key))))))))) ;; == methods called in client thread == @@ -159,12 +155,12 @@ (connector))]) (send mgr call (lambda () - (when c (remove! key #f)) + (when c (remove! key)) (put! key c*))) c*)] [else (when c ;; got a disconnected connection - (send mgr call (lambda () (remove! key #f)))) + (send mgr call (lambda () (remove! key)))) #f]))) ;; ---- @@ -178,24 +174,27 @@ ...)) (define-forward - (#f #f (connected?)) (#t '_ (get-dbsystem)) (#t '_ (query fsym stmt cursor?)) (#t '_ (fetch/cursor fsym stmt fetch-size)) (#t '_ (start-transaction fsym isolation cwt?)) (#f (void) (end-transaction fsym mode cwt?)) - (#t '_ (transaction-status fsym)) + (#f #f (transaction-status fsym)) (#t '_ (list-tables fsym schema))) (define/public (get-base) (get-connection #t)) + (define/public (connected?) + (let ([c (get (get-key))]) + (and c (send c connected?)))) + (define/public (disconnect) (let ([c (get-connection #f)] [key (get-key)]) (when c (send c disconnect) - (send mgr call (lambda () (remove! key #f))))) + (send mgr call (lambda () (remove! key))))) (void)) (define/public (prepare fsym stmt close-on-exec?) @@ -210,8 +209,7 @@ ;; ---- -(define (virtual-connection connector - #:timeout [timeout +inf.0]) +(define (virtual-connection connector) (let ([connector (cond [(connection-pool? connector) (lambda () (connection-pool-lease connector))] @@ -219,8 +217,7 @@ [get-key (lambda () (thread-dead-evt (current-thread)))]) (new virtual-connection% (connector connector) - (get-key (lambda () (thread-dead-evt (current-thread)))) - (timeout (* 1000 timeout))))) + (get-key get-key)))) ;; ======================================== @@ -233,20 +230,15 @@ max-idle-connections) (super-new) - ;; max-connections is either in [1, 10000] or +inf.0, - ;; if leave-evt is sema, then counter = (max-connections - assigned connections) - ;; ie, includes idle connections - (define lease-evt - (if (= max-connections +inf.0) - always-evt - (make-semaphore max-connections))) - (define proxy-counter 1) ;; for debugging (define actual-counter 1) ;; for debugging (define actual=>number (make-weak-hasheq)) ;; == methods called in manager thread == + ;; assigned-connections : nat + (define assigned-connections 0) + ;; proxy=>evt : hasheq[proxy-connection => evt] (define proxy=>evt (make-hasheq)) @@ -266,7 +258,8 @@ proxy-number (if take-idle? "idle" "new") (hash-ref actual=>number raw-c "???")) - (hash-set! proxy=>evt c key) + (hash-set! proxy=>evt c (wrap-evt key (lambda (_e) c))) + (set! assigned-connections (add1 assigned-connections)) c)) (define/private (release* proxy raw-c why) @@ -279,11 +272,14 @@ (hash-remove! proxy=>evt proxy) (when raw-c (with-handlers ([exn:fail? void]) - (send raw-c end-transaction 'connection-pool 'rollback)) - (cond [(< (length idle-list) max-idle-connections) + ;; If in tx, just disconnect (for simplicity; else must loop for nested txs) + (when (send raw-c transaction-status 'connection-pool) + (send raw-c disconnect))) + (cond [(and (< (length idle-list) max-idle-connections) + (send raw-c connected?)) (set! idle-list (cons raw-c idle-list))] [else (send raw-c disconnect)]) - (when (semaphore? lease-evt) (semaphore-post lease-evt)))) + (set! assigned-connections (sub1 assigned-connections)))) (define/private (new-connection) (let ([c (connector)] @@ -299,19 +295,18 @@ (new manager% (other-evt (lambda () - (let ([evts (hash-map proxy=>evt (lambda (k v) (wrap-evt v (lambda (e) k))))]) + (let ([evts (hash-values proxy=>evt)]) (handle-evt (apply choice-evt evts) (lambda (proxy) (release* proxy (send proxy release-connection) - "release-evt")))))))) + "release-evt")))))) + (alt-enabled? (lambda () (< assigned-connections max-connections))))) ;; == methods called in client thread == - (define/public (lease key) - (wrap-evt lease-evt - (lambda (_e) - (send mgr call (lambda () (lease* key)))))) + (define/public (lease-evt key) + (send mgr alt-call-evt (lambda () (lease* key)))) (define/public (release proxy) (let ([raw-c (send proxy release-connection)]) @@ -350,8 +345,7 @@ (end-transaction fsym mode cwt?) (list-tables fsym schema)) - ;; (define-forward define/override (connected?)) - (define/override (connected?) (and connection #t)) + (define/override (connected?) (and connection (send connection connected?))) (define/public (disconnect) (send pool release this)) @@ -380,7 +374,7 @@ (cond [(thread? key) (thread-dead-evt key)] [(custodian? key) (make-custodian-box key #t)] [else key])] - [result (sync/timeout 0.1 (send pool lease key))]) + [result (sync/timeout 0.1 (send pool lease-evt key))]) (unless result (uerror 'connection-pool-lease "cannot obtain connection; connection pool limit reached")) diff --git a/collects/db/private/generic/functions.rkt b/collects/db/private/generic/functions.rkt index 33ab8a7eff..9e0c50d65b 100644 --- a/collects/db/private/generic/functions.rkt +++ b/collects/db/private/generic/functions.rkt @@ -146,9 +146,7 @@ [result (query/rows c 'query-rows sql #f)] [result (cond [(not (null? group-fields-list)) - (group-rows-result* 'query-rows result group-fields-list - (not (memq 'preserve-null-rows group-mode)) - (memq 'list group-mode))] + (group-rows-result* 'query-rows result group-fields-list group-mode)] [else result])]) (rows-result-rows result))) @@ -204,34 +202,38 @@ ;; ======================================== -(define (in-query c stmt #:fetch [fetch-size +inf.0] . args) - (apply in-query-helper #f c stmt #:fetch fetch-size args)) - -(define-sequence-syntax in-query* - (lambda () #'in-query) - (lambda (stx) - (syntax-case stx () - [[(var ...) (in-query c stmt arg ...)] - #'[(var ...) - (in-query-helper (length '(var ...)) c stmt arg ...)]] - [_ #f]))) +(define (in-query c stmt + #:fetch [fetch-size +inf.0] + #:group [grouping-fields null] + #:group-mode [group-mode null] + . args) + (apply in-query-helper #f c stmt + #:fetch fetch-size + #:group grouping-fields + #:group-mode group-mode + args)) (define (in-query-helper vars c stmt #:fetch [fetch-size +inf.0] + #:group [grouping-fields null] + #:group-mode [group-mode null] . args) - ;; Not protected by contract - (unless (connection? c) - (apply raise-type-error 'in-query "connection" 0 c stmt args)) - (unless (statement? stmt) - (apply raise-type-error 'in-query "statement" 1 c stmt args)) - (unless (or (exact-positive-integer? fetch-size) (eqv? fetch-size +inf.0)) - (raise-type-error 'in-query "positive integer or +inf.0" fetch-size)) - (let* ([check (or vars 'rows)] + (when (and (not (null? grouping-fields)) + (< fetch-size +inf.0)) + (error 'in-query "cannot apply grouping to cursor (finite fetch-size)")) + (let* ([check + ;; If grouping, can't check expected arity. + ;; FIXME: should check header includes named fields + (cond [(null? grouping-fields) (or vars 'rows)] + [else 'rows])] [stmt (compose-statement 'in-query c stmt args check)]) (cond [(eqv? fetch-size +inf.0) (in-list/vector->values (rows-result-rows - (query/rows c 'in-query stmt vars)))] + (let ([result (query/rows c 'in-query stmt vars)]) + (if (null? grouping-fields) + result + (group-rows-result* 'in-query result grouping-fields group-mode)))))] [else (let ([cursor (query/cursor c 'in-query stmt vars)]) (in-list-generator/vector->values @@ -333,62 +335,38 @@ ;; ======================================== +;; FIXME: add 'assume-sorted optimization option? + (define (group-rows result #:group key-fields-list #:group-mode [group-mode null]) (when (null? key-fields-list) (error 'group-rows "expected at least one grouping field set")) - (group-rows-result* 'group-rows - result - key-fields-list - (not (memq 'preserve-null-rows group-mode)) - (memq 'list group-mode))) + (group-rows-result* 'group-rows result key-fields-list group-mode)) -(define (group-rows-result* fsym result key-fields-list invert-outer? as-list?) - (let* ([key-fields-list - (if (list? key-fields-list) key-fields-list (list key-fields-list))] - [total-fields (length (rows-result-headers result))] - [name-map - (for/hash ([header (in-list (rows-result-headers result))] - [i (in-naturals)] - #:when (assq 'name header)) - (values (cdr (assq 'name header)) i))] +(define (group-rows-result* fsym result key-fields-list group-mode) + (let* ([invert-outer? (not (or (memq 'preserve-null group-mode) + ;; old flag, deprecated: + (memq 'preserve-null-rows group-mode)))] + [as-list? (memq 'list group-mode)] + [headers (rows-result-headers result)] + [total-fields (length headers)] + [name-map (headers->name-map headers)] [fields-used (make-vector total-fields #f)] [key-indexes-list - (for/list ([key-fields (in-list key-fields-list)]) - (for/vector ([key-field (in-vector key-fields)]) - (let ([key-index - (cond [(string? key-field) - (hash-ref name-map key-field #f)] - [else key-field])]) - (when (string? key-field) - (unless key-index - (error fsym "grouping field ~s not found" key-field))) - (when (exact-integer? key-field) - (unless (< key-index total-fields) - (error fsym "grouping index ~s out of range [0, ~a]" - key-index (sub1 total-fields)))) - (when (vector-ref fields-used key-index) - (error fsym "grouping field ~s~a used multiple times" - key-field - (if (string? key-field) - (format " (index ~a)" key-index) - ""))) - (vector-set! fields-used key-index #t) - key-index)))] + (group-list->indexes fsym name-map total-fields fields-used key-fields-list)] [residual-length - (for/sum ([x (in-vector fields-used)]) - (if x 0 1))]) + (for/sum ([x (in-vector fields-used)]) (if x 0 1))]) (when (= residual-length 0) (error fsym "cannot group by all fields")) (when (and (> residual-length 1) as-list?) (error fsym - "exactly one residual field expected for #:group-mode 'list, got ~a" + "expected exactly one residual field for #:group-mode 'list, got ~a" residual-length)) (let* ([initial-projection (for/vector #:length total-fields ([i (in-range total-fields)]) i)] [headers - (group-headers (list->vector (rows-result-headers result)) + (group-headers (list->vector headers) initial-projection key-indexes-list)] [rows @@ -400,6 +378,46 @@ as-list?)]) (rows-result headers rows)))) +(define (headers->name-map headers) + (for/hash ([header (in-list headers)] + [i (in-naturals)] + #:when (assq 'name header)) + (values (cdr (assq 'name header)) i))) + +(define (group-list->indexes fsym name-map total-fields fields-used key-fields-list) + (let ([key-fields-list (if (list? key-fields-list) key-fields-list (list key-fields-list))]) + (for/list ([key-fields (in-list key-fields-list)]) + (group->indexes fsym name-map total-fields fields-used key-fields)))) + +(define (group->indexes fsym name-map total-fields fields-used key-fields) + (let ([key-fields (if (vector? key-fields) key-fields (vector key-fields))]) + (for/vector ([key-field (in-vector key-fields)]) + (grouping-field->index fsym name-map total-fields fields-used key-field)))) + +(define (grouping-field->index fsym name-map total-fields fields-used key-field) + (let ([key-index + (cond [(string? key-field) + (hash-ref name-map key-field #f)] + [else key-field])]) + (when (string? key-field) + (unless key-index + (error fsym "expected grouping field in ~s, got: ~e" + (sort (hash-keys name-map) stringkey-row row) (for/vector #:length key-row-length ([i (in-vector key-indexes)]) (vector-ref row i))) - (define (residual-all-null? row) (for/and ([i (in-vector residual-projection)]) (sql-null? (vector-ref row i)))) - (let* ([key-table (make-hash)] [r-keys (for/fold ([r-keys null]) @@ -476,3 +491,55 @@ invert-outer? as-list?)]) (vector-append key (vector residuals))))))])) + +;; ======================================== + +(define not-given (gensym 'not-given)) + +(define (rows->dict result + #:key key-field/s + #:value value-field/s + #:value-mode [value-mode null]) + (let* ([who 'rows->dict] + [headers (rows-result-headers result)] + [total-fields (length headers)] + [name-map (headers->name-map headers)] + [preserve-null? (memq 'preserve-null value-mode)] + [value-list? (memq 'list value-mode)]) + (define (make-project field/s) + (if (vector? field/s) + (let* ([indexes (group->indexes who name-map total-fields #f field/s)] + [indexes-length (vector-length indexes)]) + (lambda (v) + (for/vector #:length indexes-length ([i (in-vector indexes)]) + (vector-ref v i)))) + (let ([index (grouping-field->index who name-map total-fields #f field/s)]) + (lambda (v) (vector-ref v index))))) + (define get-key (make-project key-field/s)) + (define get-value (make-project value-field/s)) + (define ok-value? + (cond [preserve-null? (lambda (v) #t)] + [(vector? value-field/s) + (lambda (v) (not (for/or ([e (in-vector v)]) (sql-null? e))))] + [else (lambda (v) (not (sql-null? v)))])) + (for/fold ([table '#hash()]) ([row (in-list (if value-list? + (reverse (rows-result-rows result)) + (rows-result-rows result)))]) + (let* ([key (get-key row)] + [value (get-value row)] + [old-value (hash-ref table key (if value-list? '() not-given))]) + (unless (or value-list? + (eq? (hash-ref table key not-given) not-given) + ;; FIXME: okay to coalesce values if equal? + (equal? value old-value)) + (error who "duplicate value for key: ~e; values are ~e and ~e" + key old-value value)) + (if value-list? + (hash-set table key + (if (ok-value? value) + (cons value old-value) + ;; If all-NULL value, still enter key => '() into dict + old-value)) + (if (ok-value? value) + (hash-set table key value) + table)))))) diff --git a/collects/db/private/generic/interfaces.rkt b/collects/db/private/generic/interfaces.rkt index e3aa053c41..f9828cec83 100644 --- a/collects/db/private/generic/interfaces.rkt +++ b/collects/db/private/generic/interfaces.rkt @@ -29,7 +29,9 @@ ;; connection<%> (define connection<%> (interface () + ;; connected? method must return promptly (eg, without acquiring lock) connected? ;; -> boolean + disconnect ;; -> void get-dbsystem ;; -> dbsystem<%> query ;; symbol statement -> QueryResult diff --git a/collects/db/private/generic/place-client.rkt b/collects/db/private/generic/place-client.rkt index 68c60e16cd..b9c29fe001 100644 --- a/collects/db/private/generic/place-client.rkt +++ b/collects/db/private/generic/place-client.rkt @@ -49,18 +49,20 @@ (define/private (call* method-name args need-connected?) (cond [channel (pchan-put channel (cons method-name args)) - (match (pchan-get channel) - [(cons 'values vals) - (apply values (for/list ([val (in-list vals)]) (sexpr->result val)))] - [(list 'error message) - (raise (make-exn:fail message (current-continuation-marks)))])] + (let* ([response (pchan-get channel)] + [still-connected? (car response)]) + (when (not still-connected?) (set! channel #f)) + (match (cdr response) + [(cons 'values vals) + (apply values (for/list ([val (in-list vals)]) (sexpr->result val)))] + [(list 'error message) + (raise (make-exn:fail message (current-continuation-marks)))]))] [need-connected? (unless channel (error/not-connected method-name))] [else (void)])) (define/override (connected?) - ;; FIXME: can underlying connection disconnect w/o us knowing? (and channel #t)) (define/public (disconnect) diff --git a/collects/db/private/generic/place-server.rkt b/collects/db/private/generic/place-server.rkt index cc9bce68df..1b09e044f3 100644 --- a/collects/db/private/generic/place-server.rkt +++ b/collects/db/private/generic/place-server.rkt @@ -68,8 +68,8 @@ where ::= (list 'sqlite3 path/sym mode-sym delay-num limit-num) Connection methods protocol client -> server: (list ' arg ...) -server -> client: (or (list 'values result ...) - (list 'error string)) +server -> client: (or (list boolean 'values result ...) + (list boolean 'error string)) |# (define proxy-server% @@ -86,10 +86,12 @@ server -> client: (or (list 'values result ...) (serve1) (when connection (serve))) + (define/private (still-connected?) (and connection (send connection connected?))) + (define/private (serve1) (with-handlers ([exn? (lambda (e) - (pchan-put channel (list 'error (exn-message e))))]) + (pchan-put channel (list (still-connected?) 'error (exn-message e))))]) (call-with-values (lambda () (match (pchan-get channel) @@ -117,7 +119,7 @@ server -> client: (or (list 'values result ...) (transaction-status w))])) (lambda results (let ([results (for/list ([result (in-list results)]) (result->sexpr result))]) - (pchan-put channel (cons 'values results))))))) + (pchan-put channel (cons (still-connected?) (cons 'values results)))))))) (define/private (sexpr->statement x) (match x diff --git a/collects/db/scribblings/config.rkt b/collects/db/scribblings/config.rkt index 5696f47da1..e6dec4181a 100644 --- a/collects/db/scribblings/config.rkt +++ b/collects/db/scribblings/config.rkt @@ -10,6 +10,12 @@ (define (tech/reference . pre-flows) (apply tech #:doc '(lib "scribblings/reference/reference.scrbl") pre-flows)) +(define (parheading . pre-flows) + (elem (apply bold pre-flows) (hspace 1))) + +(define (wplink path . pre-flows) + (apply hyperlink (string-append "http://en.wikipedia.org/wiki/" path) pre-flows)) + ;; ---- (define the-eval (make-base-eval)) diff --git a/collects/db/scribblings/connect.scrbl b/collects/db/scribblings/connect.scrbl index 20861d2d46..72ec2fb9bf 100644 --- a/collects/db/scribblings/connect.scrbl +++ b/collects/db/scribblings/connect.scrbl @@ -19,18 +19,8 @@ administrative functions for managing connections. There are four kinds of base connection, and they are divided into two groups: @deftech{wire-based connections} and @deftech{FFI-based connections}. PostgreSQL and MySQL connections are wire-based, and -SQLite and ODBC connections are FFI-based. - -Wire-based connections communicate using @tech/reference{ports}, which -do not cause other Racket threads to block. In contrast, an FFI call -causes all Racket threads to block until it completes, so FFI-based -connections can degrade the interactivity of a Racket program, -particularly if long-running queries are performed using the -connection. This problem can be avoided by creating the FFI-based -connection in a separate @tech/reference{place} using the -@racket[#:use-place] keyword argument. Such a connection will not -block all Racket threads during queries; the disadvantage is the cost -of creating and communicating with a separate @tech/reference{place}. +SQLite and ODBC connections are FFI-based. See also +@secref["ffi-concurrency"]. Base connections are made using the following functions. @@ -240,7 +230,7 @@ Base connections are made using the following functions. If @racket[use-place] is true, the actual connection is created in a distinct @tech/reference{place} for database connections and a - proxy is returned. + proxy is returned; see @secref["ffi-concurrency"]. If the connection cannot be made, an exception is raised. @@ -289,7 +279,7 @@ Base connections are made using the following functions. If @racket[use-place] is true, the actual connection is created in a distinct @tech/reference{place} for database connections and a - proxy is returned. + proxy is returned; see @secref["ffi-concurrency"]. If the connection cannot be made, an exception is raised. } @@ -436,14 +426,13 @@ connection associated with the current thread, one is obtained by calling @racket[connect]. An actual connection is disconnected when its associated thread dies. -@;{or if @racket[timeout] seconds elapse since the actual connection was last used.} - Virtual connections are especially useful in contexts such as web -servlets, where each request is handled in a fresh thread. A single -global virtual connection can be defined, freeing each servlet request -from explicitly opening and closing its own connections. In -particular, a @tech{virtual connection} backed by a @tech{connection -pool} combines convenience with efficiency: +servlets (see @secref["intro-servlets"]), where each request is +handled in a fresh thread. A single global virtual connection can be +defined, freeing each servlet request from explicitly opening and +closing its own connections. In particular, a @tech{virtual +connection} backed by a @tech{connection pool} combines convenience +with efficiency: @examples/results[ [(define the-connection diff --git a/collects/db/scribblings/db.scrbl b/collects/db/scribblings/db.scrbl index 8b8e76d8fa..4fe3eb2943 100644 --- a/collects/db/scribblings/db.scrbl +++ b/collects/db/scribblings/db.scrbl @@ -50,7 +50,7 @@ and Doug Orleans for contributions to @tt{spgsql}, the PostgreSQL-only predecessor of this library. The SQLite support is based in part on code from Jay McCarthy's @tt{sqlite} package. -@include-section["introduction.scrbl"] +@include-section["using-db.scrbl"] @include-section["connect.scrbl"] @include-section["query.scrbl"] @include-section["sql-types.scrbl"] diff --git a/collects/db/scribblings/introduction.scrbl b/collects/db/scribblings/introduction.scrbl deleted file mode 100644 index 9305d9e9ac..0000000000 --- a/collects/db/scribblings/introduction.scrbl +++ /dev/null @@ -1,290 +0,0 @@ -#lang scribble/doc -@(require scribble/manual - scribble/eval - scribble/struct - racket/sandbox - "config.rkt" - (for-label db - web-server/lang/web)) - -@title[#:tag "introduction"]{Introduction} - -This section introduces this library's basic features and discusses -how to build a database-backed web servlet. - -@section[#:tag "intro-basic"]{Basic Features} - -The following annotated program demonstrates how to connect to a -database and perform simple queries. Some of the SQL syntax used below -is PostgreSQL-specific, such as the syntax of query parameters -(@litchar{$1} rather than @litchar{?}). - -@my-interaction[ -[(require db) - (void)] -] - -First we create a connection. Replace @racket[_user], @racket[_db], -and @racket[_password] below with the appropriate values for your -configuration (see @secref{creating-connections} for other connection examples): - -@my-interaction[ -[(define pgc - (postgresql-connect #:user _user - #:database _db - #:password _password)) - (void)] -[pgc - (new connection%)] -] - -Use @racket[query-exec] method to execute a SQL statement for effect. - -@my-interaction[ -[(query-exec pgc - "create temporary table the_numbers (n integer, d varchar(20))") - (void)] -[(query-exec pgc - "insert into the_numbers values (0, 'nothing')") - (void)] -[(query-exec pgc - "insert into the_numbers values (1, 'the loneliest number')") - (void)] -[(query-exec pgc - "insert into the_numbers values (2, 'company')") - (void)] -] - -The @racket[query] function is a more general way to execute a -statement. It returns a structure encapsulating information about the -statement's execution. (But some of that information varies from -system to system and is subject to change.) - -@my-interaction[ -[(query pgc "insert into the_numbers values (3, 'a crowd')") - (simple-result '((command insert 0 1)))] -[(query pgc "select n, d from the_numbers where n % 2 = 0") - (rows-result - (list - '((name . "n") (typeid . 23)) - '((name . "d") (typeid . 1043))) - '(#(0 "nothing") #(2 "company")))] -] - -When the query is known to return rows and when the field -descriptions are not needed, it is more convenient to use the -@racket[query-rows] function. - -@my-interaction[ -[(query-rows pgc "select n, d from the_numbers where n % 2 = 0") - '(#(0 "nothing") #(2 "company"))] -] - -Use @racket[query-row] for queries that are known to return exactly -one row. - -@my-interaction[ -[(query-row pgc "select * from the_numbers where n = 0") - (vector 0 "nothing")] -] - -Similarly, use @racket[query-list] for queries that produce rows of -exactly one column. - -@my-interaction[ -[(query-list pgc "select d from the_numbers order by n") - (list "nothing" "the loneliest number" "company" "a crowd")] -] - -When a query is known to return a single value (one row and one -column), use @racket[query-value]. - -@my-interaction[ -[(query-value pgc "select count(*) from the_numbers") - 4] -[(query-value pgc "select d from the_numbers where n = 5") - (error 'query-value - "query returned zero rows: ~s" - "select d from the_numbers where n = 5")] -] - -When a query may return zero or one rows, as the last example, use -@racket[query-maybe-row] or @racket[query-maybe-value] instead. - -@my-interaction[ -[(query-maybe-value pgc "select d from the_numbers where n = 5") - (values #f)] -] - -The @racket[in-query] function produces a sequence that can be used -with Racket's iteration forms: - -@my-interaction[ -[(for ([(n d) (in-query pgc "select * from the_numbers where n < 4")]) - (printf "~a is ~a\n" n d)) - (for-each (lambda (n d) (printf "~a: ~a\n" n d)) - '(0 1 2 3) - '("nothing" "the loneliest number" "company" "a crowd"))] -[(for/fold ([sum 0]) ([n (in-query pgc "select n from the_numbers")]) - (+ sum n)) - (for/fold ([sum 0]) ([n (in-list '(0 1 2 3))]) - (+ sum n))] -] - -Errors in queries generally do not cause the connection to disconnect. - -@my-interaction[ -[(begin (with-handlers [(exn:fail? - (lambda (e) (printf "~a~n" (exn-message e))))] - (query-value pgc "select NoSuchField from NoSuchTable")) - (query-value pgc "select 'okay to proceed!'")) - (begin (display "query-value: relation \"nosuchtable\" does not exist (SQLSTATE 42P01)") - "okay to proceed!")] -] - -Queries may contain parameters. The easiest way to execute a -parameterized query is to provide the parameters ``inline'' after the -SQL statement in the query function call. - -@my-interaction[ -[(query-value pgc - "select d from the_numbers where n = $1" 2) - "company"] -[(query-list pgc - "select n from the_numbers where n > $1 and n < $2" 0 3) - (list 1 2)] -] - -Alternatively, a parameterized query may be prepared in advance and -executed later. @tech{Prepared statements} can be executed multiple -times with different parameter values. - -@my-interaction[ -[(define get-less-than-pst - (prepare pgc "select n from the_numbers where n < $1")) - (void)] -[(query-list pgc get-less-than-pst 1) - (list 0)] -[(query-list pgc (bind-prepared-statement get-less-than-pst '(2))) - (list 0 1)] -] - -When a connection's work is done, it should be disconnected. - -@my-interaction[ -[(disconnect pgc) - (void)] -] - - -@;{============================================================} - -@section[#:tag "intro-servlets"]{Databases and Web Servlets} - -Using database connections in a web servlet is more complicated than -in a standalone program. A single servlet is potentially used to serve -many requests at once, each in a separate request-handling -thread. Furthermore, the use of @racket[send/suspend], -@racket[send/suspend/dispatch], etc means that there are many places -where a servlet may start and stop executing to service a request. - -Why not use a single connection to handle all of a servlet's requests? -That is, create the connection with the servlet instance and never -disconnect it. Such a servlet would look something like the following: - -@racketmod[ -#:file "bad-servlet.rkt" -web-server -(require db) -(define db-conn (postgresql-connect ....)) -(define (serve req) - .... db-conn ....) -] - -The main problem with using one connection for all requests is that -multiple threads accessing the same connection are not properly -@hyperlink["http://en.wikipedia.org/wiki/Isolation_%28database_systems%29"]{isolated}. For -example, if two threads both attempt to start a new transaction, the -second one will fail, because the first thread has already put the -connection into an ``in transaction'' state. And if one thread is -accessing the connection within a transaction and another thread -issues a query, the second thread may see invalid data or even disrupt -the work of the first thread. - -A secondary problem is performance. A connection can only perform a -single query at a time, whereas most database systems are capable of -concurrent query processing. - -The proper way to use database connections in a servlet is to create a -connection for each request and disconnect it when the request -is handled. But since a request thread may start and stop executing in -many places (due to @racket[send/suspend], etc), inserting the code to -connect and disconnect at the proper places can be challenging and -messy. - -A better solution is to use a @tech{virtual connection}, which creates -a request-specific (that is, thread-specific) ``actual connection'' by -need and disconnects it when the request is handled (that is, when the -thread terminates). Different request-handling threads using the same -virtual connection are assigned different actual connection, so the -threads are properly isolated. - -@racketmod[ -#:file "better-servlet.rkt" -web-server -(require db) -(define db-conn - (virtual-connection - (lambda () (postgresql-connect ....)))) -(define (serve req) - .... db-conn ....) -] - -This solution preserves the simplicity of the naive solution and fixes -the isolation problem but at the cost of creating many short-lived -database connections. That cost can be eliminated by using a -@tech{connection pool}: - -@racketmod[ -#:file "best-servlet.rkt" -web-server -(require db) -(define db-conn - (virtual-connection - (connection-pool - (lambda () (postgresql-connect ....))))) -(define (serve req) - .... db-conn ....) -] - -By using a virtual connection backed by a connection pool, a servlet -can achieve simplicity, isolation, and performance. - -@;{ - -TODO: - - talk about virtual statements, too - - show actual working servlet code - --- - -A prepared statement is tied to the connection used to create it; -attempting to use it with another connection results in an -error. Unfortunately, in some scenarios such as web servlets, the -lifetimes of connections are short or difficult to track, making -prepared statements inconvenient. In such cases, a better tool is the -@tech{virtual statement}, which prepares statements on demand and -caches them for future use with the same connection. - -@my-interaction[ -[(define get-less-than-pst - (virtual-statement "select n from the_numbers where n < $1")) - (void)] -[(code:line (query-list pgc1 get-less-than-pst 1) (code:comment "prepares statement for pgc1")) - (list 0)] -[(code:line (query-list pgc2 get-less-than-pst 2) (code:comment "prepares statement for pgc2")) - (list 0 1)] -[(code:line (query-list pgc1 get-less-than-pst 3) (code:comment "uses existing prep. stmt.")) - (list 0 1 2)] -] -} diff --git a/collects/db/scribblings/notes.scrbl b/collects/db/scribblings/notes.scrbl index 3a28fac46d..9673ed487d 100644 --- a/collects/db/scribblings/notes.scrbl +++ b/collects/db/scribblings/notes.scrbl @@ -96,6 +96,21 @@ package in Red Hat.} ] +@section[#:tag "ffi-concurrency"]{FFI-Based Connections and Concurrency} + +@tech{Wire-based connections} communicate using +@tech/reference{ports}, which do not cause other Racket threads to +block. In contrast, an FFI call causes all Racket threads to block +until it completes, so @tech{FFI-based connections} can degrade the +interactivity of a Racket program, particularly if long-running +queries are performed using the connection. This problem can be +avoided by creating the FFI-based connection in a separate +@tech/reference{place} using the @racket[#:use-place] keyword +argument. Such a connection will not block all Racket threads during +queries; the disadvantage is the cost of creating and communicating +with a separate @tech/reference{place}. + + @section[#:tag "odbc-requirements"]{ODBC Requirements} ODBC requires the appropriate driver manager native library as well as diff --git a/collects/db/scribblings/query.scrbl b/collects/db/scribblings/query.scrbl index b327abdac8..0de5852240 100644 --- a/collects/db/scribblings/query.scrbl +++ b/collects/db/scribblings/query.scrbl @@ -5,7 +5,7 @@ racket/sandbox "config.rkt" "tabbing.rkt" - (for-label db db/util/geometry db/util/postgresql)) + (for-label db db/util/geometry db/util/postgresql racket/dict)) @title[#:tag "query-api"]{Queries} @@ -19,7 +19,7 @@ raises an exception. Different query functions impose different constraints on the query results and offer different mechanisms for processing the results. -@bold{Errors} In most cases, a query error does not cause the +@parheading{Errors} In most cases, a query error does not cause the connection to be disconnected. Specifically, the following kinds of errors should never cause a connection to be disconnected: @itemize[ @@ -43,7 +43,7 @@ disconnected: See @secref["transactions"] for information on how errors can affect the transaction status. -@bold{Character encoding} This library is designed to interact with +@parheading{Character encoding} This library is designed to interact with database systems using the UTF-8 character encoding. The connection functions attempt to negotiate UTF-8 communication at the beginning of every connection, but some systems also allow the character encoding @@ -53,13 +53,12 @@ and data might get corrupted in transmission. Avoid changing a connection's character encoding. When possible, the connection will observe the change and automatically disconnect with an error. -@bold{Synchronization} Connections are internally synchronized: it is -safe to perform concurrent queries on the same connection object from -different threads. Connections are not kill-safe: killing a thread -that is using a connection---or shutting down the connection's -managing custodian---may leave the connection locked, causing future -operations to block indefinitely. See @secref["kill-safe"] for a -way to make kill-safe connections. +@parheading{Synchronization} Connections are internally synchronized: +it is safe to use a connection from different threads +concurrently. Most connections are not kill-safe: killing a thread +that is using a connection may leave the connection locked, causing +future operations to block indefinitely. See also +@secref["kill-safe"]. @section{Statements} @@ -77,16 +76,14 @@ All query functions require both a connection and a ] A SQL statement may contain parameter placeholders that stand for SQL -scalar values. The parameter values must be supplied when the -statement is executed; the parameterized statement and parameter -values are sent to the database back end, which combines them -correctly and safely. +scalar values; such statements are called @deftech{parameterized +queries}. The parameter values must be supplied when the statement is +executed; the parameterized statement and parameter values are sent to +the database back end, which combines them correctly and safely. Use parameters instead of Racket string interpolation (eg, @racket[format] or @racket[string-append]) to avoid -@hyperlink["http://xkcd.com/327/"]{SQL injection}, where a string -intended to represent a SQL scalar value is interpreted as---possibly -malicious---SQL code instead. +@secref["dbsec-sql-injection"]. The syntax of placeholders varies depending on the database system. For example: @@ -140,11 +137,13 @@ The types of parameters and returned fields are described in @defproc[(query-rows [connection connection?] [stmt statement?] [arg any/c] ... - [#:group grouping-fields - (or/c (vectorof string?) (listof (vectorof string?))) + [#:group groupings + (let* ([field/c (or/c string? exact-nonnegative-integer?)] + [grouping/c (or/c field/c (vectorof field/c))]) + (or/c grouping/c (listof grouping/c))) null] [#:group-mode group-mode - (listof (or/c 'preserve-null-rows 'list)) + (listof (or/c 'preserve-null 'list)) null]) (listof vector?)]{ @@ -158,7 +157,7 @@ The types of parameters and returned fields are described in (list (vector 17))] ] - If @racket[grouping-fields] is not empty, the result is the same as if + If @racket[groupings] is not empty, the result is the same as if @racket[group-rows] had been called on the result rows. } @@ -245,7 +244,15 @@ The types of parameters and returned fields are described in @defproc[(in-query [connection connection?] [stmt statement?] [arg any/c] ... - [#:fetch fetch-size (or/c exact-positive-integer? +inf.0) +inf.0]) + [#:fetch fetch-size (or/c exact-positive-integer? +inf.0) +inf.0] + [#:group groupings + (let* ([field/c (or/c string? exact-nonnegative-integer?)] + [grouping/c (or/c field/c (vectorof field/c))]) + (or/c grouping/c (listof grouping/c))) + null] + [#:group-mode group-mode + (listof (or/c 'preserve-null 'list)) + null]) sequence?]{ Executes a SQL query, which must produce rows, and returns a @@ -260,6 +267,11 @@ The types of parameters and returned fields are described in open cursors; attempting to fetch more rows may fail. On PostgreSQL, a cursor can be opened only within a transaction. + If @racket[groupings] is not empty, the result is the same as + if @racket[group-rows] had been called on the result rows. If + @racket[groupings] is not empty, then @racket[fetch-size] must + be @racket[+inf.0]; otherwise, an exception is raised. + @examples/results[ [(for/list ([n (in-query pgc "select n from the_numbers where n < 2")]) n) @@ -329,25 +341,29 @@ future version of this library (even new minor versions). } @defproc[(group-rows [result rows-result?] - [#:group grouping-fields - (or/c (vectorof string?) (listof (vectorof string?)))] + [#:group groupings + (let* ([field/c (or/c string? exact-nonnegative-integer?)] + [grouping/c (or/c field/c (vectorof field/c))]) + (or/c grouping/c (listof grouping/c)))] [#:group-mode group-mode - (listof (or/c 'preserve-null-rows 'list)) + (listof (or/c 'preserve-null 'list)) null]) rows-result?]{ -If @racket[grouping-fields] is a vector, the elements must be names of +If @racket[groupings] is a vector, the elements must be names of fields in @racket[result], and @racket[result]'s rows are regrouped using the given fields. Each grouped row contains N+1 fields; the -first N fields are the @racket[grouping-fields], and the final field +first N fields are the @racket[groupings], and the final field is a list of ``residual rows'' over the rest of the fields. A residual row of all NULLs is dropped (for convenient processing of @tt{OUTER JOIN} results) unless @racket[group-mode] includes -@racket['preserve-null-rows]. If @racket[group-mode] contains +@racket['preserve-null]. If @racket[group-mode] contains @racket['list], there must be exactly one residual field, and its values are included without a vector wrapper (similar to @racket[query-list]). +See also @secref["dbperf-n+1"]. + @examples[#:eval the-eval (define vehicles-result (rows-result @@ -361,7 +377,9 @@ values are included without a vector wrapper (similar to #:group '(#("type"))) ] -The @racket[grouping-fields] argument may also be a list of vectors; +The grouped final column is given the name @racket["grouped"]. + +The @racket[groupings] argument may also be a list of vectors; in that case, the grouping process is repeated for each set of grouping fields. The grouping fields must be distinct. @@ -372,6 +390,38 @@ grouping fields. The grouping fields must be distinct. ] } +@defproc[(rows->dict [result rows-result?] + [#:key key-field/s + (let ([field/c (or/c string? exact-nonnegative-integer?)]) + (or/c field/c (vectorof field/c)))] + [#:value value-field/s + (let ([field/c (or/c string? exact-nonnegative-integer?)]) + (or/c field/c (vectorof field/c)))] + [#:value-mode value-mode + (listof (or/c 'list 'preserve-null)) + null]) + dict?]{ + +Creates a dictionary mapping @racket[key-field/s] to +@racket[value-field/s]. If @racket[key-field/s] is a single field name +or index, the keys are the field values; if @racket[key-field/s] is a +vector, the keys are vectors of the field values. Likewise for +@racket[value-field/s]. + +If @racket[value-mode] contains @racket['list], a list of values is +accumulated for each key; otherwise, there must be at most one value +for each key. Values consisting of all @racket[sql-null?] values are +dropped unless @racket[value-mode] contains +@racket['preserve-null]. + +@examples[#:eval the-eval +(rows->dict vehicles-result + #:key "model" #:value '#("type" "maker")) +(rows->dict vehicles-result + #:key "maker" #:value "model" #:value-mode '(list)) +] +} + @section{Prepared Statements} @@ -533,7 +583,7 @@ statement implicitly commits the current transaction. These statements also must not be used within @tech{managed transactions}. (In contrast, PostgreSQL and SQLite both support transactional DDL.) -@bold{Errors} Query errors may affect an open transaction in one of +@parheading{Errors} Query errors may affect an open transaction in one of three ways: @itemlist[#:style 'ordered @item{the transaction remains open and unchanged} diff --git a/collects/db/scribblings/using-db.scrbl b/collects/db/scribblings/using-db.scrbl new file mode 100644 index 0000000000..863ecd3417 --- /dev/null +++ b/collects/db/scribblings/using-db.scrbl @@ -0,0 +1,511 @@ +#lang scribble/doc +@(require scribble/manual + scribble/eval + scribble/struct + racket/sandbox + "config.rkt" + (for-label db db/util/testing racket/dict web-server/lang/web)) + +@title[#:tag "using-db"]{Using Database Connections} + +This section introduces this library's basic features and covers some +practical issues with database programming in general and with this +library in particular. + + +@section[#:tag "intro-basic"]{Introduction to Using Database Connections} + +The following annotated program demonstrates how to connect to a +database and perform simple queries. Some of the SQL syntax used below +is PostgreSQL-specific, such as the syntax of query parameters +(@litchar{$1} rather than @litchar{?}). + +@my-interaction[ +[(require db) + (void)] +] + +First we create a connection. Replace @racket[_user], @racket[_db], +and @racket[_password] below with the appropriate values for your +configuration (see @secref{creating-connections} for other connection examples): + +@my-interaction[ +[(define pgc + (postgresql-connect #:user _user + #:database _db + #:password _password)) + (void)] +[pgc + (new connection%)] +] + +Use @racket[query-exec] method to execute a SQL statement for effect. + +@my-interaction[ +[(query-exec pgc + "create temporary table the_numbers (n integer, d varchar(20))") + (void)] +[(query-exec pgc + "insert into the_numbers values (0, 'nothing')") + (void)] +[(query-exec pgc + "insert into the_numbers values (1, 'the loneliest number')") + (void)] +[(query-exec pgc + "insert into the_numbers values (2, 'company')") + (void)] +] + +The @racket[query] function is a more general way to execute a +statement. It returns a structure encapsulating information about the +statement's execution. (But some of that information varies from +system to system and is subject to change.) + +@my-interaction[ +[(query pgc "insert into the_numbers values (3, 'a crowd')") + (simple-result '((command insert 0 1)))] +[(query pgc "select n, d from the_numbers where n % 2 = 0") + (rows-result + (list + '((name . "n") (typeid . 23)) + '((name . "d") (typeid . 1043))) + '(#(0 "nothing") #(2 "company")))] +] + +When the query is known to return rows and when the field +descriptions are not needed, it is more convenient to use the +@racket[query-rows] function. + +@my-interaction[ +[(query-rows pgc "select n, d from the_numbers where n % 2 = 0") + '(#(0 "nothing") #(2 "company"))] +] + +Use @racket[query-row] for queries that are known to return exactly +one row. + +@my-interaction[ +[(query-row pgc "select * from the_numbers where n = 0") + (vector 0 "nothing")] +] + +Similarly, use @racket[query-list] for queries that produce rows of +exactly one column. + +@my-interaction[ +[(query-list pgc "select d from the_numbers order by n") + (list "nothing" "the loneliest number" "company" "a crowd")] +] + +When a query is known to return a single value (one row and one +column), use @racket[query-value]. + +@my-interaction[ +[(query-value pgc "select count(*) from the_numbers") + 4] +[(query-value pgc "select d from the_numbers where n = 5") + (error 'query-value + "query returned zero rows: ~s" + "select d from the_numbers where n = 5")] +] + +When a query may return zero or one rows, as the last example, use +@racket[query-maybe-row] or @racket[query-maybe-value] instead. + +@my-interaction[ +[(query-maybe-value pgc "select d from the_numbers where n = 5") + (values #f)] +] + +The @racket[in-query] function produces a sequence that can be used +with Racket's iteration forms: + +@my-interaction[ +[(for ([(n d) (in-query pgc "select * from the_numbers where n < 4")]) + (printf "~a is ~a\n" n d)) + (for-each (lambda (n d) (printf "~a: ~a\n" n d)) + '(0 1 2 3) + '("nothing" "the loneliest number" "company" "a crowd"))] +[(for/fold ([sum 0]) ([n (in-query pgc "select n from the_numbers")]) + (+ sum n)) + (for/fold ([sum 0]) ([n (in-list '(0 1 2 3))]) + (+ sum n))] +] + +Errors in queries generally do not cause the connection to disconnect. + +@my-interaction[ +[(begin (with-handlers [(exn:fail? + (lambda (e) + (printf "~a~n" (exn-message e))))] + (query-value pgc "select NoSuchField from NoSuchTable")) + (query-value pgc "select 'okay to proceed!'")) + (begin (display "query-value: relation \"nosuchtable\" does not exist (SQLSTATE 42P01)") + "okay to proceed!")] +] + +Queries may contain parameters. The easiest way to execute a +parameterized query is to provide the parameters ``inline'' after the +SQL statement in the query function call. + +@my-interaction[ +[(query-value pgc + "select d from the_numbers where n = $1" 2) + "company"] +[(query-list pgc + "select n from the_numbers where n > $1 and n < $2" 0 3) + (list 1 2)] +] + +Alternatively, a parameterized query may be prepared in advance and +executed later. @tech{Prepared statements} can be executed multiple +times with different parameter values. + +@my-interaction[ +[(define get-less-than-pst + (prepare pgc "select n from the_numbers where n < $1")) + (void)] +[(query-list pgc get-less-than-pst 1) + (list 0)] +[(query-list pgc (bind-prepared-statement get-less-than-pst '(2))) + (list 0 1)] +] + +When a connection's work is done, it should be disconnected. + +@my-interaction[ +[(disconnect pgc) + (void)] +] + + +@section[#:tag "dbsec"]{Database Security} + +Database security requires both that the database back end be secured +against unauthorized use and that authorized clients are not tricked +or subverted into violating the database's security. + +Securing database back ends is mostly beyond the scope of this +manual. In brief: choose sufficiently strong authentication methods +and keep credentials secure, and follow the +@wplink["Principle_of_least_privilege"]{principle of least privilege}: +create and use roles that have the minimum permissions needed. + +The following is an incomplete list of security issues related to +database @emph{client} programming. + +@;{Add section on db roles and ro/rw access? + eg, for servlet create two connections: one ro and one rw + (in pg/my/etc, backed by two roles; in sqlite, connection options) } + +@subsection[#:tag "dbsec-sql-injection"]{SQL Injection} + +@wplink["SQL_injection"]{SQL injection} happens when part of a SQL +statement that was intended as SQL literal data is instead interpreted +as SQL code---possibly @hyperlink["http://xkcd.com/327/"]{malicious} +SQL code. + +Avoid dynamically creating SQL query strings by string concatenation +or interpolation (eg, with @racket[string-append] or +@racket[format]). In most cases, it is possible to use +@tech{parameterized queries} instead. For example, instead of this + +@racketblock[ +(code:comment "WRONG! DANGER!") +(query-exec c + (format "UPDATE users SET passwd='~a' WHERE user='~a'" + user new-passwd)) +] +write one of the following instead (depending on SQL dialect): +@racketblock[ +(code:comment "for PostgreSQL, SQLite") +(query-exec c "UPDATE users SET passwd=$1 WHERE user=$2" user new-passwd) +(code:comment "for MySQL, SQLite, ODBC") +(query-exec c "UPDATE users SET passwd=? WHERE user=?" user new-passwd) +] + +The first form would choke on names like @racket["Patrick O'Connor"]. +Worse, it would be susceptible to attack by malicious input like +@racket["me' OR user='root'"], which yields the following SQL +statement: + +@(element 'tt "UPDATE users SET passwd='whatever' WHERE user='me' OR user='root'") + +In contrast, using a @tech{parameterized query} causes the +parameterized SQL and its arguments to be submitted to the back end +separately; the back end then combines them safely. + +Only SQL literal values can be replaced with parameter placeholders; a +SQL statement cannot be parameterized over a column name or a sort +order, for example. In such cases, constructing the query dynamically +may be the only feasible solution. But while the query construction +may be influenced by external input, it should never directly +incorporate external input without validation. That is, don't do the +following: + +@racketblock[ +(code:comment "WRONG! DANGER!") +(query-rows c + (format "SELECT name, ~a FROM contestants" column)) +(query-list c + (format "SELECT name FROM contestants ORDER BY score ~a" direction)) +] + +Instead, select the inserted SQL from known good alternatives: + +@racketblock[ +(code:comment "BETTER") +(query-rows c + (format "SELECT name, ~a FROM contestants" + (cond [(member column '("wins" "losses")) column] + [else (error ....)]))) +(query-list c + (format "SELECT name FROM contestants ORDER BY score ~a" + (if ascending? "ASC" "DESC"))) +] + +@;{ Discuss dynamic IN comparisons? } + + +@subsection[#:tag "dbsec-xss"]{Cross-site Scripting (XSS)} + +@wplink["Cross-site_scripting"]{Cross-site scripting}---which should +probably be called ``HTML injection'' or ``markup injection''---is +when arbitrary text from an untrusted source is embedded without +escaping into an HTML page. The @emph{unstructured text from the +untrusted source} is reinterpreted as @emph{markup from the web +server}; if the reinterpreted markup contains embedded Javascript +code, it executes with the security privileges associated with the web +server's domain. + +This issue has little to do with databases @emph{per se} except that +such text is often stored in a database. This issue is mitigated by +using structured markup representations like SXML or X-expressions +(xexprs), since they automatically escape ``markup'' characters found +in embedded text. + + +@;{============================================================} + +@section[#:tag "dbperf"]{Database Performance} + +Achieving good database performance mostly consists of good database +design and intelligent client behavior. + +On the database design side, most important are wise use of indexes +and choosing appropriate data representations. As an example of the +latter, a regexp-based search using @tt{LIKE} will probably be slower +than a specialized +@hyperlink["http://www.postgresql.org/docs/9.0/static/textsearch.html"]{full-text +search} feature for large data sets. Consult your database back end's +manual for additional performance advice. + +The following sections describe a few client-side aspects of +performance. + +@subsection[#:tag "dbperf-n+1"]{The N+1 Selects Problem} + +@;{ per comments on http://stackoverflow.com/questions/97197/what-is-the-n1-selects-problem + Is N+1 actually a problem? + ie, Is communication overhead with db back end worse than + grouping cost? Should measure to see. } + +A common mistake is to fetch a large amount of data by running a query +to get a set of initial records and then running another query inside +a loop with an iteration for each of the initial records. This is +sometimes called the ``n+1 selects problem.'' For example: + +@racketblock[ +(for/list ([(name id) (in-query c "SELECT name, id FROM contestants")]) + (define wins + (query-list c "SELECT contest FROM contests WHERE winner = $1" id)) + (make-contestant-record name wins)) +] + +The same information can be retrieved in a single query by performing +a @tt{LEFT OUTER JOIN} and grouping the results: + +@racketblock[ +(for/list ([(name id wins) + (in-query c + (string-append "SELECT name, id, contest " + "FROM contestants LEFT OUTER JOIN contests " + "ON contestants.id = contests.winner") + #:group '(#("name" "id")) + #:group-mode '(list))]) + (make-contestant-record name wins)) +] + +The one-query form will perform better when database communication has +high latency. On the other hand, it may duplicate the contents of the +non-key @tt{name} column, using more bandwidth. Another approach is to +perform two queries: + +@racketblock[ +(let ([id=>name + (rows->dict #:key "id" #:value "name" + (query c "SELECT id, name FROM contestants"))]) + (for/list ([(id wins) + (in-query c + (string-append "SELECT id, contest " + "FROM contestants LEFT OUTER JOIN contests " + "ON contestants.id = contests.winner") + #:group '(#("id")) + #:group-mode '(list))]) + (make-contestant-record (dict-ref id=>name id) wins))) +] + +Compared with the one-query form, the two-query form requires +additional communication, but it avoids duplicating @tt{name} values +in the @tt{OUTER JOIN} results. If additional non-key @tt{contestant} +fields were to be retrieved, the bandwidth savings of this approach +would be even greater. + +See also @secref["dbperf-testing"]. + + +@subsection[#:tag "dbperf-update-tx"]{Updates and Transactions} + +Using transactions can dramatically improve the performance of bulk +database operations, especially @tt{UPDATE} and @tt{INSERT} +statements. As an extreme example, on commodity hardware in 2012, +SQLite is capable of executing thousands of @tt{INSERT} statements per +second within a transaction, but it is capable of only dozens of +single-@tt{INSERT} transactions per second. + + +@subsection[#:tag "dbperf-pstcache"]{Statement Caching} + +Connections cache implicitly prepared statements (that is, statements +given in string form directly to a query function). The effect of the +cache is to eliminate an extra round-trip to the server (to send the +statement and receive a prepared statement handle), leaving just a +single round-trip (to send parameters and receive results) per +execution. + +Currently, prepared statements are only cached within a +transaction. The statement cache is flushed when entering or leaving a +transaction and whenever a DDL statement is executed. + +@;{ virtual statements are mostly obsolete } + + +@subsection[#:tag "dbperf-testing"]{Testing Performance of Database-Backed Programs} + +When testing the performance of database-backed programs, remember to +test them in environments with realistic latency and +bandwidth. High-latency environments may be roughly approximated with +the @racket[high-latency-connection] function, but there's no +substitute for the real thing. + + +@;{============================================================} + +@section[#:tag "intro-servlets"]{Databases and Web Servlets} + +Using database connections in a web servlet is more complicated than +in a standalone program. A single servlet potentially serves many +requests at once, each in a separate request-handling +thread. Furthermore, the use of @racket[send/suspend], +@racket[send/suspend/dispatch], etc means that there are many places +where a servlet may start and stop executing to service a request. + +Why not use a single connection to handle all of a servlet's requests? +That is, create the connection with the servlet instance and never +disconnect it. Such a servlet would look something like the following: + +@racketmod[ +#:file "bad-servlet.rkt" +web-server +(require db) +(define db-conn (postgresql-connect ....)) +(define (serve req) + .... db-conn ....) +] + +The main problem with using one connection for all requests is that +multiple threads accessing the same connection are not properly +@wplink["Isolation_%28database_systems%29"]{isolated}. For example, if +one thread is accessing the connection within a transaction and +another thread issues a query, the second thread may see invalid data +or even disrupt the work of the first thread. + +A secondary problem is performance. A connection can only perform a +single query at a time, whereas most database systems are capable of +concurrent query processing. + +The proper way to use database connections in a servlet is to create a +connection for each request and disconnect it when the request has +been handled. But since a request thread may start and stop executing +in many places (due to @racket[send/suspend], etc), inserting the code +to connect and disconnect at the proper places can be challenging and +messy. + +A better solution is to use a @tech{virtual connection}, which +automatically creates a request-specific (that is, thread-specific) +``actual connection'' by need and disconnects it when the request has +been handled (that is, when the thread terminates). Different +request-handling threads using the same virtual connection are +assigned different actual connections, so the requests are properly +isolated. + +@racketmod[ +#:file "better-servlet.rkt" +web-server +(require db) +(define db-conn + (virtual-connection + (lambda () (postgresql-connect ....)))) +(define (serve req) + .... db-conn ....) +] + +This solution preserves the simplicity of the naive solution and fixes +the isolation problem but at the cost of creating many short-lived +database connections. That cost can be eliminated by using a +@tech{connection pool}: + +@racketmod[ +#:file "best-servlet.rkt" +web-server +(require db) +(define db-conn + (virtual-connection + (connection-pool + (lambda () (postgresql-connect ....))))) +(define (serve req) + .... db-conn ....) +] + +By using a virtual connection backed by a connection pool, a servlet +can achieve simplicity, isolation, and performance all at the same +time. + +@;{ + +TODO: + - talk about virtual statements, too + - show actual working servlet code + +-- + +A prepared statement is tied to the connection used to create it; +attempting to use it with another connection results in an +error. Unfortunately, in some scenarios such as web servlets, the +lifetimes of connections are short or difficult to track, making +prepared statements inconvenient. In such cases, a better tool is the +@tech{virtual statement}, which prepares statements on demand and +caches them for future use with the same connection. + +@my-interaction[ +[(define get-less-than-pst + (virtual-statement "select n from the_numbers where n < $1")) + (void)] +[(code:line (query-list pgc1 get-less-than-pst 1) (code:comment "prepares statement for pgc1")) + (list 0)] +[(code:line (query-list pgc2 get-less-than-pst 2) (code:comment "prepares statement for pgc2")) + (list 0 1)] +[(code:line (query-list pgc1 get-less-than-pst 3) (code:comment "uses existing prep. stmt.")) + (list 0 1 2)] +] +} diff --git a/collects/db/scribblings/util.scrbl b/collects/db/scribblings/util.scrbl index b20734d346..3e42c99f1c 100644 --- a/collects/db/scribblings/util.scrbl +++ b/collects/db/scribblings/util.scrbl @@ -4,7 +4,7 @@ scribble/struct racket/sandbox "config.rkt" - (for-label db db/util/datetime db/util/geometry db/util/postgresql)) + (for-label db db/util/datetime db/util/geometry db/util/postgresql db/util/testing)) @title[#:tag "util"]{Utilities} @@ -203,3 +203,33 @@ types that have no appropriate analogue in the OpenGIS model: Note: PostgreSQL's built-in geometric types are distinct from those provided by the PostGIS extension library (see @secref["geometry"]). } + +@;{========================================} + +@section[#:tag "util-testing"]{Testing Database Programs} + +@defmodule[db/util/testing] + +This module provides utilities for testing programs that use database +connections. + +@defproc[(high-latency-connection [connection connection?] + [latency (>=/c 0)] + [#:sleep-atomic? sleep-atomic? any/c #f]) + connection?]{ + +Returns a proxy connection for @racket[connection] that introduces +@racket[latency] additional seconds of latency before operations that +require communicating with the database back end---@racket[prepare], +@racket[query], @racket[start-transaction], etc. + +Use this function in performance testing to roughly simulate +environments with high-latency communication with a database back +end. + +If @racket[sleep-atomic?] is true, then the proxy enters atomic mode +before sleeping, to better simulate the effect of a long-running FFI +call (see @secref["ffi-concurrency"]). Even so, it may not accurately +simulate an ODBC connection that internally uses cursors to fetch data +on demand, as each fetch would introduce additional latency. +} diff --git a/collects/db/util/testing.rkt b/collects/db/util/testing.rkt new file mode 100644 index 0000000000..87635ee6c5 --- /dev/null +++ b/collects/db/util/testing.rkt @@ -0,0 +1,58 @@ +#lang racket/base +(require racket/contract + racket/class + ffi/unsafe/atomic + "../private/generic/interfaces.rkt" + "../private/generic/common.rkt") + +(define high-latency-connection% + (class* locking% (connection<%>) + (init-private connection + latency + sleep-atomic?) + (inherit call-with-lock) + (super-new) + + (define-syntax-rule (define-forward defmethod (sleep-factor method arg ...) ...) + (begin + (defmethod (method arg ...) + (call-with-lock 'method + (lambda () + (let ([c connection]) + (unless c (error/not-connected 'method)) + (when (positive? sleep-factor) + (if sleep-atomic? + (call-as-atomic (lambda () (sleep (* sleep-factor latency)))) + (sleep (* sleep-factor latency)))) + (send c method arg ...))))) + ...)) + + (define-forward define/public + (0 get-dbsystem) + (2 query fsym stmt cursor?) ;; 2 because may require implicit prepare + (1 prepare fsym stmt close-on-exec?) + (1 fetch/cursor fsym stmt fetch-size) + (0 get-base) + (0 free-statement stmt need-lock?) + (0 transaction-status fsym) + (1 start-transaction fsym isolation cwt?) + (1 end-transaction fsym mode cwt?) + (1 list-tables fsym schema)) + + (define/override (connected?) (and connection (send connection connected?))) + + (define/public (disconnect) + (set! connection #f)))) + +(define (high-latency-connection connection latency + #:sleep-atomic? [sleep-atomic? #f]) + (new high-latency-connection% + [connection connection] + [latency latency] + [sleep-atomic? sleep-atomic?])) + +(provide/contract + [high-latency-connection + (->* (connection? (>=/c 0)) + (#:sleep-atomic? any/c) + connection?)]) diff --git a/collects/tests/db/db/sql-types.rkt b/collects/tests/db/db/sql-types.rkt index f7ecee3c4b..21d3449ca7 100644 --- a/collects/tests/db/db/sql-types.rkt +++ b/collects/tests/db/db/sql-types.rkt @@ -171,7 +171,7 @@ (when (ANYFLAGS 'postgresql 'mysql 'sqlite3) (check-roundtrip c (make-bytes #e1e6 (char->integer #\a))) (check-roundtrip c (make-bytes #e1e7 (char->integer #\b))) - (check-roundtrip c (make-bytes #e1e8 (char->integer #\c)))) + #| (check-roundtrip c (make-bytes #e1e8 (char->integer #\c))) |#) (when (ANYFLAGS 'postgresql) (let ([r (query-value c "select cast(repeat('a', 10000000) as bytea)")]) (check-pred bytes? r)