io: adjust read-[bytes-]line to use direct buffer

This commit is contained in:
Matthew Flatt 2019-02-13 07:38:18 -07:00
parent e266da929d
commit d197e1b8f4
4 changed files with 129 additions and 58 deletions

View File

@ -19,7 +19,7 @@
make-string string-set!
string-copy! substring
read-a-char peek-a-char
as-char)
as-char direct-string?)
(define/who (read-line [orig-in (current-input-port)] [mode 'linefeed])
(define in (->core-input-port orig-in who))
(check who ok-mode? #:contract ok-mode-str mode)
@ -27,47 +27,53 @@
(define cr? (case mode [(return any any-one) #t] [else #f]))
(define lf? (case mode [(linefeed any any-one) #t] [else #f]))
(define crlf? (case mode [(return-linefeed any) #t] [else #f]))
(define init-len 32)
(let loop ([str (make-string init-len)] [len init-len] [pos 0])
(define ch (read-a-char 'read-line in))
(define (keep-char)
(if (pos . fx< . len)
(begin
(string-set! str pos ch)
(loop str len (fx+ pos 1)))
(let* ([new-len (fx* len 2)]
[new-str (make-string new-len)])
(string-copy! new-str 0 str 0)
(string-set! new-str pos ch)
(loop new-str new-len (fx+ pos 1)))))
(cond
[(eof-object? ch)
(if (fx= pos 0)
eof
(substring str 0 pos))]
[(and (or cr? crlf?)
(eqv? ch (as-char #\return)))
(cond
[(and crlf?
(eqv? (peek-a-char 'read-line in 0) (as-char #\linefeed)))
(read-a-char 'read-line in)
(substring str 0 pos)]
[cr?
(substring str 0 pos)]
[else (keep-char)])]
[(and lf?
(eqv? ch (as-char #\newline)))
(substring str 0 pos)]
[else (keep-char)]))))
(cond
[(maybe-read-a-line in cr? lf? crlf? direct-string?)
=> (lambda (r) r)]
[else
(define init-len 32)
(let loop ([str (make-string init-len)] [len init-len] [pos 0])
(define ch (read-a-char 'read-line in))
(define (keep-char)
(if (pos . fx< . len)
(begin
(string-set! str pos ch)
(loop str len (fx+ pos 1)))
(let* ([new-len (fx* len 2)]
[new-str (make-string new-len)])
(string-copy! new-str 0 str 0)
(string-set! new-str pos ch)
(loop new-str new-len (fx+ pos 1)))))
(cond
[(eof-object? ch)
(if (fx= pos 0)
eof
(substring str 0 pos))]
[(and (or cr? crlf?)
(eqv? ch (as-char #\return)))
(cond
[(and crlf?
(eqv? (peek-a-char 'read-line in 0) (as-char #\linefeed)))
(read-a-char 'read-line in)
(substring str 0 pos)]
[cr?
(substring str 0 pos)]
[else (keep-char)])]
[(and lf?
(eqv? ch (as-char #\newline)))
(substring str 0 pos)]
[else (keep-char)]))])))
(define-read-line read-line
make-string string-set!
string-copy! substring
read-a-char peek-a-char
values)
values
#t)
(define-read-line read-bytes-line
make-bytes bytes-set!
bytes-copy! subbytes
read-a-byte peek-a-byte
char->integer)
char->integer
#f)

View File

@ -3,6 +3,7 @@
"../common/internal-error.rkt"
"../common/class.rkt"
"../host/thread.rkt"
"../string/utf-8-decode.rkt"
"port.rkt"
"input-port.rkt"
"count.rkt"
@ -15,7 +16,9 @@
read-a-byte
read-byte-via-bytes
peek-a-byte
peek-byte-via-bytes)
peek-byte-via-bytes
maybe-read-a-line)
;; Read up to `(- end start)` bytes, producing at least a
;; single by unless `zero-ok?` is true. The result is
@ -259,3 +262,45 @@
(if (eq? v 1)
(bytes-ref bstr 0)
v))
;; ----------------------------------------
;; Tries to read a line from the fast-path buffer
(define (maybe-read-a-line in cr? lf? crlf? as-string?)
(start-atomic)
(define buffer (core-port-buffer in))
(define bstr (direct-bstr buffer))
(define pos (direct-pos buffer))
(define end (fxmin (direct-end buffer)
;; limit atomicity
(fx+ pos 4096)))
(define (finish end read-end)
(set-direct-pos! buffer read-end)
(when (core-port-count in)
(port-count! in (fx- read-end pos) bstr pos))
(define result
(if as-string?
(a-bytes->string/utf-8 bstr pos end)
(subbytes bstr pos end)))
(end-atomic)
result)
(let loop ([i pos])
(cond
[(fx= i end)
(end-atomic)
#f]
[else
(define b (bytes-ref bstr i))
(cond
[(and lf? (eqv? b (char->integer #\linefeed)))
(finish i (fx+ i 1))]
[(and (or cr? crlf?) (eqv? b (char->integer #\return)))
(cond
[(and crlf?
((fx+ i 1) . fx< . end)
(eqv? (bytes-ref bstr (fx+ i 1)) (char->integer #\linefeed)))
(finish i (fx+ i 2))]
[cr?
(finish i (fx+ i 1))]
[else (loop (fx+ i 1))])]
[else (loop (fx+ i 1))])])))

View File

@ -40,25 +40,10 @@
(check who exact-nonnegative-integer? start)
(check who exact-nonnegative-integer? end)
(check-range who start end (bytes-length bstr) bstr)
;; Measure result string:
(define-values (used-bytes got-chars state)
(utf-8-decode! bstr start end
#f 0 #f
#:error-char err-char
#:abort-mode 'error))
(cond
[(eq? state 'error) (if just-length?
#f
(raise-encoding-error who bstr start end))]
[just-length? got-chars]
[else
;; Create result string:
(define str (make-string got-chars))
(utf-8-decode! bstr start end
str 0 #f
#:error-char err-char
#:abort-mode 'error)
str]))
(or (a-bytes->string/utf-8 bstr start end err-char #:just-length? just-length?)
(if just-length?
#f
(raise-encoding-error who bstr start end))))
(define/who (bytes->string/utf-8 bstr [err-char #f] [start 0] [end (and (bytes? bstr)
(bytes-length bstr))])
@ -86,7 +71,7 @@
(check-range who start end (bytes-length bstr) bstr)
;; First, decode `skip` items:
(define-values (initial-used-bytes initial-got-chars state)
(if (zero? skip)
(if (eqv? skip 0)
(values 0 0 (if (= start end) 'complete 'continues))
(utf-8-decode! bstr start end
#f 0 skip

View File

@ -7,7 +7,9 @@
utf-8-decode-byte
utf-8-state?
utf-8-state-pending-amt)
utf-8-state-pending-amt
a-bytes->string/utf-8)
;; The maximum number of characters that might not be consumed
;; by a conversion at the tail of a byte string, assuming that
@ -23,7 +25,7 @@
;; and further decoding reveals that earlier bytes were in error.
;;
;; The `abort-mode` argument determines what to do when reaching the end of the input
;; and an encoding needs more ytes:
;; and an encoding needs more bytes:
;; * 'error : treat the bytes as encoding errors
;; * 'aborts : report 'aborts
;; * 'state : return a value that encapsulates the state, so another call can continue
@ -217,3 +219,36 @@
(values accum remaining 'continues))
(lambda ()
(values #f 0 'error))))
;; ----------------------------------------
(define (a-bytes->string/utf-8 bstr start end [err-char #\uFFFD] #:just-length? [just-length? #f])
;; Shortcut for all-ASCII:
(cond
[(for/and ([i (in-range start end)])
((bytes-ref bstr i) . fx< . 128))
(cond
[just-length? (fx- end start)]
[else
(define str (make-string (fx- end start)))
(for ([i (in-range start end)])
(string-set! str (fx- i start) (integer->char (bytes-ref bstr i))))
str])]
[else
;; Measure result string:
(define-values (used-bytes got-chars state)
(utf-8-decode! bstr start end
#f 0 #f
#:error-char err-char
#:abort-mode 'error))
(cond
[(eq? state 'error) #f]
[just-length? got-chars]
[else
;; Create result string:
(define str (make-string got-chars))
(utf-8-decode! bstr start end
str 0 #f
#:error-char err-char
#:abort-mode 'error)
str])]))