io: adjust read-[bytes-]line
to use direct buffer
This commit is contained in:
parent
e266da929d
commit
d197e1b8f4
|
@ -19,7 +19,7 @@
|
|||
make-string string-set!
|
||||
string-copy! substring
|
||||
read-a-char peek-a-char
|
||||
as-char)
|
||||
as-char direct-string?)
|
||||
(define/who (read-line [orig-in (current-input-port)] [mode 'linefeed])
|
||||
(define in (->core-input-port orig-in who))
|
||||
(check who ok-mode? #:contract ok-mode-str mode)
|
||||
|
@ -27,47 +27,53 @@
|
|||
(define cr? (case mode [(return any any-one) #t] [else #f]))
|
||||
(define lf? (case mode [(linefeed any any-one) #t] [else #f]))
|
||||
(define crlf? (case mode [(return-linefeed any) #t] [else #f]))
|
||||
(define init-len 32)
|
||||
(let loop ([str (make-string init-len)] [len init-len] [pos 0])
|
||||
(define ch (read-a-char 'read-line in))
|
||||
(define (keep-char)
|
||||
(if (pos . fx< . len)
|
||||
(begin
|
||||
(string-set! str pos ch)
|
||||
(loop str len (fx+ pos 1)))
|
||||
(let* ([new-len (fx* len 2)]
|
||||
[new-str (make-string new-len)])
|
||||
(string-copy! new-str 0 str 0)
|
||||
(string-set! new-str pos ch)
|
||||
(loop new-str new-len (fx+ pos 1)))))
|
||||
(cond
|
||||
[(eof-object? ch)
|
||||
(if (fx= pos 0)
|
||||
eof
|
||||
(substring str 0 pos))]
|
||||
[(and (or cr? crlf?)
|
||||
(eqv? ch (as-char #\return)))
|
||||
(cond
|
||||
[(and crlf?
|
||||
(eqv? (peek-a-char 'read-line in 0) (as-char #\linefeed)))
|
||||
(read-a-char 'read-line in)
|
||||
(substring str 0 pos)]
|
||||
[cr?
|
||||
(substring str 0 pos)]
|
||||
[else (keep-char)])]
|
||||
[(and lf?
|
||||
(eqv? ch (as-char #\newline)))
|
||||
(substring str 0 pos)]
|
||||
[else (keep-char)]))))
|
||||
(cond
|
||||
[(maybe-read-a-line in cr? lf? crlf? direct-string?)
|
||||
=> (lambda (r) r)]
|
||||
[else
|
||||
(define init-len 32)
|
||||
(let loop ([str (make-string init-len)] [len init-len] [pos 0])
|
||||
(define ch (read-a-char 'read-line in))
|
||||
(define (keep-char)
|
||||
(if (pos . fx< . len)
|
||||
(begin
|
||||
(string-set! str pos ch)
|
||||
(loop str len (fx+ pos 1)))
|
||||
(let* ([new-len (fx* len 2)]
|
||||
[new-str (make-string new-len)])
|
||||
(string-copy! new-str 0 str 0)
|
||||
(string-set! new-str pos ch)
|
||||
(loop new-str new-len (fx+ pos 1)))))
|
||||
(cond
|
||||
[(eof-object? ch)
|
||||
(if (fx= pos 0)
|
||||
eof
|
||||
(substring str 0 pos))]
|
||||
[(and (or cr? crlf?)
|
||||
(eqv? ch (as-char #\return)))
|
||||
(cond
|
||||
[(and crlf?
|
||||
(eqv? (peek-a-char 'read-line in 0) (as-char #\linefeed)))
|
||||
(read-a-char 'read-line in)
|
||||
(substring str 0 pos)]
|
||||
[cr?
|
||||
(substring str 0 pos)]
|
||||
[else (keep-char)])]
|
||||
[(and lf?
|
||||
(eqv? ch (as-char #\newline)))
|
||||
(substring str 0 pos)]
|
||||
[else (keep-char)]))])))
|
||||
|
||||
(define-read-line read-line
|
||||
make-string string-set!
|
||||
string-copy! substring
|
||||
read-a-char peek-a-char
|
||||
values)
|
||||
values
|
||||
#t)
|
||||
|
||||
(define-read-line read-bytes-line
|
||||
make-bytes bytes-set!
|
||||
bytes-copy! subbytes
|
||||
read-a-byte peek-a-byte
|
||||
char->integer)
|
||||
char->integer
|
||||
#f)
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
"../common/internal-error.rkt"
|
||||
"../common/class.rkt"
|
||||
"../host/thread.rkt"
|
||||
"../string/utf-8-decode.rkt"
|
||||
"port.rkt"
|
||||
"input-port.rkt"
|
||||
"count.rkt"
|
||||
|
@ -15,7 +16,9 @@
|
|||
read-a-byte
|
||||
read-byte-via-bytes
|
||||
peek-a-byte
|
||||
peek-byte-via-bytes)
|
||||
peek-byte-via-bytes
|
||||
|
||||
maybe-read-a-line)
|
||||
|
||||
;; Read up to `(- end start)` bytes, producing at least a
|
||||
;; single by unless `zero-ok?` is true. The result is
|
||||
|
@ -259,3 +262,45 @@
|
|||
(if (eq? v 1)
|
||||
(bytes-ref bstr 0)
|
||||
v))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
;; Tries to read a line from the fast-path buffer
|
||||
(define (maybe-read-a-line in cr? lf? crlf? as-string?)
|
||||
(start-atomic)
|
||||
(define buffer (core-port-buffer in))
|
||||
(define bstr (direct-bstr buffer))
|
||||
(define pos (direct-pos buffer))
|
||||
(define end (fxmin (direct-end buffer)
|
||||
;; limit atomicity
|
||||
(fx+ pos 4096)))
|
||||
(define (finish end read-end)
|
||||
(set-direct-pos! buffer read-end)
|
||||
(when (core-port-count in)
|
||||
(port-count! in (fx- read-end pos) bstr pos))
|
||||
(define result
|
||||
(if as-string?
|
||||
(a-bytes->string/utf-8 bstr pos end)
|
||||
(subbytes bstr pos end)))
|
||||
(end-atomic)
|
||||
result)
|
||||
(let loop ([i pos])
|
||||
(cond
|
||||
[(fx= i end)
|
||||
(end-atomic)
|
||||
#f]
|
||||
[else
|
||||
(define b (bytes-ref bstr i))
|
||||
(cond
|
||||
[(and lf? (eqv? b (char->integer #\linefeed)))
|
||||
(finish i (fx+ i 1))]
|
||||
[(and (or cr? crlf?) (eqv? b (char->integer #\return)))
|
||||
(cond
|
||||
[(and crlf?
|
||||
((fx+ i 1) . fx< . end)
|
||||
(eqv? (bytes-ref bstr (fx+ i 1)) (char->integer #\linefeed)))
|
||||
(finish i (fx+ i 2))]
|
||||
[cr?
|
||||
(finish i (fx+ i 1))]
|
||||
[else (loop (fx+ i 1))])]
|
||||
[else (loop (fx+ i 1))])])))
|
||||
|
|
|
@ -40,25 +40,10 @@
|
|||
(check who exact-nonnegative-integer? start)
|
||||
(check who exact-nonnegative-integer? end)
|
||||
(check-range who start end (bytes-length bstr) bstr)
|
||||
;; Measure result string:
|
||||
(define-values (used-bytes got-chars state)
|
||||
(utf-8-decode! bstr start end
|
||||
#f 0 #f
|
||||
#:error-char err-char
|
||||
#:abort-mode 'error))
|
||||
(cond
|
||||
[(eq? state 'error) (if just-length?
|
||||
#f
|
||||
(raise-encoding-error who bstr start end))]
|
||||
[just-length? got-chars]
|
||||
[else
|
||||
;; Create result string:
|
||||
(define str (make-string got-chars))
|
||||
(utf-8-decode! bstr start end
|
||||
str 0 #f
|
||||
#:error-char err-char
|
||||
#:abort-mode 'error)
|
||||
str]))
|
||||
(or (a-bytes->string/utf-8 bstr start end err-char #:just-length? just-length?)
|
||||
(if just-length?
|
||||
#f
|
||||
(raise-encoding-error who bstr start end))))
|
||||
|
||||
(define/who (bytes->string/utf-8 bstr [err-char #f] [start 0] [end (and (bytes? bstr)
|
||||
(bytes-length bstr))])
|
||||
|
@ -86,7 +71,7 @@
|
|||
(check-range who start end (bytes-length bstr) bstr)
|
||||
;; First, decode `skip` items:
|
||||
(define-values (initial-used-bytes initial-got-chars state)
|
||||
(if (zero? skip)
|
||||
(if (eqv? skip 0)
|
||||
(values 0 0 (if (= start end) 'complete 'continues))
|
||||
(utf-8-decode! bstr start end
|
||||
#f 0 skip
|
||||
|
|
|
@ -7,7 +7,9 @@
|
|||
utf-8-decode-byte
|
||||
|
||||
utf-8-state?
|
||||
utf-8-state-pending-amt)
|
||||
utf-8-state-pending-amt
|
||||
|
||||
a-bytes->string/utf-8)
|
||||
|
||||
;; The maximum number of characters that might not be consumed
|
||||
;; by a conversion at the tail of a byte string, assuming that
|
||||
|
@ -23,7 +25,7 @@
|
|||
;; and further decoding reveals that earlier bytes were in error.
|
||||
;;
|
||||
;; The `abort-mode` argument determines what to do when reaching the end of the input
|
||||
;; and an encoding needs more ytes:
|
||||
;; and an encoding needs more bytes:
|
||||
;; * 'error : treat the bytes as encoding errors
|
||||
;; * 'aborts : report 'aborts
|
||||
;; * 'state : return a value that encapsulates the state, so another call can continue
|
||||
|
@ -217,3 +219,36 @@
|
|||
(values accum remaining 'continues))
|
||||
(lambda ()
|
||||
(values #f 0 'error))))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
(define (a-bytes->string/utf-8 bstr start end [err-char #\uFFFD] #:just-length? [just-length? #f])
|
||||
;; Shortcut for all-ASCII:
|
||||
(cond
|
||||
[(for/and ([i (in-range start end)])
|
||||
((bytes-ref bstr i) . fx< . 128))
|
||||
(cond
|
||||
[just-length? (fx- end start)]
|
||||
[else
|
||||
(define str (make-string (fx- end start)))
|
||||
(for ([i (in-range start end)])
|
||||
(string-set! str (fx- i start) (integer->char (bytes-ref bstr i))))
|
||||
str])]
|
||||
[else
|
||||
;; Measure result string:
|
||||
(define-values (used-bytes got-chars state)
|
||||
(utf-8-decode! bstr start end
|
||||
#f 0 #f
|
||||
#:error-char err-char
|
||||
#:abort-mode 'error))
|
||||
(cond
|
||||
[(eq? state 'error) #f]
|
||||
[just-length? got-chars]
|
||||
[else
|
||||
;; Create result string:
|
||||
(define str (make-string got-chars))
|
||||
(utf-8-decode! bstr start end
|
||||
str 0 #f
|
||||
#:error-char err-char
|
||||
#:abort-mode 'error)
|
||||
str])]))
|
||||
|
|
Loading…
Reference in New Issue
Block a user