fix the module reader for the case when there are non-ASCII unicode characters
in comments before the #lang line also add an #:init-position argument to peeking-input-port
This commit is contained in:
parent
8ec968fe8f
commit
d00aed6f1b
|
@ -428,17 +428,22 @@
|
|||
(set! buffering? (eq? mode 'block))
|
||||
(buffer-mode-proc mode)])))))
|
||||
|
||||
(define peeking-input-port
|
||||
(lambda (orig-in [name (object-name orig-in)] [delta 0])
|
||||
(make-input-port/read-to-peek
|
||||
name
|
||||
(lambda (s)
|
||||
(let ([r (peek-bytes-avail!* s delta #f orig-in)])
|
||||
(set! delta (+ delta (if (number? r) r 1)))
|
||||
(if (eq? r 0) (handle-evt orig-in (lambda (v) 0)) r)))
|
||||
(lambda (s skip default)
|
||||
(peek-bytes-avail!* s (+ delta skip) #f orig-in))
|
||||
void)))
|
||||
(define (peeking-input-port orig-in
|
||||
[name (object-name orig-in)]
|
||||
[delta 0]
|
||||
#:init-position [init-position 1])
|
||||
(make-input-port/read-to-peek
|
||||
name
|
||||
(lambda (s)
|
||||
(let ([r (peek-bytes-avail!* s delta #f orig-in)])
|
||||
(set! delta (+ delta (if (number? r) r 1)))
|
||||
(if (eq? r 0) (handle-evt orig-in (lambda (v) 0)) r)))
|
||||
(lambda (s skip default)
|
||||
(peek-bytes-avail!* s (+ delta skip) #f orig-in))
|
||||
void
|
||||
#f
|
||||
void
|
||||
init-position))
|
||||
|
||||
(define relocate-input-port
|
||||
(lambda (p line col pos [close? #t])
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
@mzlib[#:mode title port]
|
||||
|
||||
The @racketmodname[mzlib/port] library mostly re-provides
|
||||
@racketmodname[scheme/port].
|
||||
@racketmodname[racket/port].
|
||||
|
||||
@defproc[(strip-shell-command-start [in input-port?]) void?]{
|
||||
|
||||
|
|
|
@ -310,7 +310,8 @@ resulting port supports @racket[write-special], otherwise it does not.}
|
|||
|
||||
@defproc[(peeking-input-port [in input-port?]
|
||||
[name any/c (object-name in)]
|
||||
[skip exact-nonnegative-integer? 0])
|
||||
[skip exact-nonnegative-integer? 0]
|
||||
[#:init-position init-position exact-positive-integer? 1])
|
||||
input-port]{
|
||||
|
||||
Returns an input port whose content is determined by peeking into
|
||||
|
@ -323,8 +324,8 @@ The optional @racket[name] argument is the name of the resulting
|
|||
port. The @racket[skip] argument is the port initial skip count, and
|
||||
it defaults to @racket[0].
|
||||
|
||||
The resulting port's initial position is @racket[0], no matter the
|
||||
position of @racket[in].
|
||||
The resulting port's initial position (as reported by @racket[file-position])
|
||||
is @racket[(- init-position 1)], no matter the position of @racket[in].
|
||||
|
||||
For example, when you read from a peeking port, you
|
||||
see the same answers as when you read from the original port.
|
||||
|
|
|
@ -25,36 +25,37 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
|
|||
(define (module-lexer in offset mode)
|
||||
(cond
|
||||
[(or (not mode) (eq? mode 'before-lang-line))
|
||||
(define lexer-port (peeking-input-port in))
|
||||
(define lexer-port (peeking-input-port in #:init-position (+ 1 (file-position in))))
|
||||
(port-count-lines! lexer-port)
|
||||
(define-values (lexeme type data raw-new-token-start raw-new-token-end) (scheme-lexer lexer-port))
|
||||
(define new-token-start (and raw-new-token-start (+ raw-new-token-start (file-position in))))
|
||||
(define new-token-end (and raw-new-token-end (+ raw-new-token-end (file-position in))))
|
||||
(set-port-next-location-from in lexer-port)
|
||||
(define-values (lexeme type data new-token-start new-token-end) (scheme-lexer lexer-port))
|
||||
(cond
|
||||
[(or (eq? type 'comment) (eq? type 'white-space))
|
||||
(define lexer-end (file-position lexer-port))
|
||||
;; sync ports
|
||||
(for ([i (in-range 0 lexer-end)])
|
||||
(read-char-or-special in))
|
||||
(for/list ([i (in-range (file-position in) (file-position lexer-port))])
|
||||
(read-byte-or-special in))
|
||||
(values lexeme type data new-token-start new-token-end 0 'before-lang-line)]
|
||||
[else
|
||||
;; look for #lang:
|
||||
(define p (peeking-input-port in))
|
||||
(define p (peeking-input-port in #:init-position (+ 1 (file-position in))))
|
||||
(port-count-lines! p)
|
||||
(set-port-next-location-from in p)
|
||||
(define-values (_1 _2 start-pos) (port-next-location p))
|
||||
(define get-info (with-handlers ([exn:fail? values]) (read-language p (λ () 'fail))))
|
||||
(define end-pos (file-position p))
|
||||
(define-values (_3 _4 end-pos) (port-next-location p))
|
||||
(cond
|
||||
[(procedure? get-info)
|
||||
;; sync ports
|
||||
(for ([i (in-range 0 end-pos)])
|
||||
(read-char-or-special in))
|
||||
(for ([i (in-range (file-position in) (file-position p))])
|
||||
(read-byte-or-special in))
|
||||
;; Produce language as first token:
|
||||
(values
|
||||
"#lang"
|
||||
'other
|
||||
#f
|
||||
1 ;; start-pos
|
||||
(+ end-pos 1)
|
||||
start-pos
|
||||
end-pos
|
||||
0
|
||||
(or (let ([v (get-info 'color-lexer #f)])
|
||||
(and v
|
||||
|
@ -68,12 +69,12 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
|
|||
;; the read-language docs say that this is all it takes to commit to a #lang
|
||||
(regexp-match #rx"^#[!l]" lexeme))
|
||||
;; sync ports
|
||||
(for ([i (in-range 0 end-pos)])
|
||||
(read-char-or-special in))
|
||||
(for ([i (in-range (file-position in) (file-position p))])
|
||||
(read-byte-or-special in))
|
||||
(values lexeme 'error data 1 (+ end-pos 1) 0 'no-lang-line)]
|
||||
[else
|
||||
(for ([i (in-range 0 (file-position lexer-port))])
|
||||
(read-char-or-special in))
|
||||
(for ([i (in-range (file-position in) (file-position lexer-port))])
|
||||
(read-byte-or-special in))
|
||||
(values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])]
|
||||
[(eq? mode 'no-lang-line)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
|
@ -89,3 +90,7 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
|
|||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(mode in)])
|
||||
(values lexeme type data new-token-start new-token-end 0 mode))]))
|
||||
|
||||
(define (set-port-next-location-from src dest)
|
||||
(define-values (line col pos) (port-next-location src))
|
||||
(set-port-next-location! dest line col pos))
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
(define (lex str)
|
||||
(define p (open-input-string str))
|
||||
(port-count-lines! p)
|
||||
(let loop ([mode #f]
|
||||
[n 0])
|
||||
(define-values (lexeme type data token-start token-end backup new-mode)
|
||||
|
@ -25,6 +26,15 @@
|
|||
[(= n 1000) '()] ;; watch out for loops
|
||||
[else (cons one (loop new-mode (+ n 1)))])))
|
||||
|
||||
(define (same? a b)
|
||||
(cond
|
||||
[(eq? a 'dont-care) #t]
|
||||
[(eq? b 'dont-care) #t]
|
||||
[(and (pair? a) (pair? b))
|
||||
(and (same? (car a) (car b))
|
||||
(same? (cdr a) (cdr b)))]
|
||||
[else (equal? a b)]))
|
||||
|
||||
(check-equal? (lex "#lang racket/base")
|
||||
`(("#lang" other 1 18 #f)
|
||||
(,eof eof #f #f (proc scheme-lexer))))
|
||||
|
@ -33,14 +43,31 @@
|
|||
("\n" white-space 18 19 (proc scheme-lexer))
|
||||
("1" constant 19 20 (proc scheme-lexer))
|
||||
(,eof eof #f #f (proc scheme-lexer))))
|
||||
(check-equal? (lex ";; αα\n")
|
||||
`(("; αα" comment 1 6 #f)
|
||||
("\n" white-space 6 7 before-lang-line)
|
||||
(,eof eof #f #f before-lang-line)))
|
||||
(check-equal? (lex ";; ααα\n;; aaa\n")
|
||||
`(("; ααα" comment 1 7 #f)
|
||||
("\n" white-space 7 8 before-lang-line)
|
||||
("; aaa" comment 8 14 before-lang-line)
|
||||
("\n" white-space 14 15 before-lang-line)
|
||||
(,eof eof #f #f before-lang-line)))
|
||||
(check-equal? (lex ";; a\n#lang racket/base")
|
||||
`(("; a" comment 1 5 #f)
|
||||
("\n" white-space 5 6 before-lang-line)
|
||||
("#lang" other 1 18 before-lang-line)
|
||||
("#lang" other 6 23 before-lang-line)
|
||||
(,eof eof #f #f (proc scheme-lexer))))
|
||||
(check-equal? (lex "#lang at-exp racket/base")
|
||||
`(("#lang" other 1 25 #f)
|
||||
(,eof eof 25 25 ((proc scribble-lexer) . #f))))
|
||||
(check-equal? (lex "#lang at-exp racket/baseBOGUS")
|
||||
`(("#lang at-exp" error 1 30 #f)
|
||||
`(("#lang at-exp" error 1 31 #f)
|
||||
(,eof eof #f #f no-lang-line)))
|
||||
(check same?
|
||||
(lex "#lang at-exp racket/base\n1\n")
|
||||
`(("#lang" other 1 25 #f)
|
||||
("\n" white-space 25 26 ((proc scribble-lexer) . #f))
|
||||
("1" constant 26 27 ((proc scribble-lexer) . dont-care))
|
||||
("\n" white-space 27 28 ((proc scribble-lexer) . dont-care))
|
||||
(,eof eof 28 28 ((proc scribble-lexer) . dont-care))))
|
||||
|
|
Loading…
Reference in New Issue
Block a user