282 lines
9.0 KiB
Scheme
282 lines
9.0 KiB
Scheme
|
|
(module reader mzscheme
|
|
(provide r6rs-readtable
|
|
(rename r6rs-read read)
|
|
(rename r6rs-read-syntax read-syntax))
|
|
|
|
;; for raise-read-[eof-]error:
|
|
(require (lib "readerr.ss" "syntax"))
|
|
|
|
(define hex-digits (string->list "0123456789abcdefABCDEF"))
|
|
(define standard-delimiters (string->list ";',`()[]{}"))
|
|
|
|
;; hex-value : char -> int
|
|
(define (hex-value ch)
|
|
(cond
|
|
[(char-numeric? ch)
|
|
(- (char->integer ch) 48)]
|
|
[(memv ch '(#\a #\b #\c #\d #\e #\f))
|
|
(- (char->integer ch) 87)]
|
|
[else
|
|
(- (char->integer ch) 55)]))
|
|
|
|
;; read-delimited-string : char input-port .... -> string
|
|
;; Reads a string or symbol, given the closing character
|
|
(define (read-delimited-string closer-ch port
|
|
what src line col pos)
|
|
;; raise-bad-eof
|
|
;; Reports an unexpected EOF in a string/symbol
|
|
(define (raise-bad-eof len)
|
|
(raise-read-eof-error
|
|
(format "unexpected end-of-file in ~a" what)
|
|
src line col pos len))
|
|
|
|
;; to-hex : char int -> int
|
|
;; Checks input and gets it's value as a hex digit
|
|
(define (to-hex ch len)
|
|
(unless (memv ch hex-digits)
|
|
(if (eof-object? ch)
|
|
(raise-bad-eof len)
|
|
(raise-read-error
|
|
(format "expected a hex digit for ~a, found: ~e" what ch)
|
|
src line col pos len)))
|
|
(hex-value ch))
|
|
|
|
;; loop to read string/symbol characters; track the length for error reporting
|
|
(let loop ([chars null][len 1])
|
|
(let ([ch (read-char port)])
|
|
(cond
|
|
;; eof
|
|
[(eof-object? ch) (raise-bad-eof len)]
|
|
;; closing quote or bar
|
|
[(char=? ch closer-ch) (list->string (reverse chars))]
|
|
;; escape
|
|
[(char=? ch #\\)
|
|
(let ([ch (read-char port)])
|
|
(cond
|
|
;; eof after escape
|
|
[(eof-object? ch) (raise-bad-eof (add1 len))]
|
|
;; newline escape
|
|
[(char=? #\newline ch)
|
|
;; Eat whitespace until we find a newline...
|
|
(let w-loop ([len (+ len 1)])
|
|
(let ([ch (peek-char port)])
|
|
(cond
|
|
[(eof-object? ch) (raise-bad-eof len)]
|
|
[(and (char-whitespace? ch)
|
|
(not (char=? #\newline ch)))
|
|
(read-char port)
|
|
(w-loop (+ len 1))]
|
|
[else
|
|
(loop chars len)])))]
|
|
;; space escape
|
|
[(char=? #\space ch)
|
|
(loop (cons #\space chars) (+ len 2))]
|
|
;; 2-digit hex escape
|
|
[(char=? #\x ch)
|
|
(let* ([ch1 (to-hex (read-char port) (+ len 2))]
|
|
[ch2 (to-hex (read-char port) (+ len 3))])
|
|
(loop (cons (integer->char (+ (* ch1 16) ch2))
|
|
chars)
|
|
(+ len 3)))]
|
|
;; 4-digit hex escape
|
|
[(char=? #\u ch)
|
|
(let* ([ch1 (to-hex (read-char port) (+ len 2))]
|
|
[ch2 (to-hex (read-char port) (+ len 3))]
|
|
[ch3 (to-hex (read-char port) (+ len 4))]
|
|
[ch4 (to-hex (read-char port) (+ len 5))])
|
|
(let ([v (+ (* ch1 4096) (* ch2 256) (* ch3 16) ch4)])
|
|
(when (<= #xD8FF v #xDFFF)
|
|
(raise-read-error
|
|
(format "out-of-range character for ~a: \\u~a~a~a~a"
|
|
what ch1 ch2 ch3 ch4)
|
|
src line col pos (+ len 5)))
|
|
(loop (cons (integer->char v) chars)
|
|
(+ len 5))))]
|
|
;; 8-digit hex escape
|
|
[(char=? #\U ch)
|
|
(let* ([ch1 (to-hex (read-char port) (+ len 2))]
|
|
[ch2 (to-hex (read-char port) (+ len 3))]
|
|
[ch3 (to-hex (read-char port) (+ len 4))]
|
|
[ch4 (to-hex (read-char port) (+ len 5))]
|
|
[ch5 (to-hex (read-char port) (+ len 6))]
|
|
[ch6 (to-hex (read-char port) (+ len 7))]
|
|
[ch7 (to-hex (read-char port) (+ len 8))]
|
|
[ch8 (to-hex (read-char port) (+ len 9))])
|
|
(let ([v (+ (* ch1 268435456) (* ch2 16777216) (* ch3 1048576) (* ch4 65536)
|
|
(* ch5 4096) (* ch6 256) (* ch7 16) ch8)])
|
|
(when (or (> v #x10FFFF)
|
|
(<= #xD8FF v #xDFFF))
|
|
(raise-read-error
|
|
(format "out-of-range character for ~a: \\U~a~a~a~a~a~a~a~a"
|
|
what ch1 ch2 ch3 ch4 ch5 ch6 ch7 ch8)
|
|
src line col pos (+ len 9)))
|
|
(loop (cons (integer->char v) chars)
|
|
(+ len 9))))]
|
|
;; other escapes
|
|
[else (let ([v (case ch
|
|
[(#\a) 7]
|
|
[(#\b) 8]
|
|
[(#\t) 9]
|
|
[(#\n) 10]
|
|
[(#\v) 11]
|
|
[(#\f) 12]
|
|
[(#\r) 13]
|
|
[(#\") 34]
|
|
[(#\\) 92]
|
|
[(#\|) 124]
|
|
;; not a valid escape!
|
|
[else
|
|
(raise-read-error
|
|
(format "illegal escape for ~a: \\~a" what ch)
|
|
src line col pos (+ len 2))])])
|
|
(loop (cons (integer->char v) chars) (+ len 2)))]))]
|
|
;; other character
|
|
[else (loop (cons ch chars) (+ len 1))]))))
|
|
|
|
;; read-quoted-symbol
|
|
;; Reader macro for |
|
|
(define (read-quoted-symbol ch port src line col pos)
|
|
(string->symbol (read-delimited-string #\| port
|
|
"symbol" src line col pos)))
|
|
|
|
;; read-quoted-string
|
|
;; Reader macro for "
|
|
(define (read-quoted-string ch port src line col pos)
|
|
(read-delimited-string #\" port
|
|
"string" src line col pos))
|
|
|
|
;; read-character
|
|
;; Reader macro for characters
|
|
(define (read-character ch port src line col pos)
|
|
|
|
;; make-char-const : list-of-char len -> char
|
|
;; Checks whether the character sequence names a char,
|
|
;; and either reports and error or returns the character
|
|
(define (make-char-const chars len)
|
|
(let ([chars (reverse chars)])
|
|
(if (null? (cdr chars))
|
|
;; simple case: single character
|
|
(car chars)
|
|
;; multi-character name:
|
|
(let ([name (list->string chars)])
|
|
;; raise-bad-char
|
|
;; When it's not a valid character
|
|
(define (raise-bad-char detail)
|
|
(raise-read-error
|
|
(format "bad character constant~a: #\\~a" detail name)
|
|
src line col pos len))
|
|
|
|
;; hex-char : int -> char
|
|
;; Checks whether chars has n hex digits, and
|
|
;; produces the character if so
|
|
(define (hex-char n)
|
|
(unless (= (+ n 1) (length chars))
|
|
(raise-bad-char (format " (expected ~a hex digits after #\\~a) "
|
|
n
|
|
(car chars))))
|
|
(for-each (lambda (c)
|
|
(unless (memv c hex-digits)
|
|
(raise-bad-char (format " (expected hex digit, found ~a) " c))))
|
|
(cdr chars))
|
|
(let loop ([n 0][chars (cdr chars)])
|
|
(if (null? chars)
|
|
(begin
|
|
(when (or (> n #x10FFFF)
|
|
(<= #xD8FF n #xDFFF))
|
|
(raise-read-error
|
|
(format "out-of-range character: #\\~a" name)
|
|
src line col pos (+ len 9)))
|
|
(integer->char n))
|
|
(loop (+ (* n 16) (hex-value (car chars)))
|
|
(cdr chars)))))
|
|
|
|
;; Check for standard names or hex, and report an error if not
|
|
(case (string->symbol name)
|
|
[(nul) (integer->char 0)]
|
|
[(alarm) (integer->char 7)]
|
|
[(backspace) (integer->char 8)]
|
|
[(tab) (integer->char 9)]
|
|
[(newline linefeed) (integer->char 10)]
|
|
[(vtab) (integer->char 11)]
|
|
[(page) (integer->char 12)]
|
|
[(return) (integer->char 13)]
|
|
[(esc) (integer->char 27)]
|
|
[(space) (integer->char 32)]
|
|
[(delete) (integer->char 127)]
|
|
[else
|
|
;; Hex?
|
|
(case (car chars)
|
|
[(#\x)
|
|
(hex-char 2)]
|
|
[(#\u)
|
|
(hex-char 4)]
|
|
[(#\U)
|
|
(hex-char 8)]
|
|
[else
|
|
(raise-bad-char "")])])))))
|
|
|
|
;; read the leading character:
|
|
(let ([ch (read-char port)])
|
|
(when (eof-object? ch)
|
|
(raise-read-eof-error "unexpected end-of-file after #\\"
|
|
src line col pos 2))
|
|
;; loop until delimiter:
|
|
(let loop ([len 3][chars (list ch)])
|
|
(let ([ch (peek-char port)])
|
|
(if (eof-object? ch)
|
|
;; eof is a delimiter
|
|
(make-char-const chars len)
|
|
;; otherwise, consult the current readtable to find delimiters
|
|
;; in case someone extends r6rs-readtable:
|
|
(let-values ([(kind proc dispatch-proc)
|
|
(readtable-mapping (current-readtable) ch)])
|
|
(cond
|
|
[(eq? kind 'terminating-macro)
|
|
;; a terminating macro is a delimiter by definition
|
|
(make-char-const chars len)]
|
|
[(or (char-whitespace? ch)
|
|
(member ch standard-delimiters))
|
|
;; something mapped to one of the standard delimiters is
|
|
;; a delimiter
|
|
(make-char-const chars len)]
|
|
[else
|
|
;; otherwise, it's not a delimiter
|
|
(read-char port)
|
|
(loop (add1 len) (cons ch chars))])))))))
|
|
|
|
(define (reject-backslash ch port src line col pos)
|
|
(raise-read-error
|
|
"illegal character in input: \\"
|
|
src line col pos 1))
|
|
|
|
;; r6rs-readtable
|
|
;; Extends MzScheme's default reader to handle quoted symbols,
|
|
;; strings, and characters:
|
|
(define r6rs-readtable
|
|
(make-readtable #f
|
|
;; New syntax:
|
|
#\| 'terminating-macro read-quoted-symbol
|
|
#\" 'terminating-macro read-quoted-string
|
|
#\\ 'dispatch-macro read-character
|
|
;; Disable \ symbol escape:
|
|
#\\ 'terminating-macro reject-backslash))
|
|
|
|
|
|
;; r6rs-read
|
|
;; Like the normal read, but uses r6rs-readtable
|
|
(define r6rs-read
|
|
(case-lambda
|
|
[() (r6rs-read (current-input-port))]
|
|
[(input) (parameterize ([current-readtable r6rs-readtable])
|
|
(read input))]))
|
|
|
|
;; r6rs-read-syntax
|
|
;; Like the normal read-syntax, but uses r6rs-readtable
|
|
(define r6rs-read-syntax
|
|
(case-lambda
|
|
[() (r6rs-read-syntax (object-name (current-input-port)) (current-input-port))]
|
|
[(src-v) (r6rs-read-syntax src-v (current-input-port))]
|
|
[(src-v input) (parameterize ([current-readtable r6rs-readtable])
|
|
(read-syntax src-v input))])))
|