This commit is contained in:
Robby Findler 2013-02-05 09:05:33 -06:00
parent 76cc599ff9
commit 014abe64c0
5 changed files with 414 additions and 404 deletions

View File

@ -1,4 +1,4 @@
;; This implements the equivalent of mzscheme's read-syntax for R5RS scheme.
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
;; It has not been thoroughly tested. Also it will read an entire file into a
;; list of syntax objects, instead of returning one syntax object at a time
@ -59,7 +59,7 @@
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
;; See ${PLTHOME}/collects/syntax-color/scheme-lexer.rkt for an example of
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
;; using regexp macros to avoid the cut and paste.
; [numR (:: prefixR complexR)]
; [complexR (:or realR

View File

@ -54,7 +54,7 @@ style lexer and parser generators.
@racket[re]'s against the buffer, and returns the result of
executing the corresponding @racket[action-expr].
@margin-note{The implementation of @racketmodname[syntax-color/scheme-lexer]
@margin-note{The implementation of @racketmodname[syntax-color/racket-lexer]
contains a lexer for the @racketmodname[racket] language.
In addition, files in the @filepath{examples} sub-directory
of the @filepath{parser-tools} collection contain

View File

@ -0,0 +1,383 @@
#lang racket/base
(require parser-tools/lex
(prefix-in : parser-tools/lex-sre))
(provide racket-lexer
racket-lexer/status
racket-nobar-lexer/status)
(define-lex-abbrevs
;; For case insensitivity
[a (char-set "aA")]
[b (char-set "bB")]
[c (char-set "cC")]
[d (char-set "dD")]
[e (char-set "eE")]
[f (char-set "fF")]
[g (char-set "gG")]
[h (char-set "hH")]
[i (char-set "iI")]
[j (char-set "jJ")]
[k (char-set "kK")]
[l (char-set "lL")]
[m (char-set "mM")]
[n (char-set "nN")]
[o (char-set "oO")]
[p (char-set "pP")]
[q (char-set "qQ")]
[r (char-set "rR")]
[s (char-set "sS")]
[t (char-set "tT")]
[u (char-set "uU")]
[v (char-set "vV")]
[w (char-set "wW")]
[x (char-set "xX")]
[y (char-set "yY")]
[z (char-set "zZ")]
[digit (:/ "0" "9")]
[digit2 (:/ "0" "1")]
[digit8 (:/ "0" "7")]
[digit10 digit]
[digit16 (:/ "af" "AF" "09")]
[langchar (:or (:/ "az" "AZ" "09") "+" "-" "_")]
[racket-whitespace whitespace]
[line-comment (:: ";" (:* (:~ #\newline)))]
;; What about char->integer constraint?
[unicode (:or (:: "u" (:** 1 4 digit16))
(:: "U" (:** 1 6 digit16)))]
[character (:or (:: "#\\" any-char)
(:: "#\\" character-name)
(:: "#\\" (:/ "0" "3") digit8 digit8)
(:: "#\\" unicode))]
[character-name (:or (:: s p a c e)
(:: n e w l i n e)
(:: n u l)
(:: n u l l)
(:: b a c k s p a c e)
(:: t a b)
(:: l i n e f e e d)
(:: v t a b)
(:: p a g e)
(:: r e t u r n)
(:: r u b o u t))]
[bad-char (:or "#\\"
(:: "#\\" (:>= 2 alphabetic))
(:: "#\\" (:/ "0" "3") digit8))]
;; What about byte string regexp strings
[str (:or (:: (:? (:or "#px" "#rx")) "\"" (:* string-element (:: "\\" unicode)) "\"")
byte-str)]
[byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* string-element) "\"")]
[string-element (:or (:~ "\"" "\\")
"\\\""
"\\\\"
"\\a"
"\\b"
"\\t"
"\\n"
"\\v"
"\\f"
"\\r"
"\\e"
"\\'"
(:: "\\" (:** 1 3 digit8))
(:: "\\x" (:** 1 2 digit16))
(:: "\\" #\newline))]
[bad-str (:: (:? (:or "#px" "#rx")) (:? "#") "\""
(:* (:~ "\"" "\\")
(:: "\\" any-char))
(:? "\\" "\""))]
[special-numbers (:or (:: n a n ".0") (:: i n f ".0"))]
[exponent-marker (:or e s f d l)]
[sign (char-set "+-")]
[exactness (:or "#i" "#e" "#I" "#E")]
[radix2 (:or "#b" "#B")]
[radix8 (:or "#o" "#O")]
[radix10 (:or "#d" "#D")]
[radix16 (:or "#x" "#X")]
[script (:: "#!" (:or #\space #\/) (:* (:~ #\newline) (:: #\\ #\newline)))]
[identifier-delims (:or (char-set "\",'`()[]{};") racket-whitespace)]
[identifier-chars (:~ identifier-delims "\\" "|")]
[identifier-escapes (:or (:: "\\" any-char)
(:: "|" (:* (:~ "|")) "|"))]
[identifier-start (:or identifier-escapes
(:~ identifier-delims "\\" "|" "#")
"#%")]
[identifier (:: identifier-start
(:* identifier-escapes identifier-chars))]
[nobar-identifier-escapes (:: "\\" any-char)]
[nobar-identifier-start (:or nobar-identifier-escapes
(:~ identifier-delims "\\" "|" "#")
"#%")]
[nobar-identifier (:: nobar-identifier-start
(:* nobar-identifier-escapes identifier-chars))]
[bad-id-start (:or identifier-escapes
(:~ identifier-delims "\\" "|"))]
[bad-id-escapes (:or identifier-escapes
(:: "|" (:* (:~ "|"))))]
[bad-id (:or (:: bad-id-start
(:* identifier-escapes identifier-chars)
(:? "\\" bad-id-escapes))
"\\"
bad-id-escapes)]
[nobar-bad-id-escapes nobar-identifier-escapes]
[nobar-bad-id (:or (:: bad-id-start
(:* nobar-identifier-escapes identifier-chars)
(:? "\\" nobar-bad-id-escapes))
"\\"
nobar-bad-id-escapes)]
[keyword (:: "#:" (:* identifier-escapes identifier-chars))]
[nobar-keyword (:: "#:" (:* nobar-identifier-escapes identifier-chars))]
[reader-command (:or (:: "#" c s) (:: "#" c i))]
[sharing (:or (:: "#" (make-uinteger digit10) "=")
(:: "#" (make-uinteger digit10) "#"))]
[list-prefix (:or "" "#hash" "#hasheq" "#hasheqv" "#s" (:: "#" (:* digit10)))])
(define-lex-trans make-num
(syntax-rules ()
((_ digit radix) (:: (make-prefix radix) (make-complex digit)))))
(define-lex-trans make-prefix
(syntax-rules ()
((_ radix) (:or (:: radix (:? exactness))
(:: (:? exactness) radix)))))
(define-lex-trans make-complex
(syntax-rules ()
((_ digit)
(:or (make-real digit)
(:: (make-real digit) "@" (make-real digit))
(:: (make-real digit) "+" (:or special-numbers (make-ureal digit)) i)
(:: (make-real digit) "-" (:or special-numbers (make-ureal digit)) i)
(:: (make-real digit) "+" i)
(:: (make-real digit) "-" i)
(:: "+" (:or special-numbers (make-ureal digit)) i)
(:: "-" (:or special-numbers (make-ureal digit)) i)
(:: "+" i)
(:: "-" i)))))
(define-lex-trans make-ureal
(syntax-rules ()
((_ digit) (:or (make-uinteger digit)
(:: (make-uinteger digit) "/" (make-uinteger digit) (:? (make-suffix digit)))
(make-decimal digit)))))
(define-lex-trans make-real
(syntax-rules ()
((_ digit) (:or (:: (:? sign) (make-ureal digit))
(:: (char-set "+-") special-numbers)))))
(define-lex-trans make-uinteger
(syntax-rules ()
((_ digit) (:: (:+ digit) (:* "#")))))
(define-lex-trans make-decimal
(syntax-rules ()
((_ digit)
(:or (:: (make-uinteger digit) (make-suffix digit))
(:: "." (:+ digit) (:* "#") (make-suffix digit))
(:: (:+ digit) "." (:* digit) (:* "#") (make-suffix digit))
(:: (:+ digit) (:+ "#") "." (:* "#") (make-suffix digit))))))
(define-lex-trans make-suffix
(syntax-rules ()
((_ digit) (:or "" (:: exponent-marker (:? sign) (:+ digit))))))
(define (ret lexeme type paren start-pos end-pos status)
(values lexeme type paren (position-offset start-pos) (position-offset end-pos) status))
(define get-next-comment
(lexer
["#|" (values 1 end-pos)]
["|#" (values -1 end-pos)]
[(:or "#" "|" (:* (:~ "|" "#")))
(get-next-comment input-port)]
[(eof) (values 'eof end-pos)]
[(special)
(get-next-comment input-port)]
[(special-comment)
(get-next-comment input-port)]))
(define (read-nested-comment num-opens start-pos input)
(let-values (((diff end) (get-next-comment input)))
(cond
((eq? 'eof diff) (ret "" 'error #f start-pos end 'continue))
(else
(let ((next-num-opens (+ diff num-opens)))
(cond
((= 0 next-num-opens) (ret "" 'comment #f start-pos end 'continue))
(else (read-nested-comment next-num-opens start-pos input))))))))
(define (get-offset i)
(let-values (((x y offset) (port-next-location i)))
offset))
(define (escape-regexp s)
(apply string-append
(map (lambda (c)
(if (memq c '(#\( #\) #\* #\+ #\? #\[ #\] #\. #\^ #\\ #\|))
(string #\\ c)
(string c)))
(string->list s))))
(define (special-read-line i)
(let ((next (peek-char-or-special i)))
(cond
((or (eq? next #\newline) (not (char? next)))
null)
(else
(read-char i)
(cons next (special-read-line i))))))
(define (read-line/skip-over-specials i)
(let loop ()
(let ((next (peek-char-or-special i)))
(cond
((or (eq? next #\newline) (eof-object? next))
null)
(else
(read-char-or-special i)
(if (char? next)
(cons next (loop))
(loop)))))))
(define (get-here-string start-pos i)
(let* ((ender (list->string (special-read-line i)))
(next-char (peek-char-or-special i)))
(cond
((or (equal? ender "") (not (eq? #\newline next-char)))
(values (string-append "#<<" ender) 'error #f start-pos (get-offset i) 'datum))
(else
(read-char i)
(let loop ((acc (list (string-append "#<<" ender "\n"))))
(let* ((next-line (list->string (special-read-line i)))
(next-char (peek-char-or-special i)))
(cond
((not (or (char? next-char) (eof-object? next-char))) ;; a special
(values (apply string-append (reverse (cons next-line acc)))
'error #f start-pos (get-offset i)
'datum))
((equal? next-line ender) ;; end of string
(values (apply string-append (reverse (cons next-line acc)))
'string #f start-pos (get-offset i)
'datum))
((eof-object? next-char)
(values (apply string-append (reverse (cons next-line acc)))
'error #f start-pos (get-offset i)
'datum))
(else
(read-char i)
(loop (cons (string-append next-line "\n") acc))))))))))
(define (racket-lexer in)
(let-values ([(lexeme type paren start end adj) (racket-lexer/status in)])
(values lexeme type paren start end)))
(define-syntax-rule (lexer/status identifier keyword bad-id)
(lexer
[(:+ racket-whitespace)
(ret lexeme 'white-space #f start-pos end-pos 'continue)]
[(:: (:or "#true" "#false" "#t" "#f" "#T" "#F")
(:* (:~ identifier-delims)))
(ret lexeme
(if (member lexeme '("#true" "#false" "#t" "#f" "#T" "#F"))
'constant
'error)
#f start-pos end-pos 'datum)]
[(:or character
(make-num digit2 radix2)
(make-num digit8 radix8)
(make-num digit10 (:? radix10))
(make-num digit16 radix16))
(ret lexeme 'constant #f start-pos end-pos 'datum)]
[keyword (ret lexeme 'parenthesis #f start-pos end-pos 'datum)]
[str (ret lexeme 'string #f start-pos end-pos 'datum)]
[";"
(values (apply string (read-line/skip-over-specials input-port)) 'comment #f
(position-offset start-pos)
(get-offset input-port)
'continue)]
#;
[line-comment
(ret lexeme 'comment #f start-pos end-pos)]
["#;"
(ret lexeme 'sexp-comment #f start-pos end-pos 'continue)]
["#|" (read-nested-comment 1 start-pos input-port)]
[script
(ret lexeme 'comment #f start-pos end-pos 'continue)]
[(:: list-prefix "(")
(ret lexeme 'parenthesis '|(| start-pos end-pos 'open)]
[(:: list-prefix "[")
(ret lexeme 'parenthesis '|[| start-pos end-pos 'open)]
[(:: list-prefix "{")
(ret lexeme 'parenthesis '|{| start-pos end-pos 'open)]
[(:or ")" "]" "}")
(ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos 'close)]
[(:or "'" "`" "#'" "#`" "#&")
(ret lexeme 'constant #f start-pos end-pos 'continue)]
[(:or sharing reader-command "." "," ",@" "#," "#,@")
(ret lexeme 'other #f start-pos end-pos 'continue)]
[(:: (:or "#lang " "#!")
(:or langchar
(:: langchar (:* (:or langchar "/")) langchar)))
(ret lexeme 'other #f start-pos end-pos 'continue)]
[(:: (:or "#lang " "#!") (:* (:& any-char (complement whitespace))))
(ret lexeme 'error #f start-pos end-pos 'continue)]
[identifier
(ret lexeme 'symbol #f start-pos end-pos 'datum)]
["#<<"
(get-here-string (position-offset start-pos) input-port)]
[(special)
(ret "" 'no-color #f start-pos end-pos 'datum)]
[(special-comment)
(ret "" 'comment #f start-pos end-pos 'continue)]
[(eof) (values lexeme 'eof #f #f #f #f)]
[(:or bad-char bad-str
(:& bad-id
(complement (:: (:or (:: "#" (:or f t)) reader-command sharing "#<<" "#\\" "#|" "#;" "#&" script)
any-string))))
(ret lexeme 'error #f start-pos end-pos 'bad)]
[any-char (extend-error lexeme start-pos end-pos input-port)]))
(define racket-lexer/status (lexer/status identifier keyword bad-id))
(define racket-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id))
(define (extend-error lexeme start end in)
(if (memq (peek-char-or-special in)
`(special #\newline #\return #\tab #\space #\vtab
#\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\;
,eof))
(ret lexeme 'error #f start end 'bad)
(let-values (((rest end-pos) (get-chunk in)))
(ret (string-append lexeme rest) 'error #f start end-pos 'bad))))
(define get-chunk
(lexer
((:+ (:~ identifier-delims)) (values lexeme end-pos))))

View File

@ -1,386 +1,13 @@
(module scheme-lexer mzscheme
(require parser-tools/lex
(prefix : parser-tools/lex-sre))
(provide scheme-lexer
scheme-lexer/status
scheme-nobar-lexer/status)
(define-lex-abbrevs
;; For case insensitivity
[a (char-set "aA")]
[b (char-set "bB")]
[c (char-set "cC")]
[d (char-set "dD")]
[e (char-set "eE")]
[f (char-set "fF")]
[g (char-set "gG")]
[h (char-set "hH")]
[i (char-set "iI")]
[j (char-set "jJ")]
[k (char-set "kK")]
[l (char-set "lL")]
[m (char-set "mM")]
[n (char-set "nN")]
[o (char-set "oO")]
[p (char-set "pP")]
[q (char-set "qQ")]
[r (char-set "rR")]
[s (char-set "sS")]
[t (char-set "tT")]
[u (char-set "uU")]
[v (char-set "vV")]
[w (char-set "wW")]
[x (char-set "xX")]
[y (char-set "yY")]
[z (char-set "zZ")]
#lang racket/base
#|
[digit (:/ "0" "9")]
[digit2 (:/ "0" "1")]
[digit8 (:/ "0" "7")]
[digit10 digit]
[digit16 (:/ "af" "AF" "09")]
This file is provided for backwards compatibility.
New code should use racket-lexer.rkt.
[langchar (:or (:/ "az" "AZ" "09") "+" "-" "_")]
[scheme-whitespace whitespace]
[line-comment (:: ";" (:* (:~ #\newline)))]
;; What about char->integer constraint?
[unicode (:or (:: "u" (:** 1 4 digit16))
(:: "U" (:** 1 6 digit16)))]
[character (:or (:: "#\\" any-char)
(:: "#\\" character-name)
(:: "#\\" (:/ "0" "3") digit8 digit8)
(:: "#\\" unicode))]
[character-name (:or (:: s p a c e)
(:: n e w l i n e)
(:: n u l)
(:: n u l l)
(:: b a c k s p a c e)
(:: t a b)
(:: l i n e f e e d)
(:: v t a b)
(:: p a g e)
(:: r e t u r n)
(:: r u b o u t))]
[bad-char (:or "#\\"
(:: "#\\" (:>= 2 alphabetic))
(:: "#\\" (:/ "0" "3") digit8))]
;; What about byte string regexp strings
[str (:or (:: (:? (:or "#px" "#rx")) "\"" (:* string-element (:: "\\" unicode)) "\"")
byte-str)]
[byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* string-element) "\"")]
[string-element (:or (:~ "\"" "\\")
"\\\""
"\\\\"
"\\a"
"\\b"
"\\t"
"\\n"
"\\v"
"\\f"
"\\r"
"\\e"
"\\'"
(:: "\\" (:** 1 3 digit8))
(:: "\\x" (:** 1 2 digit16))
(:: "\\" #\newline))]
[bad-str (:: (:? (:or "#px" "#rx")) (:? "#") "\""
(:* (:~ "\"" "\\")
(:: "\\" any-char))
(:? "\\" "\""))]
[special-numbers (:or (:: n a n ".0") (:: i n f ".0"))]
[exponent-marker (:or e s f d l)]
[sign (char-set "+-")]
[exactness (:or "#i" "#e" "#I" "#E")]
[radix2 (:or "#b" "#B")]
[radix8 (:or "#o" "#O")]
[radix10 (:or "#d" "#D")]
[radix16 (:or "#x" "#X")]
[script (:: "#!" (:or #\space #\/) (:* (:~ #\newline) (:: #\\ #\newline)))]
[identifier-delims (:or (char-set "\",'`()[]{};") scheme-whitespace)]
[identifier-chars (:~ identifier-delims "\\" "|")]
[identifier-escapes (:or (:: "\\" any-char)
(:: "|" (:* (:~ "|")) "|"))]
[identifier-start (:or identifier-escapes
(:~ identifier-delims "\\" "|" "#")
"#%")]
[identifier (:: identifier-start
(:* identifier-escapes identifier-chars))]
[nobar-identifier-escapes (:: "\\" any-char)]
[nobar-identifier-start (:or nobar-identifier-escapes
(:~ identifier-delims "\\" "|" "#")
"#%")]
[nobar-identifier (:: nobar-identifier-start
(:* nobar-identifier-escapes identifier-chars))]
[bad-id-start (:or identifier-escapes
(:~ identifier-delims "\\" "|"))]
[bad-id-escapes (:or identifier-escapes
(:: "|" (:* (:~ "|"))))]
[bad-id (:or (:: bad-id-start
(:* identifier-escapes identifier-chars)
(:? "\\" bad-id-escapes))
"\\"
bad-id-escapes)]
[nobar-bad-id-escapes nobar-identifier-escapes]
[nobar-bad-id (:or (:: bad-id-start
(:* nobar-identifier-escapes identifier-chars)
(:? "\\" nobar-bad-id-escapes))
"\\"
nobar-bad-id-escapes)]
[keyword (:: "#:" (:* identifier-escapes identifier-chars))]
[nobar-keyword (:: "#:" (:* nobar-identifier-escapes identifier-chars))]
[reader-command (:or (:: "#" c s) (:: "#" c i))]
[sharing (:or (:: "#" (make-uinteger digit10) "=")
(:: "#" (make-uinteger digit10) "#"))]
[list-prefix (:or "" "#hash" "#hasheq" "#hasheqv" "#s" (:: "#" (:* digit10)))])
(define-lex-trans make-num
(syntax-rules ()
((_ digit radix) (:: (make-prefix radix) (make-complex digit)))))
(define-lex-trans make-prefix
(syntax-rules ()
((_ radix) (:or (:: radix (:? exactness))
(:: (:? exactness) radix)))))
(define-lex-trans make-complex
(syntax-rules ()
((_ digit)
(:or (make-real digit)
(:: (make-real digit) "@" (make-real digit))
(:: (make-real digit) "+" (:or special-numbers (make-ureal digit)) i)
(:: (make-real digit) "-" (:or special-numbers (make-ureal digit)) i)
(:: (make-real digit) "+" i)
(:: (make-real digit) "-" i)
(:: "+" (:or special-numbers (make-ureal digit)) i)
(:: "-" (:or special-numbers (make-ureal digit)) i)
(:: "+" i)
(:: "-" i)))))
(define-lex-trans make-ureal
(syntax-rules ()
((_ digit) (:or (make-uinteger digit)
(:: (make-uinteger digit) "/" (make-uinteger digit) (:? (make-suffix digit)))
(make-decimal digit)))))
(define-lex-trans make-real
(syntax-rules ()
((_ digit) (:or (:: (:? sign) (make-ureal digit))
(:: (char-set "+-") special-numbers)))))
(define-lex-trans make-uinteger
(syntax-rules ()
((_ digit) (:: (:+ digit) (:* "#")))))
(define-lex-trans make-decimal
(syntax-rules ()
((_ digit)
(:or (:: (make-uinteger digit) (make-suffix digit))
(:: "." (:+ digit) (:* "#") (make-suffix digit))
(:: (:+ digit) "." (:* digit) (:* "#") (make-suffix digit))
(:: (:+ digit) (:+ "#") "." (:* "#") (make-suffix digit))))))
(define-lex-trans make-suffix
(syntax-rules ()
((_ digit) (:or "" (:: exponent-marker (:? sign) (:+ digit))))))
(define (ret lexeme type paren start-pos end-pos status)
(values lexeme type paren (position-offset start-pos) (position-offset end-pos) status))
(define get-next-comment
(lexer
["#|" (values 1 end-pos)]
["|#" (values -1 end-pos)]
[(:or "#" "|" (:* (:~ "|" "#")))
(get-next-comment input-port)]
[(eof) (values 'eof end-pos)]
[(special)
(get-next-comment input-port)]
[(special-comment)
(get-next-comment input-port)]))
(define (read-nested-comment num-opens start-pos input)
(let-values (((diff end) (get-next-comment input)))
(cond
((eq? 'eof diff) (ret "" 'error #f start-pos end 'continue))
(else
(let ((next-num-opens (+ diff num-opens)))
(cond
((= 0 next-num-opens) (ret "" 'comment #f start-pos end 'continue))
(else (read-nested-comment next-num-opens start-pos input))))))))
(define (get-offset i)
(let-values (((x y offset) (port-next-location i)))
offset))
(define (escape-regexp s)
(apply string-append
(map (lambda (c)
(if (memq c '(#\( #\) #\* #\+ #\? #\[ #\] #\. #\^ #\\ #\|))
(string #\\ c)
(string c)))
(string->list s))))
(define (special-read-line i)
(let ((next (peek-char-or-special i)))
(cond
((or (eq? next #\newline) (not (char? next)))
null)
(else
(read-char i)
(cons next (special-read-line i))))))
(define (read-line/skip-over-specials i)
(let loop ()
(let ((next (peek-char-or-special i)))
(cond
((or (eq? next #\newline) (eof-object? next))
null)
(else
(read-char-or-special i)
(if (char? next)
(cons next (loop))
(loop)))))))
(define (get-here-string start-pos i)
(let* ((ender (list->string (special-read-line i)))
(next-char (peek-char-or-special i)))
(cond
((or (equal? ender "") (not (eq? #\newline next-char)))
(values (string-append "#<<" ender) 'error #f start-pos (get-offset i) 'datum))
(else
(read-char i)
(let loop ((acc (list (string-append "#<<" ender "\n"))))
(let* ((next-line (list->string (special-read-line i)))
(next-char (peek-char-or-special i)))
(cond
((not (or (char? next-char) (eof-object? next-char))) ;; a special
(values (apply string-append (reverse (cons next-line acc)))
'error #f start-pos (get-offset i)
'datum))
((equal? next-line ender) ;; end of string
(values (apply string-append (reverse (cons next-line acc)))
'string #f start-pos (get-offset i)
'datum))
((eof-object? next-char)
(values (apply string-append (reverse (cons next-line acc)))
'error #f start-pos (get-offset i)
'datum))
(else
(read-char i)
(loop (cons (string-append next-line "\n") acc))))))))))
(define (scheme-lexer in)
(let-values ([(lexeme type paren start end adj) (scheme-lexer/status in)])
(values lexeme type paren start end)))
(define-syntax-rule (lexer/status identifier keyword bad-id)
(lexer
[(:+ scheme-whitespace)
(ret lexeme 'white-space #f start-pos end-pos 'continue)]
[(:: (:or "#true" "#false" "#t" "#f" "#T" "#F")
(:* (:~ identifier-delims)))
(ret lexeme
(if (member lexeme '("#true" "#false" "#t" "#f" "#T" "#F"))
'constant
'error)
#f start-pos end-pos 'datum)]
[(:or character
(make-num digit2 radix2)
(make-num digit8 radix8)
(make-num digit10 (:? radix10))
(make-num digit16 radix16))
(ret lexeme 'constant #f start-pos end-pos 'datum)]
[keyword (ret lexeme 'parenthesis #f start-pos end-pos 'datum)]
[str (ret lexeme 'string #f start-pos end-pos 'datum)]
[";"
(values (apply string (read-line/skip-over-specials input-port)) 'comment #f
(position-offset start-pos)
(get-offset input-port)
'continue)]
#;
[line-comment
(ret lexeme 'comment #f start-pos end-pos)]
["#;"
(ret lexeme 'sexp-comment #f start-pos end-pos 'continue)]
["#|" (read-nested-comment 1 start-pos input-port)]
[script
(ret lexeme 'comment #f start-pos end-pos 'continue)]
[(:: list-prefix "(")
(ret lexeme 'parenthesis '|(| start-pos end-pos 'open)]
[(:: list-prefix "[")
(ret lexeme 'parenthesis '|[| start-pos end-pos 'open)]
[(:: list-prefix "{")
(ret lexeme 'parenthesis '|{| start-pos end-pos 'open)]
[(:or ")" "]" "}")
(ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos 'close)]
[(:or "'" "`" "#'" "#`" "#&")
(ret lexeme 'constant #f start-pos end-pos 'continue)]
[(:or sharing reader-command "." "," ",@" "#," "#,@")
(ret lexeme 'other #f start-pos end-pos 'continue)]
[(:: (:or "#lang " "#!")
(:or langchar
(:: langchar (:* (:or langchar "/")) langchar)))
(ret lexeme 'other #f start-pos end-pos 'continue)]
[(:: (:or "#lang " "#!") (:* (:& any-char (complement whitespace))))
(ret lexeme 'error #f start-pos end-pos 'continue)]
[identifier
(ret lexeme 'symbol #f start-pos end-pos 'datum)]
["#<<"
(get-here-string (position-offset start-pos) input-port)]
[(special)
(ret "" 'no-color #f start-pos end-pos 'datum)]
[(special-comment)
(ret "" 'comment #f start-pos end-pos 'continue)]
[(eof) (values lexeme 'eof #f #f #f #f)]
[(:or bad-char bad-str
(:& bad-id
(complement (:: (:or (:: "#" (:or f t)) reader-command sharing "#<<" "#\\" "#|" "#;" "#&" script)
any-string))))
(ret lexeme 'error #f start-pos end-pos 'bad)]
[any-char (extend-error lexeme start-pos end-pos input-port)]))
(define scheme-lexer/status (lexer/status identifier keyword bad-id))
(define scheme-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id))
(define (extend-error lexeme start end in)
(if (memq (peek-char-or-special in)
`(special #\newline #\return #\tab #\space #\vtab
#\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\;
,eof))
(ret lexeme 'error #f start end 'bad)
(let-values (((rest end-pos) (get-chunk in)))
(ret (string-append lexeme rest) 'error #f start end-pos 'bad))))
(define get-chunk
(lexer
((:+ (:~ identifier-delims)) (values lexeme end-pos))))
)
|#
(require "racket-lexer.rkt")
(provide
(rename-out [racket-lexer scheme-lexer]
[racket-lexer/status scheme-lexer/status]
[racket-lexer/status scheme-nobar-lexer/status]))

View File

@ -2,13 +2,13 @@
@(require scribble/manual
(for-label syntax-color/token-tree
syntax-color/paren-tree
syntax-color/scheme-lexer
syntax-color/racket-lexer
syntax-color/module-lexer
syntax-color/scribble-lexer
syntax-color/default-lexer
framework/framework
framework/private/color
scheme))
racket))
@title{Syntax Color: Utilities}
@ -31,22 +31,22 @@ Parenthesis matching code built on top of @racket[token-tree%].
@; ----------------------------------------------------------------------
@section{Scheme Lexer}
@section{Racket Lexer}
@defmodule[syntax-color/scheme-lexer]
@defmodule[syntax-color/racket-lexer]
@defproc[(scheme-lexer [in input-port?])
@defproc[(racket-lexer [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c))]{
A lexer for Scheme, including reader extensions (@secref[#:doc'(lib
A lexer for Racket, including reader extensions (@secref[#:doc'(lib
"scribblings/reference/reference.scrbl")]{Reader_Extension}), built
specifically for @racket[color:text%].
The @racket[scheme-lexer] function returns 5 values:
The @racket[racket-lexer] function returns 5 values:
@itemize[
@item{Either a string containing the matching text or the eof object.
@ -64,7 +64,7 @@ The @racket[scheme-lexer] function returns 5 values:
}
@defproc[(scheme-lexer/status [in input-port?])
@defproc[(racket-lexer/status [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
@ -72,13 +72,13 @@ The @racket[scheme-lexer] function returns 5 values:
(or/c number? false/c)
(or/c 'datum 'open 'close 'continue))]{
Like @racket[scheme-lexer], but returns an extra value. The last
Like @racket[racket-lexer], but returns an extra value. The last
return value indicates whether the consumed token should count as a
datum, an opening parenthesis (or similar starting token to group
other tokens), a closing parenthesis (or similar), or a prefix (such
as whitespace) on a datum.}
@defproc[(scheme-nobar-lexer/status [in input-port?])
@defproc[(racket-nobar-lexer/status [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
@ -86,8 +86,8 @@ as whitespace) on a datum.}
(or/c number? false/c)
(or/c 'datum 'open 'close 'continue))]{
Like @racket[scheme-lexer/status], but for a dialect of Scheme where
@litchar{|} is a delimiter instead of quoting syntax for a symbol.
Like @racket[racket-lexer/status], except it treats
@litchar{|} as a delimiter instead of quoting syntax for a symbol.
This function is used by @racket[scribble-lexer].}
@ -140,14 +140,14 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
(-> input-port? any)
(cons/c (-> input-port? any/c any) any/c)))]{
Like @racket[scheme-lexer], but with several differences:
Like @racket[racket-lexer], but with several differences:
@itemize[
@item{The @racket[module-lexer] function accepts an offset and lexer
mode, instead of just an input port.}
@item{In addition to the results of @racket[scheme-lexer],
@item{In addition to the results of @racket[racket-lexer],
@racket[module-lexer] returns a backup distance and a new lexer
mode.}
@ -169,7 +169,7 @@ Like @racket[scheme-lexer], but with several differences:
If the language is specified but it provides no
@racket[get-info] or @racket['color-lexer] result, then
@racket[scheme-lexer] is returned as the mode.}
@racket[racket-lexer] is returned as the mode.}
@item{When @racket[mode] is a lexer procedure, the lexer is applied
to @racket[in]. The lexer's results are returned, plus the
@ -197,7 +197,7 @@ Like @racket[scheme-lexer], but with several differences:
exact-nonnegative-integer?
any/c)]{
Like @racket[scheme-lexer], but for Scheme extended with Scribbles
Like @racket[racket-lexer], but for Racket extended with Scribble's
@"@" notation (see @secref[#:doc '(lib
"scribblings/scribble/scribble.scrbl") "reader"]).}
@ -213,7 +213,7 @@ Like @racket[scheme-lexer], but for Scheme extended with Scribbles
any/c)]{
Like @racket[scribble-lexer], but starting in ``text'' mode instead of
Scheme mode.}
Racket mode.}
@; ----------------------------------------------------------------------