diff --git a/collects/parser-tools/examples/read.rkt b/collects/parser-tools/examples/read.rkt index 842eff7cd1..2dc9475aaf 100644 --- a/collects/parser-tools/examples/read.rkt +++ b/collects/parser-tools/examples/read.rkt @@ -1,4 +1,4 @@ -;; This implements the equivalent of mzscheme's read-syntax for R5RS scheme. +;; This implements the equivalent of racket's read-syntax for R5RS scheme. ;; It has not been thoroughly tested. Also it will read an entire file into a ;; list of syntax objects, instead of returning one syntax object at a time @@ -59,7 +59,7 @@ [comment (:: #\; (:* (:~ #\newline)) #\newline)] - ;; See ${PLTHOME}/collects/syntax-color/scheme-lexer.rkt for an example of + ;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of ;; using regexp macros to avoid the cut and paste. ; [numR (:: prefixR complexR)] ; [complexR (:or realR diff --git a/collects/parser-tools/parser-tools.scrbl b/collects/parser-tools/parser-tools.scrbl index bdf7fff0ff..cb3a84fcc7 100644 --- a/collects/parser-tools/parser-tools.scrbl +++ b/collects/parser-tools/parser-tools.scrbl @@ -54,7 +54,7 @@ style lexer and parser generators. @racket[re]'s against the buffer, and returns the result of executing the corresponding @racket[action-expr]. - @margin-note{The implementation of @racketmodname[syntax-color/scheme-lexer] + @margin-note{The implementation of @racketmodname[syntax-color/racket-lexer] contains a lexer for the @racketmodname[racket] language. In addition, files in the @filepath{examples} sub-directory of the @filepath{parser-tools} collection contain diff --git a/collects/syntax-color/racket-lexer.rkt b/collects/syntax-color/racket-lexer.rkt new file mode 100644 index 0000000000..e139291406 --- /dev/null +++ b/collects/syntax-color/racket-lexer.rkt @@ -0,0 +1,383 @@ +#lang racket/base + + (require parser-tools/lex + (prefix-in : parser-tools/lex-sre)) + + (provide racket-lexer + racket-lexer/status + racket-nobar-lexer/status) + + (define-lex-abbrevs + + ;; For case insensitivity + [a (char-set "aA")] + [b (char-set "bB")] + [c (char-set "cC")] + [d (char-set "dD")] + [e (char-set "eE")] + [f (char-set "fF")] + [g (char-set "gG")] + [h (char-set "hH")] + [i (char-set "iI")] + [j (char-set "jJ")] + [k (char-set "kK")] + [l (char-set "lL")] + [m (char-set "mM")] + [n (char-set "nN")] + [o (char-set "oO")] + [p (char-set "pP")] + [q (char-set "qQ")] + [r (char-set "rR")] + [s (char-set "sS")] + [t (char-set "tT")] + [u (char-set "uU")] + [v (char-set "vV")] + [w (char-set "wW")] + [x (char-set "xX")] + [y (char-set "yY")] + [z (char-set "zZ")] + + [digit (:/ "0" "9")] + [digit2 (:/ "0" "1")] + [digit8 (:/ "0" "7")] + [digit10 digit] + [digit16 (:/ "af" "AF" "09")] + + [langchar (:or (:/ "az" "AZ" "09") "+" "-" "_")] + + [racket-whitespace whitespace] + + [line-comment (:: ";" (:* (:~ #\newline)))] + + + ;; What about char->integer constraint? + [unicode (:or (:: "u" (:** 1 4 digit16)) + (:: "U" (:** 1 6 digit16)))] + + [character (:or (:: "#\\" any-char) + (:: "#\\" character-name) + (:: "#\\" (:/ "0" "3") digit8 digit8) + (:: "#\\" unicode))] + + [character-name (:or (:: s p a c e) + (:: n e w l i n e) + (:: n u l) + (:: n u l l) + (:: b a c k s p a c e) + (:: t a b) + (:: l i n e f e e d) + (:: v t a b) + (:: p a g e) + (:: r e t u r n) + (:: r u b o u t))] + + [bad-char (:or "#\\" + (:: "#\\" (:>= 2 alphabetic)) + (:: "#\\" (:/ "0" "3") digit8))] + + ;; What about byte string regexp strings + [str (:or (:: (:? (:or "#px" "#rx")) "\"" (:* string-element (:: "\\" unicode)) "\"") + byte-str)] + [byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* string-element) "\"")] + [string-element (:or (:~ "\"" "\\") + "\\\"" + "\\\\" + "\\a" + "\\b" + "\\t" + "\\n" + "\\v" + "\\f" + "\\r" + "\\e" + "\\'" + (:: "\\" (:** 1 3 digit8)) + (:: "\\x" (:** 1 2 digit16)) + (:: "\\" #\newline))] + + [bad-str (:: (:? (:or "#px" "#rx")) (:? "#") "\"" + (:* (:~ "\"" "\\") + (:: "\\" any-char)) + (:? "\\" "\""))] + + + [special-numbers (:or (:: n a n ".0") (:: i n f ".0"))] + [exponent-marker (:or e s f d l)] + [sign (char-set "+-")] + [exactness (:or "#i" "#e" "#I" "#E")] + [radix2 (:or "#b" "#B")] + [radix8 (:or "#o" "#O")] + [radix10 (:or "#d" "#D")] + [radix16 (:or "#x" "#X")] + + [script (:: "#!" (:or #\space #\/) (:* (:~ #\newline) (:: #\\ #\newline)))] + + [identifier-delims (:or (char-set "\",'`()[]{};") racket-whitespace)] + [identifier-chars (:~ identifier-delims "\\" "|")] + [identifier-escapes (:or (:: "\\" any-char) + (:: "|" (:* (:~ "|")) "|"))] + [identifier-start (:or identifier-escapes + (:~ identifier-delims "\\" "|" "#") + "#%")] + [identifier (:: identifier-start + (:* identifier-escapes identifier-chars))] + + [nobar-identifier-escapes (:: "\\" any-char)] + [nobar-identifier-start (:or nobar-identifier-escapes + (:~ identifier-delims "\\" "|" "#") + "#%")] + [nobar-identifier (:: nobar-identifier-start + (:* nobar-identifier-escapes identifier-chars))] + + [bad-id-start (:or identifier-escapes + (:~ identifier-delims "\\" "|"))] + [bad-id-escapes (:or identifier-escapes + (:: "|" (:* (:~ "|"))))] + [bad-id (:or (:: bad-id-start + (:* identifier-escapes identifier-chars) + (:? "\\" bad-id-escapes)) + "\\" + bad-id-escapes)] + + + [nobar-bad-id-escapes nobar-identifier-escapes] + [nobar-bad-id (:or (:: bad-id-start + (:* nobar-identifier-escapes identifier-chars) + (:? "\\" nobar-bad-id-escapes)) + "\\" + nobar-bad-id-escapes)] + + [keyword (:: "#:" (:* identifier-escapes identifier-chars))] + [nobar-keyword (:: "#:" (:* nobar-identifier-escapes identifier-chars))] + + [reader-command (:or (:: "#" c s) (:: "#" c i))] + [sharing (:or (:: "#" (make-uinteger digit10) "=") + (:: "#" (make-uinteger digit10) "#"))] + + [list-prefix (:or "" "#hash" "#hasheq" "#hasheqv" "#s" (:: "#" (:* digit10)))]) + + (define-lex-trans make-num + (syntax-rules () + ((_ digit radix) (:: (make-prefix radix) (make-complex digit))))) + + (define-lex-trans make-prefix + (syntax-rules () + ((_ radix) (:or (:: radix (:? exactness)) + (:: (:? exactness) radix))))) + + (define-lex-trans make-complex + (syntax-rules () + ((_ digit) + (:or (make-real digit) + (:: (make-real digit) "@" (make-real digit)) + (:: (make-real digit) "+" (:or special-numbers (make-ureal digit)) i) + (:: (make-real digit) "-" (:or special-numbers (make-ureal digit)) i) + (:: (make-real digit) "+" i) + (:: (make-real digit) "-" i) + (:: "+" (:or special-numbers (make-ureal digit)) i) + (:: "-" (:or special-numbers (make-ureal digit)) i) + (:: "+" i) + (:: "-" i))))) + + (define-lex-trans make-ureal + (syntax-rules () + ((_ digit) (:or (make-uinteger digit) + (:: (make-uinteger digit) "/" (make-uinteger digit) (:? (make-suffix digit))) + (make-decimal digit))))) + + (define-lex-trans make-real + (syntax-rules () + ((_ digit) (:or (:: (:? sign) (make-ureal digit)) + (:: (char-set "+-") special-numbers))))) + + (define-lex-trans make-uinteger + (syntax-rules () + ((_ digit) (:: (:+ digit) (:* "#"))))) + + (define-lex-trans make-decimal + (syntax-rules () + ((_ digit) + (:or (:: (make-uinteger digit) (make-suffix digit)) + (:: "." (:+ digit) (:* "#") (make-suffix digit)) + (:: (:+ digit) "." (:* digit) (:* "#") (make-suffix digit)) + (:: (:+ digit) (:+ "#") "." (:* "#") (make-suffix digit)))))) + + (define-lex-trans make-suffix + (syntax-rules () + ((_ digit) (:or "" (:: exponent-marker (:? sign) (:+ digit)))))) + + + (define (ret lexeme type paren start-pos end-pos status) + (values lexeme type paren (position-offset start-pos) (position-offset end-pos) status)) + + + (define get-next-comment + (lexer + ["#|" (values 1 end-pos)] + ["|#" (values -1 end-pos)] + [(:or "#" "|" (:* (:~ "|" "#"))) + (get-next-comment input-port)] + [(eof) (values 'eof end-pos)] + [(special) + (get-next-comment input-port)] + [(special-comment) + (get-next-comment input-port)])) + + (define (read-nested-comment num-opens start-pos input) + (let-values (((diff end) (get-next-comment input))) + (cond + ((eq? 'eof diff) (ret "" 'error #f start-pos end 'continue)) + (else + (let ((next-num-opens (+ diff num-opens))) + (cond + ((= 0 next-num-opens) (ret "" 'comment #f start-pos end 'continue)) + (else (read-nested-comment next-num-opens start-pos input)))))))) + + (define (get-offset i) + (let-values (((x y offset) (port-next-location i))) + offset)) + + (define (escape-regexp s) + (apply string-append + (map (lambda (c) + (if (memq c '(#\( #\) #\* #\+ #\? #\[ #\] #\. #\^ #\\ #\|)) + (string #\\ c) + (string c))) + (string->list s)))) + + (define (special-read-line i) + (let ((next (peek-char-or-special i))) + (cond + ((or (eq? next #\newline) (not (char? next))) + null) + (else + (read-char i) + (cons next (special-read-line i)))))) + + (define (read-line/skip-over-specials i) + (let loop () + (let ((next (peek-char-or-special i))) + (cond + ((or (eq? next #\newline) (eof-object? next)) + null) + (else + (read-char-or-special i) + (if (char? next) + (cons next (loop)) + (loop))))))) + + (define (get-here-string start-pos i) + (let* ((ender (list->string (special-read-line i))) + (next-char (peek-char-or-special i))) + (cond + ((or (equal? ender "") (not (eq? #\newline next-char))) + (values (string-append "#<<" ender) 'error #f start-pos (get-offset i) 'datum)) + (else + (read-char i) + (let loop ((acc (list (string-append "#<<" ender "\n")))) + (let* ((next-line (list->string (special-read-line i))) + (next-char (peek-char-or-special i))) + (cond + ((not (or (char? next-char) (eof-object? next-char))) ;; a special + (values (apply string-append (reverse (cons next-line acc))) + 'error #f start-pos (get-offset i) + 'datum)) + ((equal? next-line ender) ;; end of string + (values (apply string-append (reverse (cons next-line acc))) + 'string #f start-pos (get-offset i) + 'datum)) + ((eof-object? next-char) + (values (apply string-append (reverse (cons next-line acc))) + 'error #f start-pos (get-offset i) + 'datum)) + (else + (read-char i) + (loop (cons (string-append next-line "\n") acc)))))))))) + + (define (racket-lexer in) + (let-values ([(lexeme type paren start end adj) (racket-lexer/status in)]) + (values lexeme type paren start end))) + + (define-syntax-rule (lexer/status identifier keyword bad-id) + (lexer + [(:+ racket-whitespace) + (ret lexeme 'white-space #f start-pos end-pos 'continue)] + [(:: (:or "#true" "#false" "#t" "#f" "#T" "#F") + (:* (:~ identifier-delims))) + (ret lexeme + (if (member lexeme '("#true" "#false" "#t" "#f" "#T" "#F")) + 'constant + 'error) + #f start-pos end-pos 'datum)] + [(:or character + (make-num digit2 radix2) + (make-num digit8 radix8) + (make-num digit10 (:? radix10)) + (make-num digit16 radix16)) + (ret lexeme 'constant #f start-pos end-pos 'datum)] + [keyword (ret lexeme 'parenthesis #f start-pos end-pos 'datum)] + [str (ret lexeme 'string #f start-pos end-pos 'datum)] + [";" + (values (apply string (read-line/skip-over-specials input-port)) 'comment #f + (position-offset start-pos) + (get-offset input-port) + 'continue)] + #; + [line-comment + (ret lexeme 'comment #f start-pos end-pos)] + ["#;" + (ret lexeme 'sexp-comment #f start-pos end-pos 'continue)] + ["#|" (read-nested-comment 1 start-pos input-port)] + [script + (ret lexeme 'comment #f start-pos end-pos 'continue)] + [(:: list-prefix "(") + (ret lexeme 'parenthesis '|(| start-pos end-pos 'open)] + [(:: list-prefix "[") + (ret lexeme 'parenthesis '|[| start-pos end-pos 'open)] + [(:: list-prefix "{") + (ret lexeme 'parenthesis '|{| start-pos end-pos 'open)] + [(:or ")" "]" "}") + (ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos 'close)] + [(:or "'" "`" "#'" "#`" "#&") + (ret lexeme 'constant #f start-pos end-pos 'continue)] + [(:or sharing reader-command "." "," ",@" "#," "#,@") + (ret lexeme 'other #f start-pos end-pos 'continue)] + + [(:: (:or "#lang " "#!") + (:or langchar + (:: langchar (:* (:or langchar "/")) langchar))) + (ret lexeme 'other #f start-pos end-pos 'continue)] + [(:: (:or "#lang " "#!") (:* (:& any-char (complement whitespace)))) + (ret lexeme 'error #f start-pos end-pos 'continue)] + + [identifier + (ret lexeme 'symbol #f start-pos end-pos 'datum)] + ["#<<" + (get-here-string (position-offset start-pos) input-port)] + [(special) + (ret "" 'no-color #f start-pos end-pos 'datum)] + [(special-comment) + (ret "" 'comment #f start-pos end-pos 'continue)] + [(eof) (values lexeme 'eof #f #f #f #f)] + [(:or bad-char bad-str + (:& bad-id + (complement (:: (:or (:: "#" (:or f t)) reader-command sharing "#<<" "#\\" "#|" "#;" "#&" script) + any-string)))) + (ret lexeme 'error #f start-pos end-pos 'bad)] + [any-char (extend-error lexeme start-pos end-pos input-port)])) + + (define racket-lexer/status (lexer/status identifier keyword bad-id)) + (define racket-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id)) + + (define (extend-error lexeme start end in) + (if (memq (peek-char-or-special in) + `(special #\newline #\return #\tab #\space #\vtab + #\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\; + ,eof)) + (ret lexeme 'error #f start end 'bad) + (let-values (((rest end-pos) (get-chunk in))) + (ret (string-append lexeme rest) 'error #f start end-pos 'bad)))) + + (define get-chunk + (lexer + ((:+ (:~ identifier-delims)) (values lexeme end-pos)))) diff --git a/collects/syntax-color/scheme-lexer.rkt b/collects/syntax-color/scheme-lexer.rkt index 9021bcfef5..6934ed4e0b 100644 --- a/collects/syntax-color/scheme-lexer.rkt +++ b/collects/syntax-color/scheme-lexer.rkt @@ -1,386 +1,13 @@ -(module scheme-lexer mzscheme - - (require parser-tools/lex - (prefix : parser-tools/lex-sre)) - - (provide scheme-lexer - scheme-lexer/status - scheme-nobar-lexer/status) - - (define-lex-abbrevs - - ;; For case insensitivity - [a (char-set "aA")] - [b (char-set "bB")] - [c (char-set "cC")] - [d (char-set "dD")] - [e (char-set "eE")] - [f (char-set "fF")] - [g (char-set "gG")] - [h (char-set "hH")] - [i (char-set "iI")] - [j (char-set "jJ")] - [k (char-set "kK")] - [l (char-set "lL")] - [m (char-set "mM")] - [n (char-set "nN")] - [o (char-set "oO")] - [p (char-set "pP")] - [q (char-set "qQ")] - [r (char-set "rR")] - [s (char-set "sS")] - [t (char-set "tT")] - [u (char-set "uU")] - [v (char-set "vV")] - [w (char-set "wW")] - [x (char-set "xX")] - [y (char-set "yY")] - [z (char-set "zZ")] +#lang racket/base +#| - [digit (:/ "0" "9")] - [digit2 (:/ "0" "1")] - [digit8 (:/ "0" "7")] - [digit10 digit] - [digit16 (:/ "af" "AF" "09")] +This file is provided for backwards compatibility. +New code should use racket-lexer.rkt. - [langchar (:or (:/ "az" "AZ" "09") "+" "-" "_")] - - [scheme-whitespace whitespace] - - [line-comment (:: ";" (:* (:~ #\newline)))] - - - ;; What about char->integer constraint? - [unicode (:or (:: "u" (:** 1 4 digit16)) - (:: "U" (:** 1 6 digit16)))] - - [character (:or (:: "#\\" any-char) - (:: "#\\" character-name) - (:: "#\\" (:/ "0" "3") digit8 digit8) - (:: "#\\" unicode))] - - [character-name (:or (:: s p a c e) - (:: n e w l i n e) - (:: n u l) - (:: n u l l) - (:: b a c k s p a c e) - (:: t a b) - (:: l i n e f e e d) - (:: v t a b) - (:: p a g e) - (:: r e t u r n) - (:: r u b o u t))] - - [bad-char (:or "#\\" - (:: "#\\" (:>= 2 alphabetic)) - (:: "#\\" (:/ "0" "3") digit8))] - - ;; What about byte string regexp strings - [str (:or (:: (:? (:or "#px" "#rx")) "\"" (:* string-element (:: "\\" unicode)) "\"") - byte-str)] - [byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* string-element) "\"")] - [string-element (:or (:~ "\"" "\\") - "\\\"" - "\\\\" - "\\a" - "\\b" - "\\t" - "\\n" - "\\v" - "\\f" - "\\r" - "\\e" - "\\'" - (:: "\\" (:** 1 3 digit8)) - (:: "\\x" (:** 1 2 digit16)) - (:: "\\" #\newline))] - - [bad-str (:: (:? (:or "#px" "#rx")) (:? "#") "\"" - (:* (:~ "\"" "\\") - (:: "\\" any-char)) - (:? "\\" "\""))] - - - [special-numbers (:or (:: n a n ".0") (:: i n f ".0"))] - [exponent-marker (:or e s f d l)] - [sign (char-set "+-")] - [exactness (:or "#i" "#e" "#I" "#E")] - [radix2 (:or "#b" "#B")] - [radix8 (:or "#o" "#O")] - [radix10 (:or "#d" "#D")] - [radix16 (:or "#x" "#X")] - - [script (:: "#!" (:or #\space #\/) (:* (:~ #\newline) (:: #\\ #\newline)))] - - [identifier-delims (:or (char-set "\",'`()[]{};") scheme-whitespace)] - [identifier-chars (:~ identifier-delims "\\" "|")] - [identifier-escapes (:or (:: "\\" any-char) - (:: "|" (:* (:~ "|")) "|"))] - [identifier-start (:or identifier-escapes - (:~ identifier-delims "\\" "|" "#") - "#%")] - [identifier (:: identifier-start - (:* identifier-escapes identifier-chars))] - - [nobar-identifier-escapes (:: "\\" any-char)] - [nobar-identifier-start (:or nobar-identifier-escapes - (:~ identifier-delims "\\" "|" "#") - "#%")] - [nobar-identifier (:: nobar-identifier-start - (:* nobar-identifier-escapes identifier-chars))] - - [bad-id-start (:or identifier-escapes - (:~ identifier-delims "\\" "|"))] - [bad-id-escapes (:or identifier-escapes - (:: "|" (:* (:~ "|"))))] - [bad-id (:or (:: bad-id-start - (:* identifier-escapes identifier-chars) - (:? "\\" bad-id-escapes)) - "\\" - bad-id-escapes)] - - - [nobar-bad-id-escapes nobar-identifier-escapes] - [nobar-bad-id (:or (:: bad-id-start - (:* nobar-identifier-escapes identifier-chars) - (:? "\\" nobar-bad-id-escapes)) - "\\" - nobar-bad-id-escapes)] - - [keyword (:: "#:" (:* identifier-escapes identifier-chars))] - [nobar-keyword (:: "#:" (:* nobar-identifier-escapes identifier-chars))] - - [reader-command (:or (:: "#" c s) (:: "#" c i))] - [sharing (:or (:: "#" (make-uinteger digit10) "=") - (:: "#" (make-uinteger digit10) "#"))] - - [list-prefix (:or "" "#hash" "#hasheq" "#hasheqv" "#s" (:: "#" (:* digit10)))]) - - (define-lex-trans make-num - (syntax-rules () - ((_ digit radix) (:: (make-prefix radix) (make-complex digit))))) - - (define-lex-trans make-prefix - (syntax-rules () - ((_ radix) (:or (:: radix (:? exactness)) - (:: (:? exactness) radix))))) - - (define-lex-trans make-complex - (syntax-rules () - ((_ digit) - (:or (make-real digit) - (:: (make-real digit) "@" (make-real digit)) - (:: (make-real digit) "+" (:or special-numbers (make-ureal digit)) i) - (:: (make-real digit) "-" (:or special-numbers (make-ureal digit)) i) - (:: (make-real digit) "+" i) - (:: (make-real digit) "-" i) - (:: "+" (:or special-numbers (make-ureal digit)) i) - (:: "-" (:or special-numbers (make-ureal digit)) i) - (:: "+" i) - (:: "-" i))))) - - (define-lex-trans make-ureal - (syntax-rules () - ((_ digit) (:or (make-uinteger digit) - (:: (make-uinteger digit) "/" (make-uinteger digit) (:? (make-suffix digit))) - (make-decimal digit))))) - - (define-lex-trans make-real - (syntax-rules () - ((_ digit) (:or (:: (:? sign) (make-ureal digit)) - (:: (char-set "+-") special-numbers))))) - - (define-lex-trans make-uinteger - (syntax-rules () - ((_ digit) (:: (:+ digit) (:* "#"))))) - - (define-lex-trans make-decimal - (syntax-rules () - ((_ digit) - (:or (:: (make-uinteger digit) (make-suffix digit)) - (:: "." (:+ digit) (:* "#") (make-suffix digit)) - (:: (:+ digit) "." (:* digit) (:* "#") (make-suffix digit)) - (:: (:+ digit) (:+ "#") "." (:* "#") (make-suffix digit)))))) - - (define-lex-trans make-suffix - (syntax-rules () - ((_ digit) (:or "" (:: exponent-marker (:? sign) (:+ digit)))))) - - - (define (ret lexeme type paren start-pos end-pos status) - (values lexeme type paren (position-offset start-pos) (position-offset end-pos) status)) - - - (define get-next-comment - (lexer - ["#|" (values 1 end-pos)] - ["|#" (values -1 end-pos)] - [(:or "#" "|" (:* (:~ "|" "#"))) - (get-next-comment input-port)] - [(eof) (values 'eof end-pos)] - [(special) - (get-next-comment input-port)] - [(special-comment) - (get-next-comment input-port)])) - - (define (read-nested-comment num-opens start-pos input) - (let-values (((diff end) (get-next-comment input))) - (cond - ((eq? 'eof diff) (ret "" 'error #f start-pos end 'continue)) - (else - (let ((next-num-opens (+ diff num-opens))) - (cond - ((= 0 next-num-opens) (ret "" 'comment #f start-pos end 'continue)) - (else (read-nested-comment next-num-opens start-pos input)))))))) - - (define (get-offset i) - (let-values (((x y offset) (port-next-location i))) - offset)) - - (define (escape-regexp s) - (apply string-append - (map (lambda (c) - (if (memq c '(#\( #\) #\* #\+ #\? #\[ #\] #\. #\^ #\\ #\|)) - (string #\\ c) - (string c))) - (string->list s)))) - - (define (special-read-line i) - (let ((next (peek-char-or-special i))) - (cond - ((or (eq? next #\newline) (not (char? next))) - null) - (else - (read-char i) - (cons next (special-read-line i)))))) - - (define (read-line/skip-over-specials i) - (let loop () - (let ((next (peek-char-or-special i))) - (cond - ((or (eq? next #\newline) (eof-object? next)) - null) - (else - (read-char-or-special i) - (if (char? next) - (cons next (loop)) - (loop))))))) - - (define (get-here-string start-pos i) - (let* ((ender (list->string (special-read-line i))) - (next-char (peek-char-or-special i))) - (cond - ((or (equal? ender "") (not (eq? #\newline next-char))) - (values (string-append "#<<" ender) 'error #f start-pos (get-offset i) 'datum)) - (else - (read-char i) - (let loop ((acc (list (string-append "#<<" ender "\n")))) - (let* ((next-line (list->string (special-read-line i))) - (next-char (peek-char-or-special i))) - (cond - ((not (or (char? next-char) (eof-object? next-char))) ;; a special - (values (apply string-append (reverse (cons next-line acc))) - 'error #f start-pos (get-offset i) - 'datum)) - ((equal? next-line ender) ;; end of string - (values (apply string-append (reverse (cons next-line acc))) - 'string #f start-pos (get-offset i) - 'datum)) - ((eof-object? next-char) - (values (apply string-append (reverse (cons next-line acc))) - 'error #f start-pos (get-offset i) - 'datum)) - (else - (read-char i) - (loop (cons (string-append next-line "\n") acc)))))))))) - - (define (scheme-lexer in) - (let-values ([(lexeme type paren start end adj) (scheme-lexer/status in)]) - (values lexeme type paren start end))) - - (define-syntax-rule (lexer/status identifier keyword bad-id) - (lexer - [(:+ scheme-whitespace) - (ret lexeme 'white-space #f start-pos end-pos 'continue)] - [(:: (:or "#true" "#false" "#t" "#f" "#T" "#F") - (:* (:~ identifier-delims))) - (ret lexeme - (if (member lexeme '("#true" "#false" "#t" "#f" "#T" "#F")) - 'constant - 'error) - #f start-pos end-pos 'datum)] - [(:or character - (make-num digit2 radix2) - (make-num digit8 radix8) - (make-num digit10 (:? radix10)) - (make-num digit16 radix16)) - (ret lexeme 'constant #f start-pos end-pos 'datum)] - [keyword (ret lexeme 'parenthesis #f start-pos end-pos 'datum)] - [str (ret lexeme 'string #f start-pos end-pos 'datum)] - [";" - (values (apply string (read-line/skip-over-specials input-port)) 'comment #f - (position-offset start-pos) - (get-offset input-port) - 'continue)] - #; - [line-comment - (ret lexeme 'comment #f start-pos end-pos)] - ["#;" - (ret lexeme 'sexp-comment #f start-pos end-pos 'continue)] - ["#|" (read-nested-comment 1 start-pos input-port)] - [script - (ret lexeme 'comment #f start-pos end-pos 'continue)] - [(:: list-prefix "(") - (ret lexeme 'parenthesis '|(| start-pos end-pos 'open)] - [(:: list-prefix "[") - (ret lexeme 'parenthesis '|[| start-pos end-pos 'open)] - [(:: list-prefix "{") - (ret lexeme 'parenthesis '|{| start-pos end-pos 'open)] - [(:or ")" "]" "}") - (ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos 'close)] - [(:or "'" "`" "#'" "#`" "#&") - (ret lexeme 'constant #f start-pos end-pos 'continue)] - [(:or sharing reader-command "." "," ",@" "#," "#,@") - (ret lexeme 'other #f start-pos end-pos 'continue)] - - [(:: (:or "#lang " "#!") - (:or langchar - (:: langchar (:* (:or langchar "/")) langchar))) - (ret lexeme 'other #f start-pos end-pos 'continue)] - [(:: (:or "#lang " "#!") (:* (:& any-char (complement whitespace)))) - (ret lexeme 'error #f start-pos end-pos 'continue)] - - [identifier - (ret lexeme 'symbol #f start-pos end-pos 'datum)] - ["#<<" - (get-here-string (position-offset start-pos) input-port)] - [(special) - (ret "" 'no-color #f start-pos end-pos 'datum)] - [(special-comment) - (ret "" 'comment #f start-pos end-pos 'continue)] - [(eof) (values lexeme 'eof #f #f #f #f)] - [(:or bad-char bad-str - (:& bad-id - (complement (:: (:or (:: "#" (:or f t)) reader-command sharing "#<<" "#\\" "#|" "#;" "#&" script) - any-string)))) - (ret lexeme 'error #f start-pos end-pos 'bad)] - [any-char (extend-error lexeme start-pos end-pos input-port)])) - - (define scheme-lexer/status (lexer/status identifier keyword bad-id)) - (define scheme-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id)) - - (define (extend-error lexeme start end in) - (if (memq (peek-char-or-special in) - `(special #\newline #\return #\tab #\space #\vtab - #\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\; - ,eof)) - (ret lexeme 'error #f start end 'bad) - (let-values (((rest end-pos) (get-chunk in))) - (ret (string-append lexeme rest) 'error #f start end-pos 'bad)))) - - (define get-chunk - (lexer - ((:+ (:~ identifier-delims)) (values lexeme end-pos)))) - - - ) +|# +(require "racket-lexer.rkt") +(provide + (rename-out [racket-lexer scheme-lexer] + [racket-lexer/status scheme-lexer/status] + [racket-lexer/status scheme-nobar-lexer/status])) + \ No newline at end of file diff --git a/collects/syntax-color/syntax-color.scrbl b/collects/syntax-color/syntax-color.scrbl index f8fe032b04..1e5ebb4520 100644 --- a/collects/syntax-color/syntax-color.scrbl +++ b/collects/syntax-color/syntax-color.scrbl @@ -2,13 +2,13 @@ @(require scribble/manual (for-label syntax-color/token-tree syntax-color/paren-tree - syntax-color/scheme-lexer + syntax-color/racket-lexer syntax-color/module-lexer syntax-color/scribble-lexer syntax-color/default-lexer framework/framework framework/private/color - scheme)) + racket)) @title{Syntax Color: Utilities} @@ -31,22 +31,22 @@ Parenthesis matching code built on top of @racket[token-tree%]. @; ---------------------------------------------------------------------- -@section{Scheme Lexer} +@section{Racket Lexer} -@defmodule[syntax-color/scheme-lexer] +@defmodule[syntax-color/racket-lexer] -@defproc[(scheme-lexer [in input-port?]) +@defproc[(racket-lexer [in input-port?]) (values (or/c string? eof-object?) symbol? (or/c symbol? false/c) (or/c number? false/c) (or/c number? false/c))]{ -A lexer for Scheme, including reader extensions (@secref[#:doc'(lib +A lexer for Racket, including reader extensions (@secref[#:doc'(lib "scribblings/reference/reference.scrbl")]{Reader_Extension}), built specifically for @racket[color:text%]. -The @racket[scheme-lexer] function returns 5 values: +The @racket[racket-lexer] function returns 5 values: @itemize[ @item{Either a string containing the matching text or the eof object. @@ -64,7 +64,7 @@ The @racket[scheme-lexer] function returns 5 values: } -@defproc[(scheme-lexer/status [in input-port?]) +@defproc[(racket-lexer/status [in input-port?]) (values (or/c string? eof-object?) symbol? (or/c symbol? false/c) @@ -72,13 +72,13 @@ The @racket[scheme-lexer] function returns 5 values: (or/c number? false/c) (or/c 'datum 'open 'close 'continue))]{ -Like @racket[scheme-lexer], but returns an extra value. The last +Like @racket[racket-lexer], but returns an extra value. The last return value indicates whether the consumed token should count as a datum, an opening parenthesis (or similar starting token to group other tokens), a closing parenthesis (or similar), or a prefix (such as whitespace) on a datum.} -@defproc[(scheme-nobar-lexer/status [in input-port?]) +@defproc[(racket-nobar-lexer/status [in input-port?]) (values (or/c string? eof-object?) symbol? (or/c symbol? false/c) @@ -86,8 +86,8 @@ as whitespace) on a datum.} (or/c number? false/c) (or/c 'datum 'open 'close 'continue))]{ -Like @racket[scheme-lexer/status], but for a dialect of Scheme where -@litchar{|} is a delimiter instead of quoting syntax for a symbol. +Like @racket[racket-lexer/status], except it treats +@litchar{|} as a delimiter instead of quoting syntax for a symbol. This function is used by @racket[scribble-lexer].} @@ -140,14 +140,14 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[}, (-> input-port? any) (cons/c (-> input-port? any/c any) any/c)))]{ -Like @racket[scheme-lexer], but with several differences: +Like @racket[racket-lexer], but with several differences: @itemize[ @item{The @racket[module-lexer] function accepts an offset and lexer mode, instead of just an input port.} - @item{In addition to the results of @racket[scheme-lexer], + @item{In addition to the results of @racket[racket-lexer], @racket[module-lexer] returns a backup distance and a new lexer mode.} @@ -169,7 +169,7 @@ Like @racket[scheme-lexer], but with several differences: If the language is specified but it provides no @racket[get-info] or @racket['color-lexer] result, then - @racket[scheme-lexer] is returned as the mode.} + @racket[racket-lexer] is returned as the mode.} @item{When @racket[mode] is a lexer procedure, the lexer is applied to @racket[in]. The lexer's results are returned, plus the @@ -197,7 +197,7 @@ Like @racket[scheme-lexer], but with several differences: exact-nonnegative-integer? any/c)]{ -Like @racket[scheme-lexer], but for Scheme extended with Scribbles +Like @racket[racket-lexer], but for Racket extended with Scribble's @"@" notation (see @secref[#:doc '(lib "scribblings/scribble/scribble.scrbl") "reader"]).} @@ -213,7 +213,7 @@ Like @racket[scheme-lexer], but for Scheme extended with Scribbles any/c)]{ Like @racket[scribble-lexer], but starting in ``text'' mode instead of -Scheme mode.} +Racket mode.} @; ----------------------------------------------------------------------