From 0dc359a956978ffac13dacf31c4b07b80a81a709 Mon Sep 17 00:00:00 2001
From: Matthew Flatt <mflatt@racket-lang.org>
Date: Tue, 12 Feb 2008 21:50:35 +0000
Subject: [PATCH] fix a scribble bug; add r6rs reader

svn: r8635
---
 collects/r6rs/lang/reader.ss                  |  18 +
 collects/r6rs/main.ss                         |   9 +
 collects/r6rs/private/readtable.ss            | 551 ++++++++++++++++++
 collects/scribble/manual.ss                   |  11 +-
 collects/scribblings/reference/reader.scrbl   |  24 +-
 .../scribblings/reference/reference.scrbl     |   6 +
 collects/scribblings/scribble/manual.scrbl    |   4 +-
 collects/tests/mzscheme/rx.ss                 |   8 +
 src/mzscheme/src/regexp.c                     |   9 +-
 9 files changed, 624 insertions(+), 16 deletions(-)
 create mode 100644 collects/r6rs/lang/reader.ss
 create mode 100644 collects/r6rs/main.ss
 create mode 100644 collects/r6rs/private/readtable.ss

diff --git a/collects/r6rs/lang/reader.ss b/collects/r6rs/lang/reader.ss
new file mode 100644
index 0000000000..41fb1129b1
--- /dev/null
+++ b/collects/r6rs/lang/reader.ss
@@ -0,0 +1,18 @@
+#lang scheme/base
+
+(require (only-in syntax/module-reader wrap-read-all)
+         "../private/readtable.ss")
+(provide (rename-out [*read read]
+                     [*read-syntax read-syntax]))
+
+(define (*read in)
+  (wrap in read))
+
+(define (*read-syntax src in)
+  (wrap in (lambda (in)
+             (read-syntax src in))))
+
+(define (wrap in read)
+  (with-r6rs-reader-parameters
+   (lambda ()
+     (wrap-read-all 'r6rs in read))))
diff --git a/collects/r6rs/main.ss b/collects/r6rs/main.ss
new file mode 100644
index 0000000000..92fadb35b0
--- /dev/null
+++ b/collects/r6rs/main.ss
@@ -0,0 +1,9 @@
+#lang scheme/base
+
+(provide (rename-out [module-begin #%module-begin])
+         quote)
+
+(define-syntax-rule (module-begin . stuff)
+  (#%module-begin 
+   "The R6RS language just supplies a reader, so far."
+   'stuff))
diff --git a/collects/r6rs/private/readtable.ss b/collects/r6rs/private/readtable.ss
new file mode 100644
index 0000000000..fb9f79927e
--- /dev/null
+++ b/collects/r6rs/private/readtable.ss
@@ -0,0 +1,551 @@
+#lang scheme/base
+
+;; Readtable-based R6RS reading
+
+(require syntax/readerr
+         (for-syntax scheme/base))
+
+(provide with-r6rs-reader-parameters)
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (generic-not-allowed where ch port src line col pos)
+  (raise-read-error 
+   (format "illegal character~a in input: `~a'" 
+           where
+           (let ([s (format "~s" (string ch))])
+             (substring s 1 (- (string-length s) 1))))
+   src line col pos 1))
+
+(define (not-allowed ch port src line col pos)
+  (generic-not-allowed "" ch port src line col pos))
+
+(define (dispatch-not-allowed ch port src line col pos)
+  (generic-not-allowed " after `#'" ch port src line col pos))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;  #!r6rs as a comment
+
+(define (read-hash-bang ch port src line col pos)
+  (if (regexp-try-match #rx"^r6rs" port)
+      (make-special-comment #f)
+      (let* ([s (regexp-match #rx"^(r6r|r6|r|)(.|)" port)]
+             [len (+ 2 (bytes-length (cadr s)))]
+             [next (caddr s)])
+        (if (bytes=? next #"")
+            (raise-read-eof-error
+             (format "unexpected end-of-file after `#!~a'"
+                     (cadr s))
+             src line (and col (+ col len)) (and pos (+ pos len)) 1)
+            (generic-not-allowed (format " after `#!~a'" (cadr s))
+                                 (string-ref (bytes->string/utf-8 (caddr s)) 0)
+                                 port
+                                 src
+                                 line
+                                 (and col (+ col len))
+                                 (and pos (+ pos len)))))))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;  delimiter helpers
+
+(define (delimiter? delim)
+  (or (eof-object? delim)
+      (char-whitespace? delim)
+      (char=? delim #\()
+      (char=? delim #\))
+      (char=? delim #\[)
+      (char=? delim #\])
+      (char=? delim #\")
+      (char=? delim #\;)
+      (char=? delim #\#)))
+
+(define (check-delimiter result prefix ch port src line col pos len)
+  (let ([delim (peek-char port)])
+    (if (delimiter? delim)
+        result
+        (generic-not-allowed (format " after ~a~a (need a delimiter)"
+                                     prefix ch)
+                             (read-char port)
+                             port src line 
+                             (and col (+ col len))
+                             (and pos (+ pos len))))))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;  booleans (delimiter required)
+
+(define (read-boolean ch port src line col pos)
+  (check-delimiter (case ch
+                     [(#\t #\T) #t]
+                     [else #f])
+                   "#" ch
+                   port src line col pos 2))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;  characters
+
+(define (read-character ch port src line col pos)
+  ;; #\ has been consumed
+  (let ([ch (read-char port)])
+    (if (eof-object? ch)
+        (raise-read-eof-error
+         "unexpected end-of-file after `#\\'"
+         src line (and col (+ col 1)) (and pos (+ pos 1)) 
+         2)
+        (let ([next (peek-char port)])
+          (if (delimiter? next)
+              ch
+              (let ([r+str
+                     (ormap (lambda (d)
+                              (and (eq? ch (car d))
+                                   (regexp-try-match (cadr d) port)
+                                   (values (cddr d))))
+                            (let-syntax ([char-names
+                                          (lambda (stx)
+                                            (syntax-case stx ()
+                                              [(_ str ...)
+                                               (let ([strs (map syntax-e (syntax->list #'(str ...)))])
+                                                 (with-syntax ([(init-char ...)
+                                                                (map (lambda (s) (string-ref s 0))
+                                                                     strs)]
+                                                               [(rx ...)
+                                                                (map (lambda (s)
+                                                                       (regexp (string-append "^" (substring s 1))))
+                                                                     strs)]
+                                                               [(result-char ...)
+                                                                (map (lambda (s)
+                                                                       (cond
+                                                                        [(string=? s "alarm") #\u07]
+                                                                        [(string=? s "esc") #\u1B]
+                                                                        [(string=? s "delete") #\u7F]
+                                                                        [else
+                                                                         (read (open-input-string (string-append "#\\" s)))]))
+                                                                     strs)])
+                                                   #`(quote ((init-char rx result-char . str) ...))))]))])
+                              (char-names
+                               "space"
+                               "newline"
+                               "alarm"
+                               "backspace"
+                               "tab"
+                               "linefeed"
+                               "newline"
+                               "vtab"
+                               "page"
+                               "return"
+                               "esc"
+                               "space"
+                               "delete")))])
+                (if r+str
+                    (check-delimiter
+                     (car r+str)
+                     "#\\" (cdr r+str)
+                     port src line col pos 
+                     (+ 2 (string-length (cdr r+str))))
+                    (let ([hex (and (eq? ch #\x)
+                                    (regexp-try-match #rx"^[0-9a-fA-F]+" port))])
+                      (if hex
+                          (let ([hex-val (string->number
+                                          (bytes->string/latin-1 (car hex))
+                                          16)])
+                            (if (or (<= 0 hex-val #xD7FF)
+                                    (<= #xE000 hex-val #x10FFFF))
+                                (check-delimiter
+                                 (integer->char hex-val)
+                                 "#\\" (car hex)
+                                 port src line col pos
+                                 (+ 3 (bytes-length (car hex))))
+                                (raise-read-error
+                                 (format "out of range character constant `#\\x~a'"
+                                         (car hex))
+                                 src line 
+                                 col
+                                 pos
+                                 (and pos (+ 3 (bytes-length (car hex)))))))
+                          (let ([more (bytes->string/utf-8
+                                       (car (regexp-match #px"^.([a-z]{0,20})" port)))])
+                            (raise-read-error
+                             (format "unknown character constant `#\\~a~a'"
+                                     ch more)
+                             src line 
+                             col
+                             pos
+                             (and pos (+ 2 (string-length more))))))))))))))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;   byte vectors
+
+(define (do-read-byte-string stx? ch port src line col pos)
+  ;; #v has been read
+  (if (and (regexp-try-match #rx"^u8" port)
+           (eq? #\( (peek-char port)))
+      (let* ([l (if stx?
+                    (read-syntax/recursive src port)
+                    (read/recursive port))]
+             [lst (if stx?
+                      (syntax->list l)
+                      l)])
+        (unless (list? lst)
+          (raise-read-error
+           (format "expected a parenthesized sequence without `.' after `#vu8'")
+           src line col pos (and pos 4)))
+        (for-each (lambda (e)
+                    (let ([elem (if stx?
+                                    (syntax-e e)
+                                    e)])
+                      (unless (byte? elem)
+                        (let ([msg (format "invalid byte-vector element (not an octet): ~e"
+                                           elem)])
+                          (if stx?
+                              (raise-read-error
+                               msg
+                               (syntax-source e)
+                               (syntax-line e)
+                               (syntax-column e)
+                               (syntax-position e)
+                               (syntax-span e))
+                              (raise-read-error
+                               msg
+                               #f #f #f #f #f))))))
+                  lst)
+        (list->bytes (if stx? (map syntax-e lst) lst)))
+      (raise-read-error
+       "`#v' to continue `#vu8('"
+       src line
+       col pos
+       2)))
+
+(define read-byte-string
+  (case-lambda
+   [(ch port)
+    (do-read-byte-string #f ch port #f #f #f #f)]
+   [(ch port src line col pos)
+    (do-read-byte-string #t ch port src line col pos)]))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;  strings
+
+(define (read-a-string ch port src line col pos)
+  (let ([content (regexp-match #rx"^(?:[^\"\\\\]|\\\\.)*(?:\"|\\\\?$)" port)])
+    (let* ([bytes (car content)]
+           [len (bytes-utf-8-length bytes)])
+      ;; Check/convert escapes and <line ending>s
+      (let ([pieces
+             (let loop ([bpos 0])
+               (let ([m (regexp-match-positions #rx"(?:\r\n|\r\u85|[\r\u85\u2028])|\\\\." bytes bpos)])
+                 (if m
+                     (if (= (bytes-ref bytes (caar m)) (char->integer #\\))
+                         (let ([char (string-ref
+                                      (bytes->string/utf-8 (subbytes bytes (add1 (caar m)) (cdar m)))
+                                      0)])
+                           (cond
+                            [(memq char '(#\a #\b #\t #\n #\v #\f #\r #\\ #\"))
+                             (list* (subbytes bytes bpos (caar m))
+                                    (case char
+                                      [(#\n) #"\n"]
+                                      [(#\r) #"\r"]
+                                      [(#\t) #"\t"]
+                                      [(#\a) #"\a"]
+                                      [(#\b) #"\b"]
+                                      [(#\v) #"\v"]
+                                      [(#\f) #"\f"]
+                                      [(#\\) #"\\"]
+                                      [(#\\) #"\""])
+                                    (loop (cdar m)))]
+                            [(eq? char #\x)
+                             (let ([hm (regexp-match-positions #px"^[a-zA-Z0-9]*;"
+                                                               bytes 
+                                                               (+ 2 (caar m)))])
+                               (if hm
+                                   (let* ([hex-bytes (subbytes bytes (+ 2 (caar m)) (sub1 (cdar hm)))]
+                                          [v (string->number (bytes->string/utf-8 hex-bytes) 16)])
+                                     (if (or (<= 0 v #xD7FF)
+                                             (<= #xE000 v #x10FFFF))
+                                         (list* (subbytes bytes bpos (caar m))
+                                                (string->bytes/utf-8 (string (integer->char v)))
+                                                (loop (cdar hm)))
+                                         (raise-read-error
+                                          (format "out-of-range `\\x~a;' escape in string" hex-bytes)
+                                          src line
+                                          col pos
+                                          (and pos (+ 1 len)))))
+                                   (raise-read-error
+                                    (format "bad escape `\\x~a' in string"
+                                            (car (regexp-match #px"^[a-zA-Z0-9]*." bytes (+ 2 (caar m)))))
+                                    src line
+                                    col pos
+                                    (and pos (+ 1 len)))))]
+                            [(or (eq? char #\tab)
+                                 (eq? (char-general-category char) 'zs))
+                             (let ([wm (regexp-match-positions #px"^(?:\t|\\p{Zs})*(?:\r\n|\r\u85|[\r\n\u85\u2028])(?:\t|\\p{Zs})*"
+                                                               bytes 
+                                                               (add1 (caar m)))])
+                               (if wm
+                                   (cons (subbytes bytes bpos (caar m)) ; drop matched part
+                                         (loop (cdar wm)))
+                                   ;; This is an eof error if there's only intraline whitespace
+                                   ((if (regexp-match #px"^(?:\t|\\p{Zs})*$" bytes (+ 1 bpos))
+                                        raise-read-eof-error
+                                        raise-read-error)
+                                    "missing <line ending> after `\\<intraline-whitespace>'"
+                                    src line
+                                    col pos
+                                    (and pos (+ 1 len)))))]
+                            [else
+                             (raise-read-error
+                              (format "bad escape `\\~a' in string" char)
+                              src line
+                              col pos
+                              (and pos (+ 1 len)))]))
+                         ;; found a <line ending> that isn't just a newline:
+                         (list* (subbytes bytes bpos (caar m))
+                                #"\n"
+                                (loop (cdar m))))
+                     (let ([end (sub1 (bytes-length bytes))])
+                       (if (or (= end -1)
+                               (not (= (char->integer #\")
+                                       (bytes-ref bytes end))))
+                           (raise-read-error
+                            "unexpected end-of-file within string"
+                            src line col pos (and pos (+ 1 len)))
+                           ;; Ok:
+                           (list (subbytes bytes bpos end)))))))])
+        (bytes->string/utf-8
+         (if (= 1 (length pieces))
+             (car pieces)
+             (apply bytes-append pieces)))))))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;  numbers and symbols
+
+(define-values (rx:id rx:number)
+  ;; Build regexp versions of the R6RS grammar productions for
+  ;; <identifier> and <number>.
+  (let ([or (lambda args
+              (and (ormap values args)
+                   (apply
+                    string-append
+                    (append
+                     (list "(?:")
+                     (cdr (apply append
+                                 (map (lambda (a) (list "|" a))
+                                      (filter values args))))
+                     (list ")")))))]
+        [seq (lambda args
+               (and (andmap values args)
+                    (apply string-append args)))]
+        [+ (lambda (s) (and s (string-append s "+")))]
+        [* (lambda (s) (and s (string-append s "*")))])
+
+    (define letter "[a-zA-Z]")
+    (define constituent (or letter
+                            (string-append
+                             "(?:(?=[^\0-\177])(?:"
+                             (substring
+                              (apply
+                               string-append
+                               (map
+                                (lambda (s)
+                                  (format "|\\p{~a}" s))
+                                '(Lu Lt Lm Lo Mn Nl No Pd Pc Po Sc Sm Sk So Co)))
+                              1)
+                             "))")))
+    (define special-initial "[!$%&*/:<=>?^_~]")
+    (define special-subsequent "[-+.@]")
+    (define inline-hex-escape "\\\\x[0-9a-fA-F]+;")
+    (define initial (or constituent
+                        special-initial
+                        inline-hex-escape))
+    (define subsequent (or initial
+                           "[0-9]"
+                           special-subsequent
+                           "(?:\\p{Nd}|\\p{Mc}|\\p{Me})"))
+    (define peculiar-identifier (or "[+]" 
+                                    "-" 
+                                    "[.][.][.]"
+                                    (seq "->" (* subsequent))))
+    (define identifier (or (seq initial (* subsequent))
+                           peculiar-identifier))
+
+    (define digit-16  "[0-9a-fA-F]")
+    (define digit-10  "[0-9]")
+    (define digit-8   "[0-7]")
+    (define digit-2   "[01]")
+    (define (digit R)
+      (case R
+        [(2) digit-2]
+        [(8) digit-8]
+        [(10) digit-10]
+        [(16) digit-16]))
+    (define radix-16  "#[xX]")
+    (define radix-10  "(?:#[dD]|)")
+    (define radix-8   "#[oO]")
+    (define radix-2   "#[bB]")
+    (define (radix R)
+      (case R
+        [(2) radix-2]
+        [(8) radix-8]
+        [(10) radix-10]
+        [(16) radix-16]))
+    (define exactness "(?:#[iIeE]|)")
+    (define sign      "(?:[+-]|)")
+    (define mantissa-width (or "" (seq "[|]" (+ digit-10))))
+    (define exponent-marker "[eEsSfFdDlL]")
+    (define suffix (or "" (seq exponent-marker sign digit-10)))
+
+    (define (prefix R) (or (seq (radix R) exactness)
+                           (seq exactness (radix R))))
+    (define (uinteger R) (+ (digit R)))
+    (define decimal-10 (or (seq (uinteger 10) suffix)
+                           (seq "[.]" (+ (digit 10)) suffix)
+                           (seq (+ (digit 10)) "[.]" (* (digit 10)) suffix))) ; removed redundant last production
+    (define (decimal R)
+      (case R
+        [(10) decimal-10]
+        [else #f])) ; <<--- using #f to mean "can't match", and combinators propagate #f appropriately
+    (define (ureal R) (or (uinteger R)
+                          (seq (uinteger R) "/" (uinteger R))
+                          (seq (decimal R) mantissa-width)))
+    (define naninf (or "nan[.]0" "inf[.]0"))
+    (define (real R) (or (seq sign (ureal R))
+                         (seq "[+]" naninf)
+                         (seq "-" naninf)))
+    (define (complex R) (or (real R)
+                            (seq (real R) "@" (real R))
+                            (seq (real R) "[+]" (ureal R) "i")
+                            (seq (real R) "-" (ureal R) "i")
+                            (seq (real R) "[+]" naninf "i")
+                            (seq (real R) "-" naninf "i")
+                            (seq (real R) "[+]i")
+                            (seq (real R) "-i")
+                            (seq "[+]" (ureal R) "i")
+                            (seq "-" (ureal R) "i")
+                            (seq "[+]" naninf "i")
+                            (seq "-" naninf "i")
+                            "[+]i"
+                            "-i"))
+    (define (num R) (seq (prefix R) (complex R)))
+    (define number (or (num 10)
+                       (num 16)
+                       (num 8)
+                       (num 2)))
+    
+    (values (pregexp (string-append "^" identifier "$"))
+            (pregexp (string-append "^" number "$")))))
+
+(define (do-read-symbol-or-number num? prefix port src line col pos)
+  ;; Read a delimited sequence (using an extended notion of delimiter),
+  ;; then make sure it's a number or identifier.
+  (let ([thing (bytes-append
+                (string->bytes/utf-8 prefix)
+                (car (or (regexp-match #px"^(?:\\\\x[0-9a-fA-F]+;|[^\\s\\[\\]()#\";,'`])*" port)
+                         '(#""))))])
+    (cond
+     [(regexp-match rx:number thing)
+      (let ([n (string->number 
+                (bytes->string/utf-8
+                 ;; MzScheme doesn't hanel mantissa widths, so strip them out:
+                 (regexp-replace* #rx#"[|][0-9]+"
+                                  thing
+                                  #"")))])
+        (unless n
+          (error 'r6rs-parser "number didn't convert: ~e" thing))
+        n)]
+     [(and (not num?)
+           (regexp-match rx:id thing))
+      (string->symbol
+       (bytes->string/utf-8 
+        (let loop ([t thing])
+          (let ([m (regexp-match #rx#"^(.*)\\\\x([0-9a-fA-F]+);(.*)$" t)])
+            (if m
+                (loop (bytes-append
+                       (loop (cadr m))
+                       (let ([v (string->number
+                                 (bytes->string/latin-1 (caddr m))
+                                 16)])
+                         (unless (or (<= 0 v #xD7FF)
+                                     (<= #xE000 v #x10FFFF))
+                           (let ([str (bytes->string/utf-8 thing)])
+                             (raise-read-error
+                              (format "out of range escape: `\\x~a;'" (cadr m))
+                              src line col pos (and pos (string-length str)))))
+                         (string->bytes/utf-8 (string (integer->char v))))
+                       (loop (cadddr m))))
+                t)))))]
+     [else
+      (let ([str (bytes->string/utf-8 thing)])
+        (raise-read-error
+         (format "not a number or identifier: `~a'" str)
+         src line col pos (and pos (string-length str))))])))
+
+
+(define (read-symbol-or-number ch port src line col pos)
+  (do-read-symbol-or-number #f
+                            (string ch)
+                            port src line col pos))
+
+(define (read-number ch port src line col pos)
+  (do-read-symbol-or-number #t
+                            (string #\# ch)
+                            port src line col pos))
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define r6rs-readtable
+  (make-readtable 
+   #f
+   #\{ 'terminating-macro not-allowed
+   #\} 'terminating-macro not-allowed
+   #\| 'terminating-macro not-allowed
+   #\" 'terminating-macro read-a-string
+   #\{ 'dispatch-macro dispatch-not-allowed
+   #\\ 'dispatch-macro read-character
+   #\" 'dispatch-macro dispatch-not-allowed
+   #\% 'dispatch-macro dispatch-not-allowed
+   #\: 'dispatch-macro dispatch-not-allowed
+   #\& 'dispatch-macro dispatch-not-allowed
+   #\! 'dispatch-macro read-hash-bang
+   #\~ 'dispatch-macro dispatch-not-allowed
+   #\< 'dispatch-macro dispatch-not-allowed
+   #\r 'dispatch-macro dispatch-not-allowed
+   #\p 'dispatch-macro dispatch-not-allowed
+   #\c 'dispatch-macro dispatch-not-allowed
+   #\C 'dispatch-macro dispatch-not-allowed
+   #\s 'dispatch-macro dispatch-not-allowed
+   #\S 'dispatch-macro dispatch-not-allowed
+   #\h 'dispatch-macro dispatch-not-allowed
+   #\r 'dispatch-macro dispatch-not-allowed
+   #\l 'dispatch-macro dispatch-not-allowed
+   #\0 'dispatch-macro dispatch-not-allowed
+   #\1 'dispatch-macro dispatch-not-allowed
+   #\2 'dispatch-macro dispatch-not-allowed
+   #\3 'dispatch-macro dispatch-not-allowed
+   #\4 'dispatch-macro dispatch-not-allowed
+   #\5 'dispatch-macro dispatch-not-allowed
+   #\6 'dispatch-macro dispatch-not-allowed
+   #\8 'dispatch-macro dispatch-not-allowed
+   #\9 'dispatch-macro dispatch-not-allowed
+   #\t 'dispatch-macro read-boolean
+   #\T 'dispatch-macro read-boolean
+   #\f 'dispatch-macro read-boolean
+   #\F 'dispatch-macro read-boolean
+   #\v 'dispatch-macro read-byte-string
+   #\i 'dispatch-macro read-number
+   #\I 'dispatch-macro read-number
+   #\e 'dispatch-macro read-number
+   #\E 'dispatch-macro read-number
+   #\b 'dispatch-macro read-number
+   #\B 'dispatch-macro read-number
+   #\d 'dispatch-macro read-number
+   #\D 'dispatch-macro read-number
+   #\o 'dispatch-macro read-number
+   #\O 'dispatch-macro read-number
+   #\x 'dispatch-macro read-number
+   #\X 'dispatch-macro read-number
+   #\\ 'terminating-macro read-symbol-or-number
+   #f 'non-terminating-macro read-symbol-or-number
+   ))
+
+(define (with-r6rs-reader-parameters thunk)
+  (parameterize ([current-readtable r6rs-readtable]
+                 [read-accept-infix-dot #f])
+    (thunk)))
diff --git a/collects/scribble/manual.ss b/collects/scribble/manual.ss
index be2e4b6bff..f6228e210b 100644
--- a/collects/scribble/manual.ss
+++ b/collects/scribble/manual.ss
@@ -1424,10 +1424,12 @@
                         (let ([just-name
                                (make-target-element*
                                 make-toc-target-element
-                                stx-id
+                                (if (pair? name)
+                                    (car (syntax-e stx-id))
+                                    stx-id)
                                 (annote-exporting-library
                                  (to-element (if (pair? name)
-                                                 (make-just-context (car name) stx-id)
+                                                 (make-just-context (car name) (car (syntax-e stx-id)))
                                                  stx-id)))
                                 (let ([name (if (pair? name)
                                                 (car name)
@@ -1445,13 +1447,14 @@
                                            (map (lambda (f)
                                                   (if (or (not immutable?)
                                                           (and (pair? (car f))
-                                                                (memq '#:mutable (car f))))
+                                                               (memq '#:mutable (car f))))
                                                       (list 'mutator 'set- name '- (field-name f) '!)
                                                       #f))
                                                 fields))))))])
                           (if (pair? name)
                               (to-element (list just-name
-                                                (make-just-context (cadr name) stx-id)))
+                                                (make-just-context (cadr name) 
+                                                                   (cadr (syntax-e stx-id)))))
                               just-name))]
                        [short-width (apply +
                                            (length fields)
diff --git a/collects/scribblings/reference/reader.scrbl b/collects/scribblings/reference/reader.scrbl
index aa3e2706d1..acae9024fb 100644
--- a/collects/scribblings/reference/reader.scrbl
+++ b/collects/scribblings/reference/reader.scrbl
@@ -116,6 +116,7 @@ on the next character or characters in the input stream as follows:
   @dispatch[@litchar{#,}]{starts a syntax quote; see @secref["parse-quote"]}
   @dispatch[@litchar["#! "]]{starts a line comment; see @secref["parse-comment"]}
   @dispatch[@litchar["#!/"]]{starts a line comment; see @secref["parse-comment"]}
+  @dispatch[@litchar["#!"]]{may start a reader extension; see @secref["parse-reader"]}
   @dispatch[@litchar{#`}]{starts a syntax quasiquote; see @secref["parse-quote"]}
   @dispatch[@litchar{#,}]{starts an syntax [splicing] unquote; see @secref["parse-quote"]}
   @dispatch[@litchar["#~"]]{starts compiled code; see @scheme[current-compile]}
@@ -750,14 +751,21 @@ If the @scheme[read-accept-reader] @tech{parameter} is set to
 
 @guideintro["hash-lang"]{@schememodfont["#lang"]}
 
-The @as-index{@litchar{#lang}} reader form is similar, but more
-constrained: the @litchar{#lang} must be followed by a single space
-(ASCII 32), and then a non-empty sequence of alphanumeric ASCII,
-@litchar{+}, @litchar{-}, @litchar{_}, and/or @litchar{/} characters
-terminated by @schemelink[char-whitespace?]{whitespace} or an
-end-of-file.  The sequence must not start or end with @litchar{/}. A
-sequence @litchar{#lang }@nonterm{name} is equivalent to
-@litchar{#reader }@nonterm{name}@litchar{/lang/reader}.
+The @as-index{@litchar{#lang}} reader form is similar to
+@litchar{#reader}, but more constrained: the @litchar{#lang} must be
+followed by a single space (ASCII 32), and then a non-empty sequence
+of alphanumeric ASCII, @litchar{+}, @litchar{-}, @litchar{_}, and/or
+@litchar{/} characters terminated by
+@schemelink[char-whitespace?]{whitespace} or an end-of-file.  The
+sequence must not start or end with @litchar{/}. A sequence
+@litchar{#lang }@nonterm{name} is equivalent to @litchar{#reader
+}@nonterm{name}@litchar{/lang/reader}.
+
+Finally, @as-index{@litchar{#!}} followed by alphanumeric ASCII,
+@litchar{+}, @litchar{-}, or @litchar{_} is a synonym for
+@litchar{#lang} followed by a space. Use of this synonym is discourage
+except as needed to construct programs that conform to certain
+grammars, such as that of R@superscript{6}RS @cite["Sperber07"].
 
 By convention, @litchar{#lang} normally appears at the beginning of a
 file, possibly after comment forms, to specify the syntax of a module.
diff --git a/collects/scribblings/reference/reference.scrbl b/collects/scribblings/reference/reference.scrbl
index 7e99f74f95..74b5a10925 100644
--- a/collects/scribblings/reference/reference.scrbl
+++ b/collects/scribblings/reference/reference.scrbl
@@ -103,6 +103,12 @@ languages, where @schememodname[scheme] includes all of
              #:location "Workshop on Scheme and Functional Programming"
              #:date "2004")
 
+ (bib-entry #:key "Sperber07"
+            #:author "Michael Sperber, R. Kent Dybvig, Matthew Flatt, and Anton van Straaten (editors)"
+            #:title @elem{The Revised@superscript{6} Report on the Algorithmic Language Scheme}
+            #:date "2007"
+            #:url "http://www.r6rs.org/")
+
   (bib-entry #:key "Sitaram90"
              #:author "Dorai Sitaram"
              #:title "Control Delimiters and Their Hierarchies"
diff --git a/collects/scribblings/scribble/manual.scrbl b/collects/scribblings/scribble/manual.scrbl
index 5c2c541a48..6e977cf75d 100644
--- a/collects/scribblings/scribble/manual.scrbl
+++ b/collects/scribblings/scribble/manual.scrbl
@@ -583,9 +583,9 @@ of by-name arguments (for use with @scheme[new]).}
 Like @scheme[defconstructor/make], but with multiple constructor
 patterns analogous @scheme[defproc*].}
 
-@defform[(defconstructor/super-init [(arg-spec ...) ...] pre-flow ...)]{
+@defform[(defconstructor/auto-super [(arg-spec ...) ...] pre-flow ...)]{
 
-Like @scheme[defconstructor/super-init], but the constructor is
+Like @scheme[defconstructor], but the constructor is
 annotated to indicate that additional initialization arguments are
 accepted and propagated to the sueprclass.}
 
diff --git a/collects/tests/mzscheme/rx.ss b/collects/tests/mzscheme/rx.ss
index d23d34526e..4ac2ed24e3 100644
--- a/collects/tests/mzscheme/rx.ss
+++ b/collects/tests/mzscheme/rx.ss
@@ -1662,6 +1662,14 @@
 		      (loop (if uniform? end (+ n 1)))))))
 	      kcrl)))
 
+
+(test '(#" ") regexp-match #px#"\t|\\p{Zs}" " ")
+(test '(" ") regexp-match #px"\t|\\p{Zs}" " ")
+(test '(#"\t") regexp-match #px#"\t|\\p{Zs}" "\t")
+(test '("\t") regexp-match #px"\t|\\p{Zs}" "\t")
+(test #f regexp-match #px#"\t|\\p{Zs}" "a")
+(test #f regexp-match #px"\t|\\p{Zs}" "a")
+
 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Check that [\s] doesn't match \s, etc.
diff --git a/src/mzscheme/src/regexp.c b/src/mzscheme/src/regexp.c
index df442a5d83..5f78a82a77 100644
--- a/src/mzscheme/src/regexp.c
+++ b/src/mzscheme/src/regexp.c
@@ -1742,9 +1742,14 @@ regranges(int parse_flags, int at_start)
 	if ((c >= '0') && (c <= '9'))
 	  break;
 	if (((c >= 'a') && (c <= 'z'))
-	    || ((c >= 'A') && (c <= 'Z')))
+	    || ((c >= 'A') && (c <= 'Z'))) {
+          if ((c == 'p') || (c == 'P')) {
+            /* unicode char class; give up */
+            break;
+          }
 	  regcharclass(regparsestr[regparse], new_map);
-	else
+          
+	} else
 	  new_map[c] = 1;
       } else
 	new_map[c] = 1;