intern literal strings, byte strings, regexps, characters, and numbers

This commit is contained in:
Matthew Flatt 2011-11-21 19:22:10 -07:00
parent cc1b7bb8b4
commit e44bd3f79d
28 changed files with 1170 additions and 948 deletions

View File

@ -13,7 +13,8 @@ A @deftech{byte string} is a fixed-length array of bytes. A
@defterm{mutable} or @defterm{immutable}. When an immutable byte
string is provided to a procedure like @racket[bytes-set!], the
@exnraise[exn:fail:contract]. Byte-string constants generated by the
default reader (see @secref["parse-string"]) are immutable.
default reader (see @secref["parse-string"]) are immutable
and @tech{interned}.
Two byte strings are @racket[equal?] when they have the same length
and contain the same sequence of bytes.

View File

@ -16,7 +16,8 @@ characters whose values range from @racketvalfont{#x0} to
Two characters are @racket[eqv?] if they correspond to the same scalar
value. For each scalar value less than 256, character values that are
@racket[eqv?] are also @racket[eq?].
@racket[eqv?] are also @racket[eq?]. Characters produced by the default
reader are @tech{interned}.
@see-read-print["character"]{characters}

View File

@ -24,105 +24,7 @@ manipulating instances of the datatype.
@include-section["chars.scrbl"]
@; ------------------------------------------------------------
@section[#:tag "symbols"]{Symbols}
@guideintro["symbols"]{symbols}
@section-index["symbols" "generating"]
@section-index["symbols" "unique"]
A @deftech{symbol} is like an immutable string, but symbols are
normally @deftech{interned}, so that two symbols with the same
character content are normally @racket[eq?]. All symbols produced by
the default reader (see @secref["parse-symbol"]) are interned.
The two procedures @racket[string->uninterned-symbol] and
@racket[gensym] generate @deftech{uninterned} symbols, i.e., symbols
that are not @racket[eq?], @racket[eqv?], or @racket[equal?] to any
other symbol, although they may print the same as other symbols.
The procedure @racket[string->unreadable-symbol] returns an
@deftech{unreadable symbol} that is partially interned. The default
reader (see @secref["parse-symbol"]) never produces a unreadable
symbol, but two calls to @racket[string->unreadable-symbol] with
@racket[equal?] strings produce @racket[eq?] results. An unreadable
symbol can print the same as an interned or uninterned
symbol. Unreadable symbols are useful in expansion and
compilation to avoid collisions with symbols that appear in the
source; they are usually not generated directly, but they can appear
in the result of functions like @racket[identifier-binding].
Interned and unreadable symbols are only weakly held by the internal
symbol table. This weakness can never affect the result of an
@racket[eq?], @racket[eqv?], or @racket[equal?] test, but a symbol may
disappear when placed into a weak box (see @secref["weakbox"]) used as
the key in a weak @tech{hash table} (see @secref["hashtables"]), or
used as an ephemeron key (see @secref["ephemerons"]).
@see-read-print["symbol"]{symbols}
@defproc[(symbol? [v any/c]) boolean?]{Returns @racket[#t] if @racket[v] is
a symbol, @racket[#f] otherwise.
@examples[(symbol? 'Apple) (symbol? 10)]}
@defproc[(symbol-interned? [sym symbol?]) boolean?]{Returns @racket[#t] if @racket[sym] is
@tech{interned}, @racket[#f] otherwise.
@examples[(symbol-interned? 'Apple)
(symbol-interned? (gensym))
(symbol-interned? (string->unreadable-symbol "Apple"))]}
@defproc[(symbol-unreadable? [sym symbol?]) boolean?]{Returns @racket[#t] if @racket[sym] is
an @tech{unreadable symbol}, @racket[#f] otherwise.
@examples[(symbol-unreadable? 'Apple)
(symbol-unreadable? (gensym))
(symbol-unreadable? (string->unreadable-symbol "Apple"))]}
@defproc[(symbol->string [sym symbol?]) string?]{Returns a freshly
allocated mutable string whose characters are the same as in
@racket[sym].
@examples[(symbol->string 'Apple)]}
@defproc[(string->symbol [str string?]) symbol?]{Returns an
@tech{interned} symbol whose characters are the same as in
@racket[str].
@examples[(string->symbol "Apple") (string->symbol "1")]}
@defproc[(string->uninterned-symbol [str string?]) symbol?]{Like
@racket[(string->symbol str)], but the resulting symbol is a new
@tech{uninterned} symbol. Calling @racket[string->uninterned-symbol]
twice with the same @racket[str] returns two distinct symbols.
@examples[(string->uninterned-symbol "Apple")
(eq? 'a (string->uninterned-symbol "a"))
(eq? (string->uninterned-symbol "a")
(string->uninterned-symbol "a"))]}
@defproc[(string->unreadable-symbol [str string?]) symbol?]{Like
@racket[(string->symbol str)], but the resulting symbol is a new
@tech{unreadable symbol}. Calling @racket[string->unreadable-symbol]
twice with equivalent @racket[str]s returns the same symbol, but
@racket[read] never produces the symbol.
@examples[(string->unreadable-symbol "Apple")
(eq? 'a (string->unreadable-symbol "a"))
(eq? (string->unreadable-symbol "a")
(string->unreadable-symbol "a"))]}
@defproc[(gensym [base (or/c string? symbol?) "g"]) symbol?]{Returns a
new @tech{uninterned} symbol with an automatically-generated name. The
optional @racket[base] argument is a prefix symbol or string.}
@examples[(gensym "apple")]
@include-section["symbols.scrbl"]
@; ------------------------------------------------------------
@include-section["regexps.scrbl"]
@ -138,7 +40,8 @@ identifier. Furthermore, a keyword by itself is not a valid
expression, though a keyword can be @racket[quote]d to form an
expression that produces the symbol.
Two keywords are @racket[eq?] if and only if they print the same.
Two keywords are @racket[eq?] if and only if they print the same
(i.e., keywords are always @tech{interned}).
Like symbols, keywords are only weakly held by the internal keyword
table; see @secref["symbols"] for more information.

View File

@ -80,7 +80,9 @@ with fixnums. See also the @racketmodname[racket/fixnum] module, below.
Two fixnums that are @racket[=] are also the same
according to @racket[eq?]. Otherwise, the result of @racket[eq?]
applied to two numbers is undefined.
applied to two numbers is undefined, except that numbers produced
by the default reader are @tech{interned} and therefore @racket[eq?]
when they are @racket[eqv?].
Two numbers are @racket[eqv?] when they are both inexact with the same precision or both
exact, and when they are @racket[=] (except for @racket[+nan.0], @racket[+nan.f],

View File

@ -324,3 +324,34 @@ except that special-comment values (see
The default port read handler itself can be customized through a
readtable; see @secref["readtables"] for more information.}
@defproc[(read-intern-literal [v any/c]) any/c]{
Converts some values to be consistent with an @tech{interned} result
produced by the default reader.
If @racket[v] is a @tech{number}, @tech{character}, @tech{string},
@tech{byte string}, or @tech{regular expression}, then the result is a
value that is @racket[equal?] to @racket[v] and @racket[eq?] to a
potential result of the default reader. (Note that mutable strings and
byte strings are @tech{interned} as immutable strings and byte
strings.)
If @racket[v] is an @tech{uninterned} or an @tech{unreadable symbol},
the result is still @racket[v], since an @tech{interned} symbol would
not be @racket[equal?] to @racket[v].
The conversion process does not traverse compound values. For example,
if @racket[v] is a @tech{pair} containing strings, then the strings
within @racket[v] are not @tech{interned}.
If @racket[_v1] and @racket[_v2] are @racket[equal?] but not
@racket[eq?], then it is possible that @racket[(read-intern-literal
_v1)] will return @racket[_v1] and---sometime after @racket[_v1]
becomes unreachable as determined by the garbage collector (see
@secref["gc-model"])---@racket[(read-intern-literal _v2)] can still
return @racket[_v2]. In other words, @racket[read-intern-literal]
may adopt a given value as an @tech{interned} representative, but
if a former representative becomes otherwise unreachable, then
@racket[read-intern-literal] may adopt a new representative.}

View File

@ -38,6 +38,16 @@ Reading is defined in terms of Unicode characters; see
@secref["ports"] for information on how a byte stream is converted
to a character stream.
Symbols, keywords, strings, byte strings, regexps, characters, and numbers
produced by the reader are @deftech{interned}, which means that such
values in the result of @racket[read] or @racket[read-syntax] are
always @racket[eq?] when they are @racket[equal?] (whether from the
same call or different calls to @racket[read] or
@racket[read-syntax]). Sending an @tech{interned} value across a
@tech{place channel} does not necessarily produce an @tech{interned}
value at the receiving @tech{place}. See also
@racket[read-intern-literal] and @racket[datum->syntax].
@;------------------------------------------------------------------------
@section[#:tag "default-readtable-dispatch"]{Delimiters and Dispatch}
@ -159,11 +169,11 @@ on the next character or characters in the input stream as follows:
A sequence that does not start with a delimiter or @litchar{#} is
parsed as either a @tech{symbol} or a @tech{number} (see
@secref["parse-number"]), except that @litchar{.} by itself is
never parsed as a symbol or character (unless the
@racket[read-accept-dot] parameter is set to @racket[#f]). A
@as-index{@litchar{#%}} also starts a symbol. A successful number
parse takes precedence over a symbol parse.
@secref["parse-number"]), except that @litchar{.} by itself is never
parsed as a symbol or character (unless the @racket[read-accept-dot]
parameter is set to @racket[#f]). A @as-index{@litchar{#%}} also
starts a symbol. The resulting symbol is @tech{interned}. A successful
number parse takes precedence over a symbol parse.
@index["case-sensitivity"]{@index["case-insensitive"]{When}} the
@racket[read-case-sensitive] @tech{parameter} is set to @racket[#f],
@ -198,7 +208,7 @@ case-sensitive mode.
A sequence that does not start with a delimiter is parsed as a @tech{number}
when it matches the following grammar case-insenstively for
@nonterm{number@sub{10}} (decimal), where @metavar{n} is a
meta-meta-variable in the grammar.
meta-meta-variable in the grammar. The resulting number is @tech{interned}.
A number is optionally prefixed by an exactness specifier,
@as-index{@litchar{#e}} (exact) or @as-index{@litchar{#i}} (inexact),
@ -383,7 +393,7 @@ exception, instead of the infix conversion.
When the reader encounters @as-index{@litchar{"}}, it begins parsing
characters to form a @tech{string}. The string continues until it is
terminated by another @litchar{"} (that is not escaped by
@litchar{\}).
@litchar{\}). The resulting string is @tech{interned}.
Within a string sequence, the following escape sequences are
recognized:
@ -455,7 +465,8 @@ constant, the @exnraise[exn:fail:read].
A string constant preceded by @litchar{#} is parsed as a
@tech{byte string}. (That is, @as-index{@litchar{#"}} starts a byte-string
literal.) See @secref["bytestrings"] for information on byte
strings. Byte-string constants support the same escape sequences as
strings. The resulting byte string is @tech{interned}.
Byte-string constants support the same escape sequences as
character strings, except @litchar{\u} and @litchar{\U}.
When the reader encounters @as-index{@litchar{#<<}}, it starts parsing a
@ -712,7 +723,8 @@ one of the following forms:
A @as-index{@litchar{#:}} starts a @tech{keyword}. The parsing of a keyword
after the @litchar{#:} is the same as for a symbol, including
case-folding in case-insensitive mode, except that the part after
@litchar{#:} is never parsed as a number.
@litchar{#:} is never parsed as a number. The resulting keyword is
@tech{interned}.
@reader-examples[
"#:Apple"
@ -728,7 +740,7 @@ A @as-index{@litchar{#rx}} or @as-index{@litchar{#px}} starts a
expression as would be constructed by @racket[regexp], @litchar{#px}
as constructed by @racket[pregexp], @litchar{#rx#} as constructed by
@racket[byte-regexp], and @litchar{#px#} as constructed by
@racket[byte-pregexp].
@racket[byte-pregexp]. The resulting regular expression is @tech{interned}.
@reader-examples[
"#rx\".*\""

View File

@ -35,7 +35,7 @@
@guideintro["regexp"]{regular expressions}
@deftech{Regular expressions} are specified as strings or byte
strings, using the same pattern language as the Unix utility
strings, using the same pattern language as either the Unix utility
@exec{egrep} or Perl. A string-specified pattern produces a character
regexp matcher, and a byte-string pattern produces a byte regexp
matcher. If a character regexp is used with a byte string or input
@ -50,12 +50,15 @@ regexp value using one syntax of regular expressions that is most
compatible to @exec{egrep}. The @racket[pregexp] and
@racket[byte-pregexp] procedures produce a regexp value using a
slightly different syntax of regular expressions that is more
compatible with Perl. In addition, Racket constants written with
@litchar{#rx} or @litchar{#px} (see @secref["reader"]) produce
compiled regexp values.
compatible with Perl.
Two regular expressions are @racket[equal?] if they have the same
source, use the same pattern language, and are both character regexps
or both byte regexps.
A literal or printed regular expression starts with @litchar{#rx} or
@litchar{#px}. @see-read-print["regexp"]{regular expressions}
@litchar{#px}. @see-read-print["regexp"]{regular expressions} Regular
expressions produced by the default reader are @tech{interned}.
The internal size of a regexp value is limited to 32 kilobytes; this
limit roughly corresponds to a source string with 32,000 literal

View File

@ -13,7 +13,7 @@ A @deftech{string} is a fixed-length array of
procedure like @racket[string-set!], the
@exnraise[exn:fail:contract]. String constants generated by the
default reader (see @secref["parse-string"]) are
immutable.
immutable and @tech{interned}.
Two strings are @racket[equal?] when they have the same length and
contain the same sequence of characters.

View File

@ -114,7 +114,10 @@ leaving nested syntax structure (if any) in place. The result of
@item{an immutable @tech{prefab} structure containing @tech{syntax object}s}
@item{some other kind of datum---usually a number, boolean, or string}
@item{some other kind of datum---usually a number, boolean, or
string---that is @tech{interned} when
@racket[read-intern-literal] would convert the
value}
]
@ -185,7 +188,8 @@ boxes. For any kind of value other than a
pair, vector, box, immutable @tech{hash table}, immutable
@tech{prefab} structure, or @tech{syntax object}, conversion means
wrapping the value with lexical information, source-location
information, and properties.
information, and properties after the value is @tech{interned}
via @racket[read-intern-literal].
Converted objects in @racket[v] are given the lexical context
information of @racket[ctxt] and the source-location information of

View File

@ -0,0 +1,102 @@
#lang scribble/doc
@(require "mz.rkt")
@title[#:tag "symbols"]{Symbols}
@guideintro["symbols"]{symbols}
@section-index["symbols" "generating"]
@section-index["symbols" "unique"]
A @deftech{symbol} is like an immutable string, but symbols are
normally @tech{interned}, so that two symbols with the same
character content are normally @racket[eq?]. All symbols produced by
the default reader (see @secref["parse-symbol"]) are @tech{interned}.
The two procedures @racket[string->uninterned-symbol] and
@racket[gensym] generate @deftech{uninterned} symbols, i.e., symbols
that are not @racket[eq?], @racket[eqv?], or @racket[equal?] to any
other symbol, although they may print the same as other symbols.
The procedure @racket[string->unreadable-symbol] returns an
@deftech{unreadable symbol} that is partially interned. The default
reader (see @secref["parse-symbol"]) never produces a unreadable
symbol, but two calls to @racket[string->unreadable-symbol] with
@racket[equal?] strings produce @racket[eq?] results. An unreadable
symbol can print the same as an interned or uninterned
symbol. Unreadable symbols are useful in expansion and
compilation to avoid collisions with symbols that appear in the
source; they are usually not generated directly, but they can appear
in the result of functions like @racket[identifier-binding].
Interned and unreadable symbols are only weakly held by the internal
symbol table. This weakness can never affect the result of an
@racket[eq?], @racket[eqv?], or @racket[equal?] test, but a symbol may
disappear when placed into a weak box (see @secref["weakbox"]) used as
the key in a weak @tech{hash table} (see @secref["hashtables"]), or
used as an ephemeron key (see @secref["ephemerons"]).
@see-read-print["symbol"]{symbols}
@defproc[(symbol? [v any/c]) boolean?]{Returns @racket[#t] if @racket[v] is
a symbol, @racket[#f] otherwise.
@examples[(symbol? 'Apple) (symbol? 10)]}
@defproc[(symbol-interned? [sym symbol?]) boolean?]{Returns @racket[#t] if @racket[sym] is
@tech{interned}, @racket[#f] otherwise.
@examples[(symbol-interned? 'Apple)
(symbol-interned? (gensym))
(symbol-interned? (string->unreadable-symbol "Apple"))]}
@defproc[(symbol-unreadable? [sym symbol?]) boolean?]{Returns @racket[#t] if @racket[sym] is
an @tech{unreadable symbol}, @racket[#f] otherwise.
@examples[(symbol-unreadable? 'Apple)
(symbol-unreadable? (gensym))
(symbol-unreadable? (string->unreadable-symbol "Apple"))]}
@defproc[(symbol->string [sym symbol?]) string?]{Returns a freshly
allocated mutable string whose characters are the same as in
@racket[sym].
@examples[(symbol->string 'Apple)]}
@defproc[(string->symbol [str string?]) symbol?]{Returns an
@tech{interned} symbol whose characters are the same as in
@racket[str].
@examples[(string->symbol "Apple") (string->symbol "1")]}
@defproc[(string->uninterned-symbol [str string?]) symbol?]{Like
@racket[(string->symbol str)], but the resulting symbol is a new
@tech{uninterned} symbol. Calling @racket[string->uninterned-symbol]
twice with the same @racket[str] returns two distinct symbols.
@examples[(string->uninterned-symbol "Apple")
(eq? 'a (string->uninterned-symbol "a"))
(eq? (string->uninterned-symbol "a")
(string->uninterned-symbol "a"))]}
@defproc[(string->unreadable-symbol [str string?]) symbol?]{Like
@racket[(string->symbol str)], but the resulting symbol is a new
@tech{unreadable symbol}. Calling @racket[string->unreadable-symbol]
twice with equivalent @racket[str]s returns the same symbol, but
@racket[read] never produces the symbol.
@examples[(string->unreadable-symbol "Apple")
(eq? 'a (string->unreadable-symbol "a"))
(eq? (string->unreadable-symbol "a")
(string->unreadable-symbol "a"))]}
@defproc[(gensym [base (or/c string? symbol?) "g"]) symbol?]{Returns a
new @tech{uninterned} symbol with an automatically-generated name. The
optional @racket[base] argument is a prefix symbol or string.}
@examples[(gensym "apple")]

View File

@ -272,8 +272,11 @@
(test-mem memv 'memv)
(test-mem member 'member)
(test #f memq "apple" '("apple"))
(test #f memv "apple" '("apple"))
(test '("apple") memq "apple" '("apple")) ; literals are interned
(test '(#"apple") memq #"apple" '(#"apple")) ; literals are interned
(test #f memq (list->string (string->list "apple")) '("apple"))
(test #f memq (list->bytes (bytes->list #"apple")) '(#"apple"))
(test #f memv (list->string (string->list "apple")) '("apple"))
(test '("apple") member "apple" '("apple"))
; (test #f memq 1/2 '(1/2)) ; rationals are immutable and we may want to optimize
@ -2364,7 +2367,8 @@
(test '((1 . 2)) hash-map im-t cons)
(test 2 hash-ref im-t 1)
(define im-t (make-immutable-hasheq '(("hello" . 2))))
(test 'none hash-ref im-t "hello" (lambda () 'none))
(test 2 hash-ref im-t "hello" (lambda () 'none)) ; literals interned
(test 'none hash-ref im-t (list->string (string->list "hello")) (lambda () 'none))
(define im-t (make-immutable-hash '(("hello" . 2))))
(test 2 hash-ref im-t "hello" (lambda () 'none))
(test #f hash-eq? im-t)

View File

@ -352,16 +352,18 @@
(test-ht (readstr "#hasheq((1 . 2))") 1 #t 1 2)
(test-ht (readstr "#hasheqv((1 . 2))") 1 #f 1 2)
(test-ht (readstr "#hash((\"apple\" . 1))") 1 #f "apple" 1)
(test-ht (readstr "#hasheq((\"apple\" . 1))") 1 #t "apple" #f)
(test-ht (readstr "#hasheqv((\"apple\" . 1))") 1 #f "apple" #f)
(test-ht (readstr "#hash((\"apple\" . 1) (\"apple\" . 10))") 1 #f "apple" 10)
(test-ht (readstr "#hasheq((\"apple\" . 1) (\"apple\" . 10))") 2 #t "apple" #f)
(test-ht (readstr "#hasheqv((\"apple\" . 1) (\"apple\" . 10))") 2 #f "apple" #f)
(test-ht (readstr "#hash((\"apple\" . 1))") 1 #f (list->string (string->list "apple")) 1)
(test-ht (readstr "#hasheq((\"apple\" . 1))") 1 #t "apple" 1)
(test-ht (readstr "#hasheq((\"apple\" . 1))") 1 #t (list->string (string->list "apple")) #f)
(test-ht (readstr "#hasheqv((\"apple\" . 1))") 1 #f (list->string (string->list "apple")) #f)
(test-ht (readstr "#hash((#s(s \"apple\") . 1) (#s(s \"apple\") . 10))") 1 #f '#s(s "apple") 10)
(test-ht (readstr "#hasheq((#s(s \"apple\") . 1) (#s(s \"apple\") . 10))") 2 #t '#s(s "apple") #f)
(test-ht (readstr "#hasheqv((#s(s \"apple\") . 1) (#s(s \"apple\") . 10))") 2 #f #s(s "apple") #f)
(test-ht (readstr "#hash((apple . 1) (apple . 10))") 1 #f 'apple 10)
(test-ht (readstr "#hasheq((apple . 1) (apple . 10))") 1 #t 'apple 10)
(test-ht (readstr "#hasheqv((apple . 1) (apple . 10))") 1 #f 'apple 10)
(test-ht (readstr "#hasheq((#0=\"apple\" . 1) (#0# . 10))") 1 #t "apple" #f)
(test-ht (readstr "#hash((#0=\"apple\" . 1) (\"banana\" . #0#))") 2 #f "banana" "apple")
(test-ht (readstr "#hasheq((#0=\"apple\" . 1) (#0# . 10))") 1 #t (list->string (string->list "apple")) #f)
(test-ht (readstr "#hash((#0=\"apple\" . 1) (\"banana\" . #0#))") 2 #f "banana" (list->string (string->list "apple")))
(test-ht (readstr "#hash((a . 1) (b . 2) (c . 3) (e . 4) (f . 5) (g . 6) (h . 7) (i . 8))") 8 #f 'f 5)
(let ([t (readstr "#0=#hash((\"apple\" . #0#))")])
(test-ht t 1 #f "apple" t))
@ -372,6 +374,13 @@
(let ([t (readstr "#0=#hash((#0# . 17))")])
;; Don't look for t, because that's a hash on a circular object!
(test-ht t 1 #f 'none #f))
;; check intern and comparison of regexps:
(test-ht (readstr "#hasheq((#rx\"apple\" . 1))") 1 #t #rx"apple" 1)
(test-ht (readstr "#hasheq((#rx\"apple\" . 1))") 1 #t #px"apple" #f)
(test-ht (readstr "#hasheq((#rx#\"apple\" . 1))") 1 #t #rx#"apple" 1)
(test-ht (readstr "#hasheq((#rx#\"apple\" . 1))") 1 #t #rx"apple" #f)
(test-ht (readstr "#hasheq((#rx#\"apple\" . 1))") 1 #t #px#"apple" #f)
(test-ht (readstr "#hasheq((#px#\"apple\" . 1))") 1 #t #px#"apple" 1)
(define (test-write-ht writer t . strings)
(let ([o (open-output-string)])

View File

@ -201,9 +201,9 @@
(test #f vector-member 7 #(0 1 2))
(test 1 vector-memq 'x #(7 x 2))
(test 1 vector-memv 'x #(7 x 2))
(test #f vector-memq 1000000000000000000000 #(7 1000000000000000000000 2))
(test 1 vector-memv 1000000000000000000000 #(7 1000000000000000000000 2))
(test 1 vector-member 1000000000000000000000 #(7 1000000000000000000000 2))
(test #f vector-memq (* 10 100000000000000000000) #(7 1000000000000000000000 2))
(test 1 vector-memv (* 10 100000000000000000000) #(7 1000000000000000000000 2))
(test 1 vector-member (* 10 100000000000000000000) #(7 1000000000000000000000 2))
(test #f vector-memq (cons 1 2) (vector 7 (cons 1 2) 2))
(test #f vector-memv (cons 1 2) (vector 7 (cons 1 2) 2))
(test 1 vector-member (cons 1 2) (vector 7 (cons 1 2) 2))

View File

@ -1,3 +1,9 @@
Version 5.2.0.4
Regexps are `equal?' when they have the same source [byte] string
and mode
Numbers, characters, strings, byte strings, and regexps are interned by
read and datum->syntax
Version 5.2.0.3
Added module-predefined?
Changed the raacket -k command-line flag

View File

@ -328,6 +328,8 @@ typedef struct Thread_Local_Variables {
struct Scheme_Hash_Table *place_local_symbol_table_;
struct Scheme_Hash_Table *place_local_keyword_table_;
struct Scheme_Hash_Table *place_local_parallel_symbol_table_;
struct Scheme_Bucket_Table *literal_string_table_;
struct Scheme_Bucket_Table *literal_number_table_;
struct FFI_Sync_Queue *ffi_sync_queue_;
struct Scheme_GC_Pre_Post_Callback_Desc *gc_prepost_callback_descs_;
struct Scheme_Hash_Table *place_local_misc_table_;
@ -674,6 +676,8 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL;
#define place_local_symbol_table XOA (scheme_get_thread_local_variables()->place_local_symbol_table_)
#define place_local_keyword_table XOA (scheme_get_thread_local_variables()->place_local_keyword_table_)
#define place_local_parallel_symbol_table XOA (scheme_get_thread_local_variables()->place_local_parallel_symbol_table_)
#define literal_string_table XOA (scheme_get_thread_local_variables()->literal_string_table_)
#define literal_number_table XOA (scheme_get_thread_local_variables()->literal_number_table_)
#define ffi_sync_queue XOA (scheme_get_thread_local_variables()->ffi_sync_queue_)
#define gc_prepost_callback_descs XOA (scheme_get_thread_local_variables()->gc_prepost_callback_descs_)
#define place_local_misc_table XOA (scheme_get_thread_local_variables()->place_local_misc_table_)

View File

@ -485,6 +485,14 @@ int is_equal (Scheme_Object *obj1, Scheme_Object *obj2, Equal_Info *eql)
l2 = SCHEME_CHAR_STRTAG_VAL(obj2);
return ((l1 == l2)
&& !memcmp(SCHEME_CHAR_STR_VAL(obj1), SCHEME_CHAR_STR_VAL(obj2), l1 * sizeof(mzchar)));
} else if (t1 == scheme_regexp_type) {
if (scheme_regexp_is_byte(obj1) != scheme_regexp_is_byte(obj2))
return 0;
if (scheme_regexp_is_pregexp(obj1) != scheme_regexp_is_pregexp(obj2))
return 0;
obj1 = scheme_regexp_source(obj1);
obj2 = scheme_regexp_source(obj2);
goto top;
} else if ((t1 == scheme_structure_type)
|| (t1 == scheme_proc_struct_type)) {
Scheme_Struct_Type *st1, *st2;

File diff suppressed because it is too large Load Diff

View File

@ -56,6 +56,9 @@ READ_ONLY static Scheme_Env *futures_env;
THREAD_LOCAL_DECL(static int builtin_ref_counter);
THREAD_LOCAL_DECL(static int intdef_counter);
THREAD_LOCAL_DECL(static Scheme_Bucket_Table *literal_string_table);
THREAD_LOCAL_DECL(static Scheme_Bucket_Table *literal_number_table);
/* local functions */
static void make_kernel_env(void);
@ -489,6 +492,11 @@ static Scheme_Env *place_instance_init(void *stack_base, int initial_main_os_thr
scheme_init_foreign(env);
REGISTER_SO(literal_string_table);
REGISTER_SO(literal_number_table);
literal_string_table = scheme_make_weak_equal_table();
literal_number_table = scheme_make_weak_eqv_table();
scheme_starting_up = 1; /* in case it's not set already */
scheme_add_embedded_builtins(env);
@ -1438,6 +1446,36 @@ const char *scheme_look_for_primitive(void *code)
return NULL;
}
/*========================================================================*/
/* intern literal strings and numbers */
/*========================================================================*/
Scheme_Object *scheme_intern_literal_string(Scheme_Object *str)
{
Scheme_Bucket *b;
scheme_start_atomic();
b = scheme_bucket_from_table(literal_string_table, (const char *)str);
scheme_end_atomic_no_swap();
if (!b->val)
b->val = scheme_true;
return(Scheme_Object *)HT_EXTRACT_WEAK(b->key);
}
Scheme_Object *scheme_intern_literal_number(Scheme_Object *num)
{
Scheme_Bucket *b;
scheme_start_atomic();
b = scheme_bucket_from_table(literal_number_table, (const char *)num);
scheme_end_atomic_no_swap();
if (!b->val)
b->val = scheme_true;
return(Scheme_Object *)HT_EXTRACT_WEAK(b->key);
}
/*========================================================================*/
/* run-time and expansion-time Racket interface */
/*========================================================================*/

View File

@ -1180,6 +1180,11 @@ static uintptr_t equal_hash_key(Scheme_Object *o, uintptr_t k, Hash_Info *hi)
return k;
}
case scheme_regexp_type:
{
o = scheme_regexp_source(o);
break;
}
case scheme_structure_type:
case scheme_proc_struct_type:
{
@ -1605,6 +1610,11 @@ static uintptr_t equal_hash_key2(Scheme_Object *o, Hash_Info *hi)
return k;
}
case scheme_regexp_type:
{
o = scheme_regexp_source(o);
goto top;
}
case scheme_structure_type:
case scheme_proc_struct_type:
{

View File

@ -1661,7 +1661,7 @@ static int thread_val_SIZE(void *p, struct NewGC *gc) {
static int thread_val_MARK(void *p, struct NewGC *gc) {
Scheme_Thread *pr = (Scheme_Thread *)p;
gcMARK2(pr->next, gc);
gcMARK2(pr->prev, gc);
@ -1775,7 +1775,7 @@ static int thread_val_MARK(void *p, struct NewGC *gc) {
static int thread_val_FIXUP(void *p, struct NewGC *gc) {
Scheme_Thread *pr = (Scheme_Thread *)p;
gcFIXUP2(pr->next, gc);
gcFIXUP2(pr->prev, gc);

View File

@ -640,7 +640,7 @@ syntax_compiler {
thread_val {
mark:
Scheme_Thread *pr = (Scheme_Thread *)p;
gcMARK2(pr->next, gc);
gcMARK2(pr->prev, gc);

View File

@ -2487,27 +2487,47 @@ static Scheme_Object *optimize_sequence(Scheme_Object *o, Optimize_Info *info, i
return (Scheme_Object *)s;
}
XFORM_NONGCING static int small_inline_number(Scheme_Object *o)
{
if (SCHEME_BIGNUMP(o))
return SCHEME_BIGLEN(o) < 32;
else if (SCHEME_COMPLEXP(o))
return (small_inline_number(scheme_complex_real_part(o))
&& small_inline_number(scheme_complex_imaginary_part(o)));
else if (SCHEME_RATIONALP(o))
return (small_inline_number(scheme_rational_numerator(o))
&& small_inline_number(scheme_rational_denominator(o)));
else
return 1;
}
#define STR_INLINE_LIMIT 256
int scheme_compiled_duplicate_ok(Scheme_Object *fb, int cross_module)
{
return (SCHEME_VOIDP(fb)
|| SAME_OBJ(fb, scheme_true)
|| SCHEME_FALSEP(fb)
|| (SCHEME_SYMBOLP(fb) && (!cross_module || !SCHEME_SYM_WEIRDP(fb)))
|| SCHEME_KEYWORDP(fb)
|| (SCHEME_SYMBOLP(fb)
&& (!cross_module || (!SCHEME_SYM_WEIRDP(fb)
&& (SCHEME_SYM_LEN(fb) < STR_INLINE_LIMIT))))
|| (SCHEME_KEYWORDP(fb)
&& (!cross_module || (SCHEME_KEYWORD_LEN(fb) < STR_INLINE_LIMIT)))
|| SCHEME_EOFP(fb)
|| SCHEME_INTP(fb)
|| SCHEME_NULLP(fb)
|| (SCHEME_CHARP(fb) && (SCHEME_CHAR_VAL(fb) < 256))
|| SAME_TYPE(SCHEME_TYPE(fb), scheme_local_type)
|| (!cross_module
&&
/* Values that are hashed by the printer to avoid
duplication: */
(SCHEME_CHAR_STRINGP(fb)
|| SCHEME_BYTE_STRINGP(fb)
|| SAME_TYPE(SCHEME_TYPE(fb), scheme_regexp_type)
|| SCHEME_NUMBERP(fb)
|| SCHEME_PRIMP(fb))));
|| SCHEME_PRIMP(fb)
/* Values that are hashed by the printer and/or interned on
read to avoid duplication: */
|| SCHEME_CHARP(fb)
|| (SCHEME_CHAR_STRINGP(fb)
&& (!cross_module || (SCHEME_CHAR_STRLEN_VAL(fb) < STR_INLINE_LIMIT)))
|| (SCHEME_BYTE_STRINGP(fb)
&& (!cross_module || (SCHEME_BYTE_STRLEN_VAL(fb) < STR_INLINE_LIMIT)))
|| SAME_TYPE(SCHEME_TYPE(fb), scheme_regexp_type)
|| (SCHEME_NUMBERP(fb)
&& (!cross_module || small_inline_number(fb))));
}
static int equivalent_exprs(Scheme_Object *a, Scheme_Object *b)

View File

@ -312,6 +312,8 @@ static Scheme_Object *readtable_mapping(int argc, Scheme_Object **argv);
static Scheme_Object *current_readtable(int argc, Scheme_Object **argv);
static Scheme_Object *current_reader_guard(int argc, Scheme_Object **argv);
static Scheme_Object *read_intern(int argc, Scheme_Object **argv);
/* A list stack is used to speed up the creation of intermediate lists
during .zo reading. */
@ -499,6 +501,8 @@ void scheme_init_read(Scheme_Env *env)
GLOBAL_FOLDING_PRIM("readtable?", readtable_p, 1, 1, 1, env);
GLOBAL_PRIM_W_ARITY2("readtable-mapping", readtable_mapping, 2, 2, 3, 3, env);
GLOBAL_NONCM_PRIM("read-intern-literal", read_intern, 1, 1, env);
if (getenv("PLT_DELAY_FROM_ZO")) {
use_perma_cache = 0;
}
@ -1466,6 +1470,8 @@ read_inner_inner(Scheme_Object *port, Scheme_Object *stxsrc, Scheme_Hash_Table *
return NULL;
}
str = scheme_intern_literal_string(str);
if (stxsrc)
str = scheme_make_stx_w_offset(str, line, col, pos, SPAN(port, pos), stxsrc, STX_SRCTAG);
@ -3007,8 +3013,10 @@ read_string(int is_byte, Scheme_Object *port,
s[i] = 0;
result = scheme_make_immutable_sized_byte_string(s, i, 0);
}
result = scheme_intern_literal_string(result);
if (stxsrc)
result = scheme_make_stx_w_offset(result, line, col, pos, SPAN(port, pos), stxsrc, STX_SRCTAG);
return result;
}
@ -3097,9 +3105,11 @@ read_here_string(Scheme_Object *port, Scheme_Object *stxsrc,
str = scheme_make_sized_char_string(s, len, 1);
str = scheme_intern_literal_string(str);
if (stxsrc)
str = scheme_make_stx_w_offset(str, line, col, pos, SPAN(port, pos), stxsrc, STX_SRCTAG);
return str;
}
@ -3419,6 +3429,8 @@ read_number_or_symbol(int init_ch, int skip_rt, Scheme_Object *port,
port, NULL, 0,
stxsrc, line, col, pos, SPAN(port, pos),
indentation);
if (!SCHEME_INTP(o))
o = scheme_intern_literal_number(o);
}
if (SAME_OBJ(o, scheme_false)) {
@ -3553,6 +3565,14 @@ static int u_strcmp(mzchar *s, const char *_t)
return 0;
}
static Scheme_Object *make_interned_char(int ch)
{
if (ch < 256)
return scheme_make_character(ch);
else
return scheme_intern_literal_number(scheme_make_char(ch));
}
/* "#\" has been read */
static Scheme_Object *
read_character(Scheme_Object *port,
@ -3591,7 +3611,7 @@ read_character(Scheme_Object *port,
ch = ((ch - '0') << 6) + ((next - '0') << 3) + (last - '0');
return scheme_make_char(ch);
return make_interned_char(ch);
}
if (((ch == 'u') || (ch == 'U')) && NOT_EOF_OR_SPECIAL(next) && scheme_isxdigit(next)) {
@ -3691,7 +3711,7 @@ read_character(Scheme_Object *port,
"read: expected a character after #\\");
}
return scheme_make_char(ch);
return make_interned_char(ch);
}
/*========================================================================*/
@ -3800,6 +3820,39 @@ static Scheme_Object *read_hash(Scheme_Object *port, Scheme_Object *stxsrc,
}
}
/*========================================================================*/
/* intern */
/*========================================================================*/
static Scheme_Object *read_intern(int argc, Scheme_Object **argv)
{
return scheme_read_intern(argv[0]);
}
Scheme_Object *scheme_read_intern(Scheme_Object *o)
{
if (!SCHEME_INTP(o) && SCHEME_NUMBERP(o))
o = scheme_intern_literal_number(o);
else if (SCHEME_CHAR_STRINGP(o)) {
if (!SCHEME_IMMUTABLEP(o))
o = scheme_make_immutable_sized_char_string(SCHEME_CHAR_STR_VAL(o),
SCHEME_CHAR_STRLEN_VAL(o),
1);
o = scheme_intern_literal_string(o);
} else if (SCHEME_BYTE_STRINGP(o)) {
if (!SCHEME_IMMUTABLEP(o))
o = scheme_make_immutable_sized_byte_string(SCHEME_BYTE_STR_VAL(o),
SCHEME_BYTE_STRLEN_VAL(o),
1);
o = scheme_intern_literal_string(o);
} else if (SAME_TYPE(SCHEME_TYPE(o), scheme_regexp_type))
o = scheme_intern_literal_string(o);
else if (SCHEME_CHARP(o) && (SCHEME_CHAR_VAL(o) >= 256))
o = scheme_intern_literal_number(o);
return o;
}
/*========================================================================*/
/* utilities */
/*========================================================================*/
@ -4289,6 +4342,7 @@ static Scheme_Object *read_compact(CPort *port, int use_stack)
RANGE_CHECK_GETS(l);
s = read_compact_chars(port, buffer, BLK_BUF_SIZE, l);
v = scheme_make_immutable_sized_byte_string(s, l, l < BLK_BUF_SIZE);
v = scheme_intern_literal_string(v);
break;
case CPT_CHAR_STRING:
{
@ -4302,6 +4356,7 @@ static Scheme_Object *read_compact(CPort *port, int use_stack)
scheme_utf8_decode_all((const unsigned char *)s, el, us, 0);
us[l] = 0;
v = scheme_make_immutable_sized_char_string(us, l, 0);
v = scheme_intern_literal_string(v);
}
break;
case CPT_CHAR:

View File

@ -5949,6 +5949,11 @@ int scheme_regexp_is_byte(Scheme_Object *re)
return !(((regexp *)re)->flags & REGEXP_IS_UTF8);
}
int scheme_regexp_is_pregexp(Scheme_Object *re)
{
return !!(((regexp *)re)->flags & REGEXP_IS_PCRE);
}
#ifdef MZ_PRECISE_GC
START_XFORM_SKIP;
#include "mzmark_regexp.inc"

View File

@ -13,7 +13,7 @@
#define USE_COMPILED_STARTUP 1
#define EXPECTED_PRIM_COUNT 1040
#define EXPECTED_PRIM_COUNT 1041
#define EXPECTED_UNSAFE_COUNT 78
#define EXPECTED_FLFXNUM_COUNT 68
#define EXPECTED_FUTURES_COUNT 11

View File

@ -692,6 +692,9 @@ struct Scheme_Hash_Tree
#define SCHEME_HASHTR_FLAGS(tr) MZ_OPT_HASH_KEY(&(tr)->iso)
Scheme_Object *scheme_intern_literal_string(Scheme_Object *str);
Scheme_Object *scheme_intern_literal_number(Scheme_Object *num);
/*========================================================================*/
/* structs */
/*========================================================================*/
@ -2121,6 +2124,7 @@ void scheme_install_type_reader(Scheme_Type type, Scheme_Type_Reader f);
void scheme_install_type_writer(Scheme_Type type, Scheme_Type_Writer f);
Scheme_Object *scheme_make_default_readtable(void);
Scheme_Object *scheme_read_intern(Scheme_Object *o);
Scheme_Object *_scheme_apply_from_native(Scheme_Object *rator,
int argc,
@ -3603,6 +3607,7 @@ void scheme_bad_vec_index(char *name, Scheme_Object *i,
intptr_t bottom, intptr_t len);
Scheme_Bucket_Table *scheme_make_weak_equal_table(void);
Scheme_Bucket_Table *scheme_make_weak_eqv_table(void);
Scheme_Bucket_Table *scheme_make_nonlock_equal_bucket_table(void);
int scheme_hash_table_equal_rec(Scheme_Hash_Table *t1, Scheme_Hash_Table *t2, void *eql);
@ -3619,6 +3624,7 @@ void scheme_reset_hash_table(Scheme_Hash_Table *ht, int *history);
Scheme_Object *scheme_regexp_source(Scheme_Object *re);
int scheme_regexp_is_byte(Scheme_Object *re);
int scheme_regexp_is_pregexp(Scheme_Object *re);
Scheme_Object *scheme_make_regexp(Scheme_Object *str, int byte, int pcre, int * volatile result_is_err_string);
int scheme_is_pregexp(Scheme_Object *o);
void scheme_clear_rx_buffers(void);

View File

@ -13,12 +13,12 @@
consistently.)
*/
#define MZSCHEME_VERSION "5.2.0.3"
#define MZSCHEME_VERSION "5.2.0.4"
#define MZSCHEME_VERSION_X 5
#define MZSCHEME_VERSION_Y 2
#define MZSCHEME_VERSION_Z 0
#define MZSCHEME_VERSION_W 3
#define MZSCHEME_VERSION_W 4
#define MZSCHEME_VERSION_MAJOR ((MZSCHEME_VERSION_X * 100) + MZSCHEME_VERSION_Y)
#define MZSCHEME_VERSION_MINOR ((MZSCHEME_VERSION_Z * 1000) + MZSCHEME_VERSION_W)

View File

@ -7367,6 +7367,8 @@ static Scheme_Object *datum_to_syntax_inner(Scheme_Object *o,
result = (Scheme_Object *)s;
} else {
if (!wraps)
o = scheme_read_intern(o);
result = o;
}