From de2dedcdd7579cbf9f499d6c5c717983a66b7c3d Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 4 Dec 2019 12:32:43 -0700 Subject: [PATCH] Add uninterned symbols Uninterned symbols are slightly more expensive to allocate than 0- or 1-argument calls to `gensym`, but they're much cheaper to hash (and print). They're also more consistently distinct when unfasled, and the fasled form is determinsitic. original commit: 3167083008031b1f880e76a6f573563c7d9c888c --- c/externs.h | 1 + c/fasl.c | 9 ++++++++ c/prim5.c | 20 ++++++++++++++++ csug/objects.stex | 59 ++++++++++++++++++++++++++++++++++++++++++++++- mats/5_7.ms | 19 +++++++++++++++ s/cmacros.ss | 5 ++-- s/cp0.ss | 2 +- s/cpnanopass.ss | 19 ++++++++++++--- s/fasl.ss | 3 +++ s/primdata.ss | 2 ++ s/prims.ss | 7 ++++++ 11 files changed, 139 insertions(+), 7 deletions(-) diff --git a/c/externs.h b/c/externs.h index 6ffbaf5c13..366cd4dc39 100644 --- a/c/externs.h +++ b/c/externs.h @@ -315,6 +315,7 @@ extern void S_prim_init PROTO((void)); extern ptr S_strerror PROTO((INT errnum)); extern void S_prim5_init PROTO((void)); extern void S_dump_tc PROTO((ptr tc)); +extern ptr S_uninterned PROTO((ptr x)); /* print.c */ extern void S_print_init PROTO((void)); diff --git a/c/fasl.c b/c/fasl.c index 0386b6ce46..6f2de1f111 100644 --- a/c/fasl.c +++ b/c/fasl.c @@ -652,6 +652,15 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) { *x = S_intern3(&STRIT(*pstrbuf, 0), pn, &STRIT(*pstrbuf, pn), un, Sfalse, Sfalse); return; } + case fasl_type_uninterned_symbol: { + iptr i, n; + ptr str; + n = uptrin(f); + str = S_string((char *)0, n); + for (i = 0; i != n; i += 1) Sstring_set(str, i, uptrin(f)); + *x = S_uninterned(str); + return; + } case fasl_type_ratnum: *x = S_rational(FIX(0), FIX(0)); faslin(tc, &RATNUM(*x), t, pstrbuf, f); diff --git a/c/prim5.c b/c/prim5.c index 98d1fd84a8..04c9c7bff5 100644 --- a/c/prim5.c +++ b/c/prim5.c @@ -962,6 +962,25 @@ static ptr s_strings_to_gensym(ptr pname_str, ptr uname_str) { pname_str, uname_str); } +ptr S_uninterned(x) ptr x; { + ptr sym; + static uptr hc; + + require(Sstringp(x),"string->uninterned-symbol","~s is not a string",x); + + sym = S_symbol(Scons(x, Sfalse)); + + /* Wraparound on `hc++` is ok. It's technically illegal with + threads, since multiple thread might increment `hc` at the same + time; we don't care if we miss an increment sometimes, and we + assume compilers won't take this as a license for arbitrarily bad + behavior: */ + hc++; + INITSYMHASH(sym) = FIX(hc); + + return sym; +} + static ptr s_mkdir(const char *inpath, INT mode) { INT status; ptr res; char *path; @@ -1516,6 +1535,7 @@ void S_prim5_init() { Sforeign_symbol("(cs)s_intern3", (void *)s_intern3); Sforeign_symbol("(cs)s_strings_to_gensym", (void *)s_strings_to_gensym); Sforeign_symbol("(cs)s_intern_gensym", (void *)S_intern_gensym); + Sforeign_symbol("(cs)s_uninterned", (void *)S_uninterned); Sforeign_symbol("(cs)cputime", (void *)S_cputime); Sforeign_symbol("(cs)realtime", (void *)S_realtime); Sforeign_symbol("(cs)clock_gettime", (void *)S_clock_gettime); diff --git a/csug/objects.stex b/csug/objects.stex index 407cd476c5..ee088b1c1a 100644 --- a/csug/objects.stex +++ b/csug/objects.stex @@ -1550,7 +1550,7 @@ These parameters are not consulted until that time; setting them when %---------------------------------------------------------------------------- \entryheader \formdef{gensym?}{\categoryprocedure}{(gensym? \var{obj})} -\returns \scheme{#t} if \var{obj} is gensym, \scheme{#f} otherwise +\returns \scheme{#t} if \var{obj} is a gensym, \scheme{#f} otherwise \listlibraries \endentryheader @@ -1564,6 +1564,63 @@ These parameters are not consulted until that time; setting them when \endschemedisplay +%---------------------------------------------------------------------------- +\entryheader +\formdef{string->uninterned-symbol}{\categoryprocedure}{(string->uninterned-symbol \var{str})} +\returns a fresh uninterned symbol +\listlibraries +\endentryheader + +\var{str} must be a string. + +Returns an uninterned symbol that prints the same as a symbol +constructed from \var{str}, but which is not \scheme{eq?} to any +other symbol. + +When an uninterned symbol is converted by the fasl writer, the fasl +reader will allocate a fresh uninterned symbol each time the fasl +stream is read. Multiple occurrences of the same uninterned symbol in +the fasl writer's argument will become multiple occurrences of the +same new uninterned symbol in the fasl reader's result for the stream. + +\noskip\schemedisplay +(string->uninterned-symbol "z") ;=> z +(uninterned-symbol? (string->uninterned-symbol "z")) ;=> #t +(symbol? (string->uninterned-symbol "z")) ;=> #t +(gensym? (string->uninterned-symbol "z")) ;=> #f +\endschemedisplay + +%---------------------------------------------------------------------------- +\entryheader +\formdef{uninterned-symbol?}{\categoryprocedure}{(uninterned-symbol? \var{obj})} +\returns \scheme{#t} if \var{obj} is an uninterned symbol, \scheme{#f} otherwise +\listlibraries +\endentryheader + +\noskip\schemedisplay +(uninterned-symbol? (string->symbol "z")) ;=> #f +(uninterned-symbol? (gensym "z")) ;=> #f +(uninterned-symbol? (string->uninterned-symbol "z")) ;=> #t +\endschemedisplay + + + + +%---------------------------------------------------------------------------- +\entryheader +\formdef{uninterned-symbol?}{\categoryprocedure}{(uninterned-symbol? \var{obj})} +\returns \scheme{#t} if \var{obj} is an uninterned symbol, \scheme{#f} otherwise +\listlibraries +\endentryheader + +\noskip\schemedisplay +(uninterned-symbol? (string->symbol "z")) ;=> #f +(uninterned-symbol? (gensym "z")) ;=> #f +(uninterned-symbol? (string->uninterned-symbol "z")) ;=> #t +\endschemedisplay + + + %---------------------------------------------------------------------------- \entryheader\label{property-lists} \formdef{putprop}{\categoryprocedure}{(putprop \var{symbol} \var{key} \var{value})} diff --git a/mats/5_7.ms b/mats/5_7.ms index fd74e87156..e54033edbf 100644 --- a/mats/5_7.ms +++ b/mats/5_7.ms @@ -105,3 +105,22 @@ (error? (putprop "hi" 'key 'value)) (error? (property-list '(a b c))) ) + +(mat uninterned-symbol + (symbol? (string->uninterned-symbol "hello")) + (uninterned-symbol? (string->uninterned-symbol "hello")) + (not (gensym? (string->uninterned-symbol "hello"))) + + (equal? "hello" (symbol->string (string->uninterned-symbol "hello"))) + + (not (eq? (string->uninterned-symbol "hello") + (string->uninterned-symbol "hello"))) + + (let-values ([(o get) (open-bytevector-output-port)]) + (let ([s (string->uninterned-symbol "hello")]) + (fasl-write (list s s) o) + (let ([r (fasl-read (open-bytevector-input-port (get)))]) + (and (eq? (car r) (cadr r)) + (equal? "hello" (symbol->string (car r))))))) + + ) diff --git a/s/cmacros.ss b/s/cmacros.ss index cf11bdba86..36d1b60e6b 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -328,7 +328,7 @@ [(_ foo e1 e2) e1] ... [(_ bar e1 e2) e2]))))]))) -(define-constant scheme-version #x09050305) +(define-constant scheme-version #x09050306) (define-syntax define-machine-types (lambda (x) @@ -457,6 +457,7 @@ (define-constant fasl-type-begin 41) (define-constant fasl-type-phantom 42) +(define-constant fasl-type-uninterned-symbol 43) (define-constant fasl-fld-ptr 0) (define-constant fasl-fld-u8 1) @@ -1242,7 +1243,7 @@ ([ptr value] [ptr pvalue] [ptr plist] - [ptr name] + [ptr name] ; (cons str #f) => uninterned; #f or (cons ptr str) => gensym [ptr splist] [ptr hash])) diff --git a/s/cp0.ss b/s/cp0.ss index c2319777b0..3cab08bf25 100644 --- a/s/cp0.ss +++ b/s/cp0.ss @@ -4876,7 +4876,7 @@ (not (preinfo-lambda-name preinfo))) (preinfo-lambda-name-set! preinfo (let ([x ($symbol-name name)]) - (if (pair? x) (cdr x) x)))) + (if (pair? x) (or (cdr x) (car x)) x)))) (context-case ctxt [(value tail) (bump sc 1) diff --git a/s/cpnanopass.ss b/s/cpnanopass.ss index e7fa1b6d66..c008168ab5 100644 --- a/s/cpnanopass.ss +++ b/s/cpnanopass.ss @@ -1225,7 +1225,7 @@ [(string? x) x] [(symbol? x) (let ([name ($symbol-name x)]) - (if (pair? name) (cdr name) name))] + (if (pair? name) (or (cdr name) (car name)) name))] [(eq? #f x) #f] [else (error 'np-discover-names "x is not a name" x)])))) (Expr : Expr (ir name moi) -> Expr () @@ -9503,8 +9503,18 @@ (%type-check mask-symbol type-symbol ,e) (bind #t ([t (%mref ,e ,(constant symbol-name-disp))]) `(if ,t - ,(%type-check mask-pair type-pair ,t) + ,(build-and (%type-check mask-pair type-pair ,t) + (build-and (%mref ,t ,(constant pair-cdr-disp)) + (%constant strue))) ,(%constant strue)))))]) + (define-inline 2 uninterned-symbol? + [(e) + (bind #t (e) + (build-and + (%type-check mask-symbol type-symbol ,e) + (bind #t ([t (%mref ,e ,(constant symbol-name-disp))]) + (build-and (%type-check mask-pair type-pair ,t) + (build-not (%mref ,t ,(constant pair-cdr-disp)))))))]) (let () (define build-make-symbol (lambda (e-name) @@ -9541,7 +9551,10 @@ (bind #t ([e-name (%mref ,e-sym ,(constant symbol-name-disp))]) `(if ,e-name (if ,(%type-check mask-pair type-pair ,e-name) - ,(%mref ,e-name ,(constant pair-cdr-disp)) + ,(bind #t ([e-cdr (%mref ,e-name ,(constant pair-cdr-disp))]) + `(if ,e-cdr + ,e-cdr + ,(%mref ,e-name ,(constant pair-car-disp)))) ,e-name) ,(%primcall #f sexpr $gensym->pretty-name ,e-sym))))]) (define-inline 3 $fxaddress diff --git a/s/fasl.ss b/s/fasl.ss index 3734f23040..55b2b051c7 100644 --- a/s/fasl.ss +++ b/s/fasl.ss @@ -250,6 +250,9 @@ (put-u8 p (constant fasl-type-gensym)) (wrf-string-help (symbol->string x) p) (wrf-string-help uname p))] + [(uninterned-symbol? x) + (put-u8 p (constant fasl-type-uninterned-symbol)) + (wrf-string-help (symbol->string x) p)] [else (put-u8 p (constant fasl-type-symbol)) (wrf-string-help (symbol->string x) p)]))) diff --git a/s/primdata.ss b/s/primdata.ss index 974e59ce0f..6ba8a7a55e 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -300,6 +300,8 @@ (symbol->string [sig [(symbol) -> (string)]] [flags true mifoldable discard safeongoodargs ieee r5rs]) (symbol=? [sig [(symbol symbol symbol ...) -> (boolean)]] [flags pure mifoldable discard cp03 safeongoodargs]) (string->symbol [sig [(string) -> (symbol)]] [flags true mifoldable discard safeongoodargs ieee r5rs]) + (string->uninterned-symbol [sig [(string) -> (symbol)]] [flags true discard]) + (uninterned-symbol? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard]) (char? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard ieee r5rs]) (char->integer [sig [(char) -> (fixnum)]] [flags pure mifoldable discard safeongoodargs true ieee r5rs]) (integer->char [sig [(sub-ufixnum) -> (char)]] [flags pure mifoldable discard true ieee r5rs]) diff --git a/s/prims.ss b/s/prims.ss index a1f67d5994..3978bb5484 100644 --- a/s/prims.ss +++ b/s/prims.ss @@ -146,6 +146,11 @@ (scheme-object) scheme-object)) +(define string->uninterned-symbol + (foreign-procedure "(cs)s_uninterned" + (scheme-object) + scheme-object)) + (define $intern2 (foreign-procedure "(cs)s_intern2" (scheme-object scheme-object) @@ -1261,6 +1266,8 @@ (define gensym? (lambda (x) (gensym? x))) +(define uninterned-symbol? (lambda (x) (uninterned-symbol? x))) + (define fixnum? (lambda (x) (fixnum? x))) (define bignum? (lambda (x) (bignum? x)))