diff --git a/collects/compiler/zo-marshal.ss b/collects/compiler/zo-marshal.ss index 4f147dfc67..c59938b482 100644 --- a/collects/compiler/zo-marshal.ss +++ b/collects/compiler/zo-marshal.ss @@ -1,6 +1,7 @@ #lang scheme/base (require compiler/zo-structs scheme/match + scheme/local scheme/list scheme/dict) @@ -10,14 +11,9 @@ Less sharing occurs than in the C implementation, creating much larger files - encode-all-from-module only handles one case - - What is the purpose of protect-quote? It was making it so certain things (like paths) weren't being encoded correctly. - + protect-quote caused some things to be sent to write. But there are some things (like paths) that can be read and passed to protect-quote that cannot be 'read' in after 'write', so we turned it off |# -;; Doesn't write as compactly as MzScheme, since list and pair sequences -;; are not compacted, and symbols are not written in short form (define current-wrapped-ht (make-parameter #f)) (define (zo-marshal top) (match top @@ -318,11 +314,30 @@ APPVALS_EXPD SPLICE_EXPD) +(define CPT_SMALL_NUMBER_START 35) +(define CPT_SMALL_NUMBER_END 60) + +(define CPT_SMALL_SYMBOL_START 60) +(define CPT_SMALL_SYMBOL_END 80) + +(define CPT_SMALL_MARSHALLED_START 80) +(define CPT_SMALL_MARSHALLED_END 92) + +(define CPT_SMALL_LIST_MAX 65) +(define CPT_SMALL_PROPER_LIST_START 92) +(define CPT_SMALL_PROPER_LIST_END (+ CPT_SMALL_PROPER_LIST_START CPT_SMALL_LIST_MAX)) + +(define CPT_SMALL_LIST_START CPT_SMALL_PROPER_LIST_END) +(define CPT_SMALL_LIST_END 192) + (define CPT_SMALL_LOCAL_START 192) (define CPT_SMALL_LOCAL_END 207) (define CPT_SMALL_LOCAL_UNBOX_START 207) (define CPT_SMALL_LOCAL_UNBOX_END 222) +(define CPT_SMALL_SVECTOR_START 222) +(define CPT_SMALL_SVECTOR_END 247) + (define CPT_SMALL_APPLICATION_START 247) (define CPT_SMALL_APPLICATION_END 255) @@ -385,8 +400,11 @@ (out-marshaled syntax-type-num (list* key val) out)) (define (out-marshaled type-num val out) - (out-byte CPT_MARSHALLED out) - (out-number type-num out) + (if (type-num . < . (- CPT_SMALL_MARSHALLED_END CPT_SMALL_MARSHALLED_START)) + (out-byte (+ CPT_SMALL_MARSHALLED_START type-num) out) + (begin + (out-byte CPT_MARSHALLED out) + (out-number type-num out))) (out-data val out)) (define (out-anything v out) @@ -537,7 +555,9 @@ (define (encode-all-from-module all) (match all [(struct all-from-module (path phase src-phase exceptions prefix)) - (list* path phase src-phase)])) + (if (and (empty? exceptions) (not prefix)) + (list* path phase src-phase) + (list* path phase src-phase (append exceptions prefix)))])) (define (encode-wraps wraps) (for/list ([wrap (in-list wraps)]) @@ -592,7 +612,7 @@ [(struct stx (encoded)) (out-byte CPT_STX out) (out-wrapped encoded out)])))) - + (define (out-form form out) (match form [(? mod?) @@ -734,13 +754,14 @@ (out-expr (protect-quote then) out) (out-expr (protect-quote else) out)] [(struct application (rator rands)) - (if ((length rands) . < . (- CPT_SMALL_APPLICATION_END CPT_SMALL_APPLICATION_START)) - (out-byte (+ CPT_SMALL_APPLICATION_START (length rands)) out) - (begin - (out-byte CPT_APPLICATION out) - (out-number (length rands) out))) - (for-each (lambda (e) (out-expr (protect-quote e) out)) - (cons rator rands))] + (let ([len (length rands)]) + (if (len . < . (- CPT_SMALL_APPLICATION_END CPT_SMALL_APPLICATION_START)) + (out-byte (+ CPT_SMALL_APPLICATION_START (length rands)) out) + (begin + (out-byte CPT_APPLICATION out) + (out-number len out))) + (for-each (lambda (e) (out-expr (protect-quote e) out)) + (cons rator rands)))] [(struct apply-values (proc args-expr)) (out-syntax APPVALS_EXPD (cons (protect-quote proc) @@ -852,11 +873,15 @@ #f out)] [(symbol? expr) - (out-as-bytes expr - (compose string->bytes/utf-8 symbol->string) - CPT_SYMBOL - #f - out)] + (out-shared expr out + (lambda () + (define bs (string->bytes/utf-8 (symbol->string expr))) + (define len (bytes-length bs)) + (if (len . < . (- CPT_SMALL_SYMBOL_END CPT_SMALL_SYMBOL_START)) + (out-byte (+ CPT_SMALL_SYMBOL_START len) out) + (begin (out-byte CPT_SYMBOL out) + (out-number len out))) + (out-bytes bs out)))] [(keyword? expr) (out-as-bytes expr (compose string->bytes/utf-8 keyword->string) @@ -886,8 +911,12 @@ (out-number (char->integer expr) out)] [(and (exact-integer? expr) (and (expr . >= . -1073741824) (expr . <= . 1073741823))) - (out-byte CPT_INT out) - (out-number expr out)] + (if (and (expr . >= . 0) + (expr . < . (- CPT_SMALL_NUMBER_END CPT_SMALL_NUMBER_START))) + (out-byte (+ CPT_SMALL_NUMBER_START expr) out) + (begin + (out-byte CPT_INT out) + (out-number expr out)))] [(null? expr) (out-byte CPT_NULL out)] [(eq? expr #t) @@ -900,10 +929,46 @@ (out-byte CPT_BOX out) (out-data (unbox expr) out)] [(pair? expr) - (out-byte CPT_LIST out) - (out-number 1 out) - (out-data (car expr) out) - (out-data (cdr expr) out)] + (local [(define seen? (make-hasheq)) ; XXX Maybe this should be global? + (define (list-length-before-cycle/improper-end l) + (if (hash-has-key? seen? l) + (begin (values 0 #f)) + (begin (hash-set! seen? l #t) + (cond + [(null? l) + (values 0 #t)] + [(pair? l) + (let-values ([(len proper?) + (list-length-before-cycle/improper-end (cdr l))]) + (values (add1 len) proper?))] + [else + (values 0 #f)])))) + (define-values (len proper?) (list-length-before-cycle/improper-end expr)) + (define (print-contents-as-proper) + (for ([e (in-list expr)]) + (out-data e out))) + (define (print-contents-as-improper) + (let loop ([l expr] [i len]) + (cond + [(zero? i) + (out-data l out)] + [else + (out-data (car l) out) + (loop (cdr l) (sub1 i))])))] + (if proper? + (if (len . < . (- CPT_SMALL_PROPER_LIST_END CPT_SMALL_PROPER_LIST_START)) + (begin (out-byte (+ CPT_SMALL_PROPER_LIST_START len) out) + (print-contents-as-proper)) + (begin (out-byte CPT_LIST out) + (out-number len out) + (print-contents-as-proper) + (out-data null out))) + (if (len . < . (- CPT_SMALL_LIST_END CPT_SMALL_LIST_START)) + (begin (out-byte (+ CPT_SMALL_LIST_START len) out) + (print-contents-as-improper)) + (begin (out-byte CPT_LIST out) + (out-number len out) + (print-contents-as-improper)))))] [(vector? expr) (out-byte CPT_VECTOR out) (out-number (vector-length expr) out) @@ -921,10 +986,13 @@ (out-data k out) (out-data v out))] [(svector? expr) - (out-byte CPT_SVECTOR out) - (out-number (vector-length (svector-vec expr)) out) - (let ([vec (svector-vec expr)]) - (for ([n (in-range (sub1 (vector-length vec)) -1 -1)]) + (let* ([vec (svector-vec expr)] + [len (vector-length vec)]) + (if (len . < . (- CPT_SMALL_SVECTOR_END CPT_SMALL_SVECTOR_START)) + (out-byte (+ CPT_SMALL_SVECTOR_START len) out) + (begin (out-byte CPT_SVECTOR out) + (out-number len out))) + (for ([n (in-range (sub1 len) -1 -1)]) (out-number (vector-ref vec n) out)))] [(module-path-index? expr) (out-shared expr out @@ -958,8 +1026,8 @@ (define (protect-quote v) v #;(if (or (list? v) (vector? v) (box? v) (hash? v)) - (make-quoted v) - v)) + (make-quoted v) + v)) (define-struct svector (vec)) diff --git a/collects/compiler/zo-parse.ss b/collects/compiler/zo-parse.ss index c6d1e0b9e3..c130288e49 100644 --- a/collects/compiler/zo-parse.ss +++ b/collects/compiler/zo-parse.ss @@ -27,6 +27,10 @@ I think parse-module-path-index was only used for debugging, so it is short-circuited now + collects/browser/compiled/browser_scrbl.zo (eg) contains a all-from-module that looks like: (# 0 (1363072) . #f) --- that doesn't seem to match the spec + + We seem to leave placeholders for hash-tables in the structs + |# ;; ---------------------------------------- ;; Bytecode unmarshalers for various forms @@ -598,6 +602,8 @@ (if kind 'marked 'normal) set-id (let ([results (map (lambda (u) + ; u = (list path phase . src-phase) + ; or u = (list path phase src-phase exn ... . prefix) (let ([just-phase? (let ([v (cddr u)]) (or (number? v) (not v)))]) (let-values ([(exns prefix) diff --git a/collects/scribblings/mzc/zo-parse.scrbl b/collects/scribblings/mzc/zo-parse.scrbl index 4fbcf8f194..1fb56be141 100644 --- a/collects/scribblings/mzc/zo-parse.scrbl +++ b/collects/scribblings/mzc/zo-parse.scrbl @@ -564,7 +564,7 @@ Represents a set of module and import bindings.} [phase (or/c exact-integer? #f)] [src-phase (or/c exact-integer? #f)] [exceptions (listof symbol?)] - [prefix symbol?])]{ + [prefix (or/c symbol? #f)])]{ Represents a set of simple imports from one module within a @scheme[module-rename].} diff --git a/collects/tests/compiler/zo-test.ss b/collects/tests/compiler/zo-test.ss index 137a7866a0..b2261911bd 100644 --- a/collects/tests/compiler/zo-test.ss +++ b/collects/tests/compiler/zo-test.ss @@ -29,6 +29,9 @@ (hash-update! ht phase (curry list* file) empty)) (define (equal?/why-not v1 v2) + (define v1->v2 (make-hasheq)) + (define (interned-symbol=? s1 s2) + (symbol=? (hash-ref! v1->v2 s1 s2) s2)) (define (yield p m v1 v2) (error 'equal?/why-not "~a in ~a: ~S ~S" m (reverse p) v1 v2)) @@ -93,6 +96,13 @@ (yield p "Unequal strings" v1 v2))] [_ (yield p "Not a string on right" v1 v2)])] + [(? bytes?) + (match v2 + [(? bytes?) + (unless (bytes=? v1 v2) + (yield p "Unequal bytes" v1 v2))] + [_ + (yield p "Not a bytes on right" v1 v2)])] [(? path?) (match v2 [(? path?) @@ -107,30 +117,39 @@ (yield p "Unequal numbers" v1 v2))] [_ (yield p "Not a number on right" v1 v2)])] + [(? regexp?) + (match v2 + [(? regexp?) + (unless (string=? (object-name v1) (object-name v2)) + (yield p "Unequal regexp" v1 v2))] + [_ + (yield p "Not a regexp on right" v1 v2)])] [(? symbol?) (match v2 [(? symbol?) - (do-compare (symbol-interned? - symbol-unreadable?) - yield p v1 v2 - symbol=?)] + (unless (symbol=? v1 v2) + (cond + [(and (symbol-interned? v1) (not (symbol-interned? v1))) + (yield p "Not interned symbol on right" v1 v2)] + [(and (symbol-unreadable? v1) (not (symbol-unreadable? v1))) + (yield p "Not unreadable symbol on right" v1 v2)] + [(and (symbol-uninterned? v1) (not (symbol-uninterned? v1))) + (yield p "Not uninterned symbol on right" v1 v2)] + [(and (symbol-uninterned? v1) (symbol-uninterned? v2)) + (unless (interned-symbol=? v1 v2) + (yield p "Uninterned symbols don't align" v1 v2))] + [else + (yield p "Other symbol-related problem" v1 v2)]))] [_ - (yield p "Not a symbol on right" v1 v2)])] + (yield p "Not a symbol on right" v1 v2)])] + [(? empty?) + (yield p "Not empty on right" v1 v2)] [_ (yield p "Cannot inspect values deeper" v1 v2)]))) (inner empty v1 v2)) -(define-syntax do-compare - (syntax-rules () - [(_ () yield p v1 v2 =) - (unless (= v1 v2) - (yield p (format "Not ~a" '=) v1 v2))] - [(_ (?1 ? ...) yield p v1 v2 =) - (if (?1 v1) - (if (?1 v2) - (do-compare () yield (list* '?1 p) v1 v2 =) - (yield p (format "Not ~a or right" '?1) v1 v2)) - (do-compare (? ...) yield p v1 v2 =))])) +(define (symbol-uninterned? s) + (not (or (symbol-interned? s) (symbol-unreadable? s)))) ;; Parameters (define stop-on-first-error (make-parameter #f))