Adding comments and specs to parser.

Clarifying comments in marshaller. Supporting small numbers, small symbols, small marshalleds, small (proper) lists, small svectors, all-from-module long form

Improving inequality detector in tester: regexps and uninterned symbols

Correcting doc contract

svn: r18194
This commit is contained in:
Jay McCarthy 2010-02-19 19:47:15 +00:00
parent 7aa6ea4c76
commit 7d1a739df5
4 changed files with 144 additions and 51 deletions

View File

@ -1,6 +1,7 @@
#lang scheme/base #lang scheme/base
(require compiler/zo-structs (require compiler/zo-structs
scheme/match scheme/match
scheme/local
scheme/list scheme/list
scheme/dict) scheme/dict)
@ -10,14 +11,9 @@
Less sharing occurs than in the C implementation, creating much larger files Less sharing occurs than in the C implementation, creating much larger files
encode-all-from-module only handles one case protect-quote caused some things to be sent to write. But there are some things (like paths) that can be read and passed to protect-quote that cannot be 'read' in after 'write', so we turned it off
What is the purpose of protect-quote? It was making it so certain things (like paths) weren't being encoded correctly.
|# |#
;; Doesn't write as compactly as MzScheme, since list and pair sequences
;; are not compacted, and symbols are not written in short form
(define current-wrapped-ht (make-parameter #f)) (define current-wrapped-ht (make-parameter #f))
(define (zo-marshal top) (define (zo-marshal top)
(match top (match top
@ -318,11 +314,30 @@
APPVALS_EXPD APPVALS_EXPD
SPLICE_EXPD) SPLICE_EXPD)
(define CPT_SMALL_NUMBER_START 35)
(define CPT_SMALL_NUMBER_END 60)
(define CPT_SMALL_SYMBOL_START 60)
(define CPT_SMALL_SYMBOL_END 80)
(define CPT_SMALL_MARSHALLED_START 80)
(define CPT_SMALL_MARSHALLED_END 92)
(define CPT_SMALL_LIST_MAX 65)
(define CPT_SMALL_PROPER_LIST_START 92)
(define CPT_SMALL_PROPER_LIST_END (+ CPT_SMALL_PROPER_LIST_START CPT_SMALL_LIST_MAX))
(define CPT_SMALL_LIST_START CPT_SMALL_PROPER_LIST_END)
(define CPT_SMALL_LIST_END 192)
(define CPT_SMALL_LOCAL_START 192) (define CPT_SMALL_LOCAL_START 192)
(define CPT_SMALL_LOCAL_END 207) (define CPT_SMALL_LOCAL_END 207)
(define CPT_SMALL_LOCAL_UNBOX_START 207) (define CPT_SMALL_LOCAL_UNBOX_START 207)
(define CPT_SMALL_LOCAL_UNBOX_END 222) (define CPT_SMALL_LOCAL_UNBOX_END 222)
(define CPT_SMALL_SVECTOR_START 222)
(define CPT_SMALL_SVECTOR_END 247)
(define CPT_SMALL_APPLICATION_START 247) (define CPT_SMALL_APPLICATION_START 247)
(define CPT_SMALL_APPLICATION_END 255) (define CPT_SMALL_APPLICATION_END 255)
@ -385,8 +400,11 @@
(out-marshaled syntax-type-num (list* key val) out)) (out-marshaled syntax-type-num (list* key val) out))
(define (out-marshaled type-num val out) (define (out-marshaled type-num val out)
(out-byte CPT_MARSHALLED out) (if (type-num . < . (- CPT_SMALL_MARSHALLED_END CPT_SMALL_MARSHALLED_START))
(out-number type-num out) (out-byte (+ CPT_SMALL_MARSHALLED_START type-num) out)
(begin
(out-byte CPT_MARSHALLED out)
(out-number type-num out)))
(out-data val out)) (out-data val out))
(define (out-anything v out) (define (out-anything v out)
@ -537,7 +555,9 @@
(define (encode-all-from-module all) (define (encode-all-from-module all)
(match all (match all
[(struct all-from-module (path phase src-phase exceptions prefix)) [(struct all-from-module (path phase src-phase exceptions prefix))
(list* path phase src-phase)])) (if (and (empty? exceptions) (not prefix))
(list* path phase src-phase)
(list* path phase src-phase (append exceptions prefix)))]))
(define (encode-wraps wraps) (define (encode-wraps wraps)
(for/list ([wrap (in-list wraps)]) (for/list ([wrap (in-list wraps)])
@ -734,13 +754,14 @@
(out-expr (protect-quote then) out) (out-expr (protect-quote then) out)
(out-expr (protect-quote else) out)] (out-expr (protect-quote else) out)]
[(struct application (rator rands)) [(struct application (rator rands))
(if ((length rands) . < . (- CPT_SMALL_APPLICATION_END CPT_SMALL_APPLICATION_START)) (let ([len (length rands)])
(out-byte (+ CPT_SMALL_APPLICATION_START (length rands)) out) (if (len . < . (- CPT_SMALL_APPLICATION_END CPT_SMALL_APPLICATION_START))
(begin (out-byte (+ CPT_SMALL_APPLICATION_START (length rands)) out)
(out-byte CPT_APPLICATION out) (begin
(out-number (length rands) out))) (out-byte CPT_APPLICATION out)
(for-each (lambda (e) (out-expr (protect-quote e) out)) (out-number len out)))
(cons rator rands))] (for-each (lambda (e) (out-expr (protect-quote e) out))
(cons rator rands)))]
[(struct apply-values (proc args-expr)) [(struct apply-values (proc args-expr))
(out-syntax APPVALS_EXPD (out-syntax APPVALS_EXPD
(cons (protect-quote proc) (cons (protect-quote proc)
@ -852,11 +873,15 @@
#f #f
out)] out)]
[(symbol? expr) [(symbol? expr)
(out-as-bytes expr (out-shared expr out
(compose string->bytes/utf-8 symbol->string) (lambda ()
CPT_SYMBOL (define bs (string->bytes/utf-8 (symbol->string expr)))
#f (define len (bytes-length bs))
out)] (if (len . < . (- CPT_SMALL_SYMBOL_END CPT_SMALL_SYMBOL_START))
(out-byte (+ CPT_SMALL_SYMBOL_START len) out)
(begin (out-byte CPT_SYMBOL out)
(out-number len out)))
(out-bytes bs out)))]
[(keyword? expr) [(keyword? expr)
(out-as-bytes expr (out-as-bytes expr
(compose string->bytes/utf-8 keyword->string) (compose string->bytes/utf-8 keyword->string)
@ -886,8 +911,12 @@
(out-number (char->integer expr) out)] (out-number (char->integer expr) out)]
[(and (exact-integer? expr) [(and (exact-integer? expr)
(and (expr . >= . -1073741824) (expr . <= . 1073741823))) (and (expr . >= . -1073741824) (expr . <= . 1073741823)))
(out-byte CPT_INT out) (if (and (expr . >= . 0)
(out-number expr out)] (expr . < . (- CPT_SMALL_NUMBER_END CPT_SMALL_NUMBER_START)))
(out-byte (+ CPT_SMALL_NUMBER_START expr) out)
(begin
(out-byte CPT_INT out)
(out-number expr out)))]
[(null? expr) [(null? expr)
(out-byte CPT_NULL out)] (out-byte CPT_NULL out)]
[(eq? expr #t) [(eq? expr #t)
@ -900,10 +929,46 @@
(out-byte CPT_BOX out) (out-byte CPT_BOX out)
(out-data (unbox expr) out)] (out-data (unbox expr) out)]
[(pair? expr) [(pair? expr)
(out-byte CPT_LIST out) (local [(define seen? (make-hasheq)) ; XXX Maybe this should be global?
(out-number 1 out) (define (list-length-before-cycle/improper-end l)
(out-data (car expr) out) (if (hash-has-key? seen? l)
(out-data (cdr expr) out)] (begin (values 0 #f))
(begin (hash-set! seen? l #t)
(cond
[(null? l)
(values 0 #t)]
[(pair? l)
(let-values ([(len proper?)
(list-length-before-cycle/improper-end (cdr l))])
(values (add1 len) proper?))]
[else
(values 0 #f)]))))
(define-values (len proper?) (list-length-before-cycle/improper-end expr))
(define (print-contents-as-proper)
(for ([e (in-list expr)])
(out-data e out)))
(define (print-contents-as-improper)
(let loop ([l expr] [i len])
(cond
[(zero? i)
(out-data l out)]
[else
(out-data (car l) out)
(loop (cdr l) (sub1 i))])))]
(if proper?
(if (len . < . (- CPT_SMALL_PROPER_LIST_END CPT_SMALL_PROPER_LIST_START))
(begin (out-byte (+ CPT_SMALL_PROPER_LIST_START len) out)
(print-contents-as-proper))
(begin (out-byte CPT_LIST out)
(out-number len out)
(print-contents-as-proper)
(out-data null out)))
(if (len . < . (- CPT_SMALL_LIST_END CPT_SMALL_LIST_START))
(begin (out-byte (+ CPT_SMALL_LIST_START len) out)
(print-contents-as-improper))
(begin (out-byte CPT_LIST out)
(out-number len out)
(print-contents-as-improper)))))]
[(vector? expr) [(vector? expr)
(out-byte CPT_VECTOR out) (out-byte CPT_VECTOR out)
(out-number (vector-length expr) out) (out-number (vector-length expr) out)
@ -921,10 +986,13 @@
(out-data k out) (out-data k out)
(out-data v out))] (out-data v out))]
[(svector? expr) [(svector? expr)
(out-byte CPT_SVECTOR out) (let* ([vec (svector-vec expr)]
(out-number (vector-length (svector-vec expr)) out) [len (vector-length vec)])
(let ([vec (svector-vec expr)]) (if (len . < . (- CPT_SMALL_SVECTOR_END CPT_SMALL_SVECTOR_START))
(for ([n (in-range (sub1 (vector-length vec)) -1 -1)]) (out-byte (+ CPT_SMALL_SVECTOR_START len) out)
(begin (out-byte CPT_SVECTOR out)
(out-number len out)))
(for ([n (in-range (sub1 len) -1 -1)])
(out-number (vector-ref vec n) out)))] (out-number (vector-ref vec n) out)))]
[(module-path-index? expr) [(module-path-index? expr)
(out-shared expr out (out-shared expr out
@ -958,8 +1026,8 @@
(define (protect-quote v) (define (protect-quote v)
v v
#;(if (or (list? v) (vector? v) (box? v) (hash? v)) #;(if (or (list? v) (vector? v) (box? v) (hash? v))
(make-quoted v) (make-quoted v)
v)) v))
(define-struct svector (vec)) (define-struct svector (vec))

View File

@ -27,6 +27,10 @@
I think parse-module-path-index was only used for debugging, so it is short-circuited now I think parse-module-path-index was only used for debugging, so it is short-circuited now
collects/browser/compiled/browser_scrbl.zo (eg) contains a all-from-module that looks like: (#<module-path-index> 0 (1363072) . #f) --- that doesn't seem to match the spec
We seem to leave placeholders for hash-tables in the structs
|# |#
;; ---------------------------------------- ;; ----------------------------------------
;; Bytecode unmarshalers for various forms ;; Bytecode unmarshalers for various forms
@ -598,6 +602,8 @@
(if kind 'marked 'normal) (if kind 'marked 'normal)
set-id set-id
(let ([results (map (lambda (u) (let ([results (map (lambda (u)
; u = (list path phase . src-phase)
; or u = (list path phase src-phase exn ... . prefix)
(let ([just-phase? (let ([v (cddr u)]) (let ([just-phase? (let ([v (cddr u)])
(or (number? v) (not v)))]) (or (number? v) (not v)))])
(let-values ([(exns prefix) (let-values ([(exns prefix)

View File

@ -564,7 +564,7 @@ Represents a set of module and import bindings.}
[phase (or/c exact-integer? #f)] [phase (or/c exact-integer? #f)]
[src-phase (or/c exact-integer? #f)] [src-phase (or/c exact-integer? #f)]
[exceptions (listof symbol?)] [exceptions (listof symbol?)]
[prefix symbol?])]{ [prefix (or/c symbol? #f)])]{
Represents a set of simple imports from one module within a Represents a set of simple imports from one module within a
@scheme[module-rename].} @scheme[module-rename].}

View File

@ -29,6 +29,9 @@
(hash-update! ht phase (curry list* file) empty)) (hash-update! ht phase (curry list* file) empty))
(define (equal?/why-not v1 v2) (define (equal?/why-not v1 v2)
(define v1->v2 (make-hasheq))
(define (interned-symbol=? s1 s2)
(symbol=? (hash-ref! v1->v2 s1 s2) s2))
(define (yield p m v1 v2) (define (yield p m v1 v2)
(error 'equal?/why-not "~a in ~a: ~S ~S" (error 'equal?/why-not "~a in ~a: ~S ~S"
m (reverse p) v1 v2)) m (reverse p) v1 v2))
@ -93,6 +96,13 @@
(yield p "Unequal strings" v1 v2))] (yield p "Unequal strings" v1 v2))]
[_ [_
(yield p "Not a string on right" v1 v2)])] (yield p "Not a string on right" v1 v2)])]
[(? bytes?)
(match v2
[(? bytes?)
(unless (bytes=? v1 v2)
(yield p "Unequal bytes" v1 v2))]
[_
(yield p "Not a bytes on right" v1 v2)])]
[(? path?) [(? path?)
(match v2 (match v2
[(? path?) [(? path?)
@ -107,30 +117,39 @@
(yield p "Unequal numbers" v1 v2))] (yield p "Unequal numbers" v1 v2))]
[_ [_
(yield p "Not a number on right" v1 v2)])] (yield p "Not a number on right" v1 v2)])]
[(? regexp?)
(match v2
[(? regexp?)
(unless (string=? (object-name v1) (object-name v2))
(yield p "Unequal regexp" v1 v2))]
[_
(yield p "Not a regexp on right" v1 v2)])]
[(? symbol?) [(? symbol?)
(match v2 (match v2
[(? symbol?) [(? symbol?)
(do-compare (symbol-interned? (unless (symbol=? v1 v2)
symbol-unreadable?) (cond
yield p v1 v2 [(and (symbol-interned? v1) (not (symbol-interned? v1)))
symbol=?)] (yield p "Not interned symbol on right" v1 v2)]
[(and (symbol-unreadable? v1) (not (symbol-unreadable? v1)))
(yield p "Not unreadable symbol on right" v1 v2)]
[(and (symbol-uninterned? v1) (not (symbol-uninterned? v1)))
(yield p "Not uninterned symbol on right" v1 v2)]
[(and (symbol-uninterned? v1) (symbol-uninterned? v2))
(unless (interned-symbol=? v1 v2)
(yield p "Uninterned symbols don't align" v1 v2))]
[else
(yield p "Other symbol-related problem" v1 v2)]))]
[_ [_
(yield p "Not a symbol on right" v1 v2)])] (yield p "Not a symbol on right" v1 v2)])]
[(? empty?)
(yield p "Not empty on right" v1 v2)]
[_ [_
(yield p "Cannot inspect values deeper" v1 v2)]))) (yield p "Cannot inspect values deeper" v1 v2)])))
(inner empty v1 v2)) (inner empty v1 v2))
(define-syntax do-compare (define (symbol-uninterned? s)
(syntax-rules () (not (or (symbol-interned? s) (symbol-unreadable? s))))
[(_ () yield p v1 v2 =)
(unless (= v1 v2)
(yield p (format "Not ~a" '=) v1 v2))]
[(_ (?1 ? ...) yield p v1 v2 =)
(if (?1 v1)
(if (?1 v2)
(do-compare () yield (list* '?1 p) v1 v2 =)
(yield p (format "Not ~a or right" '?1) v1 v2))
(do-compare (? ...) yield p v1 v2 =))]))
;; Parameters ;; Parameters
(define stop-on-first-error (make-parameter #f)) (define stop-on-first-error (make-parameter #f))