rewrite number parser

Inspired by the way the Chez Scheme number parser works, change the
one in the expander to be faster and probably clearer. This improved
performance brings number parsing almost back in line with the v6.12
parser's performance.

The revised parser is faster because it goes through an input string
just once. The new parser is also more xcomplete; it doesn't rely on a
host-system `number->string` (except for dummy extflonums when
extflonums are not supported).

If you're reading the commit history, beware that the note on commit
be19996953 is incorrect about the change to parsing divide-by-zero
errors. (It explains a change that was edited away before merging.)
This commit really does change the bahvior, though, again as a better
match for v6.12. Specifically, "/0" (with no hashes) always triggers
divide-by-zero in an otherwise well-formed number, even if `#i` is
used.
This commit is contained in:
Matthew Flatt 2019-02-23 11:18:09 -07:00
parent be828b184c
commit 0549bfb1e5
5 changed files with 2480 additions and 2087 deletions

View File

@ -76,8 +76,10 @@
(5e-5 "#i1/2e-4") (5e-5 "#i1/2e-4")
(0.5 "#i1/2") (0.5 "#i1/2")
(1/2 "#e1/2") (1/2 "#e1/2")
(0.5 "#i0.5") (0.5 "#i0.5")
(1/2 "#e0.5") (1/2 "#e0.5")
(0.5 "+.5")
(-0.5 "-.5")
(1/20 "#e0.5e-1") (1/20 "#e0.5e-1")
(1/20 "#e0.005e1") (1/20 "#e0.005e1")
(1.0+0.5i "1+0.5i") (1.0+0.5i "1+0.5i")
@ -120,8 +122,8 @@
(-inf.0 "-1/0#") (-inf.0 "-1/0#")
(DBZ "1#/0") (DBZ "1#/0")
(DBZ "-1#/0") (DBZ "-1#/0")
(+inf.0 "#i1#/0") (DBZ "#i1#/0")
(-inf.0 "#i-1#/0") (DBZ "#i-1#/0")
(NOE "#e+inf.0") (NOE "#e+inf.0")
(NOE "#e-inf.0") (NOE "#e-inf.0")
(NOE "#e+nan.0") (NOE "#e+nan.0")
@ -216,6 +218,23 @@
(DBZ "5@1/0") (DBZ "5@1/0")
(DBZ "1/0@5") (DBZ "1/0@5")
(DBZ "1/0e2") (DBZ "1/0e2")
(DBZ "#i1/0")
(DBZ "#i5+1/0i")
(DBZ "#i1/0+5i")
(DBZ "#i5@1/0")
(DBZ "#i1/0@5")
(DBZ "#i1/0e2")
(5+inf.0i "5+1/0#i")
(+inf.0+5i "1/0#+5i")
(+nan.0+nan.0i "5@1/0#")
(+inf.0-inf.0i "1/0#@5")
(DBZ "#i1/0e2")
(#f "1/#e2")
(#f "5+1/#i")
(#f "1/#+5i")
(#f "5@1/#")
(#f "1/#@5")
(#f "1/#e2")
(#f "1/0+hi") (#f "1/0+hi")
(#f "x+1/0i") (#f "x+1/0i")
(+nan.0+1i "+nan.0+1i") (+nan.0+1i "+nan.0+1i")
@ -265,6 +284,9 @@
(DBZ "+inf.0+1/0i") (DBZ "+inf.0+1/0i")
(DBZ "1/0@+inf.0") (DBZ "1/0@+inf.0")
(DBZ "+inf.0@1/0") (DBZ "+inf.0@1/0")
(DBZ "#i+inf.0+1/0i")
(DBZ "#i1/0@+inf.0")
(DBZ "#i+inf.0@1/0")
(#f "1e1/0") (#f "1e1/0")
(#f "011111122222222223333333333444444x") (#f "011111122222222223333333333444444x")
(#f "t") (#f "t")
@ -279,4 +301,5 @@
(#f ".#e1") (#f ".#e1")
(#f "/2") (#f "/2")
(#f "-#/2") (#f "-#/2")
(X "#/2"))) (X "#/2")
(#f "2+4ix")))

View File

@ -616,7 +616,7 @@
(string-append "\\" (cadar l)) (string-append "\\" (cadar l))
(cadar l)) (cadar l))
(loop (cdr l))] (loop (cdr l))]
[else [else
(test-write-sym (cadar l) (cadar l) (cadar l)) (test-write-sym (cadar l) (cadar l) (cadar l))
(loop (cdr l))])) (loop (cdr l))]))
@ -1441,6 +1441,5 @@
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs) ;; readtable has `report-errs`:
(load-relative "readtable.rktl") (load-relative "readtable.rktl")

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
#lang racket/base
(require racket/fixnum
(for-syntax racket/base))
;; `parse-case` is used for numebr parsing in "number.rkt"
(provide parse-case
parse-case*
digit)
(define-syntax digit #f)
(define (maybe-digit c radix)
(define v (char->integer c))
(cond
[(v . fx< . (char->integer #\0)) c]
[(v . fx< . (fx+ (fxmin radix 10) (char->integer #\0)))
(fx- v (char->integer #\0))]
[(radix . fx<= . 10) c]
[(v . fx< . (char->integer #\A)) c]
[(v . fx< . (+ radix (char->integer #\A)))
(fx- v (- (char->integer #\A) 10))]
[(v . fx< . (char->integer #\a)) c]
[(v . fx< . (+ radix (- (char->integer #\a) 10)))
(fx- v (- (char->integer #\a) 10))]
[else c]))
;; Extracts the byte at index `start` of `s`, or produces 'eof if
;; `start` is `end`. Binds the digit value, character, or 'eof to
;; `var`. Each `clause` is as in `case`, but more frequently used
;; clauses should be first. Assumes that `start` and `end` can be
;; duplicated.
(define-syntax-rule (parse-case s start end radix => var clause ...)
(let* ([var (if (fx= start end)
'eof
(let ([c (string-ref s start)])
(maybe-digit c radix)))])
(parse/case var clause ...)))
(define-syntax parse/case
(syntax-rules (else)
[(_ var) (void)]
[(_ var [else body ...])
(let () body ...)]
[(_ var [(datum ...) body ...] clause ...)
(if (parse-matches? var (datum ...))
(let ()
body ...)
(parse/case var clause ...))]))
(define-syntax parse-matches?
(syntax-rules (digit)
[(_ var ()) #f]
[(_ var (digit . datums))
(or (fixnum? var) (parse-matches? var datums))]
[(_ var (datum . datums))
(or (eqv? var 'datum) (parse-matches? var datums))]))
;; Nests a sequence of matches with a shared "else"
(define-syntax parse-case*
(syntax-rules (else)
[(_ s start end [[] body ...] [else body2 ...])
(let ()
body ...)]
[(_ s (fx+ start n) end
[[datums . datums-rest] body ...]
[else body2 ...])
(let ([fail (lambda () body2 ...)])
(let* ([start+n (fx+ start n)]
[var (if (fx= start+n end)
'eof
(string-ref s start+n))])
(case var
[datums
(parse-case*
s (fx+ start (+ n 1)) end
[datums-rest body ...]
[else (fail)])]
[else (fail)])))]
[(_ s start . rest)
(parse-case* s (fx+ start 0) . rest)]))

File diff suppressed because it is too large Load Diff