[checkpoint]
This commit is contained in:
parent
e70e0d3867
commit
3703cee372
1
popl-2017/.gitignore
vendored
1
popl-2017/.gitignore
vendored
|
@ -8,7 +8,6 @@
|
|||
paper.tex
|
||||
pearl.tex
|
||||
|
||||
benchmark/
|
||||
compiled/
|
||||
*.swp
|
||||
*.swo
|
||||
|
|
|
@ -10,6 +10,10 @@ ${PAPER}.pdf: pkg setup texstyle.tex
|
|||
++extra fig-stlc-core.tex \
|
||||
++extra fig-elab0.tex \
|
||||
++extra fig-elab1.tex \
|
||||
++extra fig-elab-regexp.tex \
|
||||
++extra fig-elab-sigma.tex \
|
||||
++extra fig-regexp-lib.tex \
|
||||
++extra fig-stlc-dict.tex \
|
||||
++extra mathpartir.sty \
|
||||
++style texstyle.tex \
|
||||
--pdf $(PAPER).scrbl
|
||||
|
|
|
@ -63,3 +63,21 @@ rust will be difficult
|
|||
5. ending
|
||||
---
|
||||
|
||||
0. finish sql search
|
||||
1. revise 3,4 with MODEL IMPL, EVAL
|
||||
2. do 5 better, not sure how, figure first
|
||||
|
||||
related work
|
||||
- dep types
|
||||
- occurrence types TR optimizer (no theoretical justifications)
|
||||
- yes we did macros, yes that's been done (fisher, hermen)
|
||||
- haskell
|
||||
|
||||
conclusion
|
||||
- jsut one way of doing this, can reimagin typed racket, TC + elab
|
||||
= stephen chang
|
||||
|
||||
|
||||
regexP;
|
||||
-
|
||||
- put the (list ...) back
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
#lang scribble/sigplan @onecolumn
|
||||
|
||||
@require["common.rkt" pict racket/class racket/draw]
|
||||
@; Goal of Sec 2 is to introduce background on TR needed to:
|
||||
@; - understand our bias
|
||||
@; - understand our examples
|
||||
@; - understand "type elaborator api"
|
||||
|
||||
@title[#:tag "sec:background"]{If You Know What I Mean}
|
||||
|
||||
@require["common.rkt"]
|
||||
|
||||
@title[#:tag "sec:background"]{Towards a Type Elaborator API}
|
||||
@; A poet should be of the
|
||||
@; old-fahioned meaningless brand:
|
||||
@; obscure, esoteric, symbolic,
|
||||
|
@ -12,370 +18,254 @@
|
|||
@; I'll gladly explain what it means
|
||||
@; till you don't understand it.
|
||||
|
||||
@; Computers understand a very limited set of instructions.
|
||||
@; It is tedious and error-prone to describe even simple thoughts in terms of these
|
||||
@; instructions, but programming languages provide abstraction mechanisms
|
||||
@; that can make the process bearable.
|
||||
@; For example, low-level or assembly languages can implement the following
|
||||
@; mathematical function for referencing an element of a fixed-length
|
||||
@; data structure:
|
||||
@;
|
||||
@; @exact|{\[
|
||||
@; \RktMeta{ref} = \lambda (\RktMeta{v}~\RktMeta{i})
|
||||
@; \left\{\begin{array}{l l}
|
||||
@; \RktMeta{v}_{i+1} & \RktMeta{if}~\RktMeta{v} = \vectorvn
|
||||
@; \\ ~&\RktMeta{and}~\RktMeta{i} \in \mathbb{N}
|
||||
@; ~\RktMeta{and}~i < n
|
||||
@; \\[4pt]
|
||||
@; \bot & \RktMeta{otherwise}
|
||||
@; \end{array}\right.
|
||||
@; \]}|
|
||||
@;
|
||||
@; @;Here we use the notation @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_{n-1} \rangle$}|
|
||||
@; @; to describe a vector of @exact{$n$} elements.
|
||||
@; Whether the implementation is a labeled sequence of instructions or a C function
|
||||
@; is not important; what matters is the precise specification
|
||||
@; and our ability to compute the function via machine.
|
||||
@; Memory-safe functions with clear semantics are the core building blocks for
|
||||
@; sane programs.
|
||||
@;
|
||||
@; As programs grow in size, complexity, and importance, it is desirable to have
|
||||
@; some guarantees about how a program will run before actually running the
|
||||
@; program.
|
||||
@; Ideally, we would statically prove that @racket[ref] is
|
||||
@; never applied to arguments that are not vectors or to out-of-bounds indices.
|
||||
@; @;Such a proof would imply that every call @racket[(ref v i)] in the program
|
||||
@; @; would yield a non-@|bot| value and help show that the overall program works as intended.
|
||||
@; But proving such properties is difficult and there are often many
|
||||
@; functions in a program for which we seek guarantees, so we settle for
|
||||
@; approximate results.
|
||||
@; Instead of statically ruling out all calls @racket[(ref v i)] that produce
|
||||
@; @|bot|, we strive to eliminate a large subset automatically.
|
||||
@;
|
||||
@; Type systems have emerged as a powerful and convenient way of statically detecting
|
||||
@; errors.
|
||||
@; Instead of tracking the flow of exact values through a program, a type system
|
||||
@; tracks the flow of @emph{types} denoting a range of possible runtime values.
|
||||
@; Depending on the type system and programmers' discipline using it, the range
|
||||
@; might be limited enough to show that @racket[ref] never returns @|bot|
|
||||
@; in a program; however, the tradeoff is always the time and energy programmers
|
||||
@; are willing to invest in writing down and maintaining type information.
|
||||
@; As it stands, the most widely used type systems are those that require
|
||||
@; minimal annotations from the programmer and catch only shallow, common errors.
|
||||
@;
|
||||
@;
|
||||
@; @section{Problem Statement and Promises}
|
||||
@;
|
||||
@; @let*[([all-w 150]
|
||||
@; [lang-w (/ all-w 2)]
|
||||
@; [lang-h 20]
|
||||
@; [lang-rectangle
|
||||
@; (lambda (brush-style
|
||||
@; fill-color
|
||||
@; #:width [lang-w lang-w] ;; getting lazy there ben
|
||||
@; #:height [lang-h lang-h]
|
||||
@; #:slant-left [slant-left #f]
|
||||
@; #:border-color [maybe-border-color #f]
|
||||
@; #:border-width [border-width 3])
|
||||
@; (dc (lambda (dc dx dy)
|
||||
@; (define old-brush (send dc get-brush))
|
||||
@; (define old-pen (send dc get-pen))
|
||||
@; (define border-color (or maybe-border-color fill-color))
|
||||
@; (send dc set-brush
|
||||
@; (new brush% [style brush-style] [color fill-color]))
|
||||
@; (send dc set-pen
|
||||
@; (new pen% [width border-width] [color border-color]))
|
||||
@; ;; --
|
||||
@; (define path (new dc-path%))
|
||||
@; (send path move-to 0 0)
|
||||
@; (if slant-left
|
||||
@; (begin (send path line-to (- 0 slant-left) (/ lang-h 2))
|
||||
@; (send path line-to 0 lang-h))
|
||||
@; (send path line-to 0 lang-h))
|
||||
@; (send path line-to lang-w lang-h)
|
||||
@; (send path line-to lang-w 0)
|
||||
@; (send path close)
|
||||
@; (send dc draw-path path dx dy)
|
||||
@; ;; --
|
||||
@; (send dc set-brush old-brush)
|
||||
@; (send dc set-pen old-pen)) lang-w lang-h))]
|
||||
@; [all-e (rectangle all-w lang-h)]
|
||||
@; [hshim (blank (/ all-w 6) lang-h)]
|
||||
@; [untyped-e (hc-append hshim (lang-rectangle 'vertical-hatch "CornflowerBlue" #:border-width 4 #:height (+ lang-h 2)))]
|
||||
@; [typed-e (hc-append (lang-rectangle 'horizontal-hatch "Coral" #:border-width 2) hshim)]
|
||||
@; [reality-of-ml (rc-superimpose (lc-superimpose all-e untyped-e) typed-e)]
|
||||
@; [value-e
|
||||
@; (let ([s 'solid]
|
||||
@; [c "LimeGreen"]
|
||||
@; [w (- lang-w lang-h)]
|
||||
@; [b 1])
|
||||
@; (hc-append (blank (* lang-h 0.8) lang-h)
|
||||
@; (lang-rectangle s c #:width (/ lang-w 3) #:height (/ lang-h 3) #:border-width b)
|
||||
@; (lang-rectangle s c #:border-width b)))]
|
||||
@; [reality-of-tr (lc-superimpose reality-of-ml value-e)]
|
||||
@; )]{@list{
|
||||
@; No matter how precise the type system, static analysis cannot recognize
|
||||
@; all correct programs and reject all incorrect programs.
|
||||
@; Layering a type system on top of an untyped programming language therefore
|
||||
@; leads to a world where the space of all syntactically valid programs
|
||||
@; (drawn as a white box, of course) is partitioned into two overlapping sets.
|
||||
@;
|
||||
@; @centered[reality-of-ml]
|
||||
@;
|
||||
@; On the left, outlined in blue with vertical hatches, we have the set of all untyped programs
|
||||
@; that produce meaningful results (read: non-error, non-diverging terms).
|
||||
@; On the right, outlined in orange with horizontal hatches, we have the set of all programs
|
||||
@; approved by the type system.
|
||||
@; Some type-correct programs raise runtime errors when evaluated, and some
|
||||
@; meaningful programs are rejected by the type checker.
|
||||
@;
|
||||
@; Again, the impossible goal is for the highlighted boxes to overlap.
|
||||
@; Our argument is that adding a @emph{syntactic elaboration} phase before
|
||||
@; typechecking can yield a language described by the
|
||||
@; solid green area shown below, capturing more of the meaningful untyped
|
||||
@; programs and fewer of the typed programs that go wrong, though rejecting
|
||||
@; some typed programs that run to completion but contain a subterm that would
|
||||
@; raise an exception if evaluated (Theorem @exact|{\ref{thm:relevance}}|).
|
||||
@;
|
||||
@; @centered[reality-of-tr]
|
||||
@; }}
|
||||
@;
|
||||
@; Starting from an untyped programming language built from a grammar of terms
|
||||
@; @exact{$e$} with a reduction relation @exact{$e \Downarrow v$}
|
||||
@; and a type judgment @exact{$\vdash e : \tau$} relating terms to
|
||||
@; values @exact{$v$} and types @exact{$\tau$}, we derive an extended language
|
||||
@; by inserting a function @exact{$\elaborate(e)$} before type checking.
|
||||
@; The extended language has a type system @exact{$\vdashe$} consisting of a single
|
||||
@; rule:
|
||||
@;
|
||||
@; @exact|{
|
||||
@; \begin{mathpar}
|
||||
@; \inferrule*{
|
||||
@; \vdash \elaborate(e) : \tau
|
||||
@; }{
|
||||
@; \vdashe e : \tau
|
||||
@; }
|
||||
@; \end{mathpar}
|
||||
@; }|
|
||||
@;
|
||||
@; We also derive a reduction relation on typed terms @exact{$\vdashe e : \tau$}
|
||||
@; as @exact{$\elaborate(e) \Downarrow v$}.
|
||||
@; Type soundness for @exact{$\vdashe$} is thus a corollary of
|
||||
@; type soundness for the @exact{$\vdash$} judgment.
|
||||
@;
|
||||
@; The output of @exact{$\elaborate(e)$} is a term @exact{$e'$},
|
||||
@; though @exact{$e'$} may be a labeled error term
|
||||
@; @exact|{$\bot^{e''}$}| meaning the elaborator detected an error in the program.
|
||||
@; Terms @exact{$e$} such that @exact{$\elaborate(e)$} type checks but @exact{$e$}
|
||||
@; does not are newly expressible in the extended system.
|
||||
@; Terms @exact{$e$} such that @exact{$\vdash e : \tau$} but
|
||||
@; @exact|{$\elaborate(e) = \bot^{e'}$}| are programs validated by the typechecker
|
||||
@; but rejected by the extended system because the subterm @exact{$e'$} of @exact{$e$}
|
||||
@; would raise a runtime error if evaluated.
|
||||
@;
|
||||
@; Our main contribution is defining a useful collection of @emph{local}
|
||||
@; transformations to perform during elaboration.
|
||||
@; Each local elaboration is proven correct in isolation.
|
||||
@; We then define @exact{$\elaborate(e)$} as the union of these local cases
|
||||
@; and a default elaboration that applies @exact{$\elaborate$} to all
|
||||
@; subterms of @exact{$e$}.
|
||||
@;
|
||||
@; To avoid reasoning about terms post-elaboration we will describe
|
||||
@; our local elaborations with type rules of the form: @todo{maybe drop this}
|
||||
@;
|
||||
@; @exact|{
|
||||
@; \begin{mathpar}
|
||||
@; \inferrule*{
|
||||
@; \prope(f, e)
|
||||
@; \\\\
|
||||
@; \vdash f : \tau_f
|
||||
@; \\
|
||||
@; \vdash e : \tau_e
|
||||
@; }{
|
||||
@; \vdashe f~e : \tau
|
||||
@; }
|
||||
@; \end{mathpar}
|
||||
@; }|
|
||||
@;
|
||||
@; where @exact{$\prope$} is a proposition about syntactic terms
|
||||
@; @exact{$f$} and @exact{$e$}
|
||||
@; used to justify the transformation @exact|{$\elaborate(f~e)$}|.
|
||||
@; Despite this inference rule shorthand, elaborations and @exact{$\prope$}
|
||||
@; conditions run before typechecking and have no access to type information
|
||||
@; in the program.
|
||||
@;
|
||||
@; The relationship between terms @exact{$e$} and @exact{$\elaborate(e)$} is given
|
||||
@; by three theorems: @todo{see next section for now}.
|
||||
@;
|
||||
@; Correct elaborations obey these theorems.
|
||||
@; @; Note: there is no guarantee that elaboration produces a well-typed term
|
||||
@; @; because calls like @exact{$\elaborate($}@racket[ref 0 0]@exact{$)$} are allowed.
|
||||
@;
|
||||
@; @; We refrain from making stronger statements about elaborate because
|
||||
@; @; real systems will have lots of other elaborations.
|
||||
@; TODO
|
||||
@; - can we not use "Typed Racket" as the first word of this section?
|
||||
@; to me that's a huge turn-off after reading the introduction.
|
||||
@; - missing answer: why are we using TR
|
||||
|
||||
Typed Racket does not have a type elaboration API; however, it inherits a rich
|
||||
syntax extension system from its host language, Racket.
|
||||
Experience with Racket syntax extensions (aka macros) motives our proposal for
|
||||
a similar type elaboration system, presented in @Secref{subsec:api}.
|
||||
Moreover, we have implemented the transformations described in @Secref{sec:define}
|
||||
as a Typed Racket package
|
||||
and this section is intended to prepare the way for later code snippets.
|
||||
|
||||
|
||||
@section{Syntax Extensions}
|
||||
@section{Typed Racket, today}
|
||||
|
||||
Syntax extension systems have been incorporated in
|
||||
a number of mainstream programming languages.
|
||||
Essentially, syntax extensions let the programmer write code that generates code.
|
||||
In practice this gives programmers the ability to extend the syntax of their
|
||||
language, abstract over textual patterns, and control the evaluation order
|
||||
of expressions.
|
||||
Typed Racket is an ongoing experiment in language design the success of which
|
||||
is a testament to the usefulness and versatility of syntax extensions.
|
||||
The entire language is implemented as a Racket library; in particular, a
|
||||
library of syntax extensions.
|
||||
Types in Typed Racket are distinguished Racket terms, given special meaning by
|
||||
the type checker.
|
||||
The checker itself is nothing more than a Racket function defined over
|
||||
type-annotated programs and run before the program is compiled.
|
||||
When typechecking succeeds, the annotations are erased and the resulting
|
||||
program is fed to the Racket compiler.
|
||||
As such, there is no need for a dedicated ``Typed Racket compiler''.
|
||||
Type-driven optimizations occur on core Racket forms just after type checking
|
||||
and the result feeds in to the existing compiler toolchain.
|
||||
|
||||
Racket's syntax extension API is particularly mature and has inspired
|
||||
similar APIs in at least two other languages, so we adopt it here to introduce
|
||||
syntax extensions.
|
||||
As a first example, suppose we want to assert that expressions
|
||||
@racket[e_1], @racket[e_2], @racket[e_3], and @racket[e_4] all evaluate to
|
||||
the same value.
|
||||
By hand, we might write a sequence of unit tests:
|
||||
@racketblock[
|
||||
(begin (check-equal? e_2 e_1)
|
||||
(check-equal? e_3 e_1)
|
||||
(check-equal? e_4 e_1))
|
||||
]
|
||||
but these tests follow a simple pattern that we can abstract as a @emph{syntax rule},
|
||||
using ellipses (@racket[...]) to capture an arbitrary number of expressions.
|
||||
@racketblock[
|
||||
(define-syntax-rule (check-all-equal? e_1 e_rest ...)
|
||||
(begin (check-equal? e_rest e_1) ...))
|
||||
]
|
||||
Our tests can now be written more concisely:
|
||||
@racketblock[
|
||||
(check-all-equal? e_1 e_2 e_3 e_4)
|
||||
]
|
||||
making them easier to read and maintain.
|
||||
Moreover, we can easily improve the rule to evaluate @racket[e_1] only once
|
||||
instead of @math{n-1} times:
|
||||
@racketblock[
|
||||
(define-syntax-rule (check-all-equal? e_1 e_rest ...)
|
||||
(let ([v e_1])
|
||||
(check-equal? e_rest v) ...))
|
||||
@; One might expect this is kinda slow. Maybe that's true. But has benefits and users.
|
||||
|
||||
For a concrete example, have a left-leaning factorial function:
|
||||
|
||||
@codeblock{
|
||||
#lang typed/racket
|
||||
(define (fact (n : Natural)) : Natural
|
||||
(foldl * n (range 1 n)))
|
||||
}
|
||||
|
||||
Aside from the type annotations, the function is ordinary Racket code
|
||||
and behaves as such when run.
|
||||
But the syntax accepted by Typed Racket's @racket[define] is a superset of
|
||||
valid, untyped Racket.
|
||||
Furthermore, this @racket[define] has an extended semantics.
|
||||
When the above program is compiled, @racket[define] registers the identifier
|
||||
@racket[fact] in a type environment with the signature @racket[(Natural -> Natural)].
|
||||
The definition then expands to an annotation-free Racket @racket[define] and
|
||||
the same expansion-and-type-collection process is repeated on the body of @racket[fact].
|
||||
So just as Typed Racket re-uses the Racket compiler, Typed Racket's @racket[define]
|
||||
re-uses the semantics of Racket's.
|
||||
This sleight of hand is accomplished by shadowing @racket[define] with a syntax
|
||||
extension that moves types from the program syntax to a type environment---and
|
||||
crucially, does nothing more.
|
||||
|
||||
Note, however, that no types are checked at this point.
|
||||
It is only after the entire program is expanded to core Racket and all type
|
||||
definitions collected that types are checked and e.g. the type variables
|
||||
for @racket[foldl] are instantiated.
|
||||
Waiting until all syntax extensions are expanded is both a pragmatic choice
|
||||
and allows extensions to create and refine types written in a program without
|
||||
subverting the authoritarian type checker.
|
||||
|
||||
@; not really liking 'pragmatic' but I guess it should be obvious, that's the
|
||||
@; easiest way to implement a TC for Racket
|
||||
|
||||
|
||||
@section{Racket Macros, quickly}
|
||||
|
||||
Having built some intuition for how Typed Racket's @racket[define] operates,
|
||||
we use its definition to introduce Racket macros.@note{After this section,
|
||||
we will stop using the term @emph{macro} in favor of the more general
|
||||
phrase @emph{syntax extension}.}
|
||||
The following is paraphrased from Typed Racket and elaborates a type-annotated
|
||||
@racket[define] to an unannotated one.
|
||||
|
||||
@racketblock[
|
||||
(define-syntax (-define stx)
|
||||
(syntax-parse stx #:literals (:)
|
||||
[(-define (nm:id (arg:id : ty)) : ret-ty body ...)
|
||||
(define type #`(ty -> ret-ty))
|
||||
(register-type #`nm type)
|
||||
#`(define (nm arg) body ...)]))
|
||||
]
|
||||
|
||||
Going line-by-line, we have:
|
||||
|
||||
@; TODO what is the point of each?
|
||||
@; TODO what is the bottom line?
|
||||
|
||||
@itemlist[
|
||||
@item{
|
||||
@racket[define-syntax] declares a function on code; in other words,
|
||||
a macro.
|
||||
The formal parameter @racket[stx] is so named because it always binds
|
||||
a @emph{syntax object} representing the context of a call to the
|
||||
@racket[-define] macro.
|
||||
}
|
||||
@item{
|
||||
@racket[syntax-parse] is pattern matching for syntax objects.
|
||||
The optional argument @racket[#:literals (:)] causes any @racket[:] characters
|
||||
in a @racket[syntax-parse] pattern to match only against the @racket[:]
|
||||
identifier bound in the current lexical scope.
|
||||
Normally the variable @racket[:] is no different from the pattern @racket[x]
|
||||
or the pattern @racket[any-variable].
|
||||
}
|
||||
@item{
|
||||
The third line of the macro is a pattern.
|
||||
The remaining lines are instructions to perform if the pattern matches.
|
||||
Within the pattern:
|
||||
@itemlist[
|
||||
@item{
|
||||
@racket[-define], @racket[nm], @racket[arg], @racket[ty],
|
||||
@racket[ret-ty], and @racket[body] are @emph{pattern variables} matched to
|
||||
sub-expressions of @racket[stx].
|
||||
}
|
||||
@item{
|
||||
The ellipses (@racket[...]) are part of the grammar of @racket[syntax-parse]
|
||||
and match zero or more occurrences of the previous pattern,
|
||||
to the effect that @racket[body ...] matches a list of consecutive expressions.
|
||||
}
|
||||
@item{
|
||||
@racket[:id], as in @racket[nm:id] and @racket[arg:id], is a
|
||||
@emph{syntax class} annotation.
|
||||
Using the @racket[id] syntax class causes @racket[nm] and @racket[arg]
|
||||
to only match identifiers and not, for instance, integer constants or
|
||||
parenthesized expressions.
|
||||
}
|
||||
]
|
||||
}
|
||||
@item{
|
||||
@|stx| creates a syntax object from an expression.
|
||||
In this case, the syntax object is a function type, built from our pattern
|
||||
variables and Typed Racket's @racket[->] constructor.
|
||||
}
|
||||
@item{
|
||||
@racket[register-type] binds an identifier to a type in a global type environment.
|
||||
Since the pattern variable @racket[nm] is only bound to a symbol, we use
|
||||
the syntax constructor @|stx| to associate the symbol with
|
||||
the lexical scope of @racket[stx].
|
||||
}
|
||||
@item{
|
||||
The result of any @racket[syntax-parse] pattern must be a syntax object.
|
||||
Here we build a Racket @racket[define] statement from the relevant pattern
|
||||
variables.
|
||||
On this line, the ellipses (@racket[...]) are used in a dual sense to
|
||||
splice the contents of the list @racket[body] into the body of the new
|
||||
@racket[define] statement.
|
||||
}
|
||||
]
|
||||
|
||||
Intuitively, syntax rules perform a search-and-replace before the program
|
||||
is compiled; however, the replacement process is careful to preserve the
|
||||
lexical structure of programs.
|
||||
For instance, a program that uses the same variable name @racket[v] as the syntax rule:
|
||||
@racketblock[
|
||||
(define v 5)
|
||||
(check-all-equal? (+ 2 2) v)
|
||||
]
|
||||
will expand to code that introduces a fresh variable @racket[v1]:
|
||||
@racketblock[
|
||||
(define v 5)
|
||||
(let ([v1 (+ 2 2)])
|
||||
(check-equal? v v1))
|
||||
]
|
||||
to avoid shadowing the programmer's variable with the identifier used inside
|
||||
the syntax rule.
|
||||
Details on how this so-called @emph{hygenic} expansion is implemented and
|
||||
its benefits to extension writers and users are explained by Flatt@~cite[f-popl-2016 fcdb-jfp-2012].
|
||||
A call to @racket[syntax-parse] can contain multiple patterns.
|
||||
Indeed,
|
||||
the actual definition of Typed Racket's define has patterns for
|
||||
non-function definitions @racket[(define n : Integer 42)]
|
||||
and unannotated @racket[define] statements.
|
||||
Finally, the module that implements @racket[-define] exports it as @racket[define]
|
||||
to change the behavior of definitions only in external modules.
|
||||
|
||||
In addition to pattern-based syntax rules, one can extend Racket with
|
||||
arbitrary functions defined over @emph{syntax objects}.
|
||||
For instance, we can write an extension that formats log messages to a port
|
||||
@racket[log-output] at runtime when the program is compiled with a flag
|
||||
@racket[DEBUG] enabled. If the flag is disabled, we perform a no-op.
|
||||
The expansion of @racket[log] calls happens during compilation:
|
||||
@(begin
|
||||
#reader scribble/comment-reader
|
||||
@codeblock|{
|
||||
;; With DEBUG enabled
|
||||
(log "everything ok")
|
||||
==> (displayln "everything ok" log-output)
|
||||
;; With DEBUG disabled
|
||||
(log "everything still ok")
|
||||
==> (void)
|
||||
}|)
|
||||
@; Include a jab about parenthesized syntax making metaprogramming life easier?
|
||||
@; @subsection{Oh, the Parentheses}
|
||||
|
||||
The @racket[syntax-parse] form is a convenient way to implement @racket[log].
|
||||
In this case, we use @racket[syntax-parse] to deconstruct a syntax object
|
||||
@racket[stx] representing an application of @racket[log].
|
||||
The output of syntax parse is a new syntax object built using the constructor
|
||||
@racket[syntax].
|
||||
@codeblock{
|
||||
(define-syntax (log stx)
|
||||
(syntax-parse stx
|
||||
[(log message)
|
||||
(if DEBUG
|
||||
(syntax (displayln message log-output))
|
||||
(syntax (void)))]))
|
||||
}
|
||||
We can further enhance our implementation with a compile-time check that
|
||||
the value bound to the pattern variable @racket[message] is a string literal.
|
||||
@codeblock{
|
||||
(define-syntax (log stx)
|
||||
(syntax-parse stx
|
||||
[(log message)
|
||||
(unless (string? (syntax->datum (syntax message)))
|
||||
(error "log: expected a string literal"))
|
||||
(if DEBUG
|
||||
(syntax (displayln message log-output))
|
||||
(syntax (void)))]))
|
||||
}
|
||||
@; Alternatively, we can use Racket's @emph{syntax classes} to the same effect.
|
||||
@; The @racket[str] syntax class recognizes literal strings.
|
||||
@; Binding it to the pattern variable @racket[message] causes calls like
|
||||
@; @racket[(log 61)] to raise a compile error.
|
||||
@; @codeblock{
|
||||
@; (define-syntax (log stx)
|
||||
@; (syntax-parse stx
|
||||
@; [(log message:str)
|
||||
@; (if DEBUG
|
||||
@; (syntax (displayln message log-output))
|
||||
@; (syntax (void)))]))
|
||||
@; }
|
||||
With this enhancement, calls like @racket[(log 61)] are rejected statically.
|
||||
Unfortunately, arguments that @emph{evaluate} to string literals
|
||||
are also rejected
|
||||
because the syntax extension cannot statically predict what value an arbitrary
|
||||
expression will reduce to.
|
||||
This is a fundamental limitation, but we can make a small improvement by accepting
|
||||
any @racket[message] produced by another (trusted) syntax extension.
|
||||
Suppose @racket[++] is an extension for concatenating two strings.
|
||||
If we assign a unique @emph{syntax property} to the syntax object produced
|
||||
by @racket[++], we can later retrieve the property in the @racket[log] extension.
|
||||
|
||||
First, we give an implementation of @racket[++] in terms of Racket's
|
||||
built-in @racket[string-append] function, crucially using @racket[syntax-property]
|
||||
to associate the value @racket['string] with the key @racket['static-type].
|
||||
@codeblock{
|
||||
(define-syntax (++ stx)
|
||||
(syntax-parse stx
|
||||
[(++ s1 s2)
|
||||
(syntax-property
|
||||
(syntax (string-append s1 s2))
|
||||
'static-type 'string)]))
|
||||
}
|
||||
Assuming now that the key @racket['static-type] accurately describes the value
|
||||
contained in a syntax object, @racket[log] can accept both string
|
||||
literals and tagged syntax objects.
|
||||
@codeblock{
|
||||
(define-syntax (log stx)
|
||||
(syntax-parse stx
|
||||
[(log message)
|
||||
(define is-string?
|
||||
(string? (syntax->datum (syntax message))))
|
||||
(define expands-to-string?
|
||||
(eq? 'string
|
||||
(syntax-property
|
||||
(local-expand message)
|
||||
'static-type)))
|
||||
(unless (or is-string? expands-to-string?)
|
||||
(error "log: expected a compile-time string"))
|
||||
(if DEBUG
|
||||
(syntax (displayln message log-output))
|
||||
(syntax (void)))]))
|
||||
}
|
||||
@section[#:tag "subsec:api"]{Implementing a Type Elaborator}
|
||||
|
||||
Our syntax extensions @racket[check-all-equal?], @racket[log], and @racket[++]
|
||||
are indistinguishable from user-defined functions or core language forms,
|
||||
yet they perform useful transformations before a program is typechecked or
|
||||
compiled.
|
||||
This seamless integration gives Racket programmers the ability to grow the
|
||||
language and tailor it to their specific needs.
|
||||
Just as Typed Racket parses the syntax of a program and extracts breadcrumbs
|
||||
for the type environment, a type elaborator transforms
|
||||
a program leaving hints to guide the type checker.
|
||||
Syntax extensions are a low-level way to achieve this behavior.
|
||||
|
||||
@Figure-ref{fig:printf} demonstrates a type-elaborated variant of Racket's
|
||||
@racket[printf].
|
||||
When called with a string literal @racket[fmt] as its first argument, the
|
||||
elaborator @racket[-printf]
|
||||
reads @racket[fmt] for type and arity constraints using the function @racket[format-types].
|
||||
For instance, the format string @racket["~b"] would produce the type constraint
|
||||
@racket[(Exact-Number)], implying that the arguments @racket[args] must be a
|
||||
list with one element of type @racket[Exact-Number].
|
||||
This constraint is used twice in @racket[-printf]:
|
||||
first to check the length of @racket[args] against the length of @racket[types]
|
||||
and second to add explicit type annotations (via @racket[ann]) around each
|
||||
argument to the format string.
|
||||
Whereas Typed Racket accepts any number of values with any type and
|
||||
lets Racket's @racket[printf] raise runtime errors, type elaboration reports
|
||||
both arity and type errors statically.
|
||||
@; arity error = caught directly
|
||||
@; type error = implied
|
||||
|
||||
@figure["fig:printf" "Type elaboration for format strings"
|
||||
@racketblock[
|
||||
(define-syntax (-printf stx)
|
||||
(syntax-parse stx
|
||||
[(-printf fmt:str args ...)
|
||||
#:with (types ...) (format-types #`fmt)
|
||||
(if (= (stx-length #`(args ...))
|
||||
(stx-length #`(types ...)))
|
||||
#`(printf fmt (ann args types) ...)
|
||||
(error 'printf "arity mismatch in ~s" stx))]))
|
||||
]
|
||||
@; Include the default branch? I just don't know what to say about it.
|
||||
@; [(-printf fmt args ...)
|
||||
@; #`(printf fmt args ...)]))
|
||||
]
|
||||
|
||||
In general, the high-level goals of such type elaborations are:
|
||||
@itemlist[
|
||||
@item{ (@goal{refinement})
|
||||
Refine type signatures using latent, syntactic @emph{value information},
|
||||
such as the characters in a string constant.
|
||||
}
|
||||
@item{ (@goal{reuse})
|
||||
Rely on the existing type checker.
|
||||
Avoid duplicating its work and never resort to proofs by assertion.@note[@elem{
|
||||
Inspired by the @emph{translation validation} approach to
|
||||
compiler correctness @~cite[pss-tacas-1998].}]
|
||||
@; Just trying to say, always typecheck things
|
||||
}
|
||||
@item{ (@goal{relevance})
|
||||
Report errors in terms of the programmer's code, not in terms
|
||||
of elaborated code.@note[@elem{Inspired by SoundX @~cite[le-popl-2016].}]
|
||||
}
|
||||
]
|
||||
|
||||
The @racket[-printf] elaborator meets these goals by producing Typed Racket code
|
||||
that only adds type annotations to the original program.
|
||||
If these annotations fail, they report a type error relative to an element
|
||||
of @racket[args].
|
||||
As for refining the types, @racket[-printf] is best described with a quasi-dependent
|
||||
type in terms of a proof theory @exact{$\Sigma$}.
|
||||
|
||||
@exact|{\begin{mathpar}
|
||||
\inferrule{
|
||||
\Sigma \vdash (\RktMeta{format-types}~\RktMeta{fmt}) = \tau_0 \ldots \tau_{n-1}
|
||||
\\\\
|
||||
\typestogen{\penv;\,\tenv}{\RktMeta{arg}_0}{\tau_0}
|
||||
\\
|
||||
\ldots
|
||||
\\
|
||||
\typestogen{\penv;\,\tenv}{\RktMeta{arg}_{n-1}}{\tau_{n-1}}
|
||||
}{
|
||||
\typestogen{\penv;\,\tenv}{\RktMeta{-printf fmt}~\RktMeta{arg}_0 \ldots \RktMeta{arg}_{n-1}}{\mathsf{Unit}}
|
||||
}
|
||||
\end{mathpar}}|
|
||||
|
||||
We chose @racket[printf] as an introductory example because its correctness
|
||||
and type soundness follow directly from the soundness of @racket[format-types].
|
||||
Correctness is not generally so simple to "prove", so @Secref{sec:segfault} and @Secref{sec:regexp}
|
||||
show how type elaboration can justify a potentially unsafe optimizing transformation
|
||||
and give library authors a technique for implementing a precise API without
|
||||
changing their language's type system.
|
||||
|
||||
@; NOTE: we will use similar 'typed macros' in a coming section... right?
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
;; FIXME: this doesn't have all the papers from the README yet
|
||||
|
||||
(require racket/format
|
||||
(require racket/format
|
||||
scriblib/autobib)
|
||||
|
||||
(provide (all-defined-out))
|
||||
|
@ -77,6 +77,7 @@
|
|||
(define/short sigmod "SIGMOD" (string-append ACM "SIGMOD " International Conference "on Management of Data"))
|
||||
(define/short sigplan-notices "SIGPLAN Notices" (string-append ACM "SIGPLAN Notices"))
|
||||
(define/short tacs (string-append International Symposium "Theoretical Aspects of Computer Science"))
|
||||
(define/short tacas (string-append International Conference "on Tools and Algorithms for the Construction and Analysis of Systems"))
|
||||
(define/short tcs "Theoretical Computer Science")
|
||||
(define/short tfp "TFP" (string-append Symposium "Trends in Functional Programming"))
|
||||
(define/short tlca "TLCA" (string-append International Conference "Typed Lambda Calculi and Applications"))
|
||||
|
@ -1725,9 +1726,23 @@
|
|||
#:location (proceedings-location icfp #:pages '(235 246))
|
||||
#:date 2010))
|
||||
|
||||
(define pss-tacas-1998
|
||||
(make-bib
|
||||
#:title "Translation Validation"
|
||||
#:author (authors "Amir Pnueli" "Michael Siegel" "Eli Singerman")
|
||||
#:location (proceedings-location tacas #:pages '(151 166))
|
||||
#:date 1998))
|
||||
|
||||
(define le-popl-2016
|
||||
(make-bib
|
||||
#:title "Sound Type-Dependent Syntactic Language Extension"
|
||||
#:author (authors "Florian Lorenzen" "Sebastian Erdweg")
|
||||
#:location (proceedings-location popl #:pages '(204 216))
|
||||
#:date 2016))
|
||||
|
||||
(define gr-cup-2004
|
||||
(make-bib
|
||||
#:title "The Standard ML Base Library"
|
||||
#:author (authors "Emden R. Gansner" "John H. Reppy")
|
||||
#:location (book-location #:edition "1" #:publisher "Cambridge University Press")
|
||||
#:date 2004))
|
||||
|
|
|
@ -21,6 +21,10 @@
|
|||
id
|
||||
todo
|
||||
proof
|
||||
warning
|
||||
goal
|
||||
|
||||
stx
|
||||
)
|
||||
|
||||
(require "bib.rkt"
|
||||
|
@ -117,7 +121,7 @@
|
|||
|
||||
(define (sf x) (elem #:style "sfstyle" x))
|
||||
|
||||
(define (sc x) (exact "\\textsc{" x "}"))
|
||||
(define (sc x) (exact "\\textsc{\\small " x "}"))
|
||||
|
||||
(define (parag . x) (apply elem #:style "paragraph" x))
|
||||
|
||||
|
@ -174,3 +178,14 @@
|
|||
txt
|
||||
(exact "\\hfill\\qed"))))
|
||||
|
||||
(define (warning sym txt . arg*)
|
||||
(printf "[WARNING] ~a: " sym)
|
||||
(apply printf txt arg*)
|
||||
(newline))
|
||||
|
||||
(define (goal str)
|
||||
(bold (emph str)))
|
||||
|
||||
(define stx
|
||||
(exact "\\RktRdr{\\#{\\textasciigrave}}"))
|
||||
|
||||
|
|
|
@ -13,3 +13,6 @@ and only tells eternity.
|
|||
@; type systems change slowly. That's OK
|
||||
@; syntax extensions can change much faster and YOU can control the
|
||||
@; changes. That's YOU joe programmer or YOU jane street capital. Do you.
|
||||
|
||||
@; just one way of doing this, can imagine other Typed Racekt
|
||||
@; in fact Stephen Chang .....
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
\begin{center}
|
||||
to do
|
||||
\end{center}
|
|
@ -18,13 +18,9 @@
|
|||
}
|
||||
|
||||
\inferrule*[left=T-Unsafe]{
|
||||
\typestoclosed{\vectorvn}{\tarray}
|
||||
\typesto{\vectorvn}{\tarray}
|
||||
\\
|
||||
\typestoclosed{i}{\tint}
|
||||
\\\\
|
||||
i \in \ints
|
||||
\\
|
||||
0 \leq i < n
|
||||
\typesto{i}{\tint}
|
||||
}{
|
||||
\typesto{\unsaferef~\vectorvn~i}{\tint}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#lang scribble/sigplan @onecolumn
|
||||
|
||||
@require["common.rkt" (only-in scribble/base nested)]
|
||||
@require["common.rkt" (only-in scribble/base nested) "bib.rkt"]
|
||||
|
||||
@title[#:tag "sec:intro"]{Type Eleborators Need APIs}
|
||||
|
||||
|
@ -13,11 +13,11 @@ consistency according to the underlying type theory, replaces many of the
|
|||
surface-syntax constructs with constructs from the kernel language, and
|
||||
inserts type information to create an annotated representation.
|
||||
|
||||
Some (implementations of such) programming languages also support a way to
|
||||
Some programming languages also support a meta-API, that is, a way to
|
||||
programmatically direct the elaborator. For example, Rust and Scala come
|
||||
with compiler plug-ins. Typed Racket and Types Clojure inherit the macro
|
||||
mechanisms of their parent languages. Here we refer to such mechanisms as
|
||||
@defterm{elaborator API}s.
|
||||
mechanisms of their parents. This paper refers to all such mechanisms as
|
||||
@defterm{elaborator API}s.
|
||||
|
||||
Equipping type-checking elaborators with APIs---or using the existing
|
||||
APIs---promises new ways to expand the power of type checking at a
|
||||
|
@ -29,21 +29,17 @@ Consider the example of tailoring the API of a library that implements a
|
|||
string-based, domain-specific languages. Examples of such libraries abound:
|
||||
formatting, regular-expression matching, database queries, and so on. The
|
||||
creators of these DSLs tend to know a lot about how programs written in
|
||||
these DSLs relate to the host program, but they cannot express this
|
||||
knowledge in API types without resorting to such rich systems as
|
||||
dependent type theory.
|
||||
|
||||
Type tailoring allows such programmers to refine the typing rules for the
|
||||
APIs in a relatively straightforward way. Recall the API of the
|
||||
regular-expression library. In a typical typed language, the matching
|
||||
function is exported with a type like this:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
|
||||
reg_exp_match: String -> Option [Listof String]
|
||||
|
||||
}
|
||||
these DSLs relate to the host program, but they (usually) cannot express
|
||||
this knowledge in API types. Type tailoring allows such programmers to
|
||||
refine the typing rules for the APIs in a relatively straightforward
|
||||
way. Recall the API of the regular-expression library. In a typical typed
|
||||
language, the matching function is exported with a type like this:
|
||||
@;
|
||||
@exact|{
|
||||
\begin{mathpar}
|
||||
\mbox{regexp-match} : \mbox{String} \rightarrow \mbox{Opt[Listof[String]]}
|
||||
\end{mathpar}\hspace{-.3em}}|
|
||||
@; the above is an ***incredibly disgusting hack to work around a scribble bug***
|
||||
When the result is a list of strings, the length of the list depends on the
|
||||
argument string---but the API for the regular-expression library cannot
|
||||
express this relationship between the regular expression and the
|
||||
|
@ -51,69 +47,87 @@ surrounding host program. As a result, the author of the host program must
|
|||
inject additional complexity, often in the form of (hidden) run-time checks.
|
||||
|
||||
If type tailoring is available, the creator of the library can refine the
|
||||
type of @tt{reg_exp_match} with rules such as these:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
|
||||
Program |- s : does not contain grouping
|
||||
------------------------------------------------
|
||||
G |- reg_exp_match(s) : Option [List String]
|
||||
|
||||
|
||||
Program |- s : contains one grouping w/o alternative
|
||||
-----------------------------------------------------
|
||||
G |- reg_exp_match(s) : Option [List String String]
|
||||
|
||||
}
|
||||
type of @tt{regexp-match} with rules such as this one:
|
||||
@;
|
||||
@exact|{
|
||||
\begin{mathpar}
|
||||
\inferrule*[]{
|
||||
\elabsto{e}{e'}{\mbox{String}}, \\\\
|
||||
\mbox{\it Program} \vdash e' \mbox{ does not contain a grouping}
|
||||
}{
|
||||
\elabsto{\mbox{regexp-match}~e}{\mbox{regexp-match}~e'}{\mbox{Opt[List[String]]}}
|
||||
}
|
||||
\end{mathpar}\hspace{-.3em}}|
|
||||
That is, when the extended elaborator can use (a fragment of) the program
|
||||
to prove that the given string for a specific use of @tt{reg_exp_match}
|
||||
to prove that the given string for a specific use of @tt{regexp-match}
|
||||
does not contain grouping specifications---ways to extract a sub-string via
|
||||
matching---then the @tt{Some} result type is a list that contains a single
|
||||
string. Similarly, if the string contains exactly one such grouping (and no
|
||||
alternative), then a match produces a list of two strings. In all other
|
||||
cases, the type checker uses the default type for the function.
|
||||
matching---the @tt{Some} result type is a list of a single
|
||||
string. The original specifications remains the default rule, which the
|
||||
type checker uses when it cannot use the specific ones.
|
||||
|
||||
A vector library is another familiar example that offers opportunities for
|
||||
type tailoring. Often such a library exports a dereferencing construct with
|
||||
A similar rule would say if the string contains exactly one such grouping
|
||||
(and no alternative), a match produces a list of two strings. Critically,
|
||||
this refined type for the result of applying @tt{regexp-match} enables
|
||||
further type refinements, just like constant folding enables additional
|
||||
compiler optimizations. In this specific case, the program context of the
|
||||
application of @tt{regexp-match} may dereference the list with unsafe---and
|
||||
thus faster---versions of @tt{first} and @tt{second} once it has confirmed
|
||||
a match.
|
||||
|
||||
Vectors offer a similar opportunity for unsafe dereferencing via
|
||||
type tailoring. Typically, such a library exports a dereferencing construct with
|
||||
the following type rule:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
|
||||
G |- v : Vector[X] G |-e : Integer
|
||||
----------------------------------------
|
||||
G |- v[e] ~~> checked_ref(v,e) : X
|
||||
}
|
||||
@exact|{
|
||||
\begin{mathpar}
|
||||
\inferrule*[]{
|
||||
\elabsto{e_1}{e_1'}{\tarray}
|
||||
\\
|
||||
\elabsto{e_2}{e_2'}{\tint}
|
||||
}{
|
||||
\elabsto{{e_1}[{e_2}]}{\checkedref~e_1'~e_2'}{\tint}
|
||||
}
|
||||
\end{mathpar}\hspace{-.3em}}|
|
||||
@;
|
||||
If the elaborator can prove, however, that the indexing expression @tt{e}
|
||||
is equal to or evaluates to a specific integer @tt{i}, the rule can be
|
||||
strengthened as follows:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
@exact|{
|
||||
\begin{mathpar}
|
||||
|
||||
Program |- e = i for some i in Natural
|
||||
&& Program |- v is vector of length n
|
||||
&& i < n
|
||||
\inferrule*[]{
|
||||
\elabsto{e_1}{\vectorvn}{\tarray},
|
||||
\\
|
||||
\elabsto{e_2}{e'_2}{\tint},
|
||||
\\\\
|
||||
\mbox{\it Program} \vdash e'_2 = i, \quad
|
||||
i \in \ints, \quad
|
||||
0 \le i < n
|
||||
}{
|
||||
\elabsto{{e_1}[{e_2}]}{\unsaferef~\vectorvn~i}{\tint}
|
||||
}
|
||||
\end{mathpar}\hspace{-.3em}}|
|
||||
|
||||
G |- v : Vector[X] G |-e : Integer
|
||||
----------------------------------------
|
||||
G |- v[e] ~~> unsafe-ref(v,e) : X
|
||||
}
|
||||
This paper introduces and evaluates the novel idea of type tailoring. It
|
||||
uses Typed Racket and its API to the elaborator (section 2) because the
|
||||
language already supports appropriate type and run-time systems and because
|
||||
it is relatively straightforward to program the type
|
||||
elaborator---@emph{without modifying it}. To make type tailoring concrete
|
||||
and to demonstrate its usefulness, the paper presents two case studies
|
||||
(sections 3 and 4). Each report on a case study consists of three parts: a
|
||||
type soundness argument assuming a type soundness argument for the complete
|
||||
language exists; the actual
|
||||
@;
|
||||
That is, the elaborator can then eliminate a possibly expensive run-time
|
||||
check.
|
||||
|
||||
This paper demonstrates the idea with concrete case studies of type
|
||||
tailoring (section 2) in the context of Typed Racket and its API to the
|
||||
elaborator (section 3). To illustrate the usefulness of the idea, we
|
||||
implement two tailorings. The first one---chosen for the wide familiarity
|
||||
of the domain---enriches the type-checking of vector-referencing operations
|
||||
(section 4). The second example explains how the implementor of a
|
||||
string-based embedded DSL---a regexp-matching DSL---may tailor the types of
|
||||
the interpretation function (section 5). Our evaluations confirm that these
|
||||
tailorings reduce the number of run-time checks that the programmer or the
|
||||
elaborator have to insert into the host program. In addition, we sketch
|
||||
@margin-note*{BEN: this evaluation is missing an idea}
|
||||
@;
|
||||
implementation; and an evaluation that reports how often the revised type
|
||||
elaborator can improve the code. The first type tailoring---chosen for the
|
||||
wide familiarity of the domain---enriches the type-checking of
|
||||
vector-referencing operations (section 3). The second example explains how
|
||||
the implementor of a string-based embedded DSL---a regexp-matching DSL,
|
||||
also widely familiar to programming researchers---may tailor the types of
|
||||
the interpretation function (section 4). In addition, the paper sketches
|
||||
several other applications of type tailoring in Typed Racket (section
|
||||
6). We also explain how the creator of such libraries can refine the
|
||||
existing type soundness proof of the host language to argue the correctness
|
||||
of the tailoring.
|
||||
5). The final two sections compare programmability of the type elaborator
|
||||
to work on dependent types and sketch how such an API could be implemented
|
||||
for other languages.
|
||||
|
|
|
@ -7,23 +7,23 @@
|
|||
|
||||
@abstract{
|
||||
|
||||
Many typed APIs implicitly acknowledge the @emph{diktat} that the host type
|
||||
system imposes on the creators of libraries. When a library implements a
|
||||
string-based domain-specific language, the problem is particularly obvious.
|
||||
The interpretation functions for the programs in this embedded language
|
||||
come with the uninformative type that maps a string to some other host
|
||||
type. Only dependently typed languages can improve on this scenario at the
|
||||
moment, but they impose a steep learning curve on programmers.
|
||||
Typed APIs suffer from the @emph{diktat} of the host type system.
|
||||
Libraries that implement string-based domain-specific languages make this
|
||||
clash particularly obvious. The interpretation functions for the programs
|
||||
in embedded languages come with the rather uninformative type that maps a
|
||||
string to some other host type. Only dependently typed languages can
|
||||
improve on this scenario at the moment, but they impose a steep learning
|
||||
curve on programmers.
|
||||
|
||||
This paper proposes to tackle this problem with APIs for type
|
||||
checkers. Specifically, it observes that most typed languages already
|
||||
employ an elaboration pass to type-check programs. If this elaborator
|
||||
comes with a sufficiently rich API, the author of a library can supplement
|
||||
the default types of the library's API with typing rules that improve the
|
||||
collaboration between host programs and uses of the library. The
|
||||
evaluation uses a prototype for Typed Racket and illustrates how useful
|
||||
the idea is for widely available libraries. Also the paper sketches how
|
||||
the authors of such ``tailored'' rules can argue their soundness.
|
||||
This paper proposes to tackle this problem with a meta-API for the type
|
||||
checker. Specifically, it observes that most typed languages already
|
||||
employ an elaboration pass to type-check programs. If this elaborator came
|
||||
with a sufficiently rich API, the author of a library could supplement the
|
||||
default types of the API with typing rules that improve the collaboration
|
||||
between host programs and uses of the library. To demonstrate the
|
||||
feasibility and effectiveness of this idea, the paper presents a prototype
|
||||
for Typed Racket and two case studies. It also sketches how the authors of
|
||||
such ``tailored'' typing rules can argue their soundness.
|
||||
|
||||
}
|
||||
|
||||
|
@ -34,12 +34,14 @@ This paper proposes to tackle this problem with APIs for type
|
|||
@; See OUTLINE.md for explanation
|
||||
@include-section{intro.scrbl}
|
||||
@include-section{background.scrbl}
|
||||
@;@include-section{elaborators.scrbl}
|
||||
@include-section{segfault.scrbl}
|
||||
@;@include-section{examples.scrbl}
|
||||
@include-section{regexp.scrbl}
|
||||
@include-section{define.scrbl}
|
||||
@;@include-section{discussion.scrbl}
|
||||
@;@include-section{friends.scrbl}
|
||||
@;@include-section{related-work.scrbl}
|
||||
@;@include-section{conclusion.scrbl}
|
||||
@include-section{related-work.scrbl}
|
||||
@include-section{conclusion.scrbl}
|
||||
|
||||
@section[#:style 'unnumbered]{Acknowledgments}
|
||||
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
@title[#:tag "sec:related-work"]{Experts}
|
||||
|
||||
@section{Macros}
|
||||
|
||||
@section{SoundX}
|
||||
SoundX is a system for modeling programming languages and defining type-sound
|
||||
extensions, e.g. defining a type derivation for @tt{let} in terms of a type
|
||||
|
@ -40,6 +42,12 @@ Our general approach and outlook on type soundness is informed by Cousot.
|
|||
|
||||
@section{Compiler Plugings}
|
||||
|
||||
@; -- HASKELL
|
||||
@; https://ghc.haskell.org/trac/ghc/wiki/Plugins/TypeChecker
|
||||
@; http://christiaanb.github.io/posts/type-checker-plugin/
|
||||
@; http://adam.gundry.co.uk/pub/typechecker-plugins/typechecker-plugins-2015-07-17.pdf
|
||||
@; https://github.com/yav/type-nat-solver (copy of paper in src/ folder here)
|
||||
|
||||
GHC (constraint solvers)
|
||||
Rust (macros)
|
||||
Scala (macros)
|
||||
|
|
|
@ -1,8 +1,54 @@
|
|||
#lang scribble/sigplan @onecolumn
|
||||
@; 1. MODEL
|
||||
@; 2. IMPLEMENTATION
|
||||
@; 3. EVALUATION
|
||||
|
||||
@require["common.rkt"]
|
||||
@; Q. remove <e ... e>, and only have vector values?
|
||||
@; because the syntax extension rules expect only values (expressions ruin the proofs)
|
||||
|
||||
@title[#:tag "sec:segfault"]{Well Typed Programs do not go SEGFAULT}
|
||||
@require[
|
||||
"common.rkt"
|
||||
"evaluation.rkt"
|
||||
glob
|
||||
racket/sequence
|
||||
(only-in racket/list take)
|
||||
(only-in trivial/private/raco-command collect-and-summarize)
|
||||
]
|
||||
|
||||
@title[#:tag "sec:segfault"]{When using the API can cause a segfault}
|
||||
|
||||
Every programming languages comes with arrays. In Typed Racket, array
|
||||
facilities come as a library that essentially exports constructors,
|
||||
dereferencing functions, and mutation operations. SML similarly provides
|
||||
them from a run-time library@~cite[gr-cup-2004].
|
||||
The API for array libraries tends to come with highly conservative
|
||||
signatures. For example, an array indexing operation calls for an array
|
||||
and an integer and then produces the designated element from the given
|
||||
array; run-time checks ensure that the integer is in the interval
|
||||
@math{[0,n)} where @math{n} is the length of the array.
|
||||
|
||||
To speed up program execution, Typed Racket has access to an unsafe array
|
||||
indexing operation. Like array indexing in C, this operation
|
||||
retrieves the bits at the specified location without checking the size of
|
||||
the index. If used inappropriately, such an unsafe operation can cause the
|
||||
program to print random results or to segfault.
|
||||
|
||||
In this section we show that the creator of the Typed Racket array library
|
||||
can replace checked array indexing with unsafe indexing. While this use of
|
||||
type tailoring is quite simple, it supplies a great case study. To begin
|
||||
with, it requires an innovation on the standard progress and preservation
|
||||
method for showing type soundness. Specifically, the author of the array
|
||||
library must show that the evaluator remains a function and does not
|
||||
introduce segfaults into typed programs without run-time checks for
|
||||
indexing (@secref{sec:segfault:model}). The Typed Racket implementation is quite
|
||||
straightforward; the core consists of two dozen lines
|
||||
(@secref{sec:segfault:implementation}). Finally, an evaluation on the Racket code
|
||||
base indicates that the prototype is highly effective for a certain style of
|
||||
programming.
|
||||
|
||||
|
||||
@; -----------------------------------------------------------------------------
|
||||
@section[#:tag "sec:segfault:model"]{Elaborating array indexing}
|
||||
|
||||
@Figure-ref{fig:stlc} describes a simply typed @exact{$\lambda$} calculus
|
||||
with integers and integer arrays.
|
||||
|
@ -18,10 +64,10 @@ The operational semantics for the core language are given in @Figure-ref{fig:stl
|
|||
along with type judgments for two primitive operations:
|
||||
@exact{$\checkedref$} and @exact{$\unsaferef$}.
|
||||
Intuitively, @exact{$\checkedref$} is a memory-safe function that performs a bounds check before
|
||||
dereferencing an array and gives a runtime error when called with incompatible values.
|
||||
dereferencing an array and raises a runtime error when called with incompatible values.
|
||||
On the other hand, @exact{$\unsaferef$} does not perform a bounds check and therefore
|
||||
may return arbitrary data or raise a memory error when called with an invalid index.
|
||||
These negative consequences are modeled in the (family of) rule(s) @exact|{\textsc{E-UnsafeFail}}|.
|
||||
These negative consequences are modeled in the (family of) rule(s) @sc{E-UnsafeFail}.
|
||||
Consequently, the judgment @exact{$\smallstep{e}{e}$} and its transitive closure
|
||||
@exact{$\smallstepstar{e}{e}$} are relations, whereas
|
||||
typing and elaboration are proper functions.
|
||||
|
@ -56,6 +102,11 @@ Evaluation of the surface language in @Figure-ref{fig:stlc}, however,
|
|||
}
|
||||
|
||||
@proof{
|
||||
@; (shorter, but repeats following paragraph)
|
||||
@; The surface language does not allow @exact{$\unsaferef$} and elaboration
|
||||
@; does not introduce unsafe references, therefore the non-deterministic
|
||||
@; rule @sc{E-UnsafeFail} is never used.
|
||||
|
||||
By induction on terms of the core language, if
|
||||
@exact{$\smallstep{e}{e'}$} and @exact{$e$} is not of the form
|
||||
@exact{$\unsaferef~e_1~e_2$} then @exact{$e'$} is unique.
|
||||
|
@ -85,8 +136,8 @@ Additionally, the surface language is type safe.
|
|||
}
|
||||
|
||||
@proof{
|
||||
The interesting cases involve @exact{$\checkedref$}.
|
||||
First, @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}|
|
||||
The interesting cases are for array references, for which we observe that
|
||||
@exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}|
|
||||
implies @exact{$\typestoclosed{e_1}{\tarray}$}
|
||||
and @exact{$\typestoclosed{e_2}{\tint}$}.
|
||||
Depending on the values of @exact{$e_1$} and @exact{$e_2$},
|
||||
|
@ -95,8 +146,6 @@ Additionally, the surface language is type safe.
|
|||
}
|
||||
|
||||
|
||||
@section{Extending the Elaborator}
|
||||
|
||||
Building on the foundation of type soundness, we extend the elaborator with
|
||||
the @emph{value-directed} rules in @Figure-ref{fig:elab1}.
|
||||
When the elaborator can prove that an array reference is in bounds based on
|
||||
|
@ -110,7 +159,7 @@ Conversely, the elaborator raises a compile-time index error if it can prove
|
|||
|
||||
Our thesis is that adding such rules does not undermine the safety guarantees of the
|
||||
original language.
|
||||
In this case, we recover the original guarantees by extending the proofs of
|
||||
In this case, we recover determinism and soundness by extending the proofs of
|
||||
type and memory safety to address the @sc{S-RefPass} and @sc{S-RefFail} elaboration rules.
|
||||
|
||||
@definition[@exact{$\elabarrowplus$}]{
|
||||
|
@ -126,43 +175,38 @@ In this case, we recover the original guarantees by extending the proofs of
|
|||
|
||||
@proof{
|
||||
With the original elaboration scheme, all array references @exact{$\aref{e_1}{e_2}$}
|
||||
elaborate to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$}
|
||||
elaborate via @sc{S-Ref} to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$}
|
||||
and @exact{$e_2'$} are the elaboration of @exact{$e_1$} and @exact{$e_2$}.
|
||||
There are now three possibilities:
|
||||
@itemize[
|
||||
@item{
|
||||
@exact{$e_1'$} is an array value @exact{$\vectorvn$}
|
||||
and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
|
||||
and @exact{$0 \le i < n$}.
|
||||
Both @sc{S-Ref} and @sc{S-RefPass} are possible elaborations,
|
||||
therefore @exact{$e'$} is either @exact{$\checkedref~\vectorvn~i$}
|
||||
or @exact{$\unsaferef~\vectorvn~i$}.
|
||||
Because @exact{$0 \le i < n$}, evaluation must proceed with
|
||||
@sc{E-CheckPass} or @sc{E-UnsafePass} in each case, respectively.
|
||||
These rules have the same result, @exact{$v_i$}.
|
||||
}
|
||||
@item{
|
||||
@exact{$e_1'$} is an array value @exact{$\vectorvn$}
|
||||
and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
|
||||
and either @exact{$i < 0$} or @exact{$i \ge n$}.
|
||||
The rules @sc{S-Ref} and @sc{S-RefFail} are possible elaborations.
|
||||
If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}
|
||||
and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside
|
||||
the bounds of the array.
|
||||
If @sc{S-RefFail} is chosen an index error is raised immediately.
|
||||
}
|
||||
@item{
|
||||
Otherwise, either @exact{$e_1'$} or @exact{$e_2'$} is not a value
|
||||
form and we rely on the existing proof of determinism.
|
||||
}
|
||||
]
|
||||
References for which only @sc{S-Ref} applies remain determinsitic, but
|
||||
we have two cases where a new rule may also be used:
|
||||
@itemize[
|
||||
@item{ Case @sc{S-RefFail}:
|
||||
@exact{$e_1'$} is an array literal @exact{$\vectorvn$}
|
||||
and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$}
|
||||
and either @exact{$i < 0$} or @exact{$i \ge n$}.
|
||||
If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}
|
||||
and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside
|
||||
the bounds of the array.
|
||||
On the other hand, @sc{S-RefFail} raises the index error immediately.
|
||||
}
|
||||
@item{ Case @sc{S-RefPass}:
|
||||
@exact{$e_1'$} is an array literal @exact{$\vectorvn$}
|
||||
and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$}
|
||||
and @exact{$0 \le i < n$}.
|
||||
If @sc{S-Ref} is chosen then @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}.
|
||||
If @sc{Ref-RefPass} is chosen then @exact{$e'$} is @exact{$\unsaferef~\vectorvn~i$}.
|
||||
Because @exact{$0 \le i < n$}, evaluation must proceed with
|
||||
@sc{E-CheckPass} or @sc{E-UnsafePass} respectively, producing
|
||||
@exact{$v_i$} in any event.
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
For practical purposes, non-determinism in the elaborator should be
|
||||
resolved giving the rules in @Figure-ref{fig:elab1} preference over the rules
|
||||
in @Figure-ref{fig:elab0}.
|
||||
But the result will be the same in any event.
|
||||
|
||||
resolved by giving the rules in @Figure-ref{fig:elab1} preference over the
|
||||
rules in @Figure-ref{fig:elab0}.
|
||||
But knowing that the result is the same for the non-identity elaborations
|
||||
gives us confidence in their correctness.
|
||||
|
||||
@theorem["soundness"]{
|
||||
If @exact{$e$} is a closed term then @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}|
|
||||
|
@ -174,7 +218,7 @@ In this case, we recover the original guarantees by extending the proofs of
|
|||
We extend the existing proof with cases for @exact{$\unsaferef$}.
|
||||
@itemize[
|
||||
@item{
|
||||
If @sc{S-RefPass} is applied by the elaborator then @exact{$e'$} is
|
||||
Case @sc{S-RefPass}: @exact{$e'$} is
|
||||
the call @exact{$\unsaferef~\vectorvn~i$} and @exact{$0 \le i < n$}.
|
||||
By assumption, @exact|{$\typestoclosed{\vectorvn}{\tarray}$}| and
|
||||
@exact{$\typestoclosed{i}{\tint}$}.
|
||||
|
@ -182,24 +226,194 @@ In this case, we recover the original guarantees by extending the proofs of
|
|||
to the integer value @exact{$v_i$} by @sc{E-UnsafePass}.
|
||||
}
|
||||
@item{
|
||||
If @sc{S-RefFail} is applied by the elaborator then @exact{$e'$} is
|
||||
Case @sc{S-RefFail}: then @exact{$e'$} is
|
||||
@exact{$\indexerror$} and type soundness follows trivially.
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
The key step in both theorems was that elaboration supplied the proposition
|
||||
@exact{$0 \le i < n$}.
|
||||
Both the type rules and evaluation rules depended on this premise.
|
||||
The key step in both theorems is that elaboration supplies the proposition
|
||||
@exact{$0 \le i < n$} required for safe evaluation.
|
||||
So long as assumptions like the above are properly stated by language implementors,
|
||||
type tailoring library authors can help to meet them.
|
||||
|
||||
|
||||
@section{In Practice}
|
||||
@; -----------------------------------------------------------------------------
|
||||
@section[#:tag "sec:segfault:implementation"]{Implementing the elaborator}
|
||||
|
||||
Have implemented for typed racket
|
||||
- library
|
||||
- with enhancements from @todo{secref}
|
||||
- via expander, so typechecked afterwards
|
||||
@; WHY ... WHY ARE THEY ALWAYS PLAYING JAZZ IN THE LOBBY OF THIS HOTEL ???
|
||||
@; (Asked the concierge: it's always on, on a loop. For Christmas it's Christmas jazz)
|
||||
|
||||
Checked in
|
||||
- standard distro (includes plot,math,stats)
|
||||
- pict3d
|
||||
We have implemented the rules in @Figure-ref{fig:elab1} as a syntax extension for Typed Racket.
|
||||
Including imports and exports, the implementation is an 18 line module (@Figure-ref{fig:vector-ref-extension}).
|
||||
|
||||
@figure["fig:vector-ref-extension" "Syntax extension for array references"
|
||||
@codeblock{
|
||||
#lang typed/racket ;; vector-ref-extension.rkt
|
||||
(require
|
||||
(only-in racket/unsafe/ops unsafe-vector-ref)
|
||||
(for-syntax racket/base syntax/parse))
|
||||
|
||||
(define-syntax (-vector-ref stx)
|
||||
(syntax-parse stx
|
||||
[(_ #(e* ...) i)
|
||||
#:when (integer? (syntax->datum #`i))
|
||||
;; Case 1: constant args
|
||||
(define v-val (syntax->datum #`(e* ...)))
|
||||
(define i-val (syntax->datum #`i))
|
||||
(if (< -1 i-val v-val)
|
||||
;; then S-RefPass
|
||||
#`(unsafe-vector-ref '#(e* ...) i)
|
||||
;; else S-RefFail
|
||||
(error 'vector-ref "~a ~a" v-val i-val))]
|
||||
[(_ args ...)
|
||||
;; Case 2: S-Ref
|
||||
#`(vector-ref args ...)]))
|
||||
|
||||
(provide (rename-out [-vector-ref vector-ref]))
|
||||
}
|
||||
]
|
||||
|
||||
First, the @racket[require] statement imports the @racket[unsafe-vector-ref]
|
||||
function to the runtime environment and the libraries @racket[racket/base]
|
||||
and @racket[syntax/parse] to the compile-time environment.
|
||||
We name the extension @racket[-vector-ref] to avoid shadowing the uses of
|
||||
Typed Racket's @racket[vector-ref] in Case 2 of the extension.
|
||||
At the end of the module, the @racket[provide] statement renames @racket[-vector-ref]
|
||||
to replace the default @racket[vector-ref] in importing modules.
|
||||
|
||||
Code within @racket[-vector-ref] is evaluated at compile-time to
|
||||
transform all calls and uses of the extension in an importing module.
|
||||
For instance, calling @racket[(vector-ref x 4)] will invoke
|
||||
@racket[-vector-ref] with the expression @racket[(vector-ref x 4)] bound to the
|
||||
formal parameter @racket[stx].
|
||||
@; A higher-order use like @racket[(map vector-ref vs is)] will invoke @racket[-vector-ref]
|
||||
@; with @racket[stx] bound to the identifier @racket[vector-ref] before evaluating
|
||||
@; the call to @racket[map].
|
||||
In any event, the first task of @racket[-vector-ref] is to destructure its argument
|
||||
@racket[stx] using the @racket[syntax-parse] form.
|
||||
We consider three cases.
|
||||
The first matches expressions with three elements, like @racket[(vector-ref #(0) 0)],
|
||||
where the first element is anything, the second is a vector literal, and the third
|
||||
is an integer.
|
||||
Integer literals are recognized by the @racket[#:when] clause, which extracts
|
||||
the value from a @emph{pattern variable} @racket[i] and tests whether this value
|
||||
is an integer.
|
||||
If this first match is successful, then we implement the @sc{S-RefPass}
|
||||
and @sc{S-RefFail} rules by comparing the value of the integer literal
|
||||
contained in @racket[i] against the number of elements captured by the zero-or-more
|
||||
pattern @racket[(e ...)].
|
||||
When the reference is in bounds, we use the constructor @|stx|
|
||||
to produce code.@note[@elem{If it helps, you can mentally replace all @|stx|
|
||||
with the arrow @exact{$\elabarrow$} from our model.}]
|
||||
The second and third cases are simpler.
|
||||
The second elaborates any call to @racket[-vector-ref] into a @racket[vector-ref]
|
||||
call---even calls made with zero or seven arguments.
|
||||
We let the type checker deal with such erroneous cases.
|
||||
@; The third case replaces higher-order calls of @racket[-vector-ref] with
|
||||
@; higher-order calls to Typed Racket's @racket[vector-ref] function.
|
||||
|
||||
Existing programs can use the extension by adding a 1-line import statement.
|
||||
|
||||
@codeblock{
|
||||
#lang typed/racket
|
||||
(require vector-ref-extension)
|
||||
....
|
||||
}
|
||||
|
||||
The import shadows @racket[vector-ref] from the @racket[typed/racket] language,
|
||||
replacing all occurrences with calls to our syntax extension.
|
||||
As Typed Racket processes the code abbreviated as @racket[....] above,
|
||||
each vector reference is expanded to either a Typed Racket (checked)
|
||||
@racket[vector-ref], an @racket[unsafe-vector-ref], or a compile-time @racket[error].
|
||||
|
||||
|
||||
@; -----------------------------------------------------------------------------
|
||||
@section[#:tag "sec:segfault:evaluation"]{Evaluation}
|
||||
|
||||
@(let* ([vrx #rx"\\(vector-ref "]
|
||||
[vr-file* (for/list ([fn (sequence-append
|
||||
(in-glob "benchmark/vector/*.rkt")
|
||||
(in-glob "benchmark/vector/*/*.rkt"))]
|
||||
#:when (with-input-from-file fn
|
||||
(lambda ()
|
||||
(regexp-match vrx (current-input-port)))))
|
||||
fn)]
|
||||
;; : (Listof (List Symbol Num-Hit Num-Miss))
|
||||
[all-file+optz* (with-cache "vector-ref-optz"
|
||||
(lambda ()
|
||||
(profile-point "counting vector-ref optz")
|
||||
(let-values ([(_in _out) (make-pipe)])
|
||||
(parameterize ([current-output-port _out])
|
||||
(displayln "(")
|
||||
(for-each collect-and-summarize vr-file*)
|
||||
(displayln ")"))
|
||||
(close-output-port _out)
|
||||
(let ([v (read _in)])
|
||||
(close-input-port _in)
|
||||
(begin0
|
||||
(for/list ([d (in-list v)])
|
||||
(cons (car d)
|
||||
(or (for/first ([kvvv (in-list (cdr d))]
|
||||
#:when (eq? (car kvvv) 'vector-ref))
|
||||
(list (cadr kvvv) (caddr kvvv)))
|
||||
(list 0 0))))
|
||||
(profile-point "done w/ vector-ref optz")))))
|
||||
#:read (lambda (f+o*)
|
||||
(and
|
||||
(for/and ([f+o (in-list f+o*)]
|
||||
[fn (in-list vr-file*)])
|
||||
(eq? (car f+o) (string->symbol fn)))
|
||||
f+o*)))]
|
||||
[file+optz* (filter (compose1 positive? cadr) all-file+optz*)]
|
||||
[hit-count (length file+optz*)]
|
||||
[miss-count (- (length all-file+optz*) hit-count)]
|
||||
[num-optz (for/sum ([f+t (in-list file+optz*)]) (cadr f+t))])
|
||||
;; -- for missed optz, take `caddr` of element of `file+optz*`
|
||||
@list[
|
||||
@figure["fig:vector-table" @elem{Summary of @racket[vector-ref] evaluation}
|
||||
@graphical-summary[
|
||||
#:hit-count hit-count
|
||||
#:miss-count miss-count
|
||||
#:bar-data (for/list ([fhm (in-list (sort file+optz* < #:key cadr))])
|
||||
(define h (cadr fhm))
|
||||
(define m (caddr fhm))
|
||||
(list (format "~a" h) (* 100 (/ h (+ h m)))))
|
||||
#:bar-title "Percent of Array References Optimized"
|
||||
#:bar-x (format "# Optimized refs by module") ; (~a total)" num-optz
|
||||
#:bar-y "%"
|
||||
#:y-max 100
|
||||
]
|
||||
]
|
||||
@elem{
|
||||
The above implementation works for toy examples, but useful programs often
|
||||
give constant values a name.
|
||||
For example, the implementation of @racket[gzip] used in the main Racket distribution
|
||||
declares array constants to implement a Huffman tree and heap.
|
||||
To accomodate this and similar idioms, we added syntax extensions for binding
|
||||
data to identifiers and constant folding for arithmetic operations.
|
||||
These additions are justified in @Secref{sec:define}, but they are straightforward
|
||||
extensions of the technique described in this section.
|
||||
|
||||
With name-tracking, we were able to optimize
|
||||
two static array references in the @racket[gzip] implementation,
|
||||
one in @racket[hangman],
|
||||
12 references in a @racket[minesweeper] game, and
|
||||
480 references in an implementation of @racket[parcheesi].
|
||||
All three programs follow a general pattern of declaring a fixed-size vector
|
||||
in the scope of a module and implementing helper functions to manipulate the vector.
|
||||
The unusual success of @racket[parcheesi] was due to an inlining macro that
|
||||
would unroll a loop over a vector of pawns to straight-line array
|
||||
references at constant offsets.
|
||||
|
||||
These successes are modest,@note{Especially since we analyzed over 120 files using
|
||||
arrays for places to optimize.}
|
||||
but encouraging since the target audience for these
|
||||
extensions are script writers.
|
||||
In prototype and throwaway code, we expect programmers to use more constant
|
||||
values and make more index errors.
|
||||
Our extensions catch some of these errors without imposing any annotation burden
|
||||
or increasing compile times --- in the files we analyzed, we observed no
|
||||
statistically significant difference compiling with and without our extensions
|
||||
enabled.
|
||||
}])
|
||||
|
|
|
@ -37,23 +37,35 @@
|
|||
\newcommand{\tvar}[1]{\mathsf{#1}}
|
||||
\newcommand{\tnat}{\tvar{Natural}}
|
||||
\newcommand{\tarray}{\tvar{Array}}
|
||||
\newcommand{\tdictgen}[1]{\llparenthesis #1 \rrparenthesis} %% yuck man
|
||||
\newcommand{\tdictn}{\tdictgen{l_0 : \tau_0, \ldots,l_{n-1} : \tau_{n-1}}}
|
||||
\newcommand{\tint}{\tvar{Int}}
|
||||
\newcommand{\toption}[1]{\tvar{Option}\,#1}
|
||||
\newcommand{\tnum}{\tvar{Num}}
|
||||
\newcommand{\tstring}{\tvar{String}}
|
||||
\newcommand{\tlist}[1]{\tvar{List~#1}}
|
||||
\newcommand{\naturals}{\mathbb{N}}
|
||||
\newcommand{\ints}{\mathbb{Z}}
|
||||
|
||||
%% -- terms
|
||||
\newcommand{\dictgen}[1]{\{ #1 \}}
|
||||
\newcommand{\dictvn}{\dictgen{l_0\!=\!v_0, \ldots, l_{n-1}\!=\!v_{n-1}}}
|
||||
\newcommand{\vectorgen}[1]{\langle #1 \rangle}
|
||||
\newcommand{\vectoren}{\vectorgen{e_0, \ldots, e_{n-1}}}
|
||||
\newcommand{\vectorvn}{\vectorgen{v_0, \ldots, v_{n-1}}}
|
||||
\newcommand{\vectorxn}{\vectorgen{x_0, \ldots, x_{n-1}}}
|
||||
\newcommand{\vlam}[2]{\lambda\,#1\,.\,#2}
|
||||
\newcommand{\vlet}[3]{\mathsf{let}\,#1 = #2\,\mathsf{in}\,#3}
|
||||
\newcommand{\vnone}{\mathsf{None}}
|
||||
\newcommand{\vsome}[1]{\mathsf{Some}\,#1}
|
||||
\newcommand{\aref}[2]{#1@#2}
|
||||
\newcommand{\checkedref}{\tvar{checked\mbox{-}ref}}
|
||||
\newcommand{\unsaferef}{\tvar{unsafe\mbox{-}ref}}
|
||||
|
||||
\newcommand{\checkedref}{\RktMeta{checked-ref}}
|
||||
\newcommand{\unsaferef}{\RktMeta{unsafe-ref}}
|
||||
\newcommand{\rxm}{\RktMeta{rx-match}}
|
||||
\newcommand{\segfault}{\mathsf{segfault}}
|
||||
\newcommand{\indexerror}{\mathsf{IndexError}}
|
||||
\newcommand{\syntaxerror}{\mathsf{SyntaxError}}
|
||||
|
||||
%% -- evaluation contexts
|
||||
\newcommand{\ectx}{E}
|
||||
|
@ -64,6 +76,11 @@
|
|||
\newcommand{\tenvempty}{\cdot}
|
||||
\newcommand{\tenvcons}[3]{#1:#2,#3}
|
||||
|
||||
%% -- proof environments
|
||||
\newcommand{\penv}{\Sigma}
|
||||
\newcommand{\penvempty}{\tenvempty}
|
||||
\newcommand{\penvcons}[3]{\tenvcons{#1}{#2}{#3}}
|
||||
|
||||
%% -- typing
|
||||
\newcommand{\typestogen}[3]{#1 \vdash #2 : #3}
|
||||
\newcommand{\typestoclosed}[2]{\typestogen{\tenvempty}{#1}{#2}}
|
||||
|
@ -77,11 +94,37 @@
|
|||
\newcommand{\elabstoclosed}[3]{\elabstogen{\tenvempty}{#1}{\elabarrow}{#2}{#3}}
|
||||
\newcommand{\elabsto}[3]{\elabstogen{\tenv}{#1}{\elabarrow}{#2}{#3}}
|
||||
|
||||
%% -- elaboration II
|
||||
\newcommand{\provestogen}[7]{\typestogen{#1;#2}{#3 #4 #5}{#6 \dashv #7}}
|
||||
\newcommand{\provesto}[4]{\provestogen{\penv}{\tenv}{#1}{\elabarrow}{#2}{#3}{#4}}
|
||||
|
||||
%% -- elaboration types
|
||||
\newcommand{\pmap}{\phi}
|
||||
\newcommand{\pmapcons}[3]{#1[#2 \mapsto #3]}
|
||||
\newcommand{\pdom}{\kappa}
|
||||
\newcommand{\parrow}{\mapsto}
|
||||
\newcommand{\pvec}{\mathcal{V}}
|
||||
\newcommand{\prx}{\mathcal{R}}
|
||||
\newcommand{\pint}{\mathcal{I}}
|
||||
|
||||
%% -- evaluation
|
||||
\newcommand{\bigstep}[2]{#1 \Downarrow #2}
|
||||
\newcommand{\bigstepplus}[2]{#1 \Downarrow^+ #2}
|
||||
\newcommand{\smallstepstar}[2]{#1 \rightarrow^* #2}
|
||||
\newcommand{\smallstep}[2]{#1 \rightarrow #2}
|
||||
\newcommand{\smallsteparrow}{\rightarrow}
|
||||
\newcommand{\smallstepstar}[2]{#1 \smallsteparrow^* #2}
|
||||
\newcommand{\smallstep}[2]{#1 \smallsteparrow #2}
|
||||
|
||||
%% -- subtyping
|
||||
\newcommand{\subt}{\le:}
|
||||
\newcommand{\subtypesto}[2]{#1 \subt #2}
|
||||
|
||||
%% -- metafunctions
|
||||
%\newcommand{\mgroups}{\textsf{groups}}
|
||||
\newcommand{\msup}{\mathsf{sup}}
|
||||
\newcommand{\mlang}{\mathcal{L}}
|
||||
\newcommand{\msubstrings}{\mathcal{S}}
|
||||
\newcommand{\mset}[1]{\{\,#1\,\}}
|
||||
\newcommand{\mrset}[2]{\{\,#1 \mid #2\,\}}
|
||||
|
||||
%% -- misc
|
||||
\newcommand{\esubst}[2]{[#2/#1]}
|
||||
|
|
|
@ -64,18 +64,18 @@
|
|||
|
||||
(define-syntax define: (make-keyword-alias 'define
|
||||
(lambda (stx)
|
||||
(or (format-define stx)
|
||||
(or (rx-define stx)
|
||||
(format-define stx)
|
||||
(num-define stx)
|
||||
(lst-define stx)
|
||||
(rx-define stx)
|
||||
;(fun-define stx)
|
||||
(vec-define stx)))))
|
||||
|
||||
(define-syntax let: (make-keyword-alias 'let
|
||||
(lambda (stx)
|
||||
(or (format-let stx)
|
||||
(or (rx-let stx)
|
||||
(format-let stx)
|
||||
;(fun-let stx)
|
||||
(num-let stx)
|
||||
(lst-let stx)
|
||||
(rx-let stx)
|
||||
(vec-let stx)))))
|
||||
|
|
Loading…
Reference in New Issue
Block a user