[checkpoint]

This commit is contained in:
Ben Greenman 2016-06-27 15:57:06 -04:00
parent e70e0d3867
commit 3703cee372
15 changed files with 740 additions and 522 deletions

View File

@ -8,7 +8,6 @@
paper.tex
pearl.tex
benchmark/
compiled/
*.swp
*.swo

View File

@ -10,6 +10,10 @@ ${PAPER}.pdf: pkg setup texstyle.tex
++extra fig-stlc-core.tex \
++extra fig-elab0.tex \
++extra fig-elab1.tex \
++extra fig-elab-regexp.tex \
++extra fig-elab-sigma.tex \
++extra fig-regexp-lib.tex \
++extra fig-stlc-dict.tex \
++extra mathpartir.sty \
++style texstyle.tex \
--pdf $(PAPER).scrbl

View File

@ -63,3 +63,21 @@ rust will be difficult
5. ending
---
0. finish sql search
1. revise 3,4 with MODEL IMPL, EVAL
2. do 5 better, not sure how, figure first
related work
- dep types
- occurrence types TR optimizer (no theoretical justifications)
- yes we did macros, yes that's been done (fisher, hermen)
- haskell
conclusion
- jsut one way of doing this, can reimagin typed racket, TC + elab
= stephen chang
regexP;
-
- put the (list ...) back

View File

@ -1,8 +1,14 @@
#lang scribble/sigplan @onecolumn
@require["common.rkt" pict racket/class racket/draw]
@; Goal of Sec 2 is to introduce background on TR needed to:
@; - understand our bias
@; - understand our examples
@; - understand "type elaborator api"
@title[#:tag "sec:background"]{If You Know What I Mean}
@require["common.rkt"]
@title[#:tag "sec:background"]{Towards a Type Elaborator API}
@; A poet should be of the
@; old-fahioned meaningless brand:
@; obscure, esoteric, symbolic,
@ -12,370 +18,254 @@
@; I'll gladly explain what it means
@; till you don't understand it.
@; Computers understand a very limited set of instructions.
@; It is tedious and error-prone to describe even simple thoughts in terms of these
@; instructions, but programming languages provide abstraction mechanisms
@; that can make the process bearable.
@; For example, low-level or assembly languages can implement the following
@; mathematical function for referencing an element of a fixed-length
@; data structure:
@;
@; @exact|{\[
@; \RktMeta{ref} = \lambda (\RktMeta{v}~\RktMeta{i})
@; \left\{\begin{array}{l l}
@; \RktMeta{v}_{i+1} & \RktMeta{if}~\RktMeta{v} = \vectorvn
@; \\ ~&\RktMeta{and}~\RktMeta{i} \in \mathbb{N}
@; ~\RktMeta{and}~i < n
@; \\[4pt]
@; \bot & \RktMeta{otherwise}
@; \end{array}\right.
@; \]}|
@;
@; @;Here we use the notation @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_{n-1} \rangle$}|
@; @; to describe a vector of @exact{$n$} elements.
@; Whether the implementation is a labeled sequence of instructions or a C function
@; is not important; what matters is the precise specification
@; and our ability to compute the function via machine.
@; Memory-safe functions with clear semantics are the core building blocks for
@; sane programs.
@;
@; As programs grow in size, complexity, and importance, it is desirable to have
@; some guarantees about how a program will run before actually running the
@; program.
@; Ideally, we would statically prove that @racket[ref] is
@; never applied to arguments that are not vectors or to out-of-bounds indices.
@; @;Such a proof would imply that every call @racket[(ref v i)] in the program
@; @; would yield a non-@|bot| value and help show that the overall program works as intended.
@; But proving such properties is difficult and there are often many
@; functions in a program for which we seek guarantees, so we settle for
@; approximate results.
@; Instead of statically ruling out all calls @racket[(ref v i)] that produce
@; @|bot|, we strive to eliminate a large subset automatically.
@;
@; Type systems have emerged as a powerful and convenient way of statically detecting
@; errors.
@; Instead of tracking the flow of exact values through a program, a type system
@; tracks the flow of @emph{types} denoting a range of possible runtime values.
@; Depending on the type system and programmers' discipline using it, the range
@; might be limited enough to show that @racket[ref] never returns @|bot|
@; in a program; however, the tradeoff is always the time and energy programmers
@; are willing to invest in writing down and maintaining type information.
@; As it stands, the most widely used type systems are those that require
@; minimal annotations from the programmer and catch only shallow, common errors.
@;
@;
@; @section{Problem Statement and Promises}
@;
@; @let*[([all-w 150]
@; [lang-w (/ all-w 2)]
@; [lang-h 20]
@; [lang-rectangle
@; (lambda (brush-style
@; fill-color
@; #:width [lang-w lang-w] ;; getting lazy there ben
@; #:height [lang-h lang-h]
@; #:slant-left [slant-left #f]
@; #:border-color [maybe-border-color #f]
@; #:border-width [border-width 3])
@; (dc (lambda (dc dx dy)
@; (define old-brush (send dc get-brush))
@; (define old-pen (send dc get-pen))
@; (define border-color (or maybe-border-color fill-color))
@; (send dc set-brush
@; (new brush% [style brush-style] [color fill-color]))
@; (send dc set-pen
@; (new pen% [width border-width] [color border-color]))
@; ;; --
@; (define path (new dc-path%))
@; (send path move-to 0 0)
@; (if slant-left
@; (begin (send path line-to (- 0 slant-left) (/ lang-h 2))
@; (send path line-to 0 lang-h))
@; (send path line-to 0 lang-h))
@; (send path line-to lang-w lang-h)
@; (send path line-to lang-w 0)
@; (send path close)
@; (send dc draw-path path dx dy)
@; ;; --
@; (send dc set-brush old-brush)
@; (send dc set-pen old-pen)) lang-w lang-h))]
@; [all-e (rectangle all-w lang-h)]
@; [hshim (blank (/ all-w 6) lang-h)]
@; [untyped-e (hc-append hshim (lang-rectangle 'vertical-hatch "CornflowerBlue" #:border-width 4 #:height (+ lang-h 2)))]
@; [typed-e (hc-append (lang-rectangle 'horizontal-hatch "Coral" #:border-width 2) hshim)]
@; [reality-of-ml (rc-superimpose (lc-superimpose all-e untyped-e) typed-e)]
@; [value-e
@; (let ([s 'solid]
@; [c "LimeGreen"]
@; [w (- lang-w lang-h)]
@; [b 1])
@; (hc-append (blank (* lang-h 0.8) lang-h)
@; (lang-rectangle s c #:width (/ lang-w 3) #:height (/ lang-h 3) #:border-width b)
@; (lang-rectangle s c #:border-width b)))]
@; [reality-of-tr (lc-superimpose reality-of-ml value-e)]
@; )]{@list{
@; No matter how precise the type system, static analysis cannot recognize
@; all correct programs and reject all incorrect programs.
@; Layering a type system on top of an untyped programming language therefore
@; leads to a world where the space of all syntactically valid programs
@; (drawn as a white box, of course) is partitioned into two overlapping sets.
@;
@; @centered[reality-of-ml]
@;
@; On the left, outlined in blue with vertical hatches, we have the set of all untyped programs
@; that produce meaningful results (read: non-error, non-diverging terms).
@; On the right, outlined in orange with horizontal hatches, we have the set of all programs
@; approved by the type system.
@; Some type-correct programs raise runtime errors when evaluated, and some
@; meaningful programs are rejected by the type checker.
@;
@; Again, the impossible goal is for the highlighted boxes to overlap.
@; Our argument is that adding a @emph{syntactic elaboration} phase before
@; typechecking can yield a language described by the
@; solid green area shown below, capturing more of the meaningful untyped
@; programs and fewer of the typed programs that go wrong, though rejecting
@; some typed programs that run to completion but contain a subterm that would
@; raise an exception if evaluated (Theorem @exact|{\ref{thm:relevance}}|).
@;
@; @centered[reality-of-tr]
@; }}
@;
@; Starting from an untyped programming language built from a grammar of terms
@; @exact{$e$} with a reduction relation @exact{$e \Downarrow v$}
@; and a type judgment @exact{$\vdash e : \tau$} relating terms to
@; values @exact{$v$} and types @exact{$\tau$}, we derive an extended language
@; by inserting a function @exact{$\elaborate(e)$} before type checking.
@; The extended language has a type system @exact{$\vdashe$} consisting of a single
@; rule:
@;
@; @exact|{
@; \begin{mathpar}
@; \inferrule*{
@; \vdash \elaborate(e) : \tau
@; }{
@; \vdashe e : \tau
@; }
@; \end{mathpar}
@; }|
@;
@; We also derive a reduction relation on typed terms @exact{$\vdashe e : \tau$}
@; as @exact{$\elaborate(e) \Downarrow v$}.
@; Type soundness for @exact{$\vdashe$} is thus a corollary of
@; type soundness for the @exact{$\vdash$} judgment.
@;
@; The output of @exact{$\elaborate(e)$} is a term @exact{$e'$},
@; though @exact{$e'$} may be a labeled error term
@; @exact|{$\bot^{e''}$}| meaning the elaborator detected an error in the program.
@; Terms @exact{$e$} such that @exact{$\elaborate(e)$} type checks but @exact{$e$}
@; does not are newly expressible in the extended system.
@; Terms @exact{$e$} such that @exact{$\vdash e : \tau$} but
@; @exact|{$\elaborate(e) = \bot^{e'}$}| are programs validated by the typechecker
@; but rejected by the extended system because the subterm @exact{$e'$} of @exact{$e$}
@; would raise a runtime error if evaluated.
@;
@; Our main contribution is defining a useful collection of @emph{local}
@; transformations to perform during elaboration.
@; Each local elaboration is proven correct in isolation.
@; We then define @exact{$\elaborate(e)$} as the union of these local cases
@; and a default elaboration that applies @exact{$\elaborate$} to all
@; subterms of @exact{$e$}.
@;
@; To avoid reasoning about terms post-elaboration we will describe
@; our local elaborations with type rules of the form: @todo{maybe drop this}
@;
@; @exact|{
@; \begin{mathpar}
@; \inferrule*{
@; \prope(f, e)
@; \\\\
@; \vdash f : \tau_f
@; \\
@; \vdash e : \tau_e
@; }{
@; \vdashe f~e : \tau
@; }
@; \end{mathpar}
@; }|
@;
@; where @exact{$\prope$} is a proposition about syntactic terms
@; @exact{$f$} and @exact{$e$}
@; used to justify the transformation @exact|{$\elaborate(f~e)$}|.
@; Despite this inference rule shorthand, elaborations and @exact{$\prope$}
@; conditions run before typechecking and have no access to type information
@; in the program.
@;
@; The relationship between terms @exact{$e$} and @exact{$\elaborate(e)$} is given
@; by three theorems: @todo{see next section for now}.
@;
@; Correct elaborations obey these theorems.
@; @; Note: there is no guarantee that elaboration produces a well-typed term
@; @; because calls like @exact{$\elaborate($}@racket[ref 0 0]@exact{$)$} are allowed.
@;
@; @; We refrain from making stronger statements about elaborate because
@; @; real systems will have lots of other elaborations.
@; TODO
@; - can we not use "Typed Racket" as the first word of this section?
@; to me that's a huge turn-off after reading the introduction.
@; - missing answer: why are we using TR
Typed Racket does not have a type elaboration API; however, it inherits a rich
syntax extension system from its host language, Racket.
Experience with Racket syntax extensions (aka macros) motives our proposal for
a similar type elaboration system, presented in @Secref{subsec:api}.
Moreover, we have implemented the transformations described in @Secref{sec:define}
as a Typed Racket package
and this section is intended to prepare the way for later code snippets.
@section{Syntax Extensions}
@section{Typed Racket, today}
Syntax extension systems have been incorporated in
a number of mainstream programming languages.
Essentially, syntax extensions let the programmer write code that generates code.
In practice this gives programmers the ability to extend the syntax of their
language, abstract over textual patterns, and control the evaluation order
of expressions.
Typed Racket is an ongoing experiment in language design the success of which
is a testament to the usefulness and versatility of syntax extensions.
The entire language is implemented as a Racket library; in particular, a
library of syntax extensions.
Types in Typed Racket are distinguished Racket terms, given special meaning by
the type checker.
The checker itself is nothing more than a Racket function defined over
type-annotated programs and run before the program is compiled.
When typechecking succeeds, the annotations are erased and the resulting
program is fed to the Racket compiler.
As such, there is no need for a dedicated ``Typed Racket compiler''.
Type-driven optimizations occur on core Racket forms just after type checking
and the result feeds in to the existing compiler toolchain.
Racket's syntax extension API is particularly mature and has inspired
similar APIs in at least two other languages, so we adopt it here to introduce
syntax extensions.
As a first example, suppose we want to assert that expressions
@racket[e_1], @racket[e_2], @racket[e_3], and @racket[e_4] all evaluate to
the same value.
By hand, we might write a sequence of unit tests:
@racketblock[
(begin (check-equal? e_2 e_1)
(check-equal? e_3 e_1)
(check-equal? e_4 e_1))
]
but these tests follow a simple pattern that we can abstract as a @emph{syntax rule},
using ellipses (@racket[...]) to capture an arbitrary number of expressions.
@racketblock[
(define-syntax-rule (check-all-equal? e_1 e_rest ...)
(begin (check-equal? e_rest e_1) ...))
]
Our tests can now be written more concisely:
@racketblock[
(check-all-equal? e_1 e_2 e_3 e_4)
]
making them easier to read and maintain.
Moreover, we can easily improve the rule to evaluate @racket[e_1] only once
instead of @math{n-1} times:
@racketblock[
(define-syntax-rule (check-all-equal? e_1 e_rest ...)
(let ([v e_1])
(check-equal? e_rest v) ...))
@; One might expect this is kinda slow. Maybe that's true. But has benefits and users.
For a concrete example, have a left-leaning factorial function:
@codeblock{
#lang typed/racket
(define (fact (n : Natural)) : Natural
(foldl * n (range 1 n)))
}
Aside from the type annotations, the function is ordinary Racket code
and behaves as such when run.
But the syntax accepted by Typed Racket's @racket[define] is a superset of
valid, untyped Racket.
Furthermore, this @racket[define] has an extended semantics.
When the above program is compiled, @racket[define] registers the identifier
@racket[fact] in a type environment with the signature @racket[(Natural -> Natural)].
The definition then expands to an annotation-free Racket @racket[define] and
the same expansion-and-type-collection process is repeated on the body of @racket[fact].
So just as Typed Racket re-uses the Racket compiler, Typed Racket's @racket[define]
re-uses the semantics of Racket's.
This sleight of hand is accomplished by shadowing @racket[define] with a syntax
extension that moves types from the program syntax to a type environment---and
crucially, does nothing more.
Note, however, that no types are checked at this point.
It is only after the entire program is expanded to core Racket and all type
definitions collected that types are checked and e.g. the type variables
for @racket[foldl] are instantiated.
Waiting until all syntax extensions are expanded is both a pragmatic choice
and allows extensions to create and refine types written in a program without
subverting the authoritarian type checker.
@; not really liking 'pragmatic' but I guess it should be obvious, that's the
@; easiest way to implement a TC for Racket
@section{Racket Macros, quickly}
Having built some intuition for how Typed Racket's @racket[define] operates,
we use its definition to introduce Racket macros.@note{After this section,
we will stop using the term @emph{macro} in favor of the more general
phrase @emph{syntax extension}.}
The following is paraphrased from Typed Racket and elaborates a type-annotated
@racket[define] to an unannotated one.
@racketblock[
(define-syntax (-define stx)
(syntax-parse stx #:literals (:)
[(-define (nm:id (arg:id : ty)) : ret-ty body ...)
(define type #`(ty -> ret-ty))
(register-type #`nm type)
#`(define (nm arg) body ...)]))
]
Going line-by-line, we have:
@; TODO what is the point of each?
@; TODO what is the bottom line?
@itemlist[
@item{
@racket[define-syntax] declares a function on code; in other words,
a macro.
The formal parameter @racket[stx] is so named because it always binds
a @emph{syntax object} representing the context of a call to the
@racket[-define] macro.
}
@item{
@racket[syntax-parse] is pattern matching for syntax objects.
The optional argument @racket[#:literals (:)] causes any @racket[:] characters
in a @racket[syntax-parse] pattern to match only against the @racket[:]
identifier bound in the current lexical scope.
Normally the variable @racket[:] is no different from the pattern @racket[x]
or the pattern @racket[any-variable].
}
@item{
The third line of the macro is a pattern.
The remaining lines are instructions to perform if the pattern matches.
Within the pattern:
@itemlist[
@item{
@racket[-define], @racket[nm], @racket[arg], @racket[ty],
@racket[ret-ty], and @racket[body] are @emph{pattern variables} matched to
sub-expressions of @racket[stx].
}
@item{
The ellipses (@racket[...]) are part of the grammar of @racket[syntax-parse]
and match zero or more occurrences of the previous pattern,
to the effect that @racket[body ...] matches a list of consecutive expressions.
}
@item{
@racket[:id], as in @racket[nm:id] and @racket[arg:id], is a
@emph{syntax class} annotation.
Using the @racket[id] syntax class causes @racket[nm] and @racket[arg]
to only match identifiers and not, for instance, integer constants or
parenthesized expressions.
}
]
}
@item{
@|stx| creates a syntax object from an expression.
In this case, the syntax object is a function type, built from our pattern
variables and Typed Racket's @racket[->] constructor.
}
@item{
@racket[register-type] binds an identifier to a type in a global type environment.
Since the pattern variable @racket[nm] is only bound to a symbol, we use
the syntax constructor @|stx| to associate the symbol with
the lexical scope of @racket[stx].
}
@item{
The result of any @racket[syntax-parse] pattern must be a syntax object.
Here we build a Racket @racket[define] statement from the relevant pattern
variables.
On this line, the ellipses (@racket[...]) are used in a dual sense to
splice the contents of the list @racket[body] into the body of the new
@racket[define] statement.
}
]
Intuitively, syntax rules perform a search-and-replace before the program
is compiled; however, the replacement process is careful to preserve the
lexical structure of programs.
For instance, a program that uses the same variable name @racket[v] as the syntax rule:
@racketblock[
(define v 5)
(check-all-equal? (+ 2 2) v)
]
will expand to code that introduces a fresh variable @racket[v1]:
@racketblock[
(define v 5)
(let ([v1 (+ 2 2)])
(check-equal? v v1))
]
to avoid shadowing the programmer's variable with the identifier used inside
the syntax rule.
Details on how this so-called @emph{hygenic} expansion is implemented and
its benefits to extension writers and users are explained by Flatt@~cite[f-popl-2016 fcdb-jfp-2012].
A call to @racket[syntax-parse] can contain multiple patterns.
Indeed,
the actual definition of Typed Racket's define has patterns for
non-function definitions @racket[(define n : Integer 42)]
and unannotated @racket[define] statements.
Finally, the module that implements @racket[-define] exports it as @racket[define]
to change the behavior of definitions only in external modules.
In addition to pattern-based syntax rules, one can extend Racket with
arbitrary functions defined over @emph{syntax objects}.
For instance, we can write an extension that formats log messages to a port
@racket[log-output] at runtime when the program is compiled with a flag
@racket[DEBUG] enabled. If the flag is disabled, we perform a no-op.
The expansion of @racket[log] calls happens during compilation:
@(begin
#reader scribble/comment-reader
@codeblock|{
;; With DEBUG enabled
(log "everything ok")
==> (displayln "everything ok" log-output)
;; With DEBUG disabled
(log "everything still ok")
==> (void)
}|)
@; Include a jab about parenthesized syntax making metaprogramming life easier?
@; @subsection{Oh, the Parentheses}
The @racket[syntax-parse] form is a convenient way to implement @racket[log].
In this case, we use @racket[syntax-parse] to deconstruct a syntax object
@racket[stx] representing an application of @racket[log].
The output of syntax parse is a new syntax object built using the constructor
@racket[syntax].
@codeblock{
(define-syntax (log stx)
(syntax-parse stx
[(log message)
(if DEBUG
(syntax (displayln message log-output))
(syntax (void)))]))
}
We can further enhance our implementation with a compile-time check that
the value bound to the pattern variable @racket[message] is a string literal.
@codeblock{
(define-syntax (log stx)
(syntax-parse stx
[(log message)
(unless (string? (syntax->datum (syntax message)))
(error "log: expected a string literal"))
(if DEBUG
(syntax (displayln message log-output))
(syntax (void)))]))
}
@; Alternatively, we can use Racket's @emph{syntax classes} to the same effect.
@; The @racket[str] syntax class recognizes literal strings.
@; Binding it to the pattern variable @racket[message] causes calls like
@; @racket[(log 61)] to raise a compile error.
@; @codeblock{
@; (define-syntax (log stx)
@; (syntax-parse stx
@; [(log message:str)
@; (if DEBUG
@; (syntax (displayln message log-output))
@; (syntax (void)))]))
@; }
With this enhancement, calls like @racket[(log 61)] are rejected statically.
Unfortunately, arguments that @emph{evaluate} to string literals
are also rejected
because the syntax extension cannot statically predict what value an arbitrary
expression will reduce to.
This is a fundamental limitation, but we can make a small improvement by accepting
any @racket[message] produced by another (trusted) syntax extension.
Suppose @racket[++] is an extension for concatenating two strings.
If we assign a unique @emph{syntax property} to the syntax object produced
by @racket[++], we can later retrieve the property in the @racket[log] extension.
First, we give an implementation of @racket[++] in terms of Racket's
built-in @racket[string-append] function, crucially using @racket[syntax-property]
to associate the value @racket['string] with the key @racket['static-type].
@codeblock{
(define-syntax (++ stx)
(syntax-parse stx
[(++ s1 s2)
(syntax-property
(syntax (string-append s1 s2))
'static-type 'string)]))
}
Assuming now that the key @racket['static-type] accurately describes the value
contained in a syntax object, @racket[log] can accept both string
literals and tagged syntax objects.
@codeblock{
(define-syntax (log stx)
(syntax-parse stx
[(log message)
(define is-string?
(string? (syntax->datum (syntax message))))
(define expands-to-string?
(eq? 'string
(syntax-property
(local-expand message)
'static-type)))
(unless (or is-string? expands-to-string?)
(error "log: expected a compile-time string"))
(if DEBUG
(syntax (displayln message log-output))
(syntax (void)))]))
}
@section[#:tag "subsec:api"]{Implementing a Type Elaborator}
Our syntax extensions @racket[check-all-equal?], @racket[log], and @racket[++]
are indistinguishable from user-defined functions or core language forms,
yet they perform useful transformations before a program is typechecked or
compiled.
This seamless integration gives Racket programmers the ability to grow the
language and tailor it to their specific needs.
Just as Typed Racket parses the syntax of a program and extracts breadcrumbs
for the type environment, a type elaborator transforms
a program leaving hints to guide the type checker.
Syntax extensions are a low-level way to achieve this behavior.
@Figure-ref{fig:printf} demonstrates a type-elaborated variant of Racket's
@racket[printf].
When called with a string literal @racket[fmt] as its first argument, the
elaborator @racket[-printf]
reads @racket[fmt] for type and arity constraints using the function @racket[format-types].
For instance, the format string @racket["~b"] would produce the type constraint
@racket[(Exact-Number)], implying that the arguments @racket[args] must be a
list with one element of type @racket[Exact-Number].
This constraint is used twice in @racket[-printf]:
first to check the length of @racket[args] against the length of @racket[types]
and second to add explicit type annotations (via @racket[ann]) around each
argument to the format string.
Whereas Typed Racket accepts any number of values with any type and
lets Racket's @racket[printf] raise runtime errors, type elaboration reports
both arity and type errors statically.
@; arity error = caught directly
@; type error = implied
@figure["fig:printf" "Type elaboration for format strings"
@racketblock[
(define-syntax (-printf stx)
(syntax-parse stx
[(-printf fmt:str args ...)
#:with (types ...) (format-types #`fmt)
(if (= (stx-length #`(args ...))
(stx-length #`(types ...)))
#`(printf fmt (ann args types) ...)
(error 'printf "arity mismatch in ~s" stx))]))
]
@; Include the default branch? I just don't know what to say about it.
@; [(-printf fmt args ...)
@; #`(printf fmt args ...)]))
]
In general, the high-level goals of such type elaborations are:
@itemlist[
@item{ (@goal{refinement})
Refine type signatures using latent, syntactic @emph{value information},
such as the characters in a string constant.
}
@item{ (@goal{reuse})
Rely on the existing type checker.
Avoid duplicating its work and never resort to proofs by assertion.@note[@elem{
Inspired by the @emph{translation validation} approach to
compiler correctness @~cite[pss-tacas-1998].}]
@; Just trying to say, always typecheck things
}
@item{ (@goal{relevance})
Report errors in terms of the programmer's code, not in terms
of elaborated code.@note[@elem{Inspired by SoundX @~cite[le-popl-2016].}]
}
]
The @racket[-printf] elaborator meets these goals by producing Typed Racket code
that only adds type annotations to the original program.
If these annotations fail, they report a type error relative to an element
of @racket[args].
As for refining the types, @racket[-printf] is best described with a quasi-dependent
type in terms of a proof theory @exact{$\Sigma$}.
@exact|{\begin{mathpar}
\inferrule{
\Sigma \vdash (\RktMeta{format-types}~\RktMeta{fmt}) = \tau_0 \ldots \tau_{n-1}
\\\\
\typestogen{\penv;\,\tenv}{\RktMeta{arg}_0}{\tau_0}
\\
\ldots
\\
\typestogen{\penv;\,\tenv}{\RktMeta{arg}_{n-1}}{\tau_{n-1}}
}{
\typestogen{\penv;\,\tenv}{\RktMeta{-printf fmt}~\RktMeta{arg}_0 \ldots \RktMeta{arg}_{n-1}}{\mathsf{Unit}}
}
\end{mathpar}}|
We chose @racket[printf] as an introductory example because its correctness
and type soundness follow directly from the soundness of @racket[format-types].
Correctness is not generally so simple to "prove", so @Secref{sec:segfault} and @Secref{sec:regexp}
show how type elaboration can justify a potentially unsafe optimizing transformation
and give library authors a technique for implementing a precise API without
changing their language's type system.
@; NOTE: we will use similar 'typed macros' in a coming section... right?

View File

@ -5,7 +5,7 @@
;; FIXME: this doesn't have all the papers from the README yet
(require racket/format
(require racket/format
scriblib/autobib)
(provide (all-defined-out))
@ -77,6 +77,7 @@
(define/short sigmod "SIGMOD" (string-append ACM "SIGMOD " International Conference "on Management of Data"))
(define/short sigplan-notices "SIGPLAN Notices" (string-append ACM "SIGPLAN Notices"))
(define/short tacs (string-append International Symposium "Theoretical Aspects of Computer Science"))
(define/short tacas (string-append International Conference "on Tools and Algorithms for the Construction and Analysis of Systems"))
(define/short tcs "Theoretical Computer Science")
(define/short tfp "TFP" (string-append Symposium "Trends in Functional Programming"))
(define/short tlca "TLCA" (string-append International Conference "Typed Lambda Calculi and Applications"))
@ -1725,9 +1726,23 @@
#:location (proceedings-location icfp #:pages '(235 246))
#:date 2010))
(define pss-tacas-1998
(make-bib
#:title "Translation Validation"
#:author (authors "Amir Pnueli" "Michael Siegel" "Eli Singerman")
#:location (proceedings-location tacas #:pages '(151 166))
#:date 1998))
(define le-popl-2016
(make-bib
#:title "Sound Type-Dependent Syntactic Language Extension"
#:author (authors "Florian Lorenzen" "Sebastian Erdweg")
#:location (proceedings-location popl #:pages '(204 216))
#:date 2016))
(define gr-cup-2004
(make-bib
#:title "The Standard ML Base Library"
#:author (authors "Emden R. Gansner" "John H. Reppy")
#:location (book-location #:edition "1" #:publisher "Cambridge University Press")
#:date 2004))

View File

@ -21,6 +21,10 @@
id
todo
proof
warning
goal
stx
)
(require "bib.rkt"
@ -117,7 +121,7 @@
(define (sf x) (elem #:style "sfstyle" x))
(define (sc x) (exact "\\textsc{" x "}"))
(define (sc x) (exact "\\textsc{\\small " x "}"))
(define (parag . x) (apply elem #:style "paragraph" x))
@ -174,3 +178,14 @@
txt
(exact "\\hfill\\qed"))))
(define (warning sym txt . arg*)
(printf "[WARNING] ~a: " sym)
(apply printf txt arg*)
(newline))
(define (goal str)
(bold (emph str)))
(define stx
(exact "\\RktRdr{\\#{\\textasciigrave}}"))

View File

@ -13,3 +13,6 @@ and only tells eternity.
@; type systems change slowly. That's OK
@; syntax extensions can change much faster and YOU can control the
@; changes. That's YOU joe programmer or YOU jane street capital. Do you.
@; just one way of doing this, can imagine other Typed Racekt
@; in fact Stephen Chang .....

View File

@ -1,3 +0,0 @@
\begin{center}
to do
\end{center}

View File

@ -18,13 +18,9 @@
}
\inferrule*[left=T-Unsafe]{
\typestoclosed{\vectorvn}{\tarray}
\typesto{\vectorvn}{\tarray}
\\
\typestoclosed{i}{\tint}
\\\\
i \in \ints
\\
0 \leq i < n
\typesto{i}{\tint}
}{
\typesto{\unsaferef~\vectorvn~i}{\tint}
}

View File

@ -1,6 +1,6 @@
#lang scribble/sigplan @onecolumn
@require["common.rkt" (only-in scribble/base nested)]
@require["common.rkt" (only-in scribble/base nested) "bib.rkt"]
@title[#:tag "sec:intro"]{Type Eleborators Need APIs}
@ -13,11 +13,11 @@ consistency according to the underlying type theory, replaces many of the
surface-syntax constructs with constructs from the kernel language, and
inserts type information to create an annotated representation.
Some (implementations of such) programming languages also support a way to
Some programming languages also support a meta-API, that is, a way to
programmatically direct the elaborator. For example, Rust and Scala come
with compiler plug-ins. Typed Racket and Types Clojure inherit the macro
mechanisms of their parent languages. Here we refer to such mechanisms as
@defterm{elaborator API}s.
mechanisms of their parents. This paper refers to all such mechanisms as
@defterm{elaborator API}s.
Equipping type-checking elaborators with APIs---or using the existing
APIs---promises new ways to expand the power of type checking at a
@ -29,21 +29,17 @@ Consider the example of tailoring the API of a library that implements a
string-based, domain-specific languages. Examples of such libraries abound:
formatting, regular-expression matching, database queries, and so on. The
creators of these DSLs tend to know a lot about how programs written in
these DSLs relate to the host program, but they cannot express this
knowledge in API types without resorting to such rich systems as
dependent type theory.
Type tailoring allows such programmers to refine the typing rules for the
APIs in a relatively straightforward way. Recall the API of the
regular-expression library. In a typical typed language, the matching
function is exported with a type like this:
@;
@verbatim[#:indent 4]{
reg_exp_match: String -> Option [Listof String]
}
these DSLs relate to the host program, but they (usually) cannot express
this knowledge in API types. Type tailoring allows such programmers to
refine the typing rules for the APIs in a relatively straightforward
way. Recall the API of the regular-expression library. In a typical typed
language, the matching function is exported with a type like this:
@;
@exact|{
\begin{mathpar}
\mbox{regexp-match} : \mbox{String} \rightarrow \mbox{Opt[Listof[String]]}
\end{mathpar}\hspace{-.3em}}|
@; the above is an ***incredibly disgusting hack to work around a scribble bug***
When the result is a list of strings, the length of the list depends on the
argument string---but the API for the regular-expression library cannot
express this relationship between the regular expression and the
@ -51,69 +47,87 @@ surrounding host program. As a result, the author of the host program must
inject additional complexity, often in the form of (hidden) run-time checks.
If type tailoring is available, the creator of the library can refine the
type of @tt{reg_exp_match} with rules such as these:
@;
@verbatim[#:indent 4]{
Program |- s : does not contain grouping
------------------------------------------------
G |- reg_exp_match(s) : Option [List String]
Program |- s : contains one grouping w/o alternative
-----------------------------------------------------
G |- reg_exp_match(s) : Option [List String String]
}
type of @tt{regexp-match} with rules such as this one:
@;
@exact|{
\begin{mathpar}
\inferrule*[]{
\elabsto{e}{e'}{\mbox{String}}, \\\\
\mbox{\it Program} \vdash e' \mbox{ does not contain a grouping}
}{
\elabsto{\mbox{regexp-match}~e}{\mbox{regexp-match}~e'}{\mbox{Opt[List[String]]}}
}
\end{mathpar}\hspace{-.3em}}|
That is, when the extended elaborator can use (a fragment of) the program
to prove that the given string for a specific use of @tt{reg_exp_match}
to prove that the given string for a specific use of @tt{regexp-match}
does not contain grouping specifications---ways to extract a sub-string via
matching---then the @tt{Some} result type is a list that contains a single
string. Similarly, if the string contains exactly one such grouping (and no
alternative), then a match produces a list of two strings. In all other
cases, the type checker uses the default type for the function.
matching---the @tt{Some} result type is a list of a single
string. The original specifications remains the default rule, which the
type checker uses when it cannot use the specific ones.
A vector library is another familiar example that offers opportunities for
type tailoring. Often such a library exports a dereferencing construct with
A similar rule would say if the string contains exactly one such grouping
(and no alternative), a match produces a list of two strings. Critically,
this refined type for the result of applying @tt{regexp-match} enables
further type refinements, just like constant folding enables additional
compiler optimizations. In this specific case, the program context of the
application of @tt{regexp-match} may dereference the list with unsafe---and
thus faster---versions of @tt{first} and @tt{second} once it has confirmed
a match.
Vectors offer a similar opportunity for unsafe dereferencing via
type tailoring. Typically, such a library exports a dereferencing construct with
the following type rule:
@;
@verbatim[#:indent 4]{
G |- v : Vector[X] G |-e : Integer
----------------------------------------
G |- v[e] ~~> checked_ref(v,e) : X
}
@exact|{
\begin{mathpar}
\inferrule*[]{
\elabsto{e_1}{e_1'}{\tarray}
\\
\elabsto{e_2}{e_2'}{\tint}
}{
\elabsto{{e_1}[{e_2}]}{\checkedref~e_1'~e_2'}{\tint}
}
\end{mathpar}\hspace{-.3em}}|
@;
If the elaborator can prove, however, that the indexing expression @tt{e}
is equal to or evaluates to a specific integer @tt{i}, the rule can be
strengthened as follows:
@;
@verbatim[#:indent 4]{
@exact|{
\begin{mathpar}
Program |- e = i for some i in Natural
&& Program |- v is vector of length n
&& i < n
\inferrule*[]{
\elabsto{e_1}{\vectorvn}{\tarray},
\\
\elabsto{e_2}{e'_2}{\tint},
\\\\
\mbox{\it Program} \vdash e'_2 = i, \quad
i \in \ints, \quad
0 \le i < n
}{
\elabsto{{e_1}[{e_2}]}{\unsaferef~\vectorvn~i}{\tint}
}
\end{mathpar}\hspace{-.3em}}|
G |- v : Vector[X] G |-e : Integer
----------------------------------------
G |- v[e] ~~> unsafe-ref(v,e) : X
}
This paper introduces and evaluates the novel idea of type tailoring. It
uses Typed Racket and its API to the elaborator (section 2) because the
language already supports appropriate type and run-time systems and because
it is relatively straightforward to program the type
elaborator---@emph{without modifying it}. To make type tailoring concrete
and to demonstrate its usefulness, the paper presents two case studies
(sections 3 and 4). Each report on a case study consists of three parts: a
type soundness argument assuming a type soundness argument for the complete
language exists; the actual
@;
That is, the elaborator can then eliminate a possibly expensive run-time
check.
This paper demonstrates the idea with concrete case studies of type
tailoring (section 2) in the context of Typed Racket and its API to the
elaborator (section 3). To illustrate the usefulness of the idea, we
implement two tailorings. The first one---chosen for the wide familiarity
of the domain---enriches the type-checking of vector-referencing operations
(section 4). The second example explains how the implementor of a
string-based embedded DSL---a regexp-matching DSL---may tailor the types of
the interpretation function (section 5). Our evaluations confirm that these
tailorings reduce the number of run-time checks that the programmer or the
elaborator have to insert into the host program. In addition, we sketch
@margin-note*{BEN: this evaluation is missing an idea}
@;
implementation; and an evaluation that reports how often the revised type
elaborator can improve the code. The first type tailoring---chosen for the
wide familiarity of the domain---enriches the type-checking of
vector-referencing operations (section 3). The second example explains how
the implementor of a string-based embedded DSL---a regexp-matching DSL,
also widely familiar to programming researchers---may tailor the types of
the interpretation function (section 4). In addition, the paper sketches
several other applications of type tailoring in Typed Racket (section
6). We also explain how the creator of such libraries can refine the
existing type soundness proof of the host language to argue the correctness
of the tailoring.
5). The final two sections compare programmability of the type elaborator
to work on dependent types and sketch how such an API could be implemented
for other languages.

View File

@ -7,23 +7,23 @@
@abstract{
Many typed APIs implicitly acknowledge the @emph{diktat} that the host type
system imposes on the creators of libraries. When a library implements a
string-based domain-specific language, the problem is particularly obvious.
The interpretation functions for the programs in this embedded language
come with the uninformative type that maps a string to some other host
type. Only dependently typed languages can improve on this scenario at the
moment, but they impose a steep learning curve on programmers.
Typed APIs suffer from the @emph{diktat} of the host type system.
Libraries that implement string-based domain-specific languages make this
clash particularly obvious. The interpretation functions for the programs
in embedded languages come with the rather uninformative type that maps a
string to some other host type. Only dependently typed languages can
improve on this scenario at the moment, but they impose a steep learning
curve on programmers.
This paper proposes to tackle this problem with APIs for type
checkers. Specifically, it observes that most typed languages already
employ an elaboration pass to type-check programs. If this elaborator
comes with a sufficiently rich API, the author of a library can supplement
the default types of the library's API with typing rules that improve the
collaboration between host programs and uses of the library. The
evaluation uses a prototype for Typed Racket and illustrates how useful
the idea is for widely available libraries. Also the paper sketches how
the authors of such ``tailored'' rules can argue their soundness.
This paper proposes to tackle this problem with a meta-API for the type
checker. Specifically, it observes that most typed languages already
employ an elaboration pass to type-check programs. If this elaborator came
with a sufficiently rich API, the author of a library could supplement the
default types of the API with typing rules that improve the collaboration
between host programs and uses of the library. To demonstrate the
feasibility and effectiveness of this idea, the paper presents a prototype
for Typed Racket and two case studies. It also sketches how the authors of
such ``tailored'' typing rules can argue their soundness.
}
@ -34,12 +34,14 @@ This paper proposes to tackle this problem with APIs for type
@; See OUTLINE.md for explanation
@include-section{intro.scrbl}
@include-section{background.scrbl}
@;@include-section{elaborators.scrbl}
@include-section{segfault.scrbl}
@;@include-section{examples.scrbl}
@include-section{regexp.scrbl}
@include-section{define.scrbl}
@;@include-section{discussion.scrbl}
@;@include-section{friends.scrbl}
@;@include-section{related-work.scrbl}
@;@include-section{conclusion.scrbl}
@include-section{related-work.scrbl}
@include-section{conclusion.scrbl}
@section[#:style 'unnumbered]{Acknowledgments}

View File

@ -4,6 +4,8 @@
@title[#:tag "sec:related-work"]{Experts}
@section{Macros}
@section{SoundX}
SoundX is a system for modeling programming languages and defining type-sound
extensions, e.g. defining a type derivation for @tt{let} in terms of a type
@ -40,6 +42,12 @@ Our general approach and outlook on type soundness is informed by Cousot.
@section{Compiler Plugings}
@; -- HASKELL
@; https://ghc.haskell.org/trac/ghc/wiki/Plugins/TypeChecker
@; http://christiaanb.github.io/posts/type-checker-plugin/
@; http://adam.gundry.co.uk/pub/typechecker-plugins/typechecker-plugins-2015-07-17.pdf
@; https://github.com/yav/type-nat-solver (copy of paper in src/ folder here)
GHC (constraint solvers)
Rust (macros)
Scala (macros)

View File

@ -1,8 +1,54 @@
#lang scribble/sigplan @onecolumn
@; 1. MODEL
@; 2. IMPLEMENTATION
@; 3. EVALUATION
@require["common.rkt"]
@; Q. remove <e ... e>, and only have vector values?
@; because the syntax extension rules expect only values (expressions ruin the proofs)
@title[#:tag "sec:segfault"]{Well Typed Programs do not go SEGFAULT}
@require[
"common.rkt"
"evaluation.rkt"
glob
racket/sequence
(only-in racket/list take)
(only-in trivial/private/raco-command collect-and-summarize)
]
@title[#:tag "sec:segfault"]{When using the API can cause a segfault}
Every programming languages comes with arrays. In Typed Racket, array
facilities come as a library that essentially exports constructors,
dereferencing functions, and mutation operations. SML similarly provides
them from a run-time library@~cite[gr-cup-2004].
The API for array libraries tends to come with highly conservative
signatures. For example, an array indexing operation calls for an array
and an integer and then produces the designated element from the given
array; run-time checks ensure that the integer is in the interval
@math{[0,n)} where @math{n} is the length of the array.
To speed up program execution, Typed Racket has access to an unsafe array
indexing operation. Like array indexing in C, this operation
retrieves the bits at the specified location without checking the size of
the index. If used inappropriately, such an unsafe operation can cause the
program to print random results or to segfault.
In this section we show that the creator of the Typed Racket array library
can replace checked array indexing with unsafe indexing. While this use of
type tailoring is quite simple, it supplies a great case study. To begin
with, it requires an innovation on the standard progress and preservation
method for showing type soundness. Specifically, the author of the array
library must show that the evaluator remains a function and does not
introduce segfaults into typed programs without run-time checks for
indexing (@secref{sec:segfault:model}). The Typed Racket implementation is quite
straightforward; the core consists of two dozen lines
(@secref{sec:segfault:implementation}). Finally, an evaluation on the Racket code
base indicates that the prototype is highly effective for a certain style of
programming.
@; -----------------------------------------------------------------------------
@section[#:tag "sec:segfault:model"]{Elaborating array indexing}
@Figure-ref{fig:stlc} describes a simply typed @exact{$\lambda$} calculus
with integers and integer arrays.
@ -18,10 +64,10 @@ The operational semantics for the core language are given in @Figure-ref{fig:stl
along with type judgments for two primitive operations:
@exact{$\checkedref$} and @exact{$\unsaferef$}.
Intuitively, @exact{$\checkedref$} is a memory-safe function that performs a bounds check before
dereferencing an array and gives a runtime error when called with incompatible values.
dereferencing an array and raises a runtime error when called with incompatible values.
On the other hand, @exact{$\unsaferef$} does not perform a bounds check and therefore
may return arbitrary data or raise a memory error when called with an invalid index.
These negative consequences are modeled in the (family of) rule(s) @exact|{\textsc{E-UnsafeFail}}|.
These negative consequences are modeled in the (family of) rule(s) @sc{E-UnsafeFail}.
Consequently, the judgment @exact{$\smallstep{e}{e}$} and its transitive closure
@exact{$\smallstepstar{e}{e}$} are relations, whereas
typing and elaboration are proper functions.
@ -56,6 +102,11 @@ Evaluation of the surface language in @Figure-ref{fig:stlc}, however,
}
@proof{
@; (shorter, but repeats following paragraph)
@; The surface language does not allow @exact{$\unsaferef$} and elaboration
@; does not introduce unsafe references, therefore the non-deterministic
@; rule @sc{E-UnsafeFail} is never used.
By induction on terms of the core language, if
@exact{$\smallstep{e}{e'}$} and @exact{$e$} is not of the form
@exact{$\unsaferef~e_1~e_2$} then @exact{$e'$} is unique.
@ -85,8 +136,8 @@ Additionally, the surface language is type safe.
}
@proof{
The interesting cases involve @exact{$\checkedref$}.
First, @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}|
The interesting cases are for array references, for which we observe that
@exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}|
implies @exact{$\typestoclosed{e_1}{\tarray}$}
and @exact{$\typestoclosed{e_2}{\tint}$}.
Depending on the values of @exact{$e_1$} and @exact{$e_2$},
@ -95,8 +146,6 @@ Additionally, the surface language is type safe.
}
@section{Extending the Elaborator}
Building on the foundation of type soundness, we extend the elaborator with
the @emph{value-directed} rules in @Figure-ref{fig:elab1}.
When the elaborator can prove that an array reference is in bounds based on
@ -110,7 +159,7 @@ Conversely, the elaborator raises a compile-time index error if it can prove
Our thesis is that adding such rules does not undermine the safety guarantees of the
original language.
In this case, we recover the original guarantees by extending the proofs of
In this case, we recover determinism and soundness by extending the proofs of
type and memory safety to address the @sc{S-RefPass} and @sc{S-RefFail} elaboration rules.
@definition[@exact{$\elabarrowplus$}]{
@ -126,43 +175,38 @@ In this case, we recover the original guarantees by extending the proofs of
@proof{
With the original elaboration scheme, all array references @exact{$\aref{e_1}{e_2}$}
elaborate to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$}
elaborate via @sc{S-Ref} to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$}
and @exact{$e_2'$} are the elaboration of @exact{$e_1$} and @exact{$e_2$}.
There are now three possibilities:
@itemize[
@item{
@exact{$e_1'$} is an array value @exact{$\vectorvn$}
and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
and @exact{$0 \le i < n$}.
Both @sc{S-Ref} and @sc{S-RefPass} are possible elaborations,
therefore @exact{$e'$} is either @exact{$\checkedref~\vectorvn~i$}
or @exact{$\unsaferef~\vectorvn~i$}.
Because @exact{$0 \le i < n$}, evaluation must proceed with
@sc{E-CheckPass} or @sc{E-UnsafePass} in each case, respectively.
These rules have the same result, @exact{$v_i$}.
}
@item{
@exact{$e_1'$} is an array value @exact{$\vectorvn$}
and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
and either @exact{$i < 0$} or @exact{$i \ge n$}.
The rules @sc{S-Ref} and @sc{S-RefFail} are possible elaborations.
If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}
and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside
the bounds of the array.
If @sc{S-RefFail} is chosen an index error is raised immediately.
}
@item{
Otherwise, either @exact{$e_1'$} or @exact{$e_2'$} is not a value
form and we rely on the existing proof of determinism.
}
]
References for which only @sc{S-Ref} applies remain determinsitic, but
we have two cases where a new rule may also be used:
@itemize[
@item{ Case @sc{S-RefFail}:
@exact{$e_1'$} is an array literal @exact{$\vectorvn$}
and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$}
and either @exact{$i < 0$} or @exact{$i \ge n$}.
If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}
and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside
the bounds of the array.
On the other hand, @sc{S-RefFail} raises the index error immediately.
}
@item{ Case @sc{S-RefPass}:
@exact{$e_1'$} is an array literal @exact{$\vectorvn$}
and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$}
and @exact{$0 \le i < n$}.
If @sc{S-Ref} is chosen then @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}.
If @sc{Ref-RefPass} is chosen then @exact{$e'$} is @exact{$\unsaferef~\vectorvn~i$}.
Because @exact{$0 \le i < n$}, evaluation must proceed with
@sc{E-CheckPass} or @sc{E-UnsafePass} respectively, producing
@exact{$v_i$} in any event.
}
]
}
For practical purposes, non-determinism in the elaborator should be
resolved giving the rules in @Figure-ref{fig:elab1} preference over the rules
in @Figure-ref{fig:elab0}.
But the result will be the same in any event.
resolved by giving the rules in @Figure-ref{fig:elab1} preference over the
rules in @Figure-ref{fig:elab0}.
But knowing that the result is the same for the non-identity elaborations
gives us confidence in their correctness.
@theorem["soundness"]{
If @exact{$e$} is a closed term then @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}|
@ -174,7 +218,7 @@ In this case, we recover the original guarantees by extending the proofs of
We extend the existing proof with cases for @exact{$\unsaferef$}.
@itemize[
@item{
If @sc{S-RefPass} is applied by the elaborator then @exact{$e'$} is
Case @sc{S-RefPass}: @exact{$e'$} is
the call @exact{$\unsaferef~\vectorvn~i$} and @exact{$0 \le i < n$}.
By assumption, @exact|{$\typestoclosed{\vectorvn}{\tarray}$}| and
@exact{$\typestoclosed{i}{\tint}$}.
@ -182,24 +226,194 @@ In this case, we recover the original guarantees by extending the proofs of
to the integer value @exact{$v_i$} by @sc{E-UnsafePass}.
}
@item{
If @sc{S-RefFail} is applied by the elaborator then @exact{$e'$} is
Case @sc{S-RefFail}: then @exact{$e'$} is
@exact{$\indexerror$} and type soundness follows trivially.
}
]
}
The key step in both theorems was that elaboration supplied the proposition
@exact{$0 \le i < n$}.
Both the type rules and evaluation rules depended on this premise.
The key step in both theorems is that elaboration supplies the proposition
@exact{$0 \le i < n$} required for safe evaluation.
So long as assumptions like the above are properly stated by language implementors,
type tailoring library authors can help to meet them.
@section{In Practice}
@; -----------------------------------------------------------------------------
@section[#:tag "sec:segfault:implementation"]{Implementing the elaborator}
Have implemented for typed racket
- library
- with enhancements from @todo{secref}
- via expander, so typechecked afterwards
@; WHY ... WHY ARE THEY ALWAYS PLAYING JAZZ IN THE LOBBY OF THIS HOTEL ???
@; (Asked the concierge: it's always on, on a loop. For Christmas it's Christmas jazz)
Checked in
- standard distro (includes plot,math,stats)
- pict3d
We have implemented the rules in @Figure-ref{fig:elab1} as a syntax extension for Typed Racket.
Including imports and exports, the implementation is an 18 line module (@Figure-ref{fig:vector-ref-extension}).
@figure["fig:vector-ref-extension" "Syntax extension for array references"
@codeblock{
#lang typed/racket ;; vector-ref-extension.rkt
(require
(only-in racket/unsafe/ops unsafe-vector-ref)
(for-syntax racket/base syntax/parse))
(define-syntax (-vector-ref stx)
(syntax-parse stx
[(_ #(e* ...) i)
#:when (integer? (syntax->datum #`i))
;; Case 1: constant args
(define v-val (syntax->datum #`(e* ...)))
(define i-val (syntax->datum #`i))
(if (< -1 i-val v-val)
;; then S-RefPass
#`(unsafe-vector-ref '#(e* ...) i)
;; else S-RefFail
(error 'vector-ref "~a ~a" v-val i-val))]
[(_ args ...)
;; Case 2: S-Ref
#`(vector-ref args ...)]))
(provide (rename-out [-vector-ref vector-ref]))
}
]
First, the @racket[require] statement imports the @racket[unsafe-vector-ref]
function to the runtime environment and the libraries @racket[racket/base]
and @racket[syntax/parse] to the compile-time environment.
We name the extension @racket[-vector-ref] to avoid shadowing the uses of
Typed Racket's @racket[vector-ref] in Case 2 of the extension.
At the end of the module, the @racket[provide] statement renames @racket[-vector-ref]
to replace the default @racket[vector-ref] in importing modules.
Code within @racket[-vector-ref] is evaluated at compile-time to
transform all calls and uses of the extension in an importing module.
For instance, calling @racket[(vector-ref x 4)] will invoke
@racket[-vector-ref] with the expression @racket[(vector-ref x 4)] bound to the
formal parameter @racket[stx].
@; A higher-order use like @racket[(map vector-ref vs is)] will invoke @racket[-vector-ref]
@; with @racket[stx] bound to the identifier @racket[vector-ref] before evaluating
@; the call to @racket[map].
In any event, the first task of @racket[-vector-ref] is to destructure its argument
@racket[stx] using the @racket[syntax-parse] form.
We consider three cases.
The first matches expressions with three elements, like @racket[(vector-ref #(0) 0)],
where the first element is anything, the second is a vector literal, and the third
is an integer.
Integer literals are recognized by the @racket[#:when] clause, which extracts
the value from a @emph{pattern variable} @racket[i] and tests whether this value
is an integer.
If this first match is successful, then we implement the @sc{S-RefPass}
and @sc{S-RefFail} rules by comparing the value of the integer literal
contained in @racket[i] against the number of elements captured by the zero-or-more
pattern @racket[(e ...)].
When the reference is in bounds, we use the constructor @|stx|
to produce code.@note[@elem{If it helps, you can mentally replace all @|stx|
with the arrow @exact{$\elabarrow$} from our model.}]
The second and third cases are simpler.
The second elaborates any call to @racket[-vector-ref] into a @racket[vector-ref]
call---even calls made with zero or seven arguments.
We let the type checker deal with such erroneous cases.
@; The third case replaces higher-order calls of @racket[-vector-ref] with
@; higher-order calls to Typed Racket's @racket[vector-ref] function.
Existing programs can use the extension by adding a 1-line import statement.
@codeblock{
#lang typed/racket
(require vector-ref-extension)
....
}
The import shadows @racket[vector-ref] from the @racket[typed/racket] language,
replacing all occurrences with calls to our syntax extension.
As Typed Racket processes the code abbreviated as @racket[....] above,
each vector reference is expanded to either a Typed Racket (checked)
@racket[vector-ref], an @racket[unsafe-vector-ref], or a compile-time @racket[error].
@; -----------------------------------------------------------------------------
@section[#:tag "sec:segfault:evaluation"]{Evaluation}
@(let* ([vrx #rx"\\(vector-ref "]
[vr-file* (for/list ([fn (sequence-append
(in-glob "benchmark/vector/*.rkt")
(in-glob "benchmark/vector/*/*.rkt"))]
#:when (with-input-from-file fn
(lambda ()
(regexp-match vrx (current-input-port)))))
fn)]
;; : (Listof (List Symbol Num-Hit Num-Miss))
[all-file+optz* (with-cache "vector-ref-optz"
(lambda ()
(profile-point "counting vector-ref optz")
(let-values ([(_in _out) (make-pipe)])
(parameterize ([current-output-port _out])
(displayln "(")
(for-each collect-and-summarize vr-file*)
(displayln ")"))
(close-output-port _out)
(let ([v (read _in)])
(close-input-port _in)
(begin0
(for/list ([d (in-list v)])
(cons (car d)
(or (for/first ([kvvv (in-list (cdr d))]
#:when (eq? (car kvvv) 'vector-ref))
(list (cadr kvvv) (caddr kvvv)))
(list 0 0))))
(profile-point "done w/ vector-ref optz")))))
#:read (lambda (f+o*)
(and
(for/and ([f+o (in-list f+o*)]
[fn (in-list vr-file*)])
(eq? (car f+o) (string->symbol fn)))
f+o*)))]
[file+optz* (filter (compose1 positive? cadr) all-file+optz*)]
[hit-count (length file+optz*)]
[miss-count (- (length all-file+optz*) hit-count)]
[num-optz (for/sum ([f+t (in-list file+optz*)]) (cadr f+t))])
;; -- for missed optz, take `caddr` of element of `file+optz*`
@list[
@figure["fig:vector-table" @elem{Summary of @racket[vector-ref] evaluation}
@graphical-summary[
#:hit-count hit-count
#:miss-count miss-count
#:bar-data (for/list ([fhm (in-list (sort file+optz* < #:key cadr))])
(define h (cadr fhm))
(define m (caddr fhm))
(list (format "~a" h) (* 100 (/ h (+ h m)))))
#:bar-title "Percent of Array References Optimized"
#:bar-x (format "# Optimized refs by module") ; (~a total)" num-optz
#:bar-y "%"
#:y-max 100
]
]
@elem{
The above implementation works for toy examples, but useful programs often
give constant values a name.
For example, the implementation of @racket[gzip] used in the main Racket distribution
declares array constants to implement a Huffman tree and heap.
To accomodate this and similar idioms, we added syntax extensions for binding
data to identifiers and constant folding for arithmetic operations.
These additions are justified in @Secref{sec:define}, but they are straightforward
extensions of the technique described in this section.
With name-tracking, we were able to optimize
two static array references in the @racket[gzip] implementation,
one in @racket[hangman],
12 references in a @racket[minesweeper] game, and
480 references in an implementation of @racket[parcheesi].
All three programs follow a general pattern of declaring a fixed-size vector
in the scope of a module and implementing helper functions to manipulate the vector.
The unusual success of @racket[parcheesi] was due to an inlining macro that
would unroll a loop over a vector of pawns to straight-line array
references at constant offsets.
These successes are modest,@note{Especially since we analyzed over 120 files using
arrays for places to optimize.}
but encouraging since the target audience for these
extensions are script writers.
In prototype and throwaway code, we expect programmers to use more constant
values and make more index errors.
Our extensions catch some of these errors without imposing any annotation burden
or increasing compile times --- in the files we analyzed, we observed no
statistically significant difference compiling with and without our extensions
enabled.
}])

View File

@ -37,23 +37,35 @@
\newcommand{\tvar}[1]{\mathsf{#1}}
\newcommand{\tnat}{\tvar{Natural}}
\newcommand{\tarray}{\tvar{Array}}
\newcommand{\tdictgen}[1]{\llparenthesis #1 \rrparenthesis} %% yuck man
\newcommand{\tdictn}{\tdictgen{l_0 : \tau_0, \ldots,l_{n-1} : \tau_{n-1}}}
\newcommand{\tint}{\tvar{Int}}
\newcommand{\toption}[1]{\tvar{Option}\,#1}
\newcommand{\tnum}{\tvar{Num}}
\newcommand{\tstring}{\tvar{String}}
\newcommand{\tlist}[1]{\tvar{List~#1}}
\newcommand{\naturals}{\mathbb{N}}
\newcommand{\ints}{\mathbb{Z}}
%% -- terms
\newcommand{\dictgen}[1]{\{ #1 \}}
\newcommand{\dictvn}{\dictgen{l_0\!=\!v_0, \ldots, l_{n-1}\!=\!v_{n-1}}}
\newcommand{\vectorgen}[1]{\langle #1 \rangle}
\newcommand{\vectoren}{\vectorgen{e_0, \ldots, e_{n-1}}}
\newcommand{\vectorvn}{\vectorgen{v_0, \ldots, v_{n-1}}}
\newcommand{\vectorxn}{\vectorgen{x_0, \ldots, x_{n-1}}}
\newcommand{\vlam}[2]{\lambda\,#1\,.\,#2}
\newcommand{\vlet}[3]{\mathsf{let}\,#1 = #2\,\mathsf{in}\,#3}
\newcommand{\vnone}{\mathsf{None}}
\newcommand{\vsome}[1]{\mathsf{Some}\,#1}
\newcommand{\aref}[2]{#1@#2}
\newcommand{\checkedref}{\tvar{checked\mbox{-}ref}}
\newcommand{\unsaferef}{\tvar{unsafe\mbox{-}ref}}
\newcommand{\checkedref}{\RktMeta{checked-ref}}
\newcommand{\unsaferef}{\RktMeta{unsafe-ref}}
\newcommand{\rxm}{\RktMeta{rx-match}}
\newcommand{\segfault}{\mathsf{segfault}}
\newcommand{\indexerror}{\mathsf{IndexError}}
\newcommand{\syntaxerror}{\mathsf{SyntaxError}}
%% -- evaluation contexts
\newcommand{\ectx}{E}
@ -64,6 +76,11 @@
\newcommand{\tenvempty}{\cdot}
\newcommand{\tenvcons}[3]{#1:#2,#3}
%% -- proof environments
\newcommand{\penv}{\Sigma}
\newcommand{\penvempty}{\tenvempty}
\newcommand{\penvcons}[3]{\tenvcons{#1}{#2}{#3}}
%% -- typing
\newcommand{\typestogen}[3]{#1 \vdash #2 : #3}
\newcommand{\typestoclosed}[2]{\typestogen{\tenvempty}{#1}{#2}}
@ -77,11 +94,37 @@
\newcommand{\elabstoclosed}[3]{\elabstogen{\tenvempty}{#1}{\elabarrow}{#2}{#3}}
\newcommand{\elabsto}[3]{\elabstogen{\tenv}{#1}{\elabarrow}{#2}{#3}}
%% -- elaboration II
\newcommand{\provestogen}[7]{\typestogen{#1;#2}{#3 #4 #5}{#6 \dashv #7}}
\newcommand{\provesto}[4]{\provestogen{\penv}{\tenv}{#1}{\elabarrow}{#2}{#3}{#4}}
%% -- elaboration types
\newcommand{\pmap}{\phi}
\newcommand{\pmapcons}[3]{#1[#2 \mapsto #3]}
\newcommand{\pdom}{\kappa}
\newcommand{\parrow}{\mapsto}
\newcommand{\pvec}{\mathcal{V}}
\newcommand{\prx}{\mathcal{R}}
\newcommand{\pint}{\mathcal{I}}
%% -- evaluation
\newcommand{\bigstep}[2]{#1 \Downarrow #2}
\newcommand{\bigstepplus}[2]{#1 \Downarrow^+ #2}
\newcommand{\smallstepstar}[2]{#1 \rightarrow^* #2}
\newcommand{\smallstep}[2]{#1 \rightarrow #2}
\newcommand{\smallsteparrow}{\rightarrow}
\newcommand{\smallstepstar}[2]{#1 \smallsteparrow^* #2}
\newcommand{\smallstep}[2]{#1 \smallsteparrow #2}
%% -- subtyping
\newcommand{\subt}{\le:}
\newcommand{\subtypesto}[2]{#1 \subt #2}
%% -- metafunctions
%\newcommand{\mgroups}{\textsf{groups}}
\newcommand{\msup}{\mathsf{sup}}
\newcommand{\mlang}{\mathcal{L}}
\newcommand{\msubstrings}{\mathcal{S}}
\newcommand{\mset}[1]{\{\,#1\,\}}
\newcommand{\mrset}[2]{\{\,#1 \mid #2\,\}}
%% -- misc
\newcommand{\esubst}[2]{[#2/#1]}

View File

@ -64,18 +64,18 @@
(define-syntax define: (make-keyword-alias 'define
(lambda (stx)
(or (format-define stx)
(or (rx-define stx)
(format-define stx)
(num-define stx)
(lst-define stx)
(rx-define stx)
;(fun-define stx)
(vec-define stx)))))
(define-syntax let: (make-keyword-alias 'let
(lambda (stx)
(or (format-let stx)
(or (rx-let stx)
(format-let stx)
;(fun-let stx)
(num-let stx)
(lst-let stx)
(rx-let stx)
(vec-let stx)))))