[popl-2017] checkpoint, time to run evaluation
This commit is contained in:
parent
5699412c00
commit
dc0c4bcad8
|
@ -6,7 +6,13 @@ compiled/pearl_scrbl.zo: *.rkt *.scrbl
|
|||
raco make -v $(PAPER).scrbl
|
||||
|
||||
${PAPER}.pdf: pkg setup texstyle.tex
|
||||
scribble ++extra mathpartir.sty ++style texstyle.tex --pdf $(PAPER).scrbl
|
||||
scribble ++extra fig-stlc-surface.tex \
|
||||
++extra fig-stlc-core.tex \
|
||||
++extra fig-elab0.tex \
|
||||
++extra fig-elab1.tex \
|
||||
++extra mathpartir.sty \
|
||||
++style texstyle.tex \
|
||||
--pdf $(PAPER).scrbl
|
||||
|
||||
${PAPER}.tex: pkg setup texstyle.tex
|
||||
scribble ++style texstyle.tex --latex $(PAPER).scrbl
|
||||
|
|
|
@ -1,6 +1,18 @@
|
|||
Outline of paper
|
||||
---
|
||||
|
||||
- λ ∈ ℕ Σ
|
||||
- expand is NOT guaranteed to make well-typed terms; you can call it on any expr.
|
||||
- idea; values carry latent "type" information
|
||||
(but ya know, this is kind of just demonstrating that expand is useful)
|
||||
- (afterwards) add set!
|
||||
nevermind how \tau handles it ... here are rules for expand to handle it
|
||||
- note: OCaml is firm about maintaining a fast compiler, NOT a big standard library
|
||||
- thats a totally reasonable separation of concerns/labor
|
||||
|
||||
TODO
|
||||
- use more RACKET, less math (@racket[...] for identifiers)
|
||||
|
||||
|
||||
0 intro.scrbl
|
||||
---
|
||||
|
@ -12,10 +24,8 @@ other promises
|
|||
|
||||
1 background.scrbl
|
||||
---
|
||||
define typed racket,
|
||||
explain macro API without the word "macro",
|
||||
goals for extensions
|
||||
correctness for extensions
|
||||
define types
|
||||
define macros (without the word 'macro')
|
||||
|
||||
|
||||
2. examples.scrbl
|
||||
|
@ -32,6 +42,14 @@ limits of the extensions (need a value),
|
|||
suggestions to overcome limits (assertions, better analysis),
|
||||
even more extensions + implications
|
||||
|
||||
"""
|
||||
Our technique is implemented as a library of local transformations that
|
||||
compose to form a function @exact{$\elaborate$} defined over syntactically
|
||||
well-formed terms.
|
||||
Using the library is a one-line change for existing programs; however, the
|
||||
user may wish to remove type casts made redundant by the elaboration.
|
||||
"""
|
||||
|
||||
|
||||
4. friends.scrbl
|
||||
---
|
||||
|
|
|
@ -1,21 +1,381 @@
|
|||
#lang scribble/sigplan @onecolumn
|
||||
|
||||
@require["common.rkt"]
|
||||
@require["common.rkt" pict racket/class racket/draw]
|
||||
|
||||
@title[#:tag "sec:background"]{If You Know What I Mean}
|
||||
@; A poet should be of the
|
||||
@; old-fahioned meaningless brand:
|
||||
@; obscure, esoteric, symbolic,
|
||||
@; -- the critics demand it;
|
||||
@; so if there's a poem of mine
|
||||
@; that you do understand
|
||||
@; I'll gladly explain what it means
|
||||
@; till you don't understand it.
|
||||
|
||||
% - language = untyped, in the end we're doing machine instructions
|
||||
% - here is what a function does, here is my notation for describing such things
|
||||
% - easy to make mistakes, so we have type systems to detect errors
|
||||
% - run type system statically but it's not as good as running the program ya know
|
||||
% - so, abstraction
|
||||
% - we want to refine, here is what refinement means
|
||||
|
||||
% this section will die soon but until then here's ideas
|
||||
@; Computers understand a very limited set of instructions.
|
||||
@; It is tedious and error-prone to describe even simple thoughts in terms of these
|
||||
@; instructions, but programming languages provide abstraction mechanisms
|
||||
@; that can make the process bearable.
|
||||
@; For example, low-level or assembly languages can implement the following
|
||||
@; mathematical function for referencing an element of a fixed-length
|
||||
@; data structure:
|
||||
@;
|
||||
@; @exact|{\[
|
||||
@; \RktMeta{ref} = \lambda (\RktMeta{v}~\RktMeta{i})
|
||||
@; \left\{\begin{array}{l l}
|
||||
@; \RktMeta{v}_{i+1} & \RktMeta{if}~\RktMeta{v} = \vectorvn
|
||||
@; \\ ~&\RktMeta{and}~\RktMeta{i} \in \mathbb{N}
|
||||
@; ~\RktMeta{and}~i < n
|
||||
@; \\[4pt]
|
||||
@; \bot & \RktMeta{otherwise}
|
||||
@; \end{array}\right.
|
||||
@; \]}|
|
||||
@;
|
||||
@; @;Here we use the notation @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_{n-1} \rangle$}|
|
||||
@; @; to describe a vector of @exact{$n$} elements.
|
||||
@; Whether the implementation is a labeled sequence of instructions or a C function
|
||||
@; is not important; what matters is the precise specification
|
||||
@; and our ability to compute the function via machine.
|
||||
@; Memory-safe functions with clear semantics are the core building blocks for
|
||||
@; sane programs.
|
||||
@;
|
||||
@; As programs grow in size, complexity, and importance, it is desirable to have
|
||||
@; some guarantees about how a program will run before actually running the
|
||||
@; program.
|
||||
@; Ideally, we would statically prove that @racket[ref] is
|
||||
@; never applied to arguments that are not vectors or to out-of-bounds indices.
|
||||
@; @;Such a proof would imply that every call @racket[(ref v i)] in the program
|
||||
@; @; would yield a non-@|bot| value and help show that the overall program works as intended.
|
||||
@; But proving such properties is difficult and there are often many
|
||||
@; functions in a program for which we seek guarantees, so we settle for
|
||||
@; approximate results.
|
||||
@; Instead of statically ruling out all calls @racket[(ref v i)] that produce
|
||||
@; @|bot|, we strive to eliminate a large subset automatically.
|
||||
@;
|
||||
@; Type systems have emerged as a powerful and convenient way of statically detecting
|
||||
@; errors.
|
||||
@; Instead of tracking the flow of exact values through a program, a type system
|
||||
@; tracks the flow of @emph{types} denoting a range of possible runtime values.
|
||||
@; Depending on the type system and programmers' discipline using it, the range
|
||||
@; might be limited enough to show that @racket[ref] never returns @|bot|
|
||||
@; in a program; however, the tradeoff is always the time and energy programmers
|
||||
@; are willing to invest in writing down and maintaining type information.
|
||||
@; As it stands, the most widely used type systems are those that require
|
||||
@; minimal annotations from the programmer and catch only shallow, common errors.
|
||||
@;
|
||||
@;
|
||||
@; @section{Problem Statement and Promises}
|
||||
@;
|
||||
@; @let*[([all-w 150]
|
||||
@; [lang-w (/ all-w 2)]
|
||||
@; [lang-h 20]
|
||||
@; [lang-rectangle
|
||||
@; (lambda (brush-style
|
||||
@; fill-color
|
||||
@; #:width [lang-w lang-w] ;; getting lazy there ben
|
||||
@; #:height [lang-h lang-h]
|
||||
@; #:slant-left [slant-left #f]
|
||||
@; #:border-color [maybe-border-color #f]
|
||||
@; #:border-width [border-width 3])
|
||||
@; (dc (lambda (dc dx dy)
|
||||
@; (define old-brush (send dc get-brush))
|
||||
@; (define old-pen (send dc get-pen))
|
||||
@; (define border-color (or maybe-border-color fill-color))
|
||||
@; (send dc set-brush
|
||||
@; (new brush% [style brush-style] [color fill-color]))
|
||||
@; (send dc set-pen
|
||||
@; (new pen% [width border-width] [color border-color]))
|
||||
@; ;; --
|
||||
@; (define path (new dc-path%))
|
||||
@; (send path move-to 0 0)
|
||||
@; (if slant-left
|
||||
@; (begin (send path line-to (- 0 slant-left) (/ lang-h 2))
|
||||
@; (send path line-to 0 lang-h))
|
||||
@; (send path line-to 0 lang-h))
|
||||
@; (send path line-to lang-w lang-h)
|
||||
@; (send path line-to lang-w 0)
|
||||
@; (send path close)
|
||||
@; (send dc draw-path path dx dy)
|
||||
@; ;; --
|
||||
@; (send dc set-brush old-brush)
|
||||
@; (send dc set-pen old-pen)) lang-w lang-h))]
|
||||
@; [all-e (rectangle all-w lang-h)]
|
||||
@; [hshim (blank (/ all-w 6) lang-h)]
|
||||
@; [untyped-e (hc-append hshim (lang-rectangle 'vertical-hatch "CornflowerBlue" #:border-width 4 #:height (+ lang-h 2)))]
|
||||
@; [typed-e (hc-append (lang-rectangle 'horizontal-hatch "Coral" #:border-width 2) hshim)]
|
||||
@; [reality-of-ml (rc-superimpose (lc-superimpose all-e untyped-e) typed-e)]
|
||||
@; [value-e
|
||||
@; (let ([s 'solid]
|
||||
@; [c "LimeGreen"]
|
||||
@; [w (- lang-w lang-h)]
|
||||
@; [b 1])
|
||||
@; (hc-append (blank (* lang-h 0.8) lang-h)
|
||||
@; (lang-rectangle s c #:width (/ lang-w 3) #:height (/ lang-h 3) #:border-width b)
|
||||
@; (lang-rectangle s c #:border-width b)))]
|
||||
@; [reality-of-tr (lc-superimpose reality-of-ml value-e)]
|
||||
@; )]{@list{
|
||||
@; No matter how precise the type system, static analysis cannot recognize
|
||||
@; all correct programs and reject all incorrect programs.
|
||||
@; Layering a type system on top of an untyped programming language therefore
|
||||
@; leads to a world where the space of all syntactically valid programs
|
||||
@; (drawn as a white box, of course) is partitioned into two overlapping sets.
|
||||
@;
|
||||
@; @centered[reality-of-ml]
|
||||
@;
|
||||
@; On the left, outlined in blue with vertical hatches, we have the set of all untyped programs
|
||||
@; that produce meaningful results (read: non-error, non-diverging terms).
|
||||
@; On the right, outlined in orange with horizontal hatches, we have the set of all programs
|
||||
@; approved by the type system.
|
||||
@; Some type-correct programs raise runtime errors when evaluated, and some
|
||||
@; meaningful programs are rejected by the type checker.
|
||||
@;
|
||||
@; Again, the impossible goal is for the highlighted boxes to overlap.
|
||||
@; Our argument is that adding a @emph{syntactic elaboration} phase before
|
||||
@; typechecking can yield a language described by the
|
||||
@; solid green area shown below, capturing more of the meaningful untyped
|
||||
@; programs and fewer of the typed programs that go wrong, though rejecting
|
||||
@; some typed programs that run to completion but contain a subterm that would
|
||||
@; raise an exception if evaluated (Theorem @exact|{\ref{thm:relevance}}|).
|
||||
@;
|
||||
@; @centered[reality-of-tr]
|
||||
@; }}
|
||||
@;
|
||||
@; Starting from an untyped programming language built from a grammar of terms
|
||||
@; @exact{$e$} with a reduction relation @exact{$e \Downarrow v$}
|
||||
@; and a type judgment @exact{$\vdash e : \tau$} relating terms to
|
||||
@; values @exact{$v$} and types @exact{$\tau$}, we derive an extended language
|
||||
@; by inserting a function @exact{$\elaborate(e)$} before type checking.
|
||||
@; The extended language has a type system @exact{$\vdashe$} consisting of a single
|
||||
@; rule:
|
||||
@;
|
||||
@; @exact|{
|
||||
@; \begin{mathpar}
|
||||
@; \inferrule*{
|
||||
@; \vdash \elaborate(e) : \tau
|
||||
@; }{
|
||||
@; \vdashe e : \tau
|
||||
@; }
|
||||
@; \end{mathpar}
|
||||
@; }|
|
||||
@;
|
||||
@; We also derive a reduction relation on typed terms @exact{$\vdashe e : \tau$}
|
||||
@; as @exact{$\elaborate(e) \Downarrow v$}.
|
||||
@; Type soundness for @exact{$\vdashe$} is thus a corollary of
|
||||
@; type soundness for the @exact{$\vdash$} judgment.
|
||||
@;
|
||||
@; The output of @exact{$\elaborate(e)$} is a term @exact{$e'$},
|
||||
@; though @exact{$e'$} may be a labeled error term
|
||||
@; @exact|{$\bot^{e''}$}| meaning the elaborator detected an error in the program.
|
||||
@; Terms @exact{$e$} such that @exact{$\elaborate(e)$} type checks but @exact{$e$}
|
||||
@; does not are newly expressible in the extended system.
|
||||
@; Terms @exact{$e$} such that @exact{$\vdash e : \tau$} but
|
||||
@; @exact|{$\elaborate(e) = \bot^{e'}$}| are programs validated by the typechecker
|
||||
@; but rejected by the extended system because the subterm @exact{$e'$} of @exact{$e$}
|
||||
@; would raise a runtime error if evaluated.
|
||||
@;
|
||||
@; Our main contribution is defining a useful collection of @emph{local}
|
||||
@; transformations to perform during elaboration.
|
||||
@; Each local elaboration is proven correct in isolation.
|
||||
@; We then define @exact{$\elaborate(e)$} as the union of these local cases
|
||||
@; and a default elaboration that applies @exact{$\elaborate$} to all
|
||||
@; subterms of @exact{$e$}.
|
||||
@;
|
||||
@; To avoid reasoning about terms post-elaboration we will describe
|
||||
@; our local elaborations with type rules of the form: @todo{maybe drop this}
|
||||
@;
|
||||
@; @exact|{
|
||||
@; \begin{mathpar}
|
||||
@; \inferrule*{
|
||||
@; \prope(f, e)
|
||||
@; \\\\
|
||||
@; \vdash f : \tau_f
|
||||
@; \\
|
||||
@; \vdash e : \tau_e
|
||||
@; }{
|
||||
@; \vdashe f~e : \tau
|
||||
@; }
|
||||
@; \end{mathpar}
|
||||
@; }|
|
||||
@;
|
||||
@; where @exact{$\prope$} is a proposition about syntactic terms
|
||||
@; @exact{$f$} and @exact{$e$}
|
||||
@; used to justify the transformation @exact|{$\elaborate(f~e)$}|.
|
||||
@; Despite this inference rule shorthand, elaborations and @exact{$\prope$}
|
||||
@; conditions run before typechecking and have no access to type information
|
||||
@; in the program.
|
||||
@;
|
||||
@; The relationship between terms @exact{$e$} and @exact{$\elaborate(e)$} is given
|
||||
@; by three theorems: @todo{see next section for now}.
|
||||
@;
|
||||
@; Correct elaborations obey these theorems.
|
||||
@; @; Note: there is no guarantee that elaboration produces a well-typed term
|
||||
@; @; because calls like @exact{$\elaborate($}@racket[ref 0 0]@exact{$)$} are allowed.
|
||||
@;
|
||||
@; @; We refrain from making stronger statements about elaborate because
|
||||
@; @; real systems will have lots of other elaborations.
|
||||
|
||||
|
||||
A programming language is used to describe a sequence of instructions that
|
||||
may be carried out by a machine.
|
||||
@section{Syntax Extensions}
|
||||
|
||||
Syntax extension systems have been incorporated in
|
||||
a number of mainstream programming languages.
|
||||
Essentially, syntax extensions let the programmer write code that generates code.
|
||||
In practice this gives programmers the ability to extend the syntax of their
|
||||
language, abstract over textual patterns, and control the evaluation order
|
||||
of expressions.
|
||||
|
||||
Racket's syntax extension API is particularly mature and has inspired
|
||||
similar APIs in at least two other languages, so we adopt it here to introduce
|
||||
syntax extensions.
|
||||
As a first example, suppose we want to assert that expressions
|
||||
@racket[e_1], @racket[e_2], @racket[e_3], and @racket[e_4] all evaluate to
|
||||
the same value.
|
||||
By hand, we might write a sequence of unit tests:
|
||||
@racketblock[
|
||||
(begin (check-equal? e_2 e_1)
|
||||
(check-equal? e_3 e_1)
|
||||
(check-equal? e_4 e_1))
|
||||
]
|
||||
but these tests follow a simple pattern that we can abstract as a @emph{syntax rule},
|
||||
using ellipses (@racket[...]) to capture an arbitrary number of expressions.
|
||||
@racketblock[
|
||||
(define-syntax-rule (check-all-equal? e_1 e_rest ...)
|
||||
(begin (check-equal? e_rest e_1) ...))
|
||||
]
|
||||
Our tests can now be written more concisely:
|
||||
@racketblock[
|
||||
(check-all-equal? e_1 e_2 e_3 e_4)
|
||||
]
|
||||
making them easier to read and maintain.
|
||||
Moreover, we can easily improve the rule to evaluate @racket[e_1] only once
|
||||
instead of @math{n-1} times:
|
||||
@racketblock[
|
||||
(define-syntax-rule (check-all-equal? e_1 e_rest ...)
|
||||
(let ([v e_1])
|
||||
(check-equal? e_rest v) ...))
|
||||
]
|
||||
|
||||
Intuitively, syntax rules perform a search-and-replace before the program
|
||||
is compiled; however, the replacement process is careful to preserve the
|
||||
lexical structure of programs.
|
||||
For instance, a program that uses the same variable name @racket[v] as the syntax rule:
|
||||
@racketblock[
|
||||
(define v 5)
|
||||
(check-all-equal? (+ 2 2) v)
|
||||
]
|
||||
will expand to code that introduces a fresh variable @racket[v1]:
|
||||
@racketblock[
|
||||
(define v 5)
|
||||
(let ([v1 (+ 2 2)])
|
||||
(check-equal? v v1))
|
||||
]
|
||||
to avoid shadowing the programmer's variable with the identifier used inside
|
||||
the syntax rule.
|
||||
Details on how this so-called @emph{hygenic} expansion is implemented and
|
||||
its benefits to extension writers and users are explained by Flatt@~cite[f-popl-2016 fcdb-jfp-2012].
|
||||
|
||||
In addition to pattern-based syntax rules, one can extend Racket with
|
||||
arbitrary functions defined over @emph{syntax objects}.
|
||||
For instance, we can write an extension that formats log messages to a port
|
||||
@racket[log-output] at runtime when the program is compiled with a flag
|
||||
@racket[DEBUG] enabled. If the flag is disabled, we perform a no-op.
|
||||
The expansion of @racket[log] calls happens during compilation:
|
||||
@(begin
|
||||
#reader scribble/comment-reader
|
||||
@codeblock|{
|
||||
;; With DEBUG enabled
|
||||
(log "everything ok")
|
||||
==> (displayln "everything ok" log-output)
|
||||
;; With DEBUG disabled
|
||||
(log "everything still ok")
|
||||
==> (void)
|
||||
}|)
|
||||
|
||||
The @racket[syntax-parse] form is a convenient way to implement @racket[log].
|
||||
In this case, we use @racket[syntax-parse] to deconstruct a syntax object
|
||||
@racket[stx] representing an application of @racket[log].
|
||||
The output of syntax parse is a new syntax object built using the constructor
|
||||
@racket[syntax].
|
||||
@codeblock{
|
||||
(define-syntax (log stx)
|
||||
(syntax-parse stx
|
||||
[(log message)
|
||||
(if DEBUG
|
||||
(syntax (displayln message log-output))
|
||||
(syntax (void)))]))
|
||||
}
|
||||
We can further enhance our implementation with a compile-time check that
|
||||
the value bound to the pattern variable @racket[message] is a string literal.
|
||||
@codeblock{
|
||||
(define-syntax (log stx)
|
||||
(syntax-parse stx
|
||||
[(log message)
|
||||
(unless (string? (syntax->datum (syntax message)))
|
||||
(error "log: expected a string literal"))
|
||||
(if DEBUG
|
||||
(syntax (displayln message log-output))
|
||||
(syntax (void)))]))
|
||||
}
|
||||
@; Alternatively, we can use Racket's @emph{syntax classes} to the same effect.
|
||||
@; The @racket[str] syntax class recognizes literal strings.
|
||||
@; Binding it to the pattern variable @racket[message] causes calls like
|
||||
@; @racket[(log 61)] to raise a compile error.
|
||||
@; @codeblock{
|
||||
@; (define-syntax (log stx)
|
||||
@; (syntax-parse stx
|
||||
@; [(log message:str)
|
||||
@; (if DEBUG
|
||||
@; (syntax (displayln message log-output))
|
||||
@; (syntax (void)))]))
|
||||
@; }
|
||||
With this enhancement, calls like @racket[(log 61)] are rejected statically.
|
||||
Unfortunately, arguments that @emph{evaluate} to string literals
|
||||
are also rejected
|
||||
because the syntax extension cannot statically predict what value an arbitrary
|
||||
expression will reduce to.
|
||||
This is a fundamental limitation, but we can make a small improvement by accepting
|
||||
any @racket[message] produced by another (trusted) syntax extension.
|
||||
Suppose @racket[++] is an extension for concatenating two strings.
|
||||
If we assign a unique @emph{syntax property} to the syntax object produced
|
||||
by @racket[++], we can later retrieve the property in the @racket[log] extension.
|
||||
|
||||
First, we give an implementation of @racket[++] in terms of Racket's
|
||||
built-in @racket[string-append] function, crucially using @racket[syntax-property]
|
||||
to associate the value @racket['string] with the key @racket['static-type].
|
||||
@codeblock{
|
||||
(define-syntax (++ stx)
|
||||
(syntax-parse stx
|
||||
[(++ s1 s2)
|
||||
(syntax-property
|
||||
(syntax (string-append s1 s2))
|
||||
'static-type 'string)]))
|
||||
}
|
||||
Assuming now that the key @racket['static-type] accurately describes the value
|
||||
contained in a syntax object, @racket[log] can accept both string
|
||||
literals and tagged syntax objects.
|
||||
@codeblock{
|
||||
(define-syntax (log stx)
|
||||
(syntax-parse stx
|
||||
[(log message)
|
||||
(define is-string?
|
||||
(string? (syntax->datum (syntax message))))
|
||||
(define expands-to-string?
|
||||
(eq? 'string
|
||||
(syntax-property
|
||||
(local-expand message)
|
||||
'static-type)))
|
||||
(unless (or is-string? expands-to-string?)
|
||||
(error "log: expected a compile-time string"))
|
||||
(if DEBUG
|
||||
(syntax (displayln message log-output))
|
||||
(syntax (void)))]))
|
||||
}
|
||||
|
||||
Our syntax extensions @racket[check-all-equal?], @racket[log], and @racket[++]
|
||||
are indistinguishable from user-defined functions or core language forms,
|
||||
yet they perform useful transformations before a program is typechecked or
|
||||
compiled.
|
||||
This seamless integration gives Racket programmers the ability to grow the
|
||||
language and tailor it to their specific needs.
|
||||
|
||||
@; NOTE: we will use similar 'typed macros' in a coming section... right?
|
||||
|
|
|
@ -16,8 +16,11 @@
|
|||
nrightarrow
|
||||
parag
|
||||
sf
|
||||
sc
|
||||
bot
|
||||
id
|
||||
todo
|
||||
proof
|
||||
)
|
||||
|
||||
(require "bib.rkt"
|
||||
|
@ -114,6 +117,8 @@
|
|||
|
||||
(define (sf x) (elem #:style "sfstyle" x))
|
||||
|
||||
(define (sc x) (exact "\\textsc{" x "}"))
|
||||
|
||||
(define (parag . x) (apply elem #:style "paragraph" x))
|
||||
|
||||
(define (exact . items)
|
||||
|
@ -130,6 +135,9 @@
|
|||
(cons (if term (element #f (list " (" (defterm term) ") ")) " ") x)
|
||||
(mt-line))))
|
||||
|
||||
(define bot (exact "$\\bot$"))
|
||||
|
||||
|
||||
;; Format an identifier
|
||||
;; Usage: @id[x]
|
||||
(define (id x)
|
||||
|
@ -141,3 +149,28 @@
|
|||
(define (second)
|
||||
(make-element (make-style "relax" '(exact-chars))
|
||||
"$2^\\emph{nd}$"))
|
||||
|
||||
(define (defthing title #:thing thing #:tag tag . txt)
|
||||
;; TODO use tag
|
||||
(nested #:style 'inset
|
||||
(list (bold thing " (" (emph title) ")" ":")
|
||||
(exact "~~")
|
||||
txt)))
|
||||
;;(list txt))))
|
||||
|
||||
(define-syntax-rule (define-defthing* id* ...)
|
||||
(begin
|
||||
(begin
|
||||
(define (id* title #:tag [tag #f] . txt)
|
||||
(defthing title #:thing (string-titlecase (symbol->string (object-name id*))) #:tag tag txt))
|
||||
(provide id*)) ...))
|
||||
|
||||
(define-defthing* definition theorem lemma)
|
||||
|
||||
(define (proof . txt)
|
||||
(nested
|
||||
(list (emph "Proof:")
|
||||
(exact "~~")
|
||||
txt
|
||||
(exact "\\hfill\\qed"))))
|
||||
|
||||
|
|
|
@ -8,3 +8,8 @@ My old clock used to tell the time
|
|||
and subdivide diurnity;
|
||||
but now it's lost both hands and chime
|
||||
and only tells eternity.
|
||||
|
||||
|
||||
@; type systems change slowly. That's OK
|
||||
@; syntax extensions can change much faster and YOU can control the
|
||||
@; changes. That's YOU joe programmer or YOU jane street capital. Do you.
|
||||
|
|
121
popl-2017/correctness.scrbl
Normal file
121
popl-2017/correctness.scrbl
Normal file
|
@ -0,0 +1,121 @@
|
|||
#lang scribble/sigplan @onecolumn
|
||||
|
||||
@; TODO
|
||||
@; - revoke the AbsInt strategy
|
||||
@; - work out the "ICFP style" correctness
|
||||
@; - use objects in Haskell / ML for regexp (it's the only subtyping case?)
|
||||
@; ML row polymorphism
|
||||
@; Haskell ... ?
|
||||
@; -
|
||||
|
||||
@require["common.rkt"]
|
||||
|
||||
@title[#:tag "sec:correctness"]{Making Sense}
|
||||
|
||||
The thesis of this paper is that syntax extensions are an effective
|
||||
tool for refining an existing type system.
|
||||
Ultimately we aim to show that given a fixed program and fixed type system,
|
||||
the type system will be able to correctly reject more programs that
|
||||
produce an error at runtime and correctly accept more programs that it
|
||||
could not previously express @emph{if} a layer of syntax extensions may be
|
||||
inserted between parsing and typechecking a program.
|
||||
If library authors are permitted to write such extensions, then the language's
|
||||
type theory will become amenable to domain-specific typing rules without
|
||||
changes to the core language.
|
||||
|
||||
To make our goal precise, we define a simple, low-level programming language in @todo{Figure-1}.
|
||||
On top of this untyped core language we impose the polymorphic type system
|
||||
defined in @todo{Figure-2}.
|
||||
The idea is that user code is validated by the type system before it is compiled
|
||||
to the untyped language and run.
|
||||
Um, we take the untyped language in @todo{Figure-1} as an abstract machine code.
|
||||
|
||||
Consider again the function @racket[vector-ref].
|
||||
In @Secref{sec:background} we claimed that a type system could approximate the
|
||||
specification of @racket[vector-ref] in order to statically detect bugs.
|
||||
We formalize this claim by interpreting functions as relations between their
|
||||
input and output values.
|
||||
|
||||
Start with the untyped function @exact{$\RktMeta{vector-ref}_\lambda$}.
|
||||
For vectors @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_n \rangle$}| and
|
||||
indices @exact|{$0 \leq i < n$}|, @racket[vector-ref] returns @exact|{$\RktMeta{v}_i$}|.
|
||||
On all other inputs, @racket[vector-ref] returns @|bot|.
|
||||
|
||||
@exact|{
|
||||
\[ \RktMeta{vector-ref} = \big\{ (\RktMeta{v}, \RktMeta{i}, \RktMeta{v}_i)
|
||||
\mid \RktMeta{vector? v}
|
||||
~~
|
||||
\RktMeta{natural? i}
|
||||
~~
|
||||
\RktMeta{i} < \RktMeta{n}
|
||||
\big\} \]
|
||||
}|
|
||||
|
||||
The typed version of @exact|{$@RktMeta{vector-ref}$}| understands parametericity and
|
||||
drop side conditions
|
||||
|
||||
@exact|{
|
||||
\[ \RktMeta{vector-ref} = \big\{ (\RktMeta{v}, \RktMeta{i}, \RktMeta{v}_i)
|
||||
\mid \RktMeta{vector? v}
|
||||
~~
|
||||
\RktMeta{natural? i}
|
||||
\big\} \]
|
||||
}|
|
||||
|
||||
Sound overapprox because typed is superert
|
||||
|
||||
@; @; Programming language:
|
||||
@; @; vectors strings integers booleans
|
||||
@; @; lambdas
|
||||
@; @; map printf regexp-match
|
||||
@;
|
||||
@; Types provide sound overapproximation
|
||||
@;
|
||||
@; our goal: refine approximation
|
||||
@;
|
||||
@; type soundness is separate, we inherit soundness as corrollary of
|
||||
@; ourtype <= origtype
|
||||
@;
|
||||
|
||||
|
||||
@section{Implementation}
|
||||
The transformations described in this section have been implemented as a Racket package.
|
||||
Our @emph{expand} metafunction is implemented by the Racket macro expander.
|
||||
Each transformation is a macro wrapping a standard library function.
|
||||
It's good.
|
||||
|
||||
Proofs are mechanized as unit tests within the package.
|
||||
Readers are invited to critique our proofs by downloading and using the package.
|
||||
|
||||
|
||||
@section{Comments}
|
||||
Maybe you're wondering why we didn't prove contextual equivalence of our transformations.
|
||||
A few reasons:
|
||||
@itemlist[
|
||||
@item{
|
||||
Contextual equivalence is @emph{hard}, we don't expect library writers
|
||||
to do full proofs of that for every transformation they make.
|
||||
}
|
||||
@item{
|
||||
Our @emph{expand} metafunction is very simple, but in realistic languages
|
||||
it will be only part of a comprehensive syntax extension system.
|
||||
As Wand proved way back when, contextual equivalence for languages with
|
||||
syntax extensions is trivial in the sense that only syntactically equivalent
|
||||
terms are equal.
|
||||
|
||||
Given a two terms @racket[e_1] and @racket[e_2], here is a Racket
|
||||
syntax extension which that distinguishes them.
|
||||
@racketblock[
|
||||
(define-syntax (distinguisher stx)
|
||||
(syntax-parse stx
|
||||
[(_ e_1 e_2)
|
||||
(if (equal? (syntax->datum #'e_1)
|
||||
(syntax->datum #'e_2))
|
||||
#'((λ (x) (x x)) (λ (x) (x x)))
|
||||
#'(λ (x) x))]))
|
||||
]
|
||||
For non-identical terms, @racket[(distinguisher e_1 ???)] is the context
|
||||
@racket[C] that separates them.
|
||||
Specifically @racket[(C e_1)] will diverge and @racket[(C e_2)] will not.
|
||||
}
|
||||
]
|
|
@ -3,8 +3,181 @@
|
|||
@require["common.rkt"]
|
||||
|
||||
@title[#:tag "sec:examples"]{Small Things and Great}
|
||||
@; He that lets
|
||||
@; the small things bind him
|
||||
@; leaves the great
|
||||
@; undone behind him.
|
||||
|
||||
He that lets
|
||||
the small things bind him
|
||||
leaves the great
|
||||
undone behind him.
|
||||
@Figure-ref{fig:expr} defines an untyped, call-by-value @racket[λ] calculus
|
||||
augmented with boolean, character, natural number, and vector literals.
|
||||
The language also includes a set @exact{$\primop$} of primitive operations,
|
||||
described in @Figure-ref{fig:primop} and a labeled error term @exact{$\bot$}.
|
||||
For the moment we focus on two primitive operations: @racket[ref], for dereferencing
|
||||
a vector, and @racket[map], for mapping a function of arity @exact{$m$} over
|
||||
the columns of an @exact{$m \times n$} matrix.
|
||||
We specify these operations with an informal mathematical syntax using
|
||||
@racket[∈] and @racket[=] to dynamically test and pattern match value forms.
|
||||
|
||||
@figure["fig:expr" "Term language"
|
||||
@exact|{\input{fig-language}}|
|
||||
|
||||
]
|
||||
|
||||
@figure["fig:primop" "Primitive operations"
|
||||
@exact|{\input{fig-primops}}|
|
||||
]
|
||||
|
||||
This language is intended to model the untyped core of realistic
|
||||
languages like ML, Haskell, and Typed Racket; albiet using @exact{$\lambda$}
|
||||
notation instead of a machine language.
|
||||
Likewise, the polymorphic type system in @Figure-ref{fig:types} is a standard,
|
||||
implicitly quantified system.
|
||||
@; These types are CLEARLY LIMITED in what they can express,
|
||||
@; but even TR, ML+1stclassmodules, GHC+extensions have limitations
|
||||
@; tho less obvious
|
||||
|
||||
@figure["fig:types" "Type System"
|
||||
@exact|{\input{fig-types}}|
|
||||
]
|
||||
|
||||
@figure["fig:typed-primops" "Typed Primitive Operations"
|
||||
@exact|{\input{fig-typed-primops}}|
|
||||
]
|
||||
|
||||
Using this type system, we can approximate the behavior of @racket[ref] and
|
||||
@racket[map] with the signatures in @Figure-ref{fig:typed-primops}.
|
||||
The type @exact|{$\tau_{\RktMeta{ref}}$}| captures the essence of @racket[ref],
|
||||
but does not express the requirement that @emph{value} of the number
|
||||
@racket[i] in a call @exact|{\RktMeta{(ref~v~i)}}|
|
||||
must be strictly less than the length of the vector @racket[v].
|
||||
The type @exact|{$\tau_{\RktMeta{map}}$}|, on the other hand,
|
||||
is limited compared to the untyped @racket[map] because there is no way
|
||||
to apply a typed function to an unknown number of arguments.@note{Curried functions
|
||||
run into the same limitation.}
|
||||
@;Try implementing @racket[apply f (x1 \ldots xn) = f x1 \ldots xn}.
|
||||
Realistic languages would account for at least the 2-arity case with a second
|
||||
function @exact|{$\RktMeta{map}_2$}| and ask programmers
|
||||
to choose between @racket[map] and @exact|{$\RktMeta{map}_2$}| at each call site.
|
||||
This inability of the type system to express our specifications is a problem.
|
||||
|
||||
|
||||
@section{Refining the Imprecise Types}
|
||||
|
||||
There is no reason we could not revise our type system to make
|
||||
it possible to track the length of vectors and express polymorphism over
|
||||
the arity of functions.
|
||||
In fact, GHC can express the type of fixed-length vectors@~cite[lb-sigplan-2014] and Typed Racket
|
||||
supports variable-arity polymorphism@~cite[stf-esop-2009].
|
||||
But such revisions are sweeping changes to the type system and therefore the
|
||||
language.
|
||||
Moreover, programmers who use the language to create embedded DSLs may
|
||||
wish to encode novel type constraints, as we demonstrate in @Secref{sec:regexp}.
|
||||
|
||||
Rather than alter the type system or accept the status quo,
|
||||
we add the syntax extension system shown in @Figure-ref{fig:syntax-base}.
|
||||
To underscore the fact that syntax extensions are run on syntactic terms
|
||||
rather than runtime values, we format the system as a collection of
|
||||
inference rules.
|
||||
In practice, we could implement these rules with a pattern matcher like
|
||||
Racket's @racket[syntax-parse].
|
||||
@; Users extend by:
|
||||
@; - inhertance, modifying definition of a "parser" object
|
||||
@; - use parameter for recursive call, users update parameter
|
||||
|
||||
Define @exact{$\elaborate(e) = e'$} if and only if @exact{$e \expandsto e'$}.
|
||||
As presented in @Figure-ref{fig:syntax-base}, @exact{$\elaborate$} is a structurally
|
||||
recursive identity function.
|
||||
Still, it satisfies three key properties (the 3 R's of reasonable syntax extensions).
|
||||
Let @exact{$\Downarrow$}
|
||||
implicitly quantify over closing substitutions @exact{$\gamma$}.
|
||||
|
||||
@figure["fig:syntax-base" "Default Syntax Extensions"
|
||||
@exact|{\input{fig-syntax-base.tex}}|
|
||||
]
|
||||
|
||||
@theorem["Refinement"]{
|
||||
If @exact{$\vdash e : \tau$} then @exact{$\vdash \elaborate(e) : \tau'$}
|
||||
and @exact{$\tau' <: \tau$}.
|
||||
Furthermore @exact{$e \Downarrow v$} if and only if @exact{$\elaborate(e) \Downarrow v$}.
|
||||
}
|
||||
|
||||
In other words, elaboration may make the type of a well-typed term more precise.
|
||||
(Use type equality in place of @exact{$<:$} for now)
|
||||
|
||||
|
||||
@theorem["Retraction"]{
|
||||
If @exact{$\not \exists \tau~.~\vdash e : \tau$} but @exact{$\vdash \elaborate(e) : \tau'$}
|
||||
then @exact{$e \Downarrow v$} if and only if @exact{$\elaborate(e) \Downarrow v$}.
|
||||
}
|
||||
|
||||
Desugaring a term to make it type check does not change its dynamic behavior.
|
||||
(Holds trivially because its premise is never met.)
|
||||
|
||||
|
||||
@theorem["Relevance"]{
|
||||
If @exact|{$\elaborate(e) = \bot^{e'}$}| then @exact{$e'$} is a subterm of @exact{$e$}
|
||||
and evaluating @exact{$e'$} (but not necessarily @exact{$e$}) raises a runtime error.
|
||||
}
|
||||
|
||||
Elaboration errors always reflect defects in the user's code and are presented
|
||||
using source terms rather than expanded terms.
|
||||
(Holds because the current version of @exact{$\elaborate$} never yields an error term.)
|
||||
|
||||
We claim that these theorems enable reasoning about un-elaborated terms
|
||||
using the familiar type system and semantics from @Figure-ref["fig:expr" "fig:types"].
|
||||
Henceforth, we say that a term @racket[e] typechecks in the extended language
|
||||
(written @exact{$\vdashe e : \tau$})
|
||||
if there exists a type @exact{$\tau$} such that @exact|{$\Gamma_{\primop} \vdash \elaborate(e) : \tau$}|.
|
||||
A term @racket[e] evaluates to a value @racket[v] in the extended language
|
||||
(@exact{$e \Downarrowe v$}) if and only if @exact{$\elaborate(e) \Downarrow v$}.
|
||||
|
||||
The crucial feature of @exact{$\elaborate$} is that language users can add
|
||||
cases to it.
|
||||
Changing roles from language designers to language users, we leverage this
|
||||
ability in the next sections based on the intuition that the syntax
|
||||
of value forms carries information.
|
||||
|
||||
|
||||
@section{Bounds Checking for @racket[ref]}
|
||||
|
||||
For a first extension, we can check vector references at compile-time when
|
||||
@racket[ref] is called with a vector literal and a natural number.
|
||||
To accomodate future syntax extensions, we recursively expand the arguments to
|
||||
our @racket[ref] extension before checking.
|
||||
|
||||
@exact|{
|
||||
\begin{mathpar}
|
||||
\inferrule{
|
||||
v \expandsto \vectoren
|
||||
\\
|
||||
i \expandsto i'
|
||||
\\\\
|
||||
i' \in \naturals
|
||||
\\
|
||||
i' < n
|
||||
}{
|
||||
\RktMeta{ref}~v~i \expandsto \RktMeta{ref}~\vectoren~i
|
||||
}
|
||||
\end{mathpar}
|
||||
}|
|
||||
|
||||
When this condition is not met,
|
||||
|
||||
|
||||
@section{Typed, Generalized @racket[map]}
|
||||
|
||||
|
||||
|
||||
@section[#:tag "sec:regexp"]{Adding Regular Expressions}
|
||||
|
||||
@; - string-embedded DSL
|
||||
@; - NOT part of core language, just ideas from crazy library writer
|
||||
@; - refine codomain of an operation
|
||||
|
||||
We assume @exact{$\Sigma$} is just the lowercase characters @exact{$a \ldots z$}.
|
||||
To keep the language small, we define strings as vectors of characters
|
||||
and use e.g. @racket{abc} as shorthand for @exact|{$\vectorgen{a, b, c}$}|.
|
||||
|
||||
|
||||
@section[#:tag "sec:define"]{Handling Variables}
|
||||
@; Add a type system --- thanks Alex!
|
||||
|
|
49
popl-2017/fig-elab0.tex
Normal file
49
popl-2017/fig-elab0.tex
Normal file
|
@ -0,0 +1,49 @@
|
|||
\fbox{$\elabsto{e}{e}{\tau}$}
|
||||
|
||||
\begin{mathpar}
|
||||
\inferrule*[left=S-Var]{
|
||||
\Gamma(x) = \tau
|
||||
}{
|
||||
\elabsto{x}{x}{\tau}
|
||||
}
|
||||
|
||||
\inferrule*[left=S-Int]{
|
||||
n \in \ints
|
||||
}{
|
||||
\elabsto{n}{n}{\tint}
|
||||
}
|
||||
|
||||
\inferrule*[left=S-Lam]{
|
||||
\elabsto{e}{e'}{\tau'}
|
||||
}{
|
||||
\elabsto{\vlam{x}{e}}{\vlam{x}{e'}}{\tau \rightarrow \tau'}
|
||||
}
|
||||
|
||||
\inferrule*[left=S-Arr]{
|
||||
\elabsto{e_0}{e_0'}{\tint}
|
||||
\\
|
||||
\ldots
|
||||
\\
|
||||
\elabsto{e_{n-1}}{e_{n-1}'}{\tint}
|
||||
}{
|
||||
\elabsto{\vectoren}{\vectorgen{e_0',\ldots,e_{n-1}'}}{\tarray}
|
||||
}
|
||||
|
||||
\inferrule*[left=S-App]{
|
||||
\elabsto{e_1}{e_1'}{\tau \rightarrow \tau'}
|
||||
\\
|
||||
\elabsto{e_2}{e_2'}{\tau}
|
||||
}{
|
||||
\elabsto{e_1~e_2}{e_1'~e_2'}{\tau'}
|
||||
}
|
||||
|
||||
\inferrule*[left=S-Ref]{
|
||||
\elabsto{e_1}{e_1'}{\tarray}
|
||||
\\
|
||||
\elabsto{e_2}{e_2'}{\tint}
|
||||
}{
|
||||
\elabsto{\aref{e_1}{e_2}}{\checkedref~e_1'~e_2'}{\tint}
|
||||
}
|
||||
|
||||
\end{mathpar}
|
||||
|
32
popl-2017/fig-elab1.tex
Normal file
32
popl-2017/fig-elab1.tex
Normal file
|
@ -0,0 +1,32 @@
|
|||
\fbox{$\elabsto{e}{e}{\tau}$}
|
||||
|
||||
\begin{mathpar}
|
||||
|
||||
\inferrule*[left=S-RefPass]{
|
||||
\elabsto{e_1}{\vectorvn}{\tarray}
|
||||
\\
|
||||
\elabsto{e_2}{i}{\tint}
|
||||
\\\\
|
||||
i \in \ints
|
||||
\\
|
||||
0 \le i < n
|
||||
}{
|
||||
\elabsto{\aref{e_1}{e_2}}{\unsaferef~\vectorvn~i}{\tint}
|
||||
}
|
||||
|
||||
|
||||
\inferrule*[left=S-RefFail]{
|
||||
\elabsto{e_1}{\vectorvn}{\tarray}
|
||||
\\
|
||||
\elabsto{e_2}{i}{\tint}
|
||||
\\\\
|
||||
i \in \ints
|
||||
\\
|
||||
i < 0 ~~\vee~~ i \ge n
|
||||
}{
|
||||
\elabsto{\aref{e_1}{e_2}}{\indexerror}{\tint}
|
||||
}
|
||||
|
||||
\end{mathpar}
|
||||
|
||||
|
3
popl-2017/fig-reality-of-tr.tex
Normal file
3
popl-2017/fig-reality-of-tr.tex
Normal file
|
@ -0,0 +1,3 @@
|
|||
\begin{center}
|
||||
to do
|
||||
\end{center}
|
11
popl-2017/fig-regexp.tex
Normal file
11
popl-2017/fig-regexp.tex
Normal file
|
@ -0,0 +1,11 @@
|
|||
\[
|
||||
\begin{array}{r l l}
|
||||
\RktMeta{\++} & = & \lambda\,\vectorgen{v_0~v_1}\,.\,
|
||||
\left\{\begin{array}{l l}
|
||||
\vectorgen{v_{00}, \ldots, v_{0m}, v_{10}, \ldots, v_{1n}}
|
||||
& \RktMeta{if}~v_0 = \vectorgen{v_{00}, \ldots, v_{0m}}
|
||||
~\RktMeta{and}~v_1 = \vectorgen{v_{10}, \ldots, v_{1n}}
|
||||
\\\bot & \RktMeta{otherwise}
|
||||
\end{array}\right.
|
||||
\end{array}
|
||||
\]
|
73
popl-2017/fig-stlc-core.tex
Normal file
73
popl-2017/fig-stlc-core.tex
Normal file
|
@ -0,0 +1,73 @@
|
|||
\[
|
||||
\begin{array}{l l l}
|
||||
\delta & \coloneq & \checkedref \mid \unsaferef
|
||||
\\[8pt]
|
||||
\ectx & \coloneq & \ehole \mid \ectx~e \mid (\vlam{x}{e})~\ectx \mid \vectorgen{v,\ldots,\ectx,e,\ldots} \mid
|
||||
\\ & & \delta~E~e \mid \delta~v~E
|
||||
\end{array}
|
||||
\]
|
||||
|
||||
\fbox{$\typesto{\delta~e~e}{\tau}$}
|
||||
\begin{mathpar}
|
||||
\inferrule*[left=T-Check]{
|
||||
\typesto{e_1}{\tarray}
|
||||
\\
|
||||
\typesto{e_2}{\tint}
|
||||
}{
|
||||
\typesto{\checkedref~e_1~e_2}{\tint}
|
||||
}
|
||||
|
||||
\inferrule*[left=T-Unsafe]{
|
||||
\typestoclosed{\vectorvn}{\tarray}
|
||||
\\
|
||||
\typestoclosed{i}{\tint}
|
||||
\\\\
|
||||
i \in \ints
|
||||
\\
|
||||
0 \leq i < n
|
||||
}{
|
||||
\typesto{\unsaferef~\vectorvn~i}{\tint}
|
||||
}
|
||||
\end{mathpar}
|
||||
|
||||
|
||||
\fbox{$\smallstep{e}{e}$}
|
||||
\begin{mathpar}
|
||||
\inferrule*[left=E-Ctx]{
|
||||
\smallstep{e}{e'}
|
||||
}{
|
||||
\smallstep{\ectx[e]}{\ectx[e']}
|
||||
}
|
||||
|
||||
\inferrule*[left=E-Lam]{
|
||||
}{
|
||||
\smallstep{(\vlam{x}{e})~v}{e\esubst{x}{v}}
|
||||
}
|
||||
|
||||
\inferrule*[left=E-CheckPass]{
|
||||
0 \le i < n
|
||||
}{
|
||||
\smallstep{\checkedref~\vectorvn~i}{v_i}
|
||||
}
|
||||
|
||||
\inferrule*[left=E-CheckFail]{
|
||||
i < 0 ~~\vee~~ i \ge n
|
||||
}{
|
||||
\smallstep{\checkedref~\vectorvn~i}{\indexerror}
|
||||
}
|
||||
|
||||
\inferrule*[left=E-UnsafePass]{
|
||||
0 \le i < n
|
||||
}{
|
||||
\smallstep{\unsaferef~\vectorvn~i}{v_i}
|
||||
}
|
||||
|
||||
\inferrule*[left=E-UnsafeFail]{
|
||||
i < 0 ~~\vee~~ i \ge n
|
||||
\\
|
||||
x \in \ints \cup \{ \segfault \}
|
||||
}{
|
||||
\smallstep{\unsaferef~\vectorvn~i}{x}
|
||||
}
|
||||
\end{mathpar}
|
||||
|
61
popl-2017/fig-stlc-surface.tex
Normal file
61
popl-2017/fig-stlc-surface.tex
Normal file
|
@ -0,0 +1,61 @@
|
|||
\[
|
||||
\begin{array}{l l l}
|
||||
v & \coloneq & x \mid \vlam{x}{e} \mid \ints \mid \vectorvn
|
||||
\\[8pt]
|
||||
e & \coloneq & v \mid e~e \mid \vectoren \mid \aref{e}{e}
|
||||
\\[8pt]
|
||||
\tau & \coloneq & \tint \mid \tarray \mid \tau \rightarrow \tau
|
||||
\\[8pt]
|
||||
\tenv & \coloneq & \tenvempty \mid x:\tau,\tenv
|
||||
\end{array}
|
||||
\]
|
||||
|
||||
|
||||
\fbox{$\typesto{e}{\tau}$}
|
||||
\begin{mathpar}
|
||||
\inferrule*[left=T-Var]{
|
||||
\tenv(x) = \tau
|
||||
}{
|
||||
\typesto{x}{\tau}
|
||||
}
|
||||
|
||||
\inferrule*[left=T-Lam]{
|
||||
\typestogen{x:\tau,\tenv}{e}{\tau'}
|
||||
}{
|
||||
\typesto{\vlam{x}{e}}{\tau \rightarrow \tau'}
|
||||
}
|
||||
|
||||
\inferrule*[left=T-App]{
|
||||
\typesto{e_1}{\tau \rightarrow \tau'}
|
||||
\\
|
||||
\typesto{e_2}{\tau}
|
||||
}{
|
||||
\typesto{e_1~e_2}{\tau'}
|
||||
}
|
||||
|
||||
\inferrule*[left=T-Int]{
|
||||
i \in \ints
|
||||
}{
|
||||
\typesto{i}{\tint}
|
||||
}
|
||||
|
||||
\inferrule*[left=T-Arr]{
|
||||
\typesto{e_0}{\tint}
|
||||
\\
|
||||
\ldots
|
||||
\\
|
||||
\typesto{e_{n-1}}{\tint}
|
||||
}{
|
||||
\typesto{\vectoren}{\tarray}
|
||||
}
|
||||
|
||||
\inferrule*[left=T-Ref]{
|
||||
\typesto{e_1}{\tarray}
|
||||
\\
|
||||
\typesto{e_2}{\tint}
|
||||
}{
|
||||
\typesto{\aref{e_1}{e_2}}{\tint}
|
||||
}
|
||||
|
||||
\end{mathpar}
|
||||
|
|
@ -4,174 +4,116 @@
|
|||
|
||||
@title[#:tag "sec:intro"]{Type Eleborators Need APIs}
|
||||
|
||||
The implementations of richhly typed programming languages tend to employ
|
||||
an elaboration pass. As the elaborator traverses the (parsed) synatx, it
|
||||
simultaneously reconstructs types, checks their consistency according to
|
||||
the underlying type theory, replaces many of the surface-syntax constructs
|
||||
with constructs from the kernel language, and inserts type information to
|
||||
create a (n often) fully annotated representation.
|
||||
Type systems such as ML's, Haskell's, or Scala's help developers convey
|
||||
their thinking to their successors in a sound and concise manner. The
|
||||
implementations of such type systems tend to employ elaboration passes to
|
||||
check the types of a specific program. As an elaborator traverses the
|
||||
(parsed) synatx, it simultaneously reconstructs types, checks their
|
||||
consistency according to the underlying type theory, replaces many of the
|
||||
surface-syntax constructs with constructs from the kernel language, and
|
||||
inserts type information to create an annotated representation.
|
||||
|
||||
Some (implementations of) programming languages also support a way to
|
||||
Some (implementations of such) programming languages also support a way to
|
||||
programmatically direct the elaborator. For example, Rust and Scala come
|
||||
with compiler plug-ins. Typed Racket and Types Clojure inherit the macro
|
||||
mechanisms of their parent languages.
|
||||
mechanisms of their parent languages. Here we refer to such mechanisms as
|
||||
@defterm{elaborator API}s.
|
||||
|
||||
In this paper, we show that such APIs allow programmers to tailor the
|
||||
underlying type theories to their needs and that such tailorings look
|
||||
highly promising (section 2). The ``tailor'' can ... correctness ...
|
||||
Equipping type-checking elaborators with APIs---or using the existing
|
||||
APIs---promises new ways to expand the power of type checking at a
|
||||
relatively low cost. Specifically, the developers of libraries can
|
||||
@defterm{tailor} typing rules to the exported functions and linguistic
|
||||
constructs.
|
||||
|
||||
For convenience, we use Typed Racket and its
|
||||
API to the elaborator (section 3). To illustrate the usefulness of the
|
||||
idea, we implement two tailorings. The first one---chosen for the wide
|
||||
familiarity of the domain---enriches the type-checking of
|
||||
vector-referencing operations (section 4). The second example explains how
|
||||
the implementor of a string-based embedded DSL---a regexp-matching
|
||||
DSL---may tailor the types of the interpretation function (section 5). Our
|
||||
evaluation validate that both tailorings reduce the number of casts that
|
||||
the programmer or the elaborator have to insert.
|
||||
Consider the example of tailoring the API of a library that implements a
|
||||
string-based, domain-specific languages. Examples of such libraries abound:
|
||||
formatting, regular-expression matching, database queries, and so on. The
|
||||
creators of these DSLs tend to know a lot about how programs written in
|
||||
these DSLs relate to the host program, but they cannot express this
|
||||
knowledge in API types without resorting to such rich systems as
|
||||
dependent type theory.
|
||||
|
||||
Type tailoring allows such programmers to refine the typing rules for the
|
||||
APIs in a relatively straightforward way. Recall the API of the
|
||||
regular-expression library. In a typical typed language, the matching
|
||||
function is exported with a type like this:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
|
||||
reg_exp_match: String -> Option [Listof String]
|
||||
|
||||
}
|
||||
@;
|
||||
When the result is a list of strings, the length of the list depends on the
|
||||
argument string---but the API for the regular-expression library cannot
|
||||
express this relationship between the regular expression and the
|
||||
surrounding host program. As a result, the author of the host program must
|
||||
inject additional complexity, often in the form of (hidden) run-time checks.
|
||||
|
||||
If type tailoring is available, the creator of the library can refine the
|
||||
type of @tt{reg_exp_match} with rules such as these:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
|
||||
Program |- s : does not contain grouping
|
||||
------------------------------------------------
|
||||
G |- reg_exp_match(s) : Option [List String]
|
||||
|
||||
|
||||
Program |- s : contains one grouping w/o alternative
|
||||
-----------------------------------------------------
|
||||
G |- reg_exp_match(s) : Option [List String String]
|
||||
|
||||
}
|
||||
@;
|
||||
That is, when the extended elaborator can use (a fragment of) the program
|
||||
to prove that the given string for a specific use of @tt{reg_exp_match}
|
||||
does not contain grouping specifications---ways to extract a sub-string via
|
||||
matching---then the @tt{Some} result type is a list that contains a single
|
||||
string. Similarly, if the string contains exactly one such grouping (and no
|
||||
alternative), then a match produces a list of two strings. In all other
|
||||
cases, the type checker uses the default type for the function.
|
||||
|
||||
A vector library is another familiar example that offers opportunities for
|
||||
type tailoring. Often such a library exports a dereferencing construct with
|
||||
the following type rule:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
|
||||
G |- v : Vector[X] G |-e : Integer
|
||||
----------------------------------------
|
||||
G |- v[e] ~~> checked_ref(v,e) : X
|
||||
}
|
||||
@;
|
||||
If the elaborator can prove, however, that the indexing expression @tt{e}
|
||||
is equal to or evaluates to a specific integer @tt{i}, the rule can be
|
||||
strengthened as follows:
|
||||
@;
|
||||
@verbatim[#:indent 4]{
|
||||
|
||||
Program |- e = i for some i in Natural
|
||||
&& Program |- v is vector of length n
|
||||
&& i < n
|
||||
|
||||
G |- v : Vector[X] G |-e : Integer
|
||||
----------------------------------------
|
||||
G |- v[e] ~~> unsafe-ref(v,e) : X
|
||||
}
|
||||
@;
|
||||
That is, the elaborator can then eliminate a possibly expensive run-time
|
||||
check.
|
||||
|
||||
|
||||
|
||||
|
||||
Typecheckers for polymorphic languages like Haskell incredibly wasteful.
|
||||
Given an AST, they immediately throw away the rich @emph{syntax} of expression
|
||||
and value forms in favor of a @emph{type} abstraction.
|
||||
So whereas any novice programmer can tell that the expression @tt{(/ 1 0)}
|
||||
will go wrong, the typechecker cannot because all it sees is @tt{(/ @emph{Num} @emph{Num})}.
|
||||
|
||||
These typecheckers are also incredibly stubborn, and cannot handle basic
|
||||
forms of polymorphism that are not part of their type algebra.
|
||||
For instance, the behavior of the function @tt{map} can be stated in clear
|
||||
English:
|
||||
|
||||
@nested[@elem[#:style 'italic]{
|
||||
@tt{(map proc lst ...+) → list?}
|
||||
Applies @tt{proc} to the elements of the @tt{lsts} from the first elements to the last.
|
||||
The @tt{proc} argument must accept the same number of arguments as the number
|
||||
of supplied @tt{lsts}, and all @tt{lsts} must have the same number of elements.
|
||||
The result is a list containing each result of @tt{proc} in order.
|
||||
}]
|
||||
|
||||
But the Haskell typechecker can only validate indexed versions of @tt{map}
|
||||
defined for a fixed number of lists.
|
||||
Programmers are thus forced to choose between @tt{map}, @tt{zipWith}, and @tt{zipWith3}
|
||||
depending on the number of lists they want to map over.@note{Maybe need a better
|
||||
example than @tt{map} because polydots handle it. @tt{curry}?}
|
||||
|
||||
There are a few solutions to these problems. Blah blah.
|
||||
They all require migrating to a new programming language or instrumenting call
|
||||
sites across the program, neither of which are desirable solutions for the
|
||||
working programmer.
|
||||
|
||||
We propose @emph{extending} a language's type system with
|
||||
@emph{introspective} typing rules that capture value information as well as type
|
||||
information.
|
||||
Given a proof theory @exact|{$\Progn \vDash {\tt P}$}| for inferring propositions
|
||||
@tt{P} from the program text @exact{$\Progn$}, the rule for division would be:
|
||||
|
||||
@exact|{
|
||||
\begin{mathpar}
|
||||
\inferrule*[]{
|
||||
\Gamma; \Progn \vdash n_1 : \tnum
|
||||
\\
|
||||
\Gamma; \Progn \vdash n_2 : \tnum
|
||||
\\\\
|
||||
\Progn \vDash n_2 \neq 0
|
||||
}{
|
||||
\Gamma; \Progn \vdash {\tt /}~n_1~n_2 : \tnum
|
||||
}
|
||||
\end{mathpar}
|
||||
}|
|
||||
|
||||
and the rule for @tt{map} would be:
|
||||
|
||||
@exact|{
|
||||
\begin{mathpar}
|
||||
\inferrule*{
|
||||
\Gamma; \Progn \vdash f : \overline{\tvar{A}} \rightarrow \tvar{B}
|
||||
\\
|
||||
\Gamma; \Progn \vdash \overline{x} : \overline{\tlist{A}}
|
||||
\\\\
|
||||
\Progn \vDash {\tt length}(\overline{\tvar{A}}) = {\tt length}(\overline{\tlist{A}})
|
||||
}{
|
||||
\Gamma; \Progn \vdash {\tt map}~f~\overline{x} : \tlist{B}
|
||||
}
|
||||
\end{mathpar}
|
||||
}|
|
||||
|
||||
These rules would augment---not replace---the existing rules for division
|
||||
and @tt{map}.
|
||||
In the case that no information about subterms can be inferred from the program
|
||||
text, we defer to the division rule that admits runtime errors and the @tt{map}
|
||||
rule that can only be called with a single list.
|
||||
|
||||
|
||||
@; =============================================================================
|
||||
@parag{Contributions}
|
||||
@itemlist[
|
||||
@item{
|
||||
We propose ``typechecking modulo theories'' (or something):
|
||||
using the text of the program to refine typechecking judgments.
|
||||
}
|
||||
@item{
|
||||
As a proof of concept, we implement a simple theory within the Racket
|
||||
macro expander and use it enhance the user experience of Typed Racket.
|
||||
The implementation is just a library; we suspect Typed Clojure and TypeScript
|
||||
programmers could build similar libraries using Clojure's macros and sweet.js.
|
||||
}
|
||||
@item{
|
||||
Our Racket implementation leverages a novel class of macros.
|
||||
We define the class, state proof conditions for a new macro
|
||||
to enter the class, and prove our macros are in the class.
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@; =============================================================================
|
||||
@parag{Guarantees}
|
||||
@itemlist[
|
||||
@item{
|
||||
Refinement of the existing type system.
|
||||
Adding @exact{$\Progn \vDash$} only rejects programs that would go
|
||||
wrong dynamically and/or accept programs that are ill-typed, but go
|
||||
right dynamically.
|
||||
}
|
||||
@item{
|
||||
Errors reported by our implementation are in terms of the source program,
|
||||
not of expanded/transformed code.
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@; =============================================================================
|
||||
@parag{Applications}
|
||||
|
||||
This table summarizes the ICFP pearl.
|
||||
We infer values from various syntactic domains and use the inferred data to
|
||||
justify small program transformations.
|
||||
|
||||
@tabular[#:style 'boxed
|
||||
#:sep @hspace[2]
|
||||
#:row-properties '(bottom-border ())
|
||||
#:column-properties '(right right right)
|
||||
(list (list @bold{Domain} @bold{Infers} @bold{Use})
|
||||
(list "format strings" "arity + type" "guard input")
|
||||
(list "regexp strings" "# groups" "prove output")
|
||||
(list "λ" "arity" "implement curry_i")
|
||||
(list "numbers" "value" "constant folding")
|
||||
(list "vectors" "size" "guard, optimize")
|
||||
(list "sql schema" "types" "guard input, prove output"))]
|
||||
|
||||
@; =============================================================================
|
||||
@parag{Lineage}
|
||||
|
||||
Herman and Meunier used macros for partial evaluation of string DSLs@~cite[hm-icfp-2004].
|
||||
Lorenzen and Erdweg defined criteria for sane deguarings@~cite[le-popl-2016].
|
||||
|
||||
This paper demonstrates the idea with concrete case studies of type
|
||||
tailoring (section 2) in the context of Typed Racket and its API to the
|
||||
elaborator (section 3). To illustrate the usefulness of the idea, we
|
||||
implement two tailorings. The first one---chosen for the wide familiarity
|
||||
of the domain---enriches the type-checking of vector-referencing operations
|
||||
(section 4). The second example explains how the implementor of a
|
||||
string-based embedded DSL---a regexp-matching DSL---may tailor the types of
|
||||
the interpretation function (section 5). Our evaluations confirm that these
|
||||
tailorings reduce the number of run-time checks that the programmer or the
|
||||
elaborator have to insert into the host program. In addition, we sketch
|
||||
several other applications of type tailoring in Typed Racket (section
|
||||
6). We also explain how the creator of such libraries can refine the
|
||||
existing type soundness proof of the host language to argue the correctness
|
||||
of the tailoring.
|
||||
|
|
|
@ -3,22 +3,27 @@
|
|||
@(require "common.rkt")
|
||||
|
||||
@title{Tailoring Type Theories (T.T.T)}
|
||||
@authorinfo["Piet Hein" "Dutchman" "piet at hein.com"]
|
||||
@authorinfo["Piet Hein" "Funen, Denmark" "gruk at piethein.com"]
|
||||
|
||||
@abstract{
|
||||
|
||||
A static type system is a compromise between precision and usability.
|
||||
Improving the ability of a type system to distinguish correct and
|
||||
erroneous programs typically requires that programmers restructure their
|
||||
code or provide more type annotations, neither of which are desirable
|
||||
tasks.
|
||||
Many typed APIs implicitly acknowledge the @emph{diktat} that the host type
|
||||
system imposes on the creators of libraries. When a library implements a
|
||||
string-based domain-specific language, the problem is particularly obvious.
|
||||
The interpretation functions for the programs in this embedded language
|
||||
come with the uninformative type that maps a string to some other host
|
||||
type. Only dependently typed languages can improve on this scenario at the
|
||||
moment, but they impose a steep learning curve on programmers.
|
||||
|
||||
This paper presents an elaboration-based technique for refining the
|
||||
analysis of an existing type system on existing code without changing the
|
||||
type system or the code. As a proof of concept, we have implemented the
|
||||
technique as a Typed Racket library. From the programmers' viewpoint,
|
||||
simply importing the library makes the type system more perceptive---no
|
||||
annotations or new syntax are required.
|
||||
This paper proposes to tackle this problem with APIs for type
|
||||
checkers. Specifically, it observes that most typed languages already
|
||||
employ an elaboration pass to type-check programs. If this elaborator
|
||||
comes with a sufficiently rich API, the author of a library can supplement
|
||||
the default types of the library's API with typing rules that improve the
|
||||
collaboration between host programs and uses of the library. The
|
||||
evaluation uses a prototype for Typed Racket and illustrates how useful
|
||||
the idea is for widely available libraries. Also the paper sketches how
|
||||
the authors of such ``tailored'' rules can argue their soundness.
|
||||
|
||||
}
|
||||
|
||||
|
@ -29,11 +34,12 @@ This paper presents an elaboration-based technique for refining the
|
|||
@; See OUTLINE.md for explanation
|
||||
@include-section{intro.scrbl}
|
||||
@include-section{background.scrbl}
|
||||
@include-section{examples.scrbl}
|
||||
@include-section{discussion.scrbl}
|
||||
@include-section{friends.scrbl}
|
||||
@include-section{related-work.scrbl}
|
||||
@include-section{conclusion.scrbl}
|
||||
@include-section{segfault.scrbl}
|
||||
@;@include-section{examples.scrbl}
|
||||
@;@include-section{discussion.scrbl}
|
||||
@;@include-section{friends.scrbl}
|
||||
@;@include-section{related-work.scrbl}
|
||||
@;@include-section{conclusion.scrbl}
|
||||
|
||||
@section[#:style 'unnumbered]{Acknowledgments}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
@title[#:tag "sec:related-work"]{Experts}
|
||||
|
||||
@section{SoundX}
|
||||
SoundX is a system for modeling programming languages and defining type-sound
|
||||
extensions, e.g. defining a type derivation for @tt{let} in terms of a type
|
||||
derivation for @tt{λ} expressions.
|
||||
|
@ -32,3 +33,35 @@ We propose a deeper notion of correctness for our syntactic transformations, but
|
|||
]
|
||||
Though we fail on the third point.
|
||||
|
||||
|
||||
@section{Wepa}
|
||||
|
||||
Our general approach and outlook on type soundness is informed by Cousot.
|
||||
|
||||
@section{Compiler Plugings}
|
||||
|
||||
GHC (constraint solvers)
|
||||
Rust (macros)
|
||||
Scala (macros)
|
||||
|
||||
|
||||
@section{Parsec}
|
||||
|
||||
Haskell / ML don't really use regular expressions.
|
||||
They use parser combinators.
|
||||
|
||||
Matthias: give up ENTIRE TOOLCHAIN if you go external
|
||||
|
||||
|
||||
@section{Typechecker Plugins}
|
||||
OutsideIn
|
||||
Adam Gundry units plugin
|
||||
Fsharp packs?
|
||||
extensible records for haskell
|
||||
|
||||
@section{Why no types?}
|
||||
Could save work in map if we got function arity from type.
|
||||
Clearly strong argument for mixing types and syntax extensions.
|
||||
But that's research; requires careful design and definitely not a drop-in
|
||||
solution like we propose here.
|
||||
(Would apparaently require sweeping changes)
|
||||
|
|
205
popl-2017/segfault.scrbl
Normal file
205
popl-2017/segfault.scrbl
Normal file
|
@ -0,0 +1,205 @@
|
|||
#lang scribble/sigplan @onecolumn
|
||||
|
||||
@require["common.rkt"]
|
||||
|
||||
@title[#:tag "sec:segfault"]{Well Typed Programs do not go SEGFAULT}
|
||||
|
||||
@Figure-ref{fig:stlc} describes a simply typed @exact{$\lambda$} calculus
|
||||
with integers and integer arrays.
|
||||
@Figure-ref{fig:elab0} gives a type-directed elaboration of terms written
|
||||
in this surface language to a typed, executable core language.
|
||||
The main difference between surface and core is that array references
|
||||
@exact{$\aref{e}{e}$} are replaced with calls to a primitive operation
|
||||
@exact{$\checkedref$}.
|
||||
For example, the term @exact|{$\aref{x}{3}$}| elaborates to @exact|{$\checkedref~x~3$}|
|
||||
when @exact{$x$} is a variable with type @exact{$\tarray$}.
|
||||
|
||||
The operational semantics for the core language are given in @Figure-ref{fig:stlc-core},
|
||||
along with type judgments for two primitive operations:
|
||||
@exact{$\checkedref$} and @exact{$\unsaferef$}.
|
||||
Intuitively, @exact{$\checkedref$} is a memory-safe function that performs a bounds check before
|
||||
dereferencing an array and gives a runtime error when called with incompatible values.
|
||||
On the other hand, @exact{$\unsaferef$} does not perform a bounds check and therefore
|
||||
may return arbitrary data or raise a memory error when called with an invalid index.
|
||||
These negative consequences are modeled in the (family of) rule(s) @exact|{\textsc{E-UnsafeFail}}|.
|
||||
Consequently, the judgment @exact{$\smallstep{e}{e}$} and its transitive closure
|
||||
@exact{$\smallstepstar{e}{e}$} are relations, whereas
|
||||
typing and elaboration are proper functions.
|
||||
|
||||
@figure["fig:stlc" "Simply-Typed λ Calculus" #:style left-figure-style
|
||||
@exact|{\input{fig-stlc-surface}}|
|
||||
]
|
||||
|
||||
@figure["fig:elab0" "Elaborating typed terms" #:style left-figure-style
|
||||
@exact|{\input{fig-elab0}}|
|
||||
]
|
||||
|
||||
@figure["fig:stlc-core" "Core Language" #:style left-figure-style
|
||||
@exact|{\input{fig-stlc-core}}|
|
||||
]
|
||||
|
||||
Evaluation of the surface language in @Figure-ref{fig:stlc}, however,
|
||||
is deterministic.
|
||||
|
||||
@definition["evaluation"]{
|
||||
@exact{$\bigstep{e}{v}$} if and only if @exact{$e$} is closed and there
|
||||
exists @exact{$e'$}, @exact{$\tau$} such that
|
||||
@exact|{$\elabstoclosed{e}{e'}{\tau}$}|
|
||||
and @exact{$\smallstepstar{e'}{v}$}.
|
||||
}
|
||||
|
||||
@theorem["determinism"]{
|
||||
Evaluation is a proper function.
|
||||
In other words, if @exact{$\bigstep{e}{v}$}
|
||||
and @exact{$\bigstep{e}{v'}$}
|
||||
then @exact{$v = v'$}.
|
||||
}
|
||||
|
||||
@proof{
|
||||
By induction on terms of the core language, if
|
||||
@exact{$\smallstep{e}{e'}$} and @exact{$e$} is not of the form
|
||||
@exact{$\unsaferef~e_1~e_2$} then @exact{$e'$} is unique.
|
||||
The theorem follows because the surface language does not allow
|
||||
@exact{$\unsaferef$} and elaboration does not introduce unsafe references.
|
||||
}
|
||||
|
||||
Determinism implies that evaluation never invokes the @sc{E-UnsafeFail} rule;
|
||||
therefore, evaluation of surface terms is memory safe.
|
||||
Additionally, the surface language is type safe.
|
||||
|
||||
@theorem["soundness"]{
|
||||
If @exact{$e$} is closed and @exact|{$\elabstoclosed{e}{e'}{\tau}$}|
|
||||
then one of the following holds:
|
||||
@itemlist[
|
||||
@item{
|
||||
@exact{$e'$} is a value
|
||||
}
|
||||
@item{
|
||||
@exact{$\smallstep{e'}{\indexerror}$}
|
||||
}
|
||||
@item{
|
||||
@exact{$\smallstep{e'}{e''}$}
|
||||
and @exact{$\typestoclosed{e''}{\tau}$}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@proof{
|
||||
The interesting cases involve @exact{$\checkedref$}.
|
||||
First, @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}|
|
||||
implies @exact{$\typestoclosed{e_1}{\tarray}$}
|
||||
and @exact{$\typestoclosed{e_2}{\tint}$}.
|
||||
Depending on the values of @exact{$e_1$} and @exact{$e_2$},
|
||||
the application steps to either @exact{$\indexerror$} or the @exact{$e_2$}-th
|
||||
element of the array @exact{$e_1$}---which, by assumption, has type @exact{$\tint$}.
|
||||
}
|
||||
|
||||
|
||||
@section{Extending the Elaborator}
|
||||
|
||||
Building on the foundation of type soundness, we extend the elaborator with
|
||||
the @emph{value-directed} rules in @Figure-ref{fig:elab1}.
|
||||
When the elaborator can prove that an array reference is in bounds based on
|
||||
the syntax of terms, it produces an unsafe reference.
|
||||
Conversely, the elaborator raises a compile-time index error if it can prove
|
||||
the index is outside the bounds of the array.
|
||||
|
||||
@figure["fig:elab1" "Value-directed elaboration" #:style left-figure-style
|
||||
@exact|{\input{fig-elab1}}|
|
||||
]
|
||||
|
||||
Our thesis is that adding such rules does not undermine the safety guarantees of the
|
||||
original language.
|
||||
In this case, we recover the original guarantees by extending the proofs of
|
||||
type and memory safety to address the @sc{S-RefPass} and @sc{S-RefFail} elaboration rules.
|
||||
|
||||
@definition[@exact{$\elabarrowplus$}]{
|
||||
The relation @exact{$\elabarrowplus$} is the union of the rules in
|
||||
@Figure-ref["fig:elab0" "fig:elab1"].
|
||||
}
|
||||
|
||||
@theorem["determinism"]{
|
||||
@exact|{$\bigstepplus{e}{v}$}| is a proper function, where @exact|{$\bigstepplus{e}{v}$}|
|
||||
if and only if @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}|
|
||||
and @exact|{$\smallstepstar{e'}{v}$}|.
|
||||
}
|
||||
|
||||
@proof{
|
||||
With the original elaboration scheme, all array references @exact{$\aref{e_1}{e_2}$}
|
||||
elaborate to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$}
|
||||
and @exact{$e_2'$} are the elaboration of @exact{$e_1$} and @exact{$e_2$}.
|
||||
There are now three possibilities:
|
||||
@itemize[
|
||||
@item{
|
||||
@exact{$e_1'$} is an array value @exact{$\vectorvn$}
|
||||
and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
|
||||
and @exact{$0 \le i < n$}.
|
||||
Both @sc{S-Ref} and @sc{S-RefPass} are possible elaborations,
|
||||
therefore @exact{$e'$} is either @exact{$\checkedref~\vectorvn~i$}
|
||||
or @exact{$\unsaferef~\vectorvn~i$}.
|
||||
Because @exact{$0 \le i < n$}, evaluation must proceed with
|
||||
@sc{E-CheckPass} or @sc{E-UnsafePass} in each case, respectively.
|
||||
These rules have the same result, @exact{$v_i$}.
|
||||
}
|
||||
@item{
|
||||
@exact{$e_1'$} is an array value @exact{$\vectorvn$}
|
||||
and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
|
||||
and either @exact{$i < 0$} or @exact{$i \ge n$}.
|
||||
The rules @sc{S-Ref} and @sc{S-RefFail} are possible elaborations.
|
||||
If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}
|
||||
and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside
|
||||
the bounds of the array.
|
||||
If @sc{S-RefFail} is chosen an index error is raised immediately.
|
||||
}
|
||||
@item{
|
||||
Otherwise, either @exact{$e_1'$} or @exact{$e_2'$} is not a value
|
||||
form and we rely on the existing proof of determinism.
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
For practical purposes, non-determinism in the elaborator should be
|
||||
resolved giving the rules in @Figure-ref{fig:elab1} preference over the rules
|
||||
in @Figure-ref{fig:elab0}.
|
||||
But the result will be the same in any event.
|
||||
|
||||
|
||||
@theorem["soundness"]{
|
||||
If @exact{$e$} is a closed term then @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}|
|
||||
implies that @exact{$e'$} is either a value or steps to an index error or
|
||||
steps to a term @exact{$e''$} with type @exact{$\tau$}.
|
||||
}
|
||||
|
||||
@proof{
|
||||
We extend the existing proof with cases for @exact{$\unsaferef$}.
|
||||
@itemize[
|
||||
@item{
|
||||
If @sc{S-RefPass} is applied by the elaborator then @exact{$e'$} is
|
||||
the call @exact{$\unsaferef~\vectorvn~i$} and @exact{$0 \le i < n$}.
|
||||
By assumption, @exact|{$\typestoclosed{\vectorvn}{\tarray}$}| and
|
||||
@exact{$\typestoclosed{i}{\tint}$}.
|
||||
Therefore @exact{$e'$} is well-typed by the rule @sc{T-Unsafe} and steps
|
||||
to the integer value @exact{$v_i$} by @sc{E-UnsafePass}.
|
||||
}
|
||||
@item{
|
||||
If @sc{S-RefFail} is applied by the elaborator then @exact{$e'$} is
|
||||
@exact{$\indexerror$} and type soundness follows trivially.
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
The key step in both theorems was that elaboration supplied the proposition
|
||||
@exact{$0 \le i < n$}.
|
||||
Both the type rules and evaluation rules depended on this premise.
|
||||
|
||||
|
||||
@section{In Practice}
|
||||
|
||||
Have implemented for typed racket
|
||||
- library
|
||||
- with enhancements from @todo{secref}
|
||||
- via expander, so typechecked afterwards
|
||||
|
||||
Checked in
|
||||
- standard distro (includes plot,math,stats)
|
||||
- pict3d
|
13
popl-2017/src/distinguisher.rkt
Normal file
13
popl-2017/src/distinguisher.rkt
Normal file
|
@ -0,0 +1,13 @@
|
|||
#lang racket/base
|
||||
(require (for-syntax racket/base syntax/parse))
|
||||
|
||||
(define-syntax (distinguisher stx)
|
||||
(syntax-parse stx
|
||||
[(_ e_1 e_2)
|
||||
(if (equal? (syntax->datum #'e_1)
|
||||
(syntax->datum #'e_2))
|
||||
#'((λ (x) (x x)) (λ (x) (x x)))
|
||||
#'(λ (x) x))]))
|
||||
|
||||
;(distinguisher '() '())
|
||||
(distinguisher 'A 'B)
|
40
popl-2017/src/matrix_map.ml
Normal file
40
popl-2017/src/matrix_map.ml
Normal file
|
@ -0,0 +1,40 @@
|
|||
(* Trying to implment variable arity map in OCaml *)
|
||||
|
||||
let empty xs =
|
||||
match xs with
|
||||
| [] -> true
|
||||
| _ -> false
|
||||
|
||||
let rec ormap f xs =
|
||||
match xs with
|
||||
| [] -> true
|
||||
| x::xs -> (f x) || (ormap f xs)
|
||||
|
||||
(* Error *)
|
||||
let rec apply f xs =
|
||||
match xs with
|
||||
| [] -> failwith "nope"
|
||||
| [x] -> f x
|
||||
| x::xs ->
|
||||
let f2 = f x in
|
||||
apply f2 xs
|
||||
|
||||
let rec mapN f xss =
|
||||
match xss with
|
||||
| [] -> []
|
||||
| _ ->
|
||||
if ormap empty xss
|
||||
then []
|
||||
else apply f (map List.hd xss) :: mapN f (map List.tl xss)
|
||||
|
||||
let _ =
|
||||
let a1 = mapN (fun x y -> x + y)
|
||||
[[1, 2, 3]
|
||||
[0, 4, 5]]
|
||||
in
|
||||
let a2 = mapN (fun x y z -> x + y + z)
|
||||
[[1, 2, 3]
|
||||
[9, 4, 2]
|
||||
[0, 4, 5]]
|
||||
in
|
||||
()
|
12
popl-2017/src/racket/check-all-equal.rkt
Normal file
12
popl-2017/src/racket/check-all-equal.rkt
Normal file
|
@ -0,0 +1,12 @@
|
|||
#lang racket/base
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
|
||||
(define-syntax-rule (check-all-eq? e_1 e_rest ...)
|
||||
(begin (check-equal? e_1 e_1)
|
||||
(check-equal? e_rest e_1) ...))
|
||||
|
||||
(check-all-eq? 1 1 1 1 1)
|
||||
(check-all-eq? 1 2 1 3)
|
||||
)
|
40
popl-2017/src/racket/log.rkt
Normal file
40
popl-2017/src/racket/log.rkt
Normal file
|
@ -0,0 +1,40 @@
|
|||
#lang racket/base
|
||||
(require (for-syntax racket/base syntax/parse))
|
||||
|
||||
(define-for-syntax DEBUG #f)
|
||||
|
||||
(define log-file (open-output-file "log.txt" #:exists 'append))
|
||||
|
||||
(define-syntax (log stx)
|
||||
(syntax-parse stx
|
||||
[(_ message)
|
||||
(unless (string? (syntax->datum (syntax message)))
|
||||
(error 'nope))
|
||||
(if DEBUG
|
||||
#'(displayln (string-append "LOG " message) log-file)
|
||||
#'(void))]))
|
||||
|
||||
|
||||
(log "wepa")
|
||||
;(log 22)
|
||||
|
||||
(define-syntax (++ stx)
|
||||
(syntax-parse stx
|
||||
[(++ s1 s2)
|
||||
(syntax-property
|
||||
(syntax (string-append s1 s2))
|
||||
'mytype 'string)]))
|
||||
|
||||
(define-syntax (log1 stx)
|
||||
(syntax-parse stx
|
||||
[(log1 msg)
|
||||
(define e (local-expand #'msg 'expression '()))
|
||||
(printf "asd ~a\n" e)
|
||||
(if (or (string? (syntax->datum #'msg))
|
||||
(syntax-property e 'mytype))
|
||||
#'"SAFE"
|
||||
#'"NOTSAFE")]))
|
||||
|
||||
(log1 "yolasdf")
|
||||
(log1 "adsf")
|
||||
(log1 (++ "asdf" "cde"))
|
|
@ -3,6 +3,7 @@
|
|||
\usepackage{listings}
|
||||
\usepackage{stmaryrd}
|
||||
\usepackage{mathpartir}
|
||||
\usepackage{fourier-orns}
|
||||
\lstset{language=ML}
|
||||
\usepackage[usenames,dvipsnames]{xcolor}
|
||||
\usepackage{multicol}
|
||||
|
@ -23,7 +24,7 @@
|
|||
\overfullrule=1mm
|
||||
|
||||
%% balance last page columns
|
||||
\usepackage{flushend}
|
||||
%\usepackage{flushend}
|
||||
|
||||
%% for figure 2
|
||||
\let\ulcorner\relax
|
||||
|
@ -32,20 +33,57 @@
|
|||
\let\lrcorner\relax
|
||||
\usepackage{amssymb}
|
||||
|
||||
\newcommand{\interp}{\mathcal{I}}
|
||||
\newcommand{\untyped}[1]{{\,#1}_\flat}
|
||||
\newcommand{\trans}{\mathcal{T}}
|
||||
\newcommand{\elab}{\mathcal{E}}
|
||||
\newcommand{\elabfe}[1]{\llbracket #1 \rrbracket}
|
||||
\newcommand{\elabf}{\elabfe{\cdot}}
|
||||
\newcommand{\subt}{\mathbf{<:\,}}
|
||||
\newcommand{\tos}{\mathsf{types_of_spec}}
|
||||
\newcommand{\trt}[1]{\emph{#1}}
|
||||
\newcommand{\tprintf}{\mathsf{t_printf}}
|
||||
\newcommand{\mod}[1]{$\mathsf{#1}$}
|
||||
\newcommand{\evalsto}{\RktMeta{}\RktSym{=={\Stttextmore}}\RktMeta{}\mbox{\hphantom{\Scribtexttt{x}}}}
|
||||
%% -- types
|
||||
\newcommand{\tvar}[1]{\mathsf{#1}}
|
||||
\newcommand{\tnat}{\tvar{Natural}}
|
||||
\newcommand{\tarray}{\tvar{Array}}
|
||||
\newcommand{\tint}{\tvar{Int}}
|
||||
\newcommand{\tnum}{\tvar{Num}}
|
||||
\newcommand{\tlist}[1]{\tvar{List~#1}}
|
||||
\newcommand{\Progn}{\mathcal{P}}
|
||||
\newcommand{\naturals}{\mathbb{N}}
|
||||
\newcommand{\ints}{\mathbb{Z}}
|
||||
|
||||
%% -- terms
|
||||
\newcommand{\vectorgen}[1]{\langle #1 \rangle}
|
||||
\newcommand{\vectoren}{\vectorgen{e_0, \ldots, e_{n-1}}}
|
||||
\newcommand{\vectorvn}{\vectorgen{v_0, \ldots, v_{n-1}}}
|
||||
\newcommand{\vectorxn}{\vectorgen{x_0, \ldots, x_{n-1}}}
|
||||
\newcommand{\vlam}[2]{\lambda\,#1\,.\,#2}
|
||||
\newcommand{\aref}[2]{#1@#2}
|
||||
\newcommand{\checkedref}{\tvar{checked\mbox{-}ref}}
|
||||
\newcommand{\unsaferef}{\tvar{unsafe\mbox{-}ref}}
|
||||
\newcommand{\segfault}{\mathsf{segfault}}
|
||||
\newcommand{\indexerror}{\mathsf{IndexError}}
|
||||
|
||||
%% -- evaluation contexts
|
||||
\newcommand{\ectx}{E}
|
||||
\newcommand{\ehole}{[\cdot]}
|
||||
|
||||
%% -- type environments
|
||||
\newcommand{\tenv}{\Gamma}
|
||||
\newcommand{\tenvempty}{\cdot}
|
||||
\newcommand{\tenvcons}[3]{#1:#2,#3}
|
||||
|
||||
%% -- typing
|
||||
\newcommand{\typestogen}[3]{#1 \vdash #2 : #3}
|
||||
\newcommand{\typestoclosed}[2]{\typestogen{\tenvempty}{#1}{#2}}
|
||||
\newcommand{\typesto}[2]{\typestogen{\tenv}{#1}{#2}}
|
||||
|
||||
%% -- elaboration
|
||||
\newcommand{\elabarrow}{\rightsquigarrow}
|
||||
\newcommand{\elabarrowplus}{\elabarrow^{+}}
|
||||
\newcommand{\elabstogen}[5]{\typestogen{#1}{#2 #3 #4}{#5}}
|
||||
\newcommand{\elabstoclosedplus}[3]{\elabstogen{\tenvempty}{#1}{\elabarrowplus}{#2}{#3}}
|
||||
\newcommand{\elabstoclosed}[3]{\elabstogen{\tenvempty}{#1}{\elabarrow}{#2}{#3}}
|
||||
\newcommand{\elabsto}[3]{\elabstogen{\tenv}{#1}{\elabarrow}{#2}{#3}}
|
||||
|
||||
%% -- evaluation
|
||||
\newcommand{\bigstep}[2]{#1 \Downarrow #2}
|
||||
\newcommand{\bigstepplus}[2]{#1 \Downarrow^+ #2}
|
||||
\newcommand{\smallstepstar}[2]{#1 \rightarrow^* #2}
|
||||
\newcommand{\smallstep}[2]{#1 \rightarrow #2}
|
||||
|
||||
%% -- misc
|
||||
\newcommand{\esubst}[2]{[#2/#1]}
|
||||
\newcommand{\qed}{$\square$}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user