[checkpoint]

2016-06-27 15:57:06 -04:00 · 2016-06-27 15:57:06 -04:00 · 3703cee372
commit 3703cee372
parent e70e0d3867
15 changed files with 740 additions and 522 deletions
--- a/popl-2017/.gitignore
+++ b/popl-2017/.gitignore
@ -8,7 +8,6 @@
 paper.tex
 pearl.tex

-benchmark/
 compiled/
 *.swp
 *.swo
--- a/popl-2017/Makefile
+++ b/popl-2017/Makefile
@ -10,6 +10,10 @@ ${PAPER}.pdf: pkg setup texstyle.tex
           ++extra fig-stlc-core.tex \
           ++extra fig-elab0.tex \
           ++extra fig-elab1.tex \
+           ++extra fig-elab-regexp.tex \
+           ++extra fig-elab-sigma.tex \
+           ++extra fig-regexp-lib.tex \
+           ++extra fig-stlc-dict.tex \
           ++extra mathpartir.sty \
           ++style texstyle.tex \
           --pdf $(PAPER).scrbl
--- a/popl-2017/OUTLINE.md
+++ b/popl-2017/OUTLINE.md
@ -63,3 +63,21 @@ rust will be difficult
 5. ending
 ---

+0. finish sql search
+1. revise 3,4 with MODEL IMPL, EVAL
+2. do 5 better, not sure how, figure first
+
+related work
+- dep types
+- occurrence types TR optimizer (no theoretical justifications)
+- yes we did macros, yes that's been done (fisher, hermen)
+- haskell
+
+conclusion
+- jsut one way of doing this, can reimagin typed racket, TC + elab
+= stephen chang
+
+
+regexP;
+- 
+- put the (list ...) back
--- a/popl-2017/background.scrbl
+++ b/popl-2017/background.scrbl
@ -1,8 +1,14 @@
 #lang scribble/sigplan @onecolumn

-@require["common.rkt" pict racket/class racket/draw]
+@; Goal of Sec 2 is to introduce background on TR needed to:
+@; - understand our bias
+@; - understand our examples
+@; - understand "type elaborator api"

-@title[#:tag "sec:background"]{If You Know What I Mean}
+
+@require["common.rkt"]
+
+@title[#:tag "sec:background"]{Towards a Type Elaborator API}
@; A poet should be of the
@; old-fahioned meaningless brand:
@; obscure, esoteric, symbolic,
@ -12,370 +18,254 @@
@; I'll gladly explain what it means
@; till you don't understand it.

-@;    Computers understand a very limited set of instructions.
-@;    It is tedious and error-prone to describe even simple thoughts in terms of these
-@;     instructions, but programming languages provide abstraction mechanisms
-@;     that can make the process bearable.
-@;    For example, low-level or assembly languages can implement the following
-@;     mathematical function for referencing an element of a fixed-length
-@;     data structure:
-@;    
-@;      @exact|{\[
-@;        \RktMeta{ref} = \lambda (\RktMeta{v}~\RktMeta{i})
-@;          \left\{\begin{array}{l l}
-@;            \RktMeta{v}_{i+1} & \RktMeta{if}~\RktMeta{v} = \vectorvn
-@;          \\                 ~&\RktMeta{and}~\RktMeta{i} \in \mathbb{N}
-@;                              ~\RktMeta{and}~i < n
-@;          \\[4pt]
-@;          \bot & \RktMeta{otherwise}
-@;          \end{array}\right.
-@;      \]}|
-@;    
-@;    @;Here we use the notation @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_{n-1} \rangle$}|
-@;    @; to describe a vector of @exact{$n$} elements.
-@;    Whether the implementation is a labeled sequence of instructions or a C function
-@;     is not important; what matters is the precise specification
-@;     and our ability to compute the function via machine.
-@;    Memory-safe functions with clear semantics are the core building blocks for
-@;     sane programs.
-@;    
-@;    As programs grow in size, complexity, and importance, it is desirable to have
-@;     some guarantees about how a program will run before actually running the
-@;     program.
-@;    Ideally, we would statically prove that @racket[ref] is
-@;     never applied to arguments that are not vectors or to out-of-bounds indices.
-@;    @;Such a proof would imply that every call @racket[(ref v i)] in the program
-@;    @; would yield a non-@|bot| value and help show that the overall program works as intended.
-@;    But proving such properties is difficult and there are often many
-@;     functions in a program for which we seek guarantees, so we settle for
-@;     approximate results.
-@;    Instead of statically ruling out all calls @racket[(ref v i)] that produce
-@;     @|bot|, we strive to eliminate a large subset automatically.
-@;    
-@;    Type systems have emerged as a powerful and convenient way of statically detecting
-@;     errors.
-@;    Instead of tracking the flow of exact values through a program, a type system
-@;     tracks the flow of @emph{types} denoting a range of possible runtime values.
-@;    Depending on the type system and programmers' discipline using it, the range
-@;     might be limited enough to show that @racket[ref] never returns @|bot|
-@;     in a program; however, the tradeoff is always the time and energy programmers
-@;     are willing to invest in writing down and maintaining type information.
-@;    As it stands, the most widely used type systems are those that require
-@;     minimal annotations from the programmer and catch only shallow, common errors.
-@;    
-@;    
-@;    @section{Problem Statement and Promises}
-@;    
-@;    @let*[([all-w 150]
-@;           [lang-w (/ all-w 2)]
-@;           [lang-h 20]
-@;           [lang-rectangle
-@;            (lambda (brush-style
-@;                     fill-color
-@;                     #:width [lang-w lang-w] ;; getting lazy there ben
-@;                     #:height [lang-h lang-h]
-@;                     #:slant-left [slant-left #f]
-@;                     #:border-color [maybe-border-color #f]
-@;                     #:border-width [border-width 3])
-@;              (dc (lambda (dc dx dy)
-@;                (define old-brush (send dc get-brush))
-@;                (define old-pen (send dc get-pen))
-@;                (define border-color (or maybe-border-color fill-color))
-@;                (send dc set-brush
-@;                  (new brush% [style brush-style] [color fill-color]))
-@;                (send dc set-pen
-@;                  (new pen% [width border-width] [color border-color]))
-@;                ;; --
-@;                (define path (new dc-path%))
-@;                (send path move-to 0 0)
-@;                (if slant-left
-@;                  (begin (send path line-to (- 0 slant-left) (/ lang-h 2))
-@;                         (send path line-to 0 lang-h))
-@;                  (send path line-to 0 lang-h))
-@;                (send path line-to lang-w lang-h)
-@;                (send path line-to lang-w 0)
-@;                (send path close)
-@;                (send dc draw-path path dx dy)
-@;                ;; --
-@;                (send dc set-brush old-brush)
-@;                (send dc set-pen old-pen)) lang-w lang-h))]
-@;           [all-e (rectangle all-w lang-h)]
-@;           [hshim (blank (/ all-w 6) lang-h)]
-@;           [untyped-e (hc-append hshim (lang-rectangle 'vertical-hatch "CornflowerBlue" #:border-width 4 #:height (+ lang-h 2)))]
-@;           [typed-e (hc-append (lang-rectangle 'horizontal-hatch "Coral" #:border-width 2) hshim)]
-@;           [reality-of-ml (rc-superimpose (lc-superimpose all-e untyped-e) typed-e)]
-@;           [value-e
-@;            (let ([s 'solid]
-@;                  [c "LimeGreen"]
-@;                  [w (- lang-w lang-h)]
-@;                  [b 1])
-@;              (hc-append (blank (* lang-h 0.8) lang-h)
-@;                         (lang-rectangle s c #:width (/ lang-w 3) #:height (/ lang-h 3) #:border-width b)
-@;                         (lang-rectangle s c #:border-width b)))]
-@;           [reality-of-tr (lc-superimpose reality-of-ml value-e)]
-@;           )]{@list{
-@;      No matter how precise the type system, static analysis cannot recognize
-@;       all correct programs and reject all incorrect programs.
-@;      Layering a type system on top of an untyped programming language therefore
-@;       leads to a world where the space of all syntactically valid programs
-@;       (drawn as a white box, of course) is partitioned into two overlapping sets.
-@;    
-@;       @centered[reality-of-ml]
-@;    
-@;      On the left, outlined in blue with vertical hatches, we have the set of all untyped programs
-@;       that produce meaningful results (read: non-error, non-diverging terms).
-@;      On the right, outlined in orange with horizontal hatches, we have the set of all programs
-@;       approved by the type system.
-@;      Some type-correct programs raise runtime errors when evaluated, and some
-@;       meaningful programs are rejected by the type checker.
-@;    
-@;      Again, the impossible goal is for the highlighted boxes to overlap.
-@;      Our argument is that adding a @emph{syntactic elaboration} phase before
-@;       typechecking can yield a language described by the
-@;       solid green area shown below, capturing more of the meaningful untyped
-@;       programs and fewer of the typed programs that go wrong, though rejecting
-@;       some typed programs that run to completion but contain a subterm that would
-@;       raise an exception if evaluated (Theorem @exact|{\ref{thm:relevance}}|).
-@;    
-@;       @centered[reality-of-tr]
-@;    }}
-@;    
-@;    Starting from an untyped programming language built from a grammar of terms
-@;     @exact{$e$} with a reduction relation @exact{$e \Downarrow v$}
-@;     and a type judgment @exact{$\vdash e : \tau$} relating terms to
-@;     values @exact{$v$} and types @exact{$\tau$}, we derive an extended language
-@;     by inserting a function @exact{$\elaborate(e)$} before type checking.
-@;    The extended language has a type system @exact{$\vdashe$} consisting of a single
-@;     rule:
-@;    
-@;      @exact|{
-@;        \begin{mathpar}
-@;          \inferrule*{
-@;            \vdash \elaborate(e) : \tau
-@;          }{
-@;            \vdashe e : \tau
-@;          }
-@;        \end{mathpar}
-@;      }|
-@;    
-@;    We also derive a reduction relation on typed terms @exact{$\vdashe e : \tau$}
-@;     as @exact{$\elaborate(e) \Downarrow v$}.
-@;    Type soundness for @exact{$\vdashe$} is thus a corollary of
-@;     type soundness for the @exact{$\vdash$} judgment.
-@;    
-@;    The output of @exact{$\elaborate(e)$} is a term @exact{$e'$},
-@;     though @exact{$e'$} may be a labeled error term
-@;     @exact|{$\bot^{e''}$}| meaning the elaborator detected an error in the program.
-@;    Terms @exact{$e$} such that @exact{$\elaborate(e)$} type checks but @exact{$e$}
-@;     does not are newly expressible in the extended system.
-@;    Terms @exact{$e$} such that @exact{$\vdash e : \tau$} but
-@;     @exact|{$\elaborate(e) = \bot^{e'}$}| are programs validated by the typechecker
-@;     but rejected by the extended system because the subterm @exact{$e'$} of @exact{$e$}
-@;     would raise a runtime error if evaluated.
-@;    
-@;    Our main contribution is defining a useful collection of @emph{local}
-@;     transformations to perform during elaboration.
-@;    Each local elaboration is proven correct in isolation.
-@;    We then define @exact{$\elaborate(e)$} as the union of these local cases
-@;     and a default elaboration that applies @exact{$\elaborate$} to all
-@;     subterms of @exact{$e$}.
-@;    
-@;    To avoid reasoning about terms post-elaboration we will describe
-@;     our local elaborations with type rules of the form: @todo{maybe drop this}
-@;    
-@;      @exact|{
-@;        \begin{mathpar}
-@;          \inferrule*{
-@;            \prope(f, e)
-@;            \\\\
-@;            \vdash f : \tau_f
-@;            \\
-@;            \vdash e : \tau_e
-@;          }{
-@;            \vdashe f~e : \tau
-@;          }
-@;        \end{mathpar}
-@;      }|
-@;    
-@;    where @exact{$\prope$} is a proposition about syntactic terms
-@;     @exact{$f$} and @exact{$e$}
-@;     used to justify the transformation @exact|{$\elaborate(f~e)$}|.
-@;    Despite this inference rule shorthand, elaborations and @exact{$\prope$}
-@;     conditions run before typechecking and have no access to type information
-@;     in the program.
-@;    
-@;    The relationship between terms @exact{$e$} and @exact{$\elaborate(e)$} is given
-@;     by three theorems: @todo{see next section for now}.
-@;    
-@;    Correct elaborations obey these theorems.
-@;    @; Note: there is no guarantee that elaboration produces a well-typed term
-@;    @;  because calls like @exact{$\elaborate($}@racket[ref 0 0]@exact{$)$} are allowed.
-@;    
-@;    @; We refrain from making stronger statements about elaborate because
-@;    @; real systems will have lots of other elaborations.
+@; TODO
+@; - can we not use "Typed Racket" as the first word of this section?
+@;   to me that's a huge turn-off after reading the introduction.
+@; - missing answer: why are we using TR
+
+Typed Racket does not have a type elaboration API; however, it inherits a rich
+ syntax extension system from its host language, Racket.
+Experience with Racket syntax extensions (aka macros) motives our proposal for
+ a similar type elaboration system, presented in @Secref{subsec:api}.
+Moreover, we have implemented the transformations described in @Secref{sec:define}
+ as a Typed Racket package
+ and this section is intended to prepare the way for later code snippets.


-@section{Syntax Extensions}
+@section{Typed Racket, today}

-Syntax extension systems have been incorporated in
- a number of mainstream programming languages.
-Essentially, syntax extensions let the programmer write code that generates code.
-In practice this gives programmers the ability to extend the syntax of their
- language, abstract over textual patterns, and control the evaluation order
- of expressions.
+Typed Racket is an ongoing experiment in language design the success of which
+ is a testament to the usefulness and versatility of syntax extensions.
+The entire language is implemented as a Racket library; in particular, a
+ library of syntax extensions.
+Types in Typed Racket are distinguished Racket terms, given special meaning by
+ the type checker.
+The checker itself is nothing more than a Racket function defined over
+ type-annotated programs and run before the program is compiled.
+When typechecking succeeds, the annotations are erased and the resulting
+ program is fed to the Racket compiler.
+As such, there is no need for a dedicated ``Typed Racket compiler''.
+Type-driven optimizations occur on core Racket forms just after type checking
+ and the result feeds in to the existing compiler toolchain.

-Racket's syntax extension API is particularly mature and has inspired
- similar APIs in at least two other languages, so we adopt it here to introduce
- syntax extensions.
-As a first example, suppose we want to assert that expressions
- @racket[e_1], @racket[e_2], @racket[e_3], and @racket[e_4] all evaluate to
- the same value.
-By hand, we might write a sequence of unit tests:
-@racketblock[
-  (begin (check-equal? e_2 e_1)
-         (check-equal? e_3 e_1)
-         (check-equal? e_4 e_1))
-]
-but these tests follow a simple pattern that we can abstract as a @emph{syntax rule},
- using ellipses (@racket[...]) to capture an arbitrary number of expressions.
-@racketblock[
-  (define-syntax-rule (check-all-equal? e_1 e_rest ...)
-    (begin (check-equal? e_rest e_1) ...))
-]
-Our tests can now be written more concisely:
-@racketblock[
-  (check-all-equal? e_1 e_2 e_3 e_4)
-]
-making them easier to read and maintain.
-Moreover, we can easily improve the rule to evaluate @racket[e_1] only once
- instead of @math{n-1} times:
-@racketblock[
-  (define-syntax-rule (check-all-equal? e_1 e_rest ...)
-    (let ([v e_1])
-      (check-equal? e_rest v) ...))
+@; One might expect this is kinda slow. Maybe that's true. But has benefits and users.
+
+For a concrete example, have a left-leaning factorial function:
+
+  @codeblock{
+    #lang typed/racket
+    (define (fact (n : Natural)) : Natural
+      (foldl * n (range 1 n)))
+  }
+
+Aside from the type annotations, the function is ordinary Racket code
+ and behaves as such when run.
+But the syntax accepted by Typed Racket's @racket[define] is a superset of
+ valid, untyped Racket.
+Furthermore, this @racket[define] has an extended semantics.
+When the above program is compiled, @racket[define] registers the identifier
+ @racket[fact] in a type environment with the signature @racket[(Natural -> Natural)].
+The definition then expands to an annotation-free Racket @racket[define] and
+ the same expansion-and-type-collection process is repeated on the body of @racket[fact].
+So just as Typed Racket re-uses the Racket compiler, Typed Racket's @racket[define]
+ re-uses the semantics of Racket's.
+This sleight of hand is accomplished by shadowing @racket[define] with a syntax
+ extension that moves types from the program syntax to a type environment---and
+ crucially, does nothing more.
+
+Note, however, that no types are checked at this point.
+It is only after the entire program is expanded to core Racket and all type
+ definitions collected that types are checked and e.g. the type variables
+ for @racket[foldl] are instantiated.
+Waiting until all syntax extensions are expanded is both a pragmatic choice
+ and allows extensions to create and refine types written in a program without
+ subverting the authoritarian type checker.
+
+@; not really liking 'pragmatic' but I guess it should be obvious, that's the
+@; easiest way to implement a TC for Racket
+
+
+@section{Racket Macros, quickly}
+
+Having built some intuition for how Typed Racket's @racket[define] operates,
+ we use its definition to introduce Racket macros.@note{After this section,
+    we will stop using the term @emph{macro} in favor of the more general
+    phrase @emph{syntax extension}.}
+The following is paraphrased from Typed Racket and elaborates a type-annotated
+ @racket[define] to an unannotated one.
+
+  @racketblock[
+    (define-syntax (-define stx)
+      (syntax-parse stx #:literals (:)
+        [(-define (nm:id (arg:id : ty)) : ret-ty body ...)
+         (define type #`(ty -> ret-ty))
+         (register-type #`nm type)
+         #`(define (nm arg) body ...)]))
+  ]
+
+Going line-by-line, we have:
+
+@; TODO what is the point of each?
+@; TODO what is the bottom line?
+
+@itemlist[
+  @item{
+    @racket[define-syntax] declares a function on code; in other words,
+     a macro.
+    The formal parameter @racket[stx] is so named because it always binds
+     a @emph{syntax object} representing the context of a call to the
+     @racket[-define] macro.
+  }
+  @item{
+    @racket[syntax-parse] is pattern matching for syntax objects.
+    The optional argument @racket[#:literals (:)] causes any @racket[:] characters
+     in a @racket[syntax-parse] pattern to match only against the @racket[:]
+     identifier bound in the current lexical scope.
+    Normally the variable @racket[:] is no different from the pattern @racket[x]
+     or the pattern @racket[any-variable].
+  }
+  @item{
+    The third line of the macro is a pattern.
+    The remaining lines are instructions to perform if the pattern matches.
+    Within the pattern:
+    @itemlist[
+      @item{
+        @racket[-define], @racket[nm], @racket[arg], @racket[ty],
+         @racket[ret-ty], and @racket[body] are @emph{pattern variables} matched to
+         sub-expressions of @racket[stx].
+      }
+      @item{
+        The ellipses (@racket[...]) are part of the grammar of @racket[syntax-parse]
+         and match zero or more occurrences of the previous pattern,
+         to the effect that @racket[body ...] matches a list of consecutive expressions.
+      }
+      @item{
+        @racket[:id], as in @racket[nm:id] and @racket[arg:id], is a
+         @emph{syntax class} annotation.
+        Using the @racket[id] syntax class causes @racket[nm] and @racket[arg]
+         to only match identifiers and not, for instance, integer constants or
+         parenthesized expressions.
+      }
+    ]
+  }
+  @item{
+    @|stx| creates a syntax object from an expression.
+    In this case, the syntax object is a function type, built from our pattern
+     variables and Typed Racket's @racket[->] constructor.
+  }
+  @item{
+    @racket[register-type] binds an identifier to a type in a global type environment.
+    Since the pattern variable @racket[nm] is only bound to a symbol, we use
+     the syntax constructor @|stx| to associate the symbol with
+     the lexical scope of @racket[stx].
+  }
+  @item{
+    The result of any @racket[syntax-parse] pattern must be a syntax object.
+    Here we build a Racket @racket[define] statement from the relevant pattern
+     variables.
+    On this line, the ellipses (@racket[...]) are used in a dual sense to
+     splice the contents of the list @racket[body] into the body of the new
+     @racket[define] statement.
+  }
 ]

-Intuitively, syntax rules perform a search-and-replace before the program
- is compiled; however, the replacement process is careful to preserve the
- lexical structure of programs.
-For instance, a program that uses the same variable name @racket[v] as the syntax rule:
-@racketblock[
-  (define v 5)
-  (check-all-equal? (+ 2 2) v)
-]
-will expand to code that introduces a fresh variable @racket[v1]:
-@racketblock[
-  (define v 5)
-  (let ([v1 (+ 2 2)])
-    (check-equal? v v1))
-]
-to avoid shadowing the programmer's variable with the identifier used inside
- the syntax rule.
-Details on how this so-called @emph{hygenic} expansion is implemented and
- its benefits to extension writers and users are explained by Flatt@~cite[f-popl-2016 fcdb-jfp-2012].
+A call to @racket[syntax-parse] can contain multiple patterns.
+Indeed,
+ the actual definition of Typed Racket's define has patterns for
+ non-function definitions @racket[(define n : Integer 42)]
+ and unannotated @racket[define] statements.
+Finally, the module that implements @racket[-define] exports it as @racket[define]
+ to change the behavior of definitions only in external modules.

-In addition to pattern-based syntax rules, one can extend Racket with
- arbitrary functions defined over @emph{syntax objects}.
-For instance, we can write an extension that formats log messages to a port
- @racket[log-output] at runtime when the program is compiled with a flag
- @racket[DEBUG] enabled. If the flag is disabled, we perform a no-op.
-The expansion of @racket[log] calls happens during compilation:
-@(begin
-#reader scribble/comment-reader
-@codeblock|{
-  ;; With DEBUG enabled
-  (log "everything ok")
-  ==> (displayln "everything ok" log-output)
-  ;; With DEBUG disabled
-  (log "everything still ok")
-  ==> (void)
-}|)
+@; Include a jab about parenthesized syntax making metaprogramming life easier?
+@; @subsection{Oh, the Parentheses}

-The @racket[syntax-parse] form is a convenient way to implement @racket[log].
-In this case, we use @racket[syntax-parse] to deconstruct a syntax object
- @racket[stx] representing an application of @racket[log].
-The output of syntax parse is a new syntax object built using the constructor
- @racket[syntax].
-@codeblock{
-  (define-syntax (log stx)
-    (syntax-parse stx
-     [(log message)
-      (if DEBUG
-        (syntax (displayln message log-output))
-        (syntax (void)))]))
-}
-We can further enhance our implementation with a compile-time check that
- the value bound to the pattern variable @racket[message] is a string literal.
-@codeblock{
-  (define-syntax (log stx)
-    (syntax-parse stx
-     [(log message)
-      (unless (string? (syntax->datum (syntax message)))
-        (error "log: expected a string literal"))
-      (if DEBUG
-        (syntax (displayln message log-output))
-        (syntax (void)))]))
-}
-@; Alternatively, we can use Racket's @emph{syntax classes} to the same effect.
-@; The @racket[str] syntax class recognizes literal strings.
-@; Binding it to the pattern variable @racket[message] causes calls like
-@;  @racket[(log 61)] to raise a compile error.
-@; @codeblock{
-@;   (define-syntax (log stx)
-@;     (syntax-parse stx
-@;      [(log message:str)
-@;       (if DEBUG
-@;         (syntax (displayln message log-output))
-@;         (syntax (void)))]))
-@; }
-With this enhancement, calls like @racket[(log 61)] are rejected statically.
-Unfortunately, arguments that @emph{evaluate} to string literals
- are also rejected
- because the syntax extension cannot statically predict what value an arbitrary
- expression will reduce to.
-This is a fundamental limitation, but we can make a small improvement by accepting
- any @racket[message] produced by another (trusted) syntax extension.
-Suppose @racket[++] is an extension for concatenating two strings.
-If we assign a unique @emph{syntax property} to the syntax object produced
- by @racket[++], we can later retrieve the property in the @racket[log] extension.

-First, we give an implementation of @racket[++] in terms of Racket's
- built-in @racket[string-append] function, crucially using @racket[syntax-property]
- to associate the value @racket['string] with the key @racket['static-type].
-@codeblock{
-  (define-syntax (++ stx)
-    (syntax-parse stx
-     [(++ s1 s2)
-      (syntax-property
-        (syntax (string-append s1 s2))
-        'static-type 'string)]))
-}
-Assuming now that the key @racket['static-type] accurately describes the value
- contained in a syntax object, @racket[log] can accept both string
- literals and tagged syntax objects.
-@codeblock{
-  (define-syntax (log stx)
-    (syntax-parse stx
-     [(log message)
-      (define is-string?
-        (string? (syntax->datum (syntax message))))
-      (define expands-to-string?
-        (eq? 'string
-             (syntax-property
-               (local-expand message)
-               'static-type)))
-      (unless (or is-string? expands-to-string?)
-        (error "log: expected a compile-time string"))
-      (if DEBUG
-        (syntax (displayln message log-output))
-        (syntax (void)))]))
-}
+@section[#:tag "subsec:api"]{Implementing a Type Elaborator}

-Our syntax extensions @racket[check-all-equal?], @racket[log], and @racket[++]
- are indistinguishable from user-defined functions or core language forms,
- yet they perform useful transformations before a program is typechecked or
- compiled.
-This seamless integration gives Racket programmers the ability to grow the
- language and tailor it to their specific needs.
+Just as Typed Racket parses the syntax of a program and extracts breadcrumbs
+ for the type environment, a type elaborator transforms
+ a program leaving hints to guide the type checker.
+Syntax extensions are a low-level way to achieve this behavior.
+
+@Figure-ref{fig:printf} demonstrates a type-elaborated variant of Racket's
+ @racket[printf].
+When called with a string literal @racket[fmt] as its first argument, the
+ elaborator @racket[-printf]
+ reads @racket[fmt] for type and arity constraints using the function @racket[format-types].
+For instance, the format string @racket["~b"] would produce the type constraint
+ @racket[(Exact-Number)], implying that the arguments @racket[args] must be a
+ list with one element of type @racket[Exact-Number].
+This constraint is used twice in @racket[-printf]:
+ first to check the length of @racket[args] against the length of @racket[types]
+ and second to add explicit type annotations (via @racket[ann]) around each
+ argument to the format string.
+Whereas Typed Racket accepts any number of values with any type and
+ lets Racket's @racket[printf] raise runtime errors, type elaboration reports
+ both arity and type errors statically.
+@; arity error = caught directly
+@; type error = implied
+
+  @figure["fig:printf" "Type elaboration for format strings"
+    @racketblock[
+      (define-syntax (-printf stx)
+        (syntax-parse stx
+         [(-printf fmt:str args ...)
+          #:with (types ...) (format-types #`fmt)
+          (if (= (stx-length #`(args ...))
+                 (stx-length #`(types ...)))
+            #`(printf fmt (ann args types) ...)
+            (error 'printf "arity mismatch in ~s" stx))]))
+    ]
+    @; Include the default branch? I just don't know what to say about it.
+    @;     [(-printf fmt args ...)
+    @;      #`(printf fmt args ...)]))
+  ]
+
+In general, the high-level goals of such type elaborations are:
+  @itemlist[
+    @item{ (@goal{refinement})
+      Refine type signatures using latent, syntactic @emph{value information},
+       such as the characters in a string constant.
+    }
+    @item{ (@goal{reuse})
+      Rely on the existing type checker.
+      Avoid duplicating its work and never resort to proofs by assertion.@note[@elem{
+        Inspired by the @emph{translation validation} approach to
+         compiler correctness @~cite[pss-tacas-1998].}]
+      @; Just trying to say, always typecheck things
+    }
+    @item{ (@goal{relevance})
+      Report errors in terms of the programmer's code, not in terms
+       of elaborated code.@note[@elem{Inspired by SoundX @~cite[le-popl-2016].}]
+    }
+  ]
+
+The @racket[-printf] elaborator meets these goals by producing Typed Racket code
+ that only adds type annotations to the original program.
+If these annotations fail, they report a type error relative to an element
+ of @racket[args].
+As for refining the types, @racket[-printf] is best described with a quasi-dependent
+ type in terms of a proof theory @exact{$\Sigma$}.
+
+ @exact|{\begin{mathpar}
+   \inferrule{
+     \Sigma \vdash (\RktMeta{format-types}~\RktMeta{fmt}) = \tau_0 \ldots \tau_{n-1}
+     \\\\
+     \typestogen{\penv;\,\tenv}{\RktMeta{arg}_0}{\tau_0}
+     \\
+     \ldots
+     \\
+     \typestogen{\penv;\,\tenv}{\RktMeta{arg}_{n-1}}{\tau_{n-1}}
+   }{
+     \typestogen{\penv;\,\tenv}{\RktMeta{-printf fmt}~\RktMeta{arg}_0 \ldots \RktMeta{arg}_{n-1}}{\mathsf{Unit}}
+   }
+ \end{mathpar}}|
+
+We chose @racket[printf] as an introductory example because its correctness
+ and type soundness follow directly from the soundness of @racket[format-types].
+Correctness is not generally so simple to "prove", so @Secref{sec:segfault} and @Secref{sec:regexp}
+ show how type elaboration can justify a potentially unsafe optimizing transformation
+ and give library authors a technique for implementing a precise API without
+ changing their language's type system.

-@; NOTE: we will use similar 'typed macros' in a coming section... right?
--- a/popl-2017/bib.rkt
+++ b/popl-2017/bib.rkt
@ -5,7 +5,7 @@

 ;; FIXME: this doesn't have all the papers from the README yet

-(require racket/format
+(require racket/format 
         scriblib/autobib)

 (provide (all-defined-out))
@ -77,6 +77,7 @@
  (define/short sigmod "SIGMOD" (string-append ACM "SIGMOD " International Conference "on Management of Data"))
  (define/short sigplan-notices "SIGPLAN Notices" (string-append ACM "SIGPLAN Notices"))
  (define/short tacs (string-append International Symposium "Theoretical Aspects of Computer Science"))
+  (define/short tacas (string-append International Conference "on Tools and Algorithms for the Construction and Analysis of Systems"))
  (define/short tcs "Theoretical Computer Science")
  (define/short tfp "TFP" (string-append Symposium "Trends in Functional Programming"))
  (define/short tlca "TLCA" (string-append International Conference "Typed Lambda Calculi and Applications"))
@ -1725,9 +1726,23 @@
   #:location (proceedings-location icfp #:pages '(235 246))
   #:date 2010))

+(define pss-tacas-1998
+  (make-bib
+   #:title "Translation Validation"
+   #:author (authors "Amir Pnueli" "Michael Siegel" "Eli Singerman")
+   #:location (proceedings-location tacas #:pages '(151 166))
+   #:date 1998))
+
 (define le-popl-2016
  (make-bib
    #:title "Sound Type-Dependent Syntactic Language Extension"
    #:author (authors "Florian Lorenzen" "Sebastian Erdweg")
    #:location (proceedings-location popl #:pages '(204 216))
    #:date 2016))
+
+(define gr-cup-2004
+  (make-bib
+    #:title "The Standard ML Base Library"
+    #:author (authors "Emden R. Gansner" "John H. Reppy")
+    #:location (book-location #:edition "1" #:publisher "Cambridge University Press")
+    #:date 2004))
--- a/popl-2017/common.rkt
+++ b/popl-2017/common.rkt
@ -21,6 +21,10 @@
         id
         todo
         proof
+         warning
+         goal
+
+         stx
         )

 (require "bib.rkt"
@ -117,7 +121,7 @@

 (define (sf x) (elem #:style "sfstyle" x))

-(define (sc x) (exact "\\textsc{" x "}"))
+(define (sc x) (exact "\\textsc{\\small " x "}"))

 (define (parag . x) (apply elem #:style "paragraph" x))

@ -174,3 +178,14 @@
          txt
          (exact "\\hfill\\qed"))))

+(define (warning sym txt . arg*)
+  (printf "[WARNING] ~a: " sym)
+  (apply printf txt arg*)
+  (newline))
+
+(define (goal str)
+  (bold (emph str)))
+
+(define stx
+  (exact "\\RktRdr{\\#{\\textasciigrave}}"))
+
--- a/popl-2017/conclusion.scrbl
+++ b/popl-2017/conclusion.scrbl
@ -13,3 +13,6 @@ and only tells eternity.
@; type systems change slowly. That's OK
@; syntax extensions can change much faster and YOU can control the
@;  changes. That's YOU joe programmer or YOU jane street capital. Do you.
+
+@; just one way of doing this, can imagine other Typed Racekt
+@; in fact Stephen Chang .....
--- a/popl-2017/fig-reality-of-tr.tex
+++ b/popl-2017/fig-reality-of-tr.tex
@ -1,3 +0,0 @@
-\begin{center}
-  to do
-\end{center}
--- a/popl-2017/fig-stlc-core.tex
+++ b/popl-2017/fig-stlc-core.tex
@ -18,13 +18,9 @@
  }

  \inferrule*[left=T-Unsafe]{
-    \typestoclosed{\vectorvn}{\tarray}
+    \typesto{\vectorvn}{\tarray}
    \\
-    \typestoclosed{i}{\tint}
-    \\\\
-    i \in \ints
-    \\
-    0 \leq i < n
+    \typesto{i}{\tint}
  }{
    \typesto{\unsaferef~\vectorvn~i}{\tint}
  }
--- a/popl-2017/intro.scrbl
+++ b/popl-2017/intro.scrbl
@ -1,6 +1,6 @@
 #lang scribble/sigplan @onecolumn

-@require["common.rkt" (only-in scribble/base nested)]
+@require["common.rkt" (only-in scribble/base nested) "bib.rkt"]

@title[#:tag "sec:intro"]{Type Eleborators Need APIs}

@ -13,11 +13,11 @@ consistency according to the underlying type theory, replaces many of the
 surface-syntax constructs with constructs from the kernel language, and
 inserts type information to create an annotated representation.

-Some (implementations of such) programming languages also support a way to
+Some programming languages also support a meta-API, that is, a way to
 programmatically direct the elaborator. For example, Rust and Scala come
 with compiler plug-ins. Typed Racket and Types Clojure inherit the macro
-mechanisms of their parent languages. Here we refer to such mechanisms as
-@defterm{elaborator API}s. 
+mechanisms of their parents. This paper refers to all such mechanisms as
+@defterm{elaborator API}s.

 Equipping type-checking elaborators with APIs---or using the existing
 APIs---promises new ways to expand the power of type checking at a
@ -29,21 +29,17 @@ Consider the example of tailoring the API of a library that implements a
 string-based, domain-specific languages. Examples of such libraries abound:
 formatting, regular-expression matching, database queries, and so on.  The
 creators of these DSLs tend to know a lot about how programs written in
-these DSLs relate to the host program, but they cannot express this
-knowledge in API types without resorting to such rich systems as
-dependent type theory.
-
-Type tailoring allows such programmers to refine the typing rules for the
-APIs in a relatively straightforward way. Recall the API of the
-regular-expression library. In a typical typed language, the matching
-function is exported with a type like this: 
-@;
-@verbatim[#:indent 4]{
-
-reg_exp_match: String -> Option [Listof String]
-
-}
+these DSLs relate to the host program, but they (usually) cannot express
+this knowledge in API types. Type tailoring allows such programmers to
+refine the typing rules for the APIs in a relatively straightforward
+way. Recall the API of the regular-expression library. In a typical typed
+language, the matching function is exported with a type like this:
@;
+@exact|{
+\begin{mathpar}
+\mbox{regexp-match} : \mbox{String} \rightarrow \mbox{Opt[Listof[String]]}
+\end{mathpar}\hspace{-.3em}}|
+@; the above is an ***incredibly disgusting hack to work around a scribble bug***
 When the result is a list of strings, the length of the list depends on the
 argument string---but the API for the regular-expression library cannot
 express this relationship between the regular expression and the
@ -51,69 +47,87 @@ surrounding host program. As a result, the author of the host program must
 inject additional complexity, often in the form of (hidden) run-time checks.

 If type tailoring is available, the creator of the library can refine the
-type of @tt{reg_exp_match} with rules such as these:
-@;
-@verbatim[#:indent 4]{
-
-   Program |- s : does not contain grouping
------------------------------------------------
-  G |- reg_exp_match(s) : Option [List String]
-
-
-Program |- s : contains one grouping w/o alternative 
-----------------------------------------------------
- G |- reg_exp_match(s) : Option [List String String]
-
-}
+type of @tt{regexp-match} with rules such as this one:
@;
+@exact|{
+\begin{mathpar}
+  \inferrule*[]{
+    \elabsto{e}{e'}{\mbox{String}}, \\\\
+     \mbox{\it Program} \vdash e' \mbox{ does not contain a grouping}
+  }{
+    \elabsto{\mbox{regexp-match}~e}{\mbox{regexp-match}~e'}{\mbox{Opt[List[String]]}}
+  }
+\end{mathpar}\hspace{-.3em}}|
 That is, when the extended elaborator can use (a fragment of) the program
-to prove that the given string for a specific use of @tt{reg_exp_match}
+to prove that the given string for a specific use of @tt{regexp-match}
 does not contain grouping specifications---ways to extract a sub-string via
-matching---then the @tt{Some} result type is a list that contains a single
-string. Similarly, if the string contains exactly one such grouping (and no
-alternative), then a match produces a list of two strings. In all other
-cases, the type checker uses the default type for the function.
+matching---the @tt{Some} result type is a list of a single
+string. The original specifications remains the default rule, which the
+type checker uses when it cannot use the specific ones. 

-A vector library is another familiar example that offers opportunities for
-type tailoring. Often such a library exports a dereferencing construct with
+A similar rule would say if the string contains exactly one such grouping
+(and no alternative), a match produces a list of two strings. Critically,
+this refined type for the result of applying @tt{regexp-match} enables
+further type refinements, just like constant folding enables additional
+compiler optimizations. In this specific case, the program context of the
+application of @tt{regexp-match} may dereference the list with unsafe---and
+thus faster---versions of @tt{first} and @tt{second} once it has confirmed
+a match.
+
+Vectors offer a similar opportunity for unsafe dereferencing via 
+type tailoring. Typically, such a library exports a dereferencing construct with
 the following type rule: 
-@;
-@verbatim[#:indent 4]{
-
-  G |- v : Vector[X]   G |-e : Integer
----------------------------------------
-  G |- v[e] ~~> checked_ref(v,e) : X
-}
+@exact|{
+\begin{mathpar}
+  \inferrule*[]{
+    \elabsto{e_1}{e_1'}{\tarray}
+    \\
+    \elabsto{e_2}{e_2'}{\tint}
+  }{
+    \elabsto{{e_1}[{e_2}]}{\checkedref~e_1'~e_2'}{\tint}
+  }
+\end{mathpar}\hspace{-.3em}}|
@;
 If the elaborator can prove, however, that the indexing expression @tt{e}
 is equal to or evaluates to a specific integer @tt{i}, the rule can be
 strengthened as follows: 
@;
-@verbatim[#:indent 4]{
+@exact|{
+\begin{mathpar}

-  Program |- e = i for some i in Natural
-  && Program |- v is vector of length n
-  && i < n
+  \inferrule*[]{
+    \elabsto{e_1}{\vectorvn}{\tarray},
+    \\
+    \elabsto{e_2}{e'_2}{\tint}, 
+    \\\\
+    \mbox{\it Program} \vdash e'_2 = i, \quad
+    i \in \ints, \quad
+    0 \le i < n
+  }{
+    \elabsto{{e_1}[{e_2}]}{\unsaferef~\vectorvn~i}{\tint}
+  }
+\end{mathpar}\hspace{-.3em}}|

-  G |- v : Vector[X]   G |-e : Integer
----------------------------------------
-  G |- v[e] ~~> unsafe-ref(v,e) : X
-}
+This paper introduces and evaluates the novel idea of type tailoring. It
+uses Typed Racket and its API to the elaborator (section 2) because the
+language already supports appropriate type and run-time systems and because
+it is relatively straightforward to program the type
+elaborator---@emph{without modifying it}.  To make type tailoring concrete
+and to demonstrate its usefulness, the paper presents two case studies
+(sections 3 and 4). Each report on a case study consists of three parts: a
+type soundness argument assuming a type soundness argument for the complete
+language exists; the actual
@;
- That is, the elaborator can then eliminate a possibly expensive run-time
- check.
-
-This paper demonstrates the idea with concrete case studies of type
-tailoring (section 2) in the context of Typed Racket and its API to the
-elaborator (section 3).  To illustrate the usefulness of the idea, we
-implement two tailorings. The first one---chosen for the wide familiarity
-of the domain---enriches the type-checking of vector-referencing operations
-(section 4). The second example explains how the implementor of a
-string-based embedded DSL---a regexp-matching DSL---may tailor the types of
-the interpretation function (section 5). Our evaluations confirm that these
-tailorings reduce the number of run-time checks that the programmer or the
-elaborator have to insert into the host program. In addition, we sketch
+@margin-note*{BEN: this evaluation is missing an idea}
+@;
+implementation; and an evaluation that reports how often the revised type
+elaborator can improve the code. The first type tailoring---chosen for the
+wide familiarity of the domain---enriches the type-checking of
+vector-referencing operations (section 3). The second example explains how
+the implementor of a string-based embedded DSL---a regexp-matching DSL,
+also widely familiar to programming researchers---may tailor the types of
+the interpretation function (section 4).  In addition, the paper sketches
 several other applications of type tailoring in Typed Racket (section
-6). We also explain how the creator of such libraries can refine the
-existing type soundness proof of the host language to argue the correctness
-of the tailoring.
+5). The final two sections compare programmability of the type elaborator
+to work on dependent types and sketch how such an API could be implemented
+for other languages.
--- a/popl-2017/paper.scrbl
+++ b/popl-2017/paper.scrbl
@ -7,23 +7,23 @@

@abstract{

-Many typed APIs implicitly acknowledge the @emph{diktat} that the host type
- system imposes on the creators of libraries. When a library implements a
- string-based domain-specific language, the problem is particularly obvious.
- The interpretation functions for the programs in this embedded language
- come with the uninformative type that maps a string to some other host
- type. Only dependently typed languages can improve on this scenario at the
- moment, but they impose a steep learning curve on programmers. 
+Typed APIs suffer from the @emph{diktat} of the host type system.
+ Libraries that implement string-based domain-specific languages make this
+ clash particularly obvious.  The interpretation functions for the programs
+ in embedded languages come with the rather uninformative type that maps a
+ string to some other host type. Only dependently typed languages can
+ improve on this scenario at the moment, but they impose a steep learning
+ curve on programmers.

-This paper proposes to tackle this problem with APIs for type
- checkers. Specifically, it observes that most typed languages already
- employ an elaboration pass to type-check programs. If this elaborator
- comes with a sufficiently rich API, the author of a library can supplement
- the default types of the library's API with typing rules that improve the
- collaboration between host programs and uses of the library. The
- evaluation uses a prototype for Typed Racket and illustrates how useful
- the idea is for widely available libraries. Also the paper sketches how
- the authors of such ``tailored'' rules can argue their soundness.
+This paper proposes to tackle this problem with a meta-API for the type
+ checker. Specifically, it observes that most typed languages already
+ employ an elaboration pass to type-check programs. If this elaborator came
+ with a sufficiently rich API, the author of a library could supplement the
+ default types of the API with typing rules that improve the collaboration
+ between host programs and uses of the library. To demonstrate the
+ feasibility and effectiveness of this idea, the paper presents a prototype
+ for Typed Racket and two case studies. It also sketches how the authors of
+ such ``tailored'' typing rules can argue their soundness.

 }

@ -34,12 +34,14 @@ This paper proposes to tackle this problem with APIs for type
@; See OUTLINE.md for explanation
@include-section{intro.scrbl}
@include-section{background.scrbl}
+@;@include-section{elaborators.scrbl}
@include-section{segfault.scrbl}
-@;@include-section{examples.scrbl}
+@include-section{regexp.scrbl}
+@include-section{define.scrbl}
@;@include-section{discussion.scrbl}
@;@include-section{friends.scrbl}
-@;@include-section{related-work.scrbl}
-@;@include-section{conclusion.scrbl}
+@include-section{related-work.scrbl}
+@include-section{conclusion.scrbl}

@section[#:style 'unnumbered]{Acknowledgments}

--- a/popl-2017/related-work.scrbl
+++ b/popl-2017/related-work.scrbl
@ -4,6 +4,8 @@

@title[#:tag "sec:related-work"]{Experts}

+@section{Macros}
+
@section{SoundX}
 SoundX is a system for modeling programming languages and defining type-sound
 extensions, e.g. defining a type derivation for @tt{let} in terms of a type
@ -40,6 +42,12 @@ Our general approach and outlook on type soundness is informed by Cousot.

@section{Compiler Plugings}

+@; -- HASKELL
+@; https://ghc.haskell.org/trac/ghc/wiki/Plugins/TypeChecker
+@; http://christiaanb.github.io/posts/type-checker-plugin/
+@; http://adam.gundry.co.uk/pub/typechecker-plugins/typechecker-plugins-2015-07-17.pdf
+@; https://github.com/yav/type-nat-solver (copy of paper in src/ folder here)
+
 GHC (constraint solvers)
 Rust (macros)
 Scala (macros)
--- a/popl-2017/segfault.scrbl
+++ b/popl-2017/segfault.scrbl
@ -1,8 +1,54 @@
 #lang scribble/sigplan @onecolumn
+@; 1. MODEL
+@; 2. IMPLEMENTATION
+@; 3. EVALUATION

-@require["common.rkt"]
+@; Q. remove <e ... e>, and only have vector values?
+@;    because the syntax extension rules expect only values (expressions ruin the proofs)

-@title[#:tag "sec:segfault"]{Well Typed Programs do not go SEGFAULT}
+@require[
+  "common.rkt"
+  "evaluation.rkt"
+  glob
+  racket/sequence
+  (only-in racket/list take)
+  (only-in trivial/private/raco-command collect-and-summarize)
+]
+
+@title[#:tag "sec:segfault"]{When using the API can cause a segfault}
+
+Every programming languages comes with arrays. In Typed Racket, array
+facilities come as a library that essentially exports constructors,
+dereferencing functions, and mutation operations. SML similarly provides
+them from a run-time library@~cite[gr-cup-2004].
+The API for array libraries tends to come with highly conservative
+signatures. For example, an array indexing operation calls for an array
+and an integer and then produces the designated element from the given
+array; run-time checks ensure that the integer is in the interval
+@math{[0,n)} where @math{n} is the length of the array. 
+
+To speed up program execution, Typed Racket has access to an unsafe array
+indexing operation. Like array indexing in C, this operation
+retrieves the bits at the specified location without checking the size of
+the index. If used inappropriately, such an unsafe operation can cause the
+program to print random results or to segfault. 
+
+In this section we show that the creator of the Typed Racket array library
+can replace checked array indexing with unsafe indexing. While this use of
+type tailoring is quite simple, it supplies a great case study. To begin
+with, it requires an innovation on the standard progress and preservation
+method for showing type soundness. Specifically, the author of the array
+library must show that the evaluator remains a function and does not
+introduce segfaults into typed programs without run-time checks for
+indexing (@secref{sec:segfault:model}). The Typed Racket implementation is quite
+straightforward; the core consists of two dozen lines
+(@secref{sec:segfault:implementation}). Finally, an evaluation on the Racket code
+base indicates that the prototype is highly effective for a certain style of
+programming.
+
+
+@; -----------------------------------------------------------------------------
+@section[#:tag "sec:segfault:model"]{Elaborating array indexing}

@Figure-ref{fig:stlc} describes a simply typed @exact{$\lambda$} calculus
 with integers and integer arrays.
@ -18,10 +64,10 @@ The operational semantics for the core language are given in @Figure-ref{fig:stl
 along with type judgments for two primitive operations:
 @exact{$\checkedref$} and @exact{$\unsaferef$}.
 Intuitively, @exact{$\checkedref$} is a memory-safe function that performs a bounds check before
- dereferencing an array and gives a runtime error when called with incompatible values.
+ dereferencing an array and raises a runtime error when called with incompatible values.
 On the other hand, @exact{$\unsaferef$} does not perform a bounds check and therefore
 may return arbitrary data or raise a memory error when called with an invalid index.
-These negative consequences are modeled in the (family of) rule(s) @exact|{\textsc{E-UnsafeFail}}|.
+These negative consequences are modeled in the (family of) rule(s) @sc{E-UnsafeFail}.
 Consequently, the judgment @exact{$\smallstep{e}{e}$} and its transitive closure
 @exact{$\smallstepstar{e}{e}$} are relations, whereas
 typing and elaboration are proper functions.
@ -56,6 +102,11 @@ Evaluation of the surface language in @Figure-ref{fig:stlc}, however,
  }

  @proof{
+    @; (shorter, but repeats following paragraph)
+    @; The surface language does not allow @exact{$\unsaferef$} and elaboration
+    @;  does not introduce unsafe references, therefore the non-deterministic
+    @;  rule @sc{E-UnsafeFail} is never used.
+
    By induction on terms of the core language, if
     @exact{$\smallstep{e}{e'}$} and @exact{$e$} is not of the form
     @exact{$\unsaferef~e_1~e_2$} then @exact{$e'$} is unique.
@ -85,8 +136,8 @@ Additionally, the surface language is type safe.
  }

  @proof{
-    The interesting cases involve @exact{$\checkedref$}.
-    First, @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}|
+    The interesting cases are for array references, for which we observe that
+     @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}|
     implies @exact{$\typestoclosed{e_1}{\tarray}$}
     and @exact{$\typestoclosed{e_2}{\tint}$}.
    Depending on the values of @exact{$e_1$} and @exact{$e_2$},
@ -95,8 +146,6 @@ Additionally, the surface language is type safe.
  }


-@section{Extending the Elaborator}
-
 Building on the foundation of type soundness, we extend the elaborator with
 the @emph{value-directed} rules in @Figure-ref{fig:elab1}.
 When the elaborator can prove that an array reference is in bounds based on
@ -110,7 +159,7 @@ Conversely, the elaborator raises a compile-time index error if it can prove

 Our thesis is that adding such rules does not undermine the safety guarantees of the
 original language.
-In this case, we recover the original guarantees by extending the proofs of
+In this case, we recover determinism and soundness by extending the proofs of
 type and memory safety to address the @sc{S-RefPass} and @sc{S-RefFail} elaboration rules.

  @definition[@exact{$\elabarrowplus$}]{
@ -126,43 +175,38 @@ In this case, we recover the original guarantees by extending the proofs of

  @proof{
    With the original elaboration scheme, all array references @exact{$\aref{e_1}{e_2}$}
-     elaborate to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$}
+     elaborate via @sc{S-Ref} to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$}
     and @exact{$e_2'$} are the elaboration of @exact{$e_1$} and @exact{$e_2$}.
-    There are now three possibilities:
-      @itemize[
-        @item{
-          @exact{$e_1'$} is an array value @exact{$\vectorvn$}
-           and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
-           and @exact{$0 \le i < n$}.
-          Both @sc{S-Ref} and @sc{S-RefPass} are possible elaborations,
-           therefore @exact{$e'$} is either @exact{$\checkedref~\vectorvn~i$}
-           or @exact{$\unsaferef~\vectorvn~i$}.
-          Because @exact{$0 \le i < n$}, evaluation must proceed with
-           @sc{E-CheckPass} or @sc{E-UnsafePass} in each case, respectively.
-          These rules have the same result, @exact{$v_i$}.
-        }
-        @item{
-          @exact{$e_1'$} is an array value @exact{$\vectorvn$}
-           and @exact{$e_2'$} is an integer value @exact{$i \in \ints$}
-           and either @exact{$i < 0$} or @exact{$i \ge n$}.
-          The rules @sc{S-Ref} and @sc{S-RefFail} are possible elaborations.
-          If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}
-           and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside
-           the bounds of the array.
-          If @sc{S-RefFail} is chosen an index error is raised immediately.
-        }
-        @item{
-          Otherwise, either @exact{$e_1'$} or @exact{$e_2'$} is not a value
-           form and we rely on the existing proof of determinism.
-        }
-      ]
+    References for which only @sc{S-Ref} applies remain determinsitic, but
+     we have two cases where a new rule may also be used:
+    @itemize[
+      @item{ Case @sc{S-RefFail}:
+        @exact{$e_1'$} is an array literal @exact{$\vectorvn$}
+         and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$}
+         and either @exact{$i < 0$} or @exact{$i \ge n$}.
+        If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}
+         and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside
+         the bounds of the array.
+        On the other hand, @sc{S-RefFail} raises the index error immediately.
+      }
+      @item{ Case @sc{S-RefPass}:
+        @exact{$e_1'$} is an array literal @exact{$\vectorvn$}
+         and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$}
+         and @exact{$0 \le i < n$}.
+        If @sc{S-Ref} is chosen then @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}.
+        If @sc{Ref-RefPass} is chosen then @exact{$e'$} is @exact{$\unsaferef~\vectorvn~i$}.
+        Because @exact{$0 \le i < n$}, evaluation must proceed with
+         @sc{E-CheckPass} or @sc{E-UnsafePass} respectively, producing
+         @exact{$v_i$} in any event.
+      }
+    ]
  }

  For practical purposes, non-determinism in the elaborator should be
-   resolved giving the rules in @Figure-ref{fig:elab1} preference over the rules
-   in @Figure-ref{fig:elab0}.
-  But the result will be the same in any event.
-
+   resolved by giving the rules in @Figure-ref{fig:elab1} preference over the
+   rules in @Figure-ref{fig:elab0}.
+  But knowing that the result is the same for the non-identity elaborations
+   gives us confidence in their correctness.

  @theorem["soundness"]{
    If @exact{$e$} is a closed term then @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}|
@ -174,7 +218,7 @@ In this case, we recover the original guarantees by extending the proofs of
    We extend the existing proof with cases for @exact{$\unsaferef$}.
    @itemize[
      @item{
-        If @sc{S-RefPass} is applied by the elaborator then @exact{$e'$} is
+        Case @sc{S-RefPass}: @exact{$e'$} is
         the call @exact{$\unsaferef~\vectorvn~i$} and @exact{$0 \le i < n$}.
        By assumption, @exact|{$\typestoclosed{\vectorvn}{\tarray}$}| and
         @exact{$\typestoclosed{i}{\tint}$}.
@ -182,24 +226,194 @@ In this case, we recover the original guarantees by extending the proofs of
         to the integer value @exact{$v_i$} by @sc{E-UnsafePass}.
      }
      @item{
-        If @sc{S-RefFail} is applied by the elaborator then @exact{$e'$} is
+        Case @sc{S-RefFail}: then @exact{$e'$} is
         @exact{$\indexerror$} and type soundness follows trivially.
      }
    ]
  }

-The key step in both theorems was that elaboration supplied the proposition
- @exact{$0 \le i < n$}.
-Both the type rules and evaluation rules depended on this premise.
+The key step in both theorems is that elaboration supplies the proposition
+ @exact{$0 \le i < n$} required for safe evaluation.
+So long as assumptions like the above are properly stated by language implementors,
+ type tailoring library authors can help to meet them.


-@section{In Practice}
+@; -----------------------------------------------------------------------------
+@section[#:tag "sec:segfault:implementation"]{Implementing the elaborator}

-Have implemented for typed racket
- library
- with enhancements from @todo{secref}
- via expander, so typechecked afterwards
+@; WHY ... WHY ARE THEY ALWAYS PLAYING JAZZ IN THE LOBBY OF THIS HOTEL ???
+@; (Asked the concierge: it's always on, on a loop. For Christmas it's Christmas jazz)

-Checked in
- standard distro (includes plot,math,stats)
- pict3d
+We have implemented the rules in @Figure-ref{fig:elab1} as a syntax extension for Typed Racket.
+Including imports and exports, the implementation is an 18 line module (@Figure-ref{fig:vector-ref-extension}).
+
+  @figure["fig:vector-ref-extension" "Syntax extension for array references"
+    @codeblock{
+      #lang typed/racket ;; vector-ref-extension.rkt
+      (require
+        (only-in racket/unsafe/ops unsafe-vector-ref)
+        (for-syntax racket/base syntax/parse))
+
+      (define-syntax (-vector-ref stx)
+        (syntax-parse stx
+         [(_ #(e* ...) i)
+          #:when (integer? (syntax->datum #`i))
+          ;; Case 1: constant args
+          (define v-val (syntax->datum #`(e* ...)))
+          (define i-val (syntax->datum #`i))
+          (if (< -1 i-val v-val)
+            ;; then S-RefPass
+            #`(unsafe-vector-ref '#(e* ...) i)
+            ;; else S-RefFail
+            (error 'vector-ref "~a ~a" v-val i-val))]
+         [(_ args ...)
+          ;; Case 2: S-Ref
+          #`(vector-ref args ...)]))
+
+      (provide (rename-out [-vector-ref vector-ref]))
+    }
+  ]
+
+First, the @racket[require] statement imports the @racket[unsafe-vector-ref]
+ function to the runtime environment and the libraries @racket[racket/base]
+ and @racket[syntax/parse] to the compile-time environment.
+We name the extension @racket[-vector-ref] to avoid shadowing the uses of
+ Typed Racket's @racket[vector-ref] in Case 2 of the extension.
+At the end of the module, the @racket[provide] statement renames @racket[-vector-ref]
+ to replace the default @racket[vector-ref] in importing modules.
+
+Code within @racket[-vector-ref] is evaluated at compile-time to
+ transform all calls and uses of the extension in an importing module.
+For instance, calling @racket[(vector-ref x 4)] will invoke
+ @racket[-vector-ref] with the expression @racket[(vector-ref x 4)] bound to the
+ formal parameter @racket[stx].
+    @; A higher-order use like @racket[(map vector-ref vs is)] will invoke @racket[-vector-ref]
+    @;  with @racket[stx] bound to the identifier @racket[vector-ref] before evaluating
+    @;  the call to @racket[map].
+In any event, the first task of @racket[-vector-ref] is to destructure its argument
+ @racket[stx] using the @racket[syntax-parse] form.
+We consider three cases.
+The first matches expressions with three elements, like @racket[(vector-ref #(0) 0)],
+ where the first element is anything, the second is a vector literal, and the third
+ is an integer.
+Integer literals are recognized by the @racket[#:when] clause, which extracts
+ the value from a @emph{pattern variable} @racket[i] and tests whether this value
+ is an integer.
+If this first match is successful, then we implement the @sc{S-RefPass}
+ and @sc{S-RefFail} rules by comparing the value of the integer literal
+ contained in @racket[i] against the number of elements captured by the zero-or-more
+ pattern @racket[(e ...)].
+When the reference is in bounds, we use the constructor @|stx|
+ to produce code.@note[@elem{If it helps, you can mentally replace all @|stx|
+   with the arrow @exact{$\elabarrow$} from our model.}]
+The second and third cases are simpler.
+The second elaborates any call to @racket[-vector-ref] into a @racket[vector-ref]
+ call---even calls made with zero or seven arguments.
+We let the type checker deal with such erroneous cases.
+    @; The third case replaces higher-order calls of @racket[-vector-ref] with
+    @;  higher-order calls to Typed Racket's @racket[vector-ref] function.
+
+Existing programs can use the extension by adding a 1-line import statement.
+
+  @codeblock{
+    #lang typed/racket
+    (require vector-ref-extension)
+    ....
+  }
+
+The import shadows @racket[vector-ref] from the @racket[typed/racket] language,
+ replacing all occurrences with calls to our syntax extension.
+As Typed Racket processes the code abbreviated as @racket[....] above,
+ each vector reference is expanded to either a Typed Racket (checked)
+ @racket[vector-ref], an @racket[unsafe-vector-ref], or a compile-time @racket[error].
+
+
+@; -----------------------------------------------------------------------------
+@section[#:tag "sec:segfault:evaluation"]{Evaluation}
+
+@(let* ([vrx #rx"\\(vector-ref "]
+        [vr-file* (for/list ([fn (sequence-append
+                                   (in-glob "benchmark/vector/*.rkt")
+                                   (in-glob "benchmark/vector/*/*.rkt"))]
+                             #:when (with-input-from-file fn
+                                      (lambda ()
+                                        (regexp-match vrx (current-input-port)))))
+                    fn)]
+        ;; : (Listof (List Symbol Num-Hit Num-Miss))
+        [all-file+optz* (with-cache "vector-ref-optz"
+                                    (lambda ()
+                                      (profile-point "counting vector-ref optz")
+                                      (let-values ([(_in _out) (make-pipe)])
+                                        (parameterize ([current-output-port _out])
+                                          (displayln "(")
+                                          (for-each collect-and-summarize vr-file*)
+                                          (displayln ")"))
+                                        (close-output-port _out)
+                                        (let ([v (read _in)])
+                                          (close-input-port _in)
+                                          (begin0
+                                            (for/list ([d (in-list v)])
+                                              (cons (car d)
+                                                    (or (for/first ([kvvv (in-list (cdr d))]
+                                                                    #:when (eq? (car kvvv) 'vector-ref))
+                                                          (list (cadr kvvv) (caddr kvvv)))
+                                                        (list 0 0))))
+                                            (profile-point "done w/ vector-ref optz")))))
+                                    #:read (lambda (f+o*)
+                                             (and
+                                               (for/and ([f+o (in-list f+o*)]
+                                                         [fn (in-list vr-file*)])
+                                                 (eq? (car f+o) (string->symbol fn)))
+                                               f+o*)))]
+        [file+optz* (filter (compose1 positive? cadr) all-file+optz*)]
+        [hit-count (length file+optz*)]
+        [miss-count (- (length all-file+optz*) hit-count)]
+        [num-optz (for/sum ([f+t (in-list file+optz*)]) (cadr f+t))])
+        ;; -- for missed optz, take `caddr` of element of `file+optz*`
+  @list[
+  @figure["fig:vector-table" @elem{Summary of @racket[vector-ref] evaluation}
+    @graphical-summary[
+      #:hit-count    hit-count
+      #:miss-count   miss-count
+      #:bar-data     (for/list ([fhm (in-list (sort file+optz* < #:key cadr))])
+                       (define h (cadr fhm))
+                       (define m (caddr fhm))
+                       (list (format "~a" h) (* 100 (/ h (+ h m)))))
+      #:bar-title    "Percent of Array References Optimized"
+      #:bar-x        (format "# Optimized refs by module")  ; (~a total)" num-optz
+      #:bar-y        "%"
+      #:y-max        100
+    ]
+  ]
+  @elem{
+  The above implementation works for toy examples, but useful programs often
+   give constant values a name.
+  For example, the implementation of @racket[gzip] used in the main Racket distribution
+   declares array constants to implement a Huffman tree and heap.
+  To accomodate this and similar idioms, we added syntax extensions for binding
+   data to identifiers and constant folding for arithmetic operations.
+  These additions are justified in @Secref{sec:define}, but they are straightforward
+   extensions of the technique described in this section.
+
+  With name-tracking, we were able to optimize
+   two static array references in the @racket[gzip] implementation,
+   one in @racket[hangman],
+   12 references in a @racket[minesweeper] game, and
+   480 references in an implementation of @racket[parcheesi].
+  All three programs follow a general pattern of declaring a fixed-size vector
+   in the scope of a module and implementing helper functions to manipulate the vector.
+  The unusual success of @racket[parcheesi] was due to an inlining macro that
+   would unroll a loop over a vector of pawns to straight-line array
+   references at constant offsets.
+
+  These successes are modest,@note{Especially since we analyzed over 120 files using
+                                   arrays for places to optimize.}
+   but encouraging since the target audience for these
+   extensions are script writers.
+  In prototype and throwaway code, we expect programmers to use more constant
+   values and make more index errors.
+  Our extensions catch some of these errors without imposing any annotation burden
+   or increasing compile times --- in the files we analyzed, we observed no
+   statistically significant difference compiling with and without our extensions
+   enabled.
+}])
--- a/popl-2017/texstyle.tex
+++ b/popl-2017/texstyle.tex
@ -37,23 +37,35 @@
 \newcommand{\tvar}[1]{\mathsf{#1}}
 \newcommand{\tnat}{\tvar{Natural}}
 \newcommand{\tarray}{\tvar{Array}}
+\newcommand{\tdictgen}[1]{\llparenthesis #1 \rrparenthesis}  %% yuck man
+\newcommand{\tdictn}{\tdictgen{l_0 : \tau_0, \ldots,l_{n-1} : \tau_{n-1}}}
 \newcommand{\tint}{\tvar{Int}}
+\newcommand{\toption}[1]{\tvar{Option}\,#1}
 \newcommand{\tnum}{\tvar{Num}}
+\newcommand{\tstring}{\tvar{String}}
 \newcommand{\tlist}[1]{\tvar{List~#1}}
 \newcommand{\naturals}{\mathbb{N}}
 \newcommand{\ints}{\mathbb{Z}}

 %% -- terms
+\newcommand{\dictgen}[1]{\{ #1 \}}
+\newcommand{\dictvn}{\dictgen{l_0\!=\!v_0, \ldots, l_{n-1}\!=\!v_{n-1}}}
 \newcommand{\vectorgen}[1]{\langle #1 \rangle}
 \newcommand{\vectoren}{\vectorgen{e_0, \ldots, e_{n-1}}}
 \newcommand{\vectorvn}{\vectorgen{v_0, \ldots, v_{n-1}}}
 \newcommand{\vectorxn}{\vectorgen{x_0, \ldots, x_{n-1}}}
 \newcommand{\vlam}[2]{\lambda\,#1\,.\,#2}
+\newcommand{\vlet}[3]{\mathsf{let}\,#1 = #2\,\mathsf{in}\,#3}
+\newcommand{\vnone}{\mathsf{None}}
+\newcommand{\vsome}[1]{\mathsf{Some}\,#1}
 \newcommand{\aref}[2]{#1@#2}
-\newcommand{\checkedref}{\tvar{checked\mbox{-}ref}}
-\newcommand{\unsaferef}{\tvar{unsafe\mbox{-}ref}}
+
+\newcommand{\checkedref}{\RktMeta{checked-ref}}
+\newcommand{\unsaferef}{\RktMeta{unsafe-ref}}
+\newcommand{\rxm}{\RktMeta{rx-match}}
 \newcommand{\segfault}{\mathsf{segfault}}
 \newcommand{\indexerror}{\mathsf{IndexError}}
+\newcommand{\syntaxerror}{\mathsf{SyntaxError}}

 %% -- evaluation contexts
 \newcommand{\ectx}{E}
@ -64,6 +76,11 @@
 \newcommand{\tenvempty}{\cdot}
 \newcommand{\tenvcons}[3]{#1:#2,#3}

+%% -- proof environments
+\newcommand{\penv}{\Sigma}
+\newcommand{\penvempty}{\tenvempty}
+\newcommand{\penvcons}[3]{\tenvcons{#1}{#2}{#3}}
+
 %% -- typing
 \newcommand{\typestogen}[3]{#1 \vdash #2 : #3}
 \newcommand{\typestoclosed}[2]{\typestogen{\tenvempty}{#1}{#2}}
@ -77,11 +94,37 @@
 \newcommand{\elabstoclosed}[3]{\elabstogen{\tenvempty}{#1}{\elabarrow}{#2}{#3}}
 \newcommand{\elabsto}[3]{\elabstogen{\tenv}{#1}{\elabarrow}{#2}{#3}}

+%% -- elaboration II
+\newcommand{\provestogen}[7]{\typestogen{#1;#2}{#3 #4 #5}{#6 \dashv #7}}
+\newcommand{\provesto}[4]{\provestogen{\penv}{\tenv}{#1}{\elabarrow}{#2}{#3}{#4}}
+
+%% -- elaboration types
+\newcommand{\pmap}{\phi}
+\newcommand{\pmapcons}[3]{#1[#2 \mapsto #3]}
+\newcommand{\pdom}{\kappa}
+\newcommand{\parrow}{\mapsto}
+\newcommand{\pvec}{\mathcal{V}}
+\newcommand{\prx}{\mathcal{R}}
+\newcommand{\pint}{\mathcal{I}}
+
 %% -- evaluation
 \newcommand{\bigstep}[2]{#1 \Downarrow #2}
 \newcommand{\bigstepplus}[2]{#1 \Downarrow^+ #2}
-\newcommand{\smallstepstar}[2]{#1 \rightarrow^* #2}
-\newcommand{\smallstep}[2]{#1 \rightarrow #2}
+\newcommand{\smallsteparrow}{\rightarrow}
+\newcommand{\smallstepstar}[2]{#1 \smallsteparrow^* #2}
+\newcommand{\smallstep}[2]{#1 \smallsteparrow #2}
+
+%% -- subtyping
+\newcommand{\subt}{\le:}
+\newcommand{\subtypesto}[2]{#1 \subt #2}
+
+%% -- metafunctions
+%\newcommand{\mgroups}{\textsf{groups}}
+\newcommand{\msup}{\mathsf{sup}}
+\newcommand{\mlang}{\mathcal{L}}
+\newcommand{\msubstrings}{\mathcal{S}}
+\newcommand{\mset}[1]{\{\,#1\,\}}
+\newcommand{\mrset}[2]{\{\,#1 \mid #2\,\}}

 %% -- misc
 \newcommand{\esubst}[2]{[#2/#1]}
--- a/trivial/untyped.rkt
+++ b/trivial/untyped.rkt
@ -64,18 +64,18 @@

 (define-syntax define: (make-keyword-alias 'define
  (lambda (stx)
-    (or (format-define stx)
+    (or (rx-define stx)
+        (format-define stx)
        (num-define stx)
        (lst-define stx)
-        (rx-define stx)
        ;(fun-define stx)
        (vec-define stx)))))

 (define-syntax let: (make-keyword-alias 'let
  (lambda (stx)
-    (or (format-let stx)
+    (or (rx-let stx)
+        (format-let stx)
        ;(fun-let stx)
        (num-let stx)
        (lst-let stx)
-        (rx-let stx)
        (vec-let stx)))))