From 3703cee37251f9ebc8e219377e170d7c6693adb3 Mon Sep 17 00:00:00 2001 From: Ben Greenman Date: Mon, 27 Jun 2016 15:57:06 -0400 Subject: [PATCH] [checkpoint] --- popl-2017/.gitignore | 1 - popl-2017/Makefile | 4 + popl-2017/OUTLINE.md | 18 + popl-2017/background.scrbl | 606 +++++++++++++------------------- popl-2017/bib.rkt | 17 +- popl-2017/common.rkt | 17 +- popl-2017/conclusion.scrbl | 3 + popl-2017/fig-reality-of-tr.tex | 3 - popl-2017/fig-stlc-core.tex | 8 +- popl-2017/intro.scrbl | 154 ++++---- popl-2017/paper.scrbl | 40 ++- popl-2017/related-work.scrbl | 8 + popl-2017/segfault.scrbl | 324 ++++++++++++++--- popl-2017/texstyle.tex | 51 ++- trivial/untyped.rkt | 8 +- 15 files changed, 740 insertions(+), 522 deletions(-) delete mode 100644 popl-2017/fig-reality-of-tr.tex diff --git a/popl-2017/.gitignore b/popl-2017/.gitignore index 54e936b..09eb422 100644 --- a/popl-2017/.gitignore +++ b/popl-2017/.gitignore @@ -8,7 +8,6 @@ paper.tex pearl.tex -benchmark/ compiled/ *.swp *.swo diff --git a/popl-2017/Makefile b/popl-2017/Makefile index 35b2ed7..35420b4 100644 --- a/popl-2017/Makefile +++ b/popl-2017/Makefile @@ -10,6 +10,10 @@ ${PAPER}.pdf: pkg setup texstyle.tex ++extra fig-stlc-core.tex \ ++extra fig-elab0.tex \ ++extra fig-elab1.tex \ + ++extra fig-elab-regexp.tex \ + ++extra fig-elab-sigma.tex \ + ++extra fig-regexp-lib.tex \ + ++extra fig-stlc-dict.tex \ ++extra mathpartir.sty \ ++style texstyle.tex \ --pdf $(PAPER).scrbl diff --git a/popl-2017/OUTLINE.md b/popl-2017/OUTLINE.md index 55af05f..5e70d40 100644 --- a/popl-2017/OUTLINE.md +++ b/popl-2017/OUTLINE.md @@ -63,3 +63,21 @@ rust will be difficult 5. ending --- +0. finish sql search +1. revise 3,4 with MODEL IMPL, EVAL +2. do 5 better, not sure how, figure first + +related work +- dep types +- occurrence types TR optimizer (no theoretical justifications) +- yes we did macros, yes that's been done (fisher, hermen) +- haskell + +conclusion +- jsut one way of doing this, can reimagin typed racket, TC + elab += stephen chang + + +regexP; +- +- put the (list ...) back diff --git a/popl-2017/background.scrbl b/popl-2017/background.scrbl index cb5a8c3..c5cca05 100644 --- a/popl-2017/background.scrbl +++ b/popl-2017/background.scrbl @@ -1,8 +1,14 @@ #lang scribble/sigplan @onecolumn -@require["common.rkt" pict racket/class racket/draw] +@; Goal of Sec 2 is to introduce background on TR needed to: +@; - understand our bias +@; - understand our examples +@; - understand "type elaborator api" -@title[#:tag "sec:background"]{If You Know What I Mean} + +@require["common.rkt"] + +@title[#:tag "sec:background"]{Towards a Type Elaborator API} @; A poet should be of the @; old-fahioned meaningless brand: @; obscure, esoteric, symbolic, @@ -12,370 +18,254 @@ @; I'll gladly explain what it means @; till you don't understand it. -@; Computers understand a very limited set of instructions. -@; It is tedious and error-prone to describe even simple thoughts in terms of these -@; instructions, but programming languages provide abstraction mechanisms -@; that can make the process bearable. -@; For example, low-level or assembly languages can implement the following -@; mathematical function for referencing an element of a fixed-length -@; data structure: -@; -@; @exact|{\[ -@; \RktMeta{ref} = \lambda (\RktMeta{v}~\RktMeta{i}) -@; \left\{\begin{array}{l l} -@; \RktMeta{v}_{i+1} & \RktMeta{if}~\RktMeta{v} = \vectorvn -@; \\ ~&\RktMeta{and}~\RktMeta{i} \in \mathbb{N} -@; ~\RktMeta{and}~i < n -@; \\[4pt] -@; \bot & \RktMeta{otherwise} -@; \end{array}\right. -@; \]}| -@; -@; @;Here we use the notation @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_{n-1} \rangle$}| -@; @; to describe a vector of @exact{$n$} elements. -@; Whether the implementation is a labeled sequence of instructions or a C function -@; is not important; what matters is the precise specification -@; and our ability to compute the function via machine. -@; Memory-safe functions with clear semantics are the core building blocks for -@; sane programs. -@; -@; As programs grow in size, complexity, and importance, it is desirable to have -@; some guarantees about how a program will run before actually running the -@; program. -@; Ideally, we would statically prove that @racket[ref] is -@; never applied to arguments that are not vectors or to out-of-bounds indices. -@; @;Such a proof would imply that every call @racket[(ref v i)] in the program -@; @; would yield a non-@|bot| value and help show that the overall program works as intended. -@; But proving such properties is difficult and there are often many -@; functions in a program for which we seek guarantees, so we settle for -@; approximate results. -@; Instead of statically ruling out all calls @racket[(ref v i)] that produce -@; @|bot|, we strive to eliminate a large subset automatically. -@; -@; Type systems have emerged as a powerful and convenient way of statically detecting -@; errors. -@; Instead of tracking the flow of exact values through a program, a type system -@; tracks the flow of @emph{types} denoting a range of possible runtime values. -@; Depending on the type system and programmers' discipline using it, the range -@; might be limited enough to show that @racket[ref] never returns @|bot| -@; in a program; however, the tradeoff is always the time and energy programmers -@; are willing to invest in writing down and maintaining type information. -@; As it stands, the most widely used type systems are those that require -@; minimal annotations from the programmer and catch only shallow, common errors. -@; -@; -@; @section{Problem Statement and Promises} -@; -@; @let*[([all-w 150] -@; [lang-w (/ all-w 2)] -@; [lang-h 20] -@; [lang-rectangle -@; (lambda (brush-style -@; fill-color -@; #:width [lang-w lang-w] ;; getting lazy there ben -@; #:height [lang-h lang-h] -@; #:slant-left [slant-left #f] -@; #:border-color [maybe-border-color #f] -@; #:border-width [border-width 3]) -@; (dc (lambda (dc dx dy) -@; (define old-brush (send dc get-brush)) -@; (define old-pen (send dc get-pen)) -@; (define border-color (or maybe-border-color fill-color)) -@; (send dc set-brush -@; (new brush% [style brush-style] [color fill-color])) -@; (send dc set-pen -@; (new pen% [width border-width] [color border-color])) -@; ;; -- -@; (define path (new dc-path%)) -@; (send path move-to 0 0) -@; (if slant-left -@; (begin (send path line-to (- 0 slant-left) (/ lang-h 2)) -@; (send path line-to 0 lang-h)) -@; (send path line-to 0 lang-h)) -@; (send path line-to lang-w lang-h) -@; (send path line-to lang-w 0) -@; (send path close) -@; (send dc draw-path path dx dy) -@; ;; -- -@; (send dc set-brush old-brush) -@; (send dc set-pen old-pen)) lang-w lang-h))] -@; [all-e (rectangle all-w lang-h)] -@; [hshim (blank (/ all-w 6) lang-h)] -@; [untyped-e (hc-append hshim (lang-rectangle 'vertical-hatch "CornflowerBlue" #:border-width 4 #:height (+ lang-h 2)))] -@; [typed-e (hc-append (lang-rectangle 'horizontal-hatch "Coral" #:border-width 2) hshim)] -@; [reality-of-ml (rc-superimpose (lc-superimpose all-e untyped-e) typed-e)] -@; [value-e -@; (let ([s 'solid] -@; [c "LimeGreen"] -@; [w (- lang-w lang-h)] -@; [b 1]) -@; (hc-append (blank (* lang-h 0.8) lang-h) -@; (lang-rectangle s c #:width (/ lang-w 3) #:height (/ lang-h 3) #:border-width b) -@; (lang-rectangle s c #:border-width b)))] -@; [reality-of-tr (lc-superimpose reality-of-ml value-e)] -@; )]{@list{ -@; No matter how precise the type system, static analysis cannot recognize -@; all correct programs and reject all incorrect programs. -@; Layering a type system on top of an untyped programming language therefore -@; leads to a world where the space of all syntactically valid programs -@; (drawn as a white box, of course) is partitioned into two overlapping sets. -@; -@; @centered[reality-of-ml] -@; -@; On the left, outlined in blue with vertical hatches, we have the set of all untyped programs -@; that produce meaningful results (read: non-error, non-diverging terms). -@; On the right, outlined in orange with horizontal hatches, we have the set of all programs -@; approved by the type system. -@; Some type-correct programs raise runtime errors when evaluated, and some -@; meaningful programs are rejected by the type checker. -@; -@; Again, the impossible goal is for the highlighted boxes to overlap. -@; Our argument is that adding a @emph{syntactic elaboration} phase before -@; typechecking can yield a language described by the -@; solid green area shown below, capturing more of the meaningful untyped -@; programs and fewer of the typed programs that go wrong, though rejecting -@; some typed programs that run to completion but contain a subterm that would -@; raise an exception if evaluated (Theorem @exact|{\ref{thm:relevance}}|). -@; -@; @centered[reality-of-tr] -@; }} -@; -@; Starting from an untyped programming language built from a grammar of terms -@; @exact{$e$} with a reduction relation @exact{$e \Downarrow v$} -@; and a type judgment @exact{$\vdash e : \tau$} relating terms to -@; values @exact{$v$} and types @exact{$\tau$}, we derive an extended language -@; by inserting a function @exact{$\elaborate(e)$} before type checking. -@; The extended language has a type system @exact{$\vdashe$} consisting of a single -@; rule: -@; -@; @exact|{ -@; \begin{mathpar} -@; \inferrule*{ -@; \vdash \elaborate(e) : \tau -@; }{ -@; \vdashe e : \tau -@; } -@; \end{mathpar} -@; }| -@; -@; We also derive a reduction relation on typed terms @exact{$\vdashe e : \tau$} -@; as @exact{$\elaborate(e) \Downarrow v$}. -@; Type soundness for @exact{$\vdashe$} is thus a corollary of -@; type soundness for the @exact{$\vdash$} judgment. -@; -@; The output of @exact{$\elaborate(e)$} is a term @exact{$e'$}, -@; though @exact{$e'$} may be a labeled error term -@; @exact|{$\bot^{e''}$}| meaning the elaborator detected an error in the program. -@; Terms @exact{$e$} such that @exact{$\elaborate(e)$} type checks but @exact{$e$} -@; does not are newly expressible in the extended system. -@; Terms @exact{$e$} such that @exact{$\vdash e : \tau$} but -@; @exact|{$\elaborate(e) = \bot^{e'}$}| are programs validated by the typechecker -@; but rejected by the extended system because the subterm @exact{$e'$} of @exact{$e$} -@; would raise a runtime error if evaluated. -@; -@; Our main contribution is defining a useful collection of @emph{local} -@; transformations to perform during elaboration. -@; Each local elaboration is proven correct in isolation. -@; We then define @exact{$\elaborate(e)$} as the union of these local cases -@; and a default elaboration that applies @exact{$\elaborate$} to all -@; subterms of @exact{$e$}. -@; -@; To avoid reasoning about terms post-elaboration we will describe -@; our local elaborations with type rules of the form: @todo{maybe drop this} -@; -@; @exact|{ -@; \begin{mathpar} -@; \inferrule*{ -@; \prope(f, e) -@; \\\\ -@; \vdash f : \tau_f -@; \\ -@; \vdash e : \tau_e -@; }{ -@; \vdashe f~e : \tau -@; } -@; \end{mathpar} -@; }| -@; -@; where @exact{$\prope$} is a proposition about syntactic terms -@; @exact{$f$} and @exact{$e$} -@; used to justify the transformation @exact|{$\elaborate(f~e)$}|. -@; Despite this inference rule shorthand, elaborations and @exact{$\prope$} -@; conditions run before typechecking and have no access to type information -@; in the program. -@; -@; The relationship between terms @exact{$e$} and @exact{$\elaborate(e)$} is given -@; by three theorems: @todo{see next section for now}. -@; -@; Correct elaborations obey these theorems. -@; @; Note: there is no guarantee that elaboration produces a well-typed term -@; @; because calls like @exact{$\elaborate($}@racket[ref 0 0]@exact{$)$} are allowed. -@; -@; @; We refrain from making stronger statements about elaborate because -@; @; real systems will have lots of other elaborations. +@; TODO +@; - can we not use "Typed Racket" as the first word of this section? +@; to me that's a huge turn-off after reading the introduction. +@; - missing answer: why are we using TR + +Typed Racket does not have a type elaboration API; however, it inherits a rich + syntax extension system from its host language, Racket. +Experience with Racket syntax extensions (aka macros) motives our proposal for + a similar type elaboration system, presented in @Secref{subsec:api}. +Moreover, we have implemented the transformations described in @Secref{sec:define} + as a Typed Racket package + and this section is intended to prepare the way for later code snippets. -@section{Syntax Extensions} +@section{Typed Racket, today} -Syntax extension systems have been incorporated in - a number of mainstream programming languages. -Essentially, syntax extensions let the programmer write code that generates code. -In practice this gives programmers the ability to extend the syntax of their - language, abstract over textual patterns, and control the evaluation order - of expressions. +Typed Racket is an ongoing experiment in language design the success of which + is a testament to the usefulness and versatility of syntax extensions. +The entire language is implemented as a Racket library; in particular, a + library of syntax extensions. +Types in Typed Racket are distinguished Racket terms, given special meaning by + the type checker. +The checker itself is nothing more than a Racket function defined over + type-annotated programs and run before the program is compiled. +When typechecking succeeds, the annotations are erased and the resulting + program is fed to the Racket compiler. +As such, there is no need for a dedicated ``Typed Racket compiler''. +Type-driven optimizations occur on core Racket forms just after type checking + and the result feeds in to the existing compiler toolchain. -Racket's syntax extension API is particularly mature and has inspired - similar APIs in at least two other languages, so we adopt it here to introduce - syntax extensions. -As a first example, suppose we want to assert that expressions - @racket[e_1], @racket[e_2], @racket[e_3], and @racket[e_4] all evaluate to - the same value. -By hand, we might write a sequence of unit tests: -@racketblock[ - (begin (check-equal? e_2 e_1) - (check-equal? e_3 e_1) - (check-equal? e_4 e_1)) -] -but these tests follow a simple pattern that we can abstract as a @emph{syntax rule}, - using ellipses (@racket[...]) to capture an arbitrary number of expressions. -@racketblock[ - (define-syntax-rule (check-all-equal? e_1 e_rest ...) - (begin (check-equal? e_rest e_1) ...)) -] -Our tests can now be written more concisely: -@racketblock[ - (check-all-equal? e_1 e_2 e_3 e_4) -] -making them easier to read and maintain. -Moreover, we can easily improve the rule to evaluate @racket[e_1] only once - instead of @math{n-1} times: -@racketblock[ - (define-syntax-rule (check-all-equal? e_1 e_rest ...) - (let ([v e_1]) - (check-equal? e_rest v) ...)) +@; One might expect this is kinda slow. Maybe that's true. But has benefits and users. + +For a concrete example, have a left-leaning factorial function: + + @codeblock{ + #lang typed/racket + (define (fact (n : Natural)) : Natural + (foldl * n (range 1 n))) + } + +Aside from the type annotations, the function is ordinary Racket code + and behaves as such when run. +But the syntax accepted by Typed Racket's @racket[define] is a superset of + valid, untyped Racket. +Furthermore, this @racket[define] has an extended semantics. +When the above program is compiled, @racket[define] registers the identifier + @racket[fact] in a type environment with the signature @racket[(Natural -> Natural)]. +The definition then expands to an annotation-free Racket @racket[define] and + the same expansion-and-type-collection process is repeated on the body of @racket[fact]. +So just as Typed Racket re-uses the Racket compiler, Typed Racket's @racket[define] + re-uses the semantics of Racket's. +This sleight of hand is accomplished by shadowing @racket[define] with a syntax + extension that moves types from the program syntax to a type environment---and + crucially, does nothing more. + +Note, however, that no types are checked at this point. +It is only after the entire program is expanded to core Racket and all type + definitions collected that types are checked and e.g. the type variables + for @racket[foldl] are instantiated. +Waiting until all syntax extensions are expanded is both a pragmatic choice + and allows extensions to create and refine types written in a program without + subverting the authoritarian type checker. + +@; not really liking 'pragmatic' but I guess it should be obvious, that's the +@; easiest way to implement a TC for Racket + + +@section{Racket Macros, quickly} + +Having built some intuition for how Typed Racket's @racket[define] operates, + we use its definition to introduce Racket macros.@note{After this section, + we will stop using the term @emph{macro} in favor of the more general + phrase @emph{syntax extension}.} +The following is paraphrased from Typed Racket and elaborates a type-annotated + @racket[define] to an unannotated one. + + @racketblock[ + (define-syntax (-define stx) + (syntax-parse stx #:literals (:) + [(-define (nm:id (arg:id : ty)) : ret-ty body ...) + (define type #`(ty -> ret-ty)) + (register-type #`nm type) + #`(define (nm arg) body ...)])) + ] + +Going line-by-line, we have: + +@; TODO what is the point of each? +@; TODO what is the bottom line? + +@itemlist[ + @item{ + @racket[define-syntax] declares a function on code; in other words, + a macro. + The formal parameter @racket[stx] is so named because it always binds + a @emph{syntax object} representing the context of a call to the + @racket[-define] macro. + } + @item{ + @racket[syntax-parse] is pattern matching for syntax objects. + The optional argument @racket[#:literals (:)] causes any @racket[:] characters + in a @racket[syntax-parse] pattern to match only against the @racket[:] + identifier bound in the current lexical scope. + Normally the variable @racket[:] is no different from the pattern @racket[x] + or the pattern @racket[any-variable]. + } + @item{ + The third line of the macro is a pattern. + The remaining lines are instructions to perform if the pattern matches. + Within the pattern: + @itemlist[ + @item{ + @racket[-define], @racket[nm], @racket[arg], @racket[ty], + @racket[ret-ty], and @racket[body] are @emph{pattern variables} matched to + sub-expressions of @racket[stx]. + } + @item{ + The ellipses (@racket[...]) are part of the grammar of @racket[syntax-parse] + and match zero or more occurrences of the previous pattern, + to the effect that @racket[body ...] matches a list of consecutive expressions. + } + @item{ + @racket[:id], as in @racket[nm:id] and @racket[arg:id], is a + @emph{syntax class} annotation. + Using the @racket[id] syntax class causes @racket[nm] and @racket[arg] + to only match identifiers and not, for instance, integer constants or + parenthesized expressions. + } + ] + } + @item{ + @|stx| creates a syntax object from an expression. + In this case, the syntax object is a function type, built from our pattern + variables and Typed Racket's @racket[->] constructor. + } + @item{ + @racket[register-type] binds an identifier to a type in a global type environment. + Since the pattern variable @racket[nm] is only bound to a symbol, we use + the syntax constructor @|stx| to associate the symbol with + the lexical scope of @racket[stx]. + } + @item{ + The result of any @racket[syntax-parse] pattern must be a syntax object. + Here we build a Racket @racket[define] statement from the relevant pattern + variables. + On this line, the ellipses (@racket[...]) are used in a dual sense to + splice the contents of the list @racket[body] into the body of the new + @racket[define] statement. + } ] -Intuitively, syntax rules perform a search-and-replace before the program - is compiled; however, the replacement process is careful to preserve the - lexical structure of programs. -For instance, a program that uses the same variable name @racket[v] as the syntax rule: -@racketblock[ - (define v 5) - (check-all-equal? (+ 2 2) v) -] -will expand to code that introduces a fresh variable @racket[v1]: -@racketblock[ - (define v 5) - (let ([v1 (+ 2 2)]) - (check-equal? v v1)) -] -to avoid shadowing the programmer's variable with the identifier used inside - the syntax rule. -Details on how this so-called @emph{hygenic} expansion is implemented and - its benefits to extension writers and users are explained by Flatt@~cite[f-popl-2016 fcdb-jfp-2012]. +A call to @racket[syntax-parse] can contain multiple patterns. +Indeed, + the actual definition of Typed Racket's define has patterns for + non-function definitions @racket[(define n : Integer 42)] + and unannotated @racket[define] statements. +Finally, the module that implements @racket[-define] exports it as @racket[define] + to change the behavior of definitions only in external modules. -In addition to pattern-based syntax rules, one can extend Racket with - arbitrary functions defined over @emph{syntax objects}. -For instance, we can write an extension that formats log messages to a port - @racket[log-output] at runtime when the program is compiled with a flag - @racket[DEBUG] enabled. If the flag is disabled, we perform a no-op. -The expansion of @racket[log] calls happens during compilation: -@(begin -#reader scribble/comment-reader -@codeblock|{ - ;; With DEBUG enabled - (log "everything ok") - ==> (displayln "everything ok" log-output) - ;; With DEBUG disabled - (log "everything still ok") - ==> (void) -}|) +@; Include a jab about parenthesized syntax making metaprogramming life easier? +@; @subsection{Oh, the Parentheses} -The @racket[syntax-parse] form is a convenient way to implement @racket[log]. -In this case, we use @racket[syntax-parse] to deconstruct a syntax object - @racket[stx] representing an application of @racket[log]. -The output of syntax parse is a new syntax object built using the constructor - @racket[syntax]. -@codeblock{ - (define-syntax (log stx) - (syntax-parse stx - [(log message) - (if DEBUG - (syntax (displayln message log-output)) - (syntax (void)))])) -} -We can further enhance our implementation with a compile-time check that - the value bound to the pattern variable @racket[message] is a string literal. -@codeblock{ - (define-syntax (log stx) - (syntax-parse stx - [(log message) - (unless (string? (syntax->datum (syntax message))) - (error "log: expected a string literal")) - (if DEBUG - (syntax (displayln message log-output)) - (syntax (void)))])) -} -@; Alternatively, we can use Racket's @emph{syntax classes} to the same effect. -@; The @racket[str] syntax class recognizes literal strings. -@; Binding it to the pattern variable @racket[message] causes calls like -@; @racket[(log 61)] to raise a compile error. -@; @codeblock{ -@; (define-syntax (log stx) -@; (syntax-parse stx -@; [(log message:str) -@; (if DEBUG -@; (syntax (displayln message log-output)) -@; (syntax (void)))])) -@; } -With this enhancement, calls like @racket[(log 61)] are rejected statically. -Unfortunately, arguments that @emph{evaluate} to string literals - are also rejected - because the syntax extension cannot statically predict what value an arbitrary - expression will reduce to. -This is a fundamental limitation, but we can make a small improvement by accepting - any @racket[message] produced by another (trusted) syntax extension. -Suppose @racket[++] is an extension for concatenating two strings. -If we assign a unique @emph{syntax property} to the syntax object produced - by @racket[++], we can later retrieve the property in the @racket[log] extension. -First, we give an implementation of @racket[++] in terms of Racket's - built-in @racket[string-append] function, crucially using @racket[syntax-property] - to associate the value @racket['string] with the key @racket['static-type]. -@codeblock{ - (define-syntax (++ stx) - (syntax-parse stx - [(++ s1 s2) - (syntax-property - (syntax (string-append s1 s2)) - 'static-type 'string)])) -} -Assuming now that the key @racket['static-type] accurately describes the value - contained in a syntax object, @racket[log] can accept both string - literals and tagged syntax objects. -@codeblock{ - (define-syntax (log stx) - (syntax-parse stx - [(log message) - (define is-string? - (string? (syntax->datum (syntax message)))) - (define expands-to-string? - (eq? 'string - (syntax-property - (local-expand message) - 'static-type))) - (unless (or is-string? expands-to-string?) - (error "log: expected a compile-time string")) - (if DEBUG - (syntax (displayln message log-output)) - (syntax (void)))])) -} +@section[#:tag "subsec:api"]{Implementing a Type Elaborator} -Our syntax extensions @racket[check-all-equal?], @racket[log], and @racket[++] - are indistinguishable from user-defined functions or core language forms, - yet they perform useful transformations before a program is typechecked or - compiled. -This seamless integration gives Racket programmers the ability to grow the - language and tailor it to their specific needs. +Just as Typed Racket parses the syntax of a program and extracts breadcrumbs + for the type environment, a type elaborator transforms + a program leaving hints to guide the type checker. +Syntax extensions are a low-level way to achieve this behavior. + +@Figure-ref{fig:printf} demonstrates a type-elaborated variant of Racket's + @racket[printf]. +When called with a string literal @racket[fmt] as its first argument, the + elaborator @racket[-printf] + reads @racket[fmt] for type and arity constraints using the function @racket[format-types]. +For instance, the format string @racket["~b"] would produce the type constraint + @racket[(Exact-Number)], implying that the arguments @racket[args] must be a + list with one element of type @racket[Exact-Number]. +This constraint is used twice in @racket[-printf]: + first to check the length of @racket[args] against the length of @racket[types] + and second to add explicit type annotations (via @racket[ann]) around each + argument to the format string. +Whereas Typed Racket accepts any number of values with any type and + lets Racket's @racket[printf] raise runtime errors, type elaboration reports + both arity and type errors statically. +@; arity error = caught directly +@; type error = implied + + @figure["fig:printf" "Type elaboration for format strings" + @racketblock[ + (define-syntax (-printf stx) + (syntax-parse stx + [(-printf fmt:str args ...) + #:with (types ...) (format-types #`fmt) + (if (= (stx-length #`(args ...)) + (stx-length #`(types ...))) + #`(printf fmt (ann args types) ...) + (error 'printf "arity mismatch in ~s" stx))])) + ] + @; Include the default branch? I just don't know what to say about it. + @; [(-printf fmt args ...) + @; #`(printf fmt args ...)])) + ] + +In general, the high-level goals of such type elaborations are: + @itemlist[ + @item{ (@goal{refinement}) + Refine type signatures using latent, syntactic @emph{value information}, + such as the characters in a string constant. + } + @item{ (@goal{reuse}) + Rely on the existing type checker. + Avoid duplicating its work and never resort to proofs by assertion.@note[@elem{ + Inspired by the @emph{translation validation} approach to + compiler correctness @~cite[pss-tacas-1998].}] + @; Just trying to say, always typecheck things + } + @item{ (@goal{relevance}) + Report errors in terms of the programmer's code, not in terms + of elaborated code.@note[@elem{Inspired by SoundX @~cite[le-popl-2016].}] + } + ] + +The @racket[-printf] elaborator meets these goals by producing Typed Racket code + that only adds type annotations to the original program. +If these annotations fail, they report a type error relative to an element + of @racket[args]. +As for refining the types, @racket[-printf] is best described with a quasi-dependent + type in terms of a proof theory @exact{$\Sigma$}. + + @exact|{\begin{mathpar} + \inferrule{ + \Sigma \vdash (\RktMeta{format-types}~\RktMeta{fmt}) = \tau_0 \ldots \tau_{n-1} + \\\\ + \typestogen{\penv;\,\tenv}{\RktMeta{arg}_0}{\tau_0} + \\ + \ldots + \\ + \typestogen{\penv;\,\tenv}{\RktMeta{arg}_{n-1}}{\tau_{n-1}} + }{ + \typestogen{\penv;\,\tenv}{\RktMeta{-printf fmt}~\RktMeta{arg}_0 \ldots \RktMeta{arg}_{n-1}}{\mathsf{Unit}} + } + \end{mathpar}}| + +We chose @racket[printf] as an introductory example because its correctness + and type soundness follow directly from the soundness of @racket[format-types]. +Correctness is not generally so simple to "prove", so @Secref{sec:segfault} and @Secref{sec:regexp} + show how type elaboration can justify a potentially unsafe optimizing transformation + and give library authors a technique for implementing a precise API without + changing their language's type system. -@; NOTE: we will use similar 'typed macros' in a coming section... right? diff --git a/popl-2017/bib.rkt b/popl-2017/bib.rkt index 5f689d8..8baf7a4 100644 --- a/popl-2017/bib.rkt +++ b/popl-2017/bib.rkt @@ -5,7 +5,7 @@ ;; FIXME: this doesn't have all the papers from the README yet -(require racket/format +(require racket/format scriblib/autobib) (provide (all-defined-out)) @@ -77,6 +77,7 @@ (define/short sigmod "SIGMOD" (string-append ACM "SIGMOD " International Conference "on Management of Data")) (define/short sigplan-notices "SIGPLAN Notices" (string-append ACM "SIGPLAN Notices")) (define/short tacs (string-append International Symposium "Theoretical Aspects of Computer Science")) + (define/short tacas (string-append International Conference "on Tools and Algorithms for the Construction and Analysis of Systems")) (define/short tcs "Theoretical Computer Science") (define/short tfp "TFP" (string-append Symposium "Trends in Functional Programming")) (define/short tlca "TLCA" (string-append International Conference "Typed Lambda Calculi and Applications")) @@ -1725,9 +1726,23 @@ #:location (proceedings-location icfp #:pages '(235 246)) #:date 2010)) +(define pss-tacas-1998 + (make-bib + #:title "Translation Validation" + #:author (authors "Amir Pnueli" "Michael Siegel" "Eli Singerman") + #:location (proceedings-location tacas #:pages '(151 166)) + #:date 1998)) + (define le-popl-2016 (make-bib #:title "Sound Type-Dependent Syntactic Language Extension" #:author (authors "Florian Lorenzen" "Sebastian Erdweg") #:location (proceedings-location popl #:pages '(204 216)) #:date 2016)) + +(define gr-cup-2004 + (make-bib + #:title "The Standard ML Base Library" + #:author (authors "Emden R. Gansner" "John H. Reppy") + #:location (book-location #:edition "1" #:publisher "Cambridge University Press") + #:date 2004)) diff --git a/popl-2017/common.rkt b/popl-2017/common.rkt index 053c4d1..a43f735 100644 --- a/popl-2017/common.rkt +++ b/popl-2017/common.rkt @@ -21,6 +21,10 @@ id todo proof + warning + goal + + stx ) (require "bib.rkt" @@ -117,7 +121,7 @@ (define (sf x) (elem #:style "sfstyle" x)) -(define (sc x) (exact "\\textsc{" x "}")) +(define (sc x) (exact "\\textsc{\\small " x "}")) (define (parag . x) (apply elem #:style "paragraph" x)) @@ -174,3 +178,14 @@ txt (exact "\\hfill\\qed")))) +(define (warning sym txt . arg*) + (printf "[WARNING] ~a: " sym) + (apply printf txt arg*) + (newline)) + +(define (goal str) + (bold (emph str))) + +(define stx + (exact "\\RktRdr{\\#{\\textasciigrave}}")) + diff --git a/popl-2017/conclusion.scrbl b/popl-2017/conclusion.scrbl index 2327c93..57e435a 100644 --- a/popl-2017/conclusion.scrbl +++ b/popl-2017/conclusion.scrbl @@ -13,3 +13,6 @@ and only tells eternity. @; type systems change slowly. That's OK @; syntax extensions can change much faster and YOU can control the @; changes. That's YOU joe programmer or YOU jane street capital. Do you. + +@; just one way of doing this, can imagine other Typed Racekt +@; in fact Stephen Chang ..... diff --git a/popl-2017/fig-reality-of-tr.tex b/popl-2017/fig-reality-of-tr.tex deleted file mode 100644 index 57d88ab..0000000 --- a/popl-2017/fig-reality-of-tr.tex +++ /dev/null @@ -1,3 +0,0 @@ -\begin{center} - to do -\end{center} diff --git a/popl-2017/fig-stlc-core.tex b/popl-2017/fig-stlc-core.tex index 886bd8a..319435a 100644 --- a/popl-2017/fig-stlc-core.tex +++ b/popl-2017/fig-stlc-core.tex @@ -18,13 +18,9 @@ } \inferrule*[left=T-Unsafe]{ - \typestoclosed{\vectorvn}{\tarray} + \typesto{\vectorvn}{\tarray} \\ - \typestoclosed{i}{\tint} - \\\\ - i \in \ints - \\ - 0 \leq i < n + \typesto{i}{\tint} }{ \typesto{\unsaferef~\vectorvn~i}{\tint} } diff --git a/popl-2017/intro.scrbl b/popl-2017/intro.scrbl index 45615ee..e16df1b 100644 --- a/popl-2017/intro.scrbl +++ b/popl-2017/intro.scrbl @@ -1,6 +1,6 @@ #lang scribble/sigplan @onecolumn -@require["common.rkt" (only-in scribble/base nested)] +@require["common.rkt" (only-in scribble/base nested) "bib.rkt"] @title[#:tag "sec:intro"]{Type Eleborators Need APIs} @@ -13,11 +13,11 @@ consistency according to the underlying type theory, replaces many of the surface-syntax constructs with constructs from the kernel language, and inserts type information to create an annotated representation. -Some (implementations of such) programming languages also support a way to +Some programming languages also support a meta-API, that is, a way to programmatically direct the elaborator. For example, Rust and Scala come with compiler plug-ins. Typed Racket and Types Clojure inherit the macro -mechanisms of their parent languages. Here we refer to such mechanisms as -@defterm{elaborator API}s. +mechanisms of their parents. This paper refers to all such mechanisms as +@defterm{elaborator API}s. Equipping type-checking elaborators with APIs---or using the existing APIs---promises new ways to expand the power of type checking at a @@ -29,21 +29,17 @@ Consider the example of tailoring the API of a library that implements a string-based, domain-specific languages. Examples of such libraries abound: formatting, regular-expression matching, database queries, and so on. The creators of these DSLs tend to know a lot about how programs written in -these DSLs relate to the host program, but they cannot express this -knowledge in API types without resorting to such rich systems as -dependent type theory. - -Type tailoring allows such programmers to refine the typing rules for the -APIs in a relatively straightforward way. Recall the API of the -regular-expression library. In a typical typed language, the matching -function is exported with a type like this: -@; -@verbatim[#:indent 4]{ - -reg_exp_match: String -> Option [Listof String] - -} +these DSLs relate to the host program, but they (usually) cannot express +this knowledge in API types. Type tailoring allows such programmers to +refine the typing rules for the APIs in a relatively straightforward +way. Recall the API of the regular-expression library. In a typical typed +language, the matching function is exported with a type like this: @; +@exact|{ +\begin{mathpar} +\mbox{regexp-match} : \mbox{String} \rightarrow \mbox{Opt[Listof[String]]} +\end{mathpar}\hspace{-.3em}}| +@; the above is an ***incredibly disgusting hack to work around a scribble bug*** When the result is a list of strings, the length of the list depends on the argument string---but the API for the regular-expression library cannot express this relationship between the regular expression and the @@ -51,69 +47,87 @@ surrounding host program. As a result, the author of the host program must inject additional complexity, often in the form of (hidden) run-time checks. If type tailoring is available, the creator of the library can refine the -type of @tt{reg_exp_match} with rules such as these: -@; -@verbatim[#:indent 4]{ - - Program |- s : does not contain grouping ------------------------------------------------- - G |- reg_exp_match(s) : Option [List String] - - -Program |- s : contains one grouping w/o alternative ------------------------------------------------------ - G |- reg_exp_match(s) : Option [List String String] - -} +type of @tt{regexp-match} with rules such as this one: @; +@exact|{ +\begin{mathpar} + \inferrule*[]{ + \elabsto{e}{e'}{\mbox{String}}, \\\\ + \mbox{\it Program} \vdash e' \mbox{ does not contain a grouping} + }{ + \elabsto{\mbox{regexp-match}~e}{\mbox{regexp-match}~e'}{\mbox{Opt[List[String]]}} + } +\end{mathpar}\hspace{-.3em}}| That is, when the extended elaborator can use (a fragment of) the program -to prove that the given string for a specific use of @tt{reg_exp_match} +to prove that the given string for a specific use of @tt{regexp-match} does not contain grouping specifications---ways to extract a sub-string via -matching---then the @tt{Some} result type is a list that contains a single -string. Similarly, if the string contains exactly one such grouping (and no -alternative), then a match produces a list of two strings. In all other -cases, the type checker uses the default type for the function. +matching---the @tt{Some} result type is a list of a single +string. The original specifications remains the default rule, which the +type checker uses when it cannot use the specific ones. -A vector library is another familiar example that offers opportunities for -type tailoring. Often such a library exports a dereferencing construct with +A similar rule would say if the string contains exactly one such grouping +(and no alternative), a match produces a list of two strings. Critically, +this refined type for the result of applying @tt{regexp-match} enables +further type refinements, just like constant folding enables additional +compiler optimizations. In this specific case, the program context of the +application of @tt{regexp-match} may dereference the list with unsafe---and +thus faster---versions of @tt{first} and @tt{second} once it has confirmed +a match. + +Vectors offer a similar opportunity for unsafe dereferencing via +type tailoring. Typically, such a library exports a dereferencing construct with the following type rule: -@; -@verbatim[#:indent 4]{ - - G |- v : Vector[X] G |-e : Integer ----------------------------------------- - G |- v[e] ~~> checked_ref(v,e) : X -} +@exact|{ +\begin{mathpar} + \inferrule*[]{ + \elabsto{e_1}{e_1'}{\tarray} + \\ + \elabsto{e_2}{e_2'}{\tint} + }{ + \elabsto{{e_1}[{e_2}]}{\checkedref~e_1'~e_2'}{\tint} + } +\end{mathpar}\hspace{-.3em}}| @; If the elaborator can prove, however, that the indexing expression @tt{e} is equal to or evaluates to a specific integer @tt{i}, the rule can be strengthened as follows: @; -@verbatim[#:indent 4]{ +@exact|{ +\begin{mathpar} - Program |- e = i for some i in Natural - && Program |- v is vector of length n - && i < n + \inferrule*[]{ + \elabsto{e_1}{\vectorvn}{\tarray}, + \\ + \elabsto{e_2}{e'_2}{\tint}, + \\\\ + \mbox{\it Program} \vdash e'_2 = i, \quad + i \in \ints, \quad + 0 \le i < n + }{ + \elabsto{{e_1}[{e_2}]}{\unsaferef~\vectorvn~i}{\tint} + } +\end{mathpar}\hspace{-.3em}}| - G |- v : Vector[X] G |-e : Integer ----------------------------------------- - G |- v[e] ~~> unsafe-ref(v,e) : X -} +This paper introduces and evaluates the novel idea of type tailoring. It +uses Typed Racket and its API to the elaborator (section 2) because the +language already supports appropriate type and run-time systems and because +it is relatively straightforward to program the type +elaborator---@emph{without modifying it}. To make type tailoring concrete +and to demonstrate its usefulness, the paper presents two case studies +(sections 3 and 4). Each report on a case study consists of three parts: a +type soundness argument assuming a type soundness argument for the complete +language exists; the actual @; - That is, the elaborator can then eliminate a possibly expensive run-time - check. - -This paper demonstrates the idea with concrete case studies of type -tailoring (section 2) in the context of Typed Racket and its API to the -elaborator (section 3). To illustrate the usefulness of the idea, we -implement two tailorings. The first one---chosen for the wide familiarity -of the domain---enriches the type-checking of vector-referencing operations -(section 4). The second example explains how the implementor of a -string-based embedded DSL---a regexp-matching DSL---may tailor the types of -the interpretation function (section 5). Our evaluations confirm that these -tailorings reduce the number of run-time checks that the programmer or the -elaborator have to insert into the host program. In addition, we sketch +@margin-note*{BEN: this evaluation is missing an idea} +@; +implementation; and an evaluation that reports how often the revised type +elaborator can improve the code. The first type tailoring---chosen for the +wide familiarity of the domain---enriches the type-checking of +vector-referencing operations (section 3). The second example explains how +the implementor of a string-based embedded DSL---a regexp-matching DSL, +also widely familiar to programming researchers---may tailor the types of +the interpretation function (section 4). In addition, the paper sketches several other applications of type tailoring in Typed Racket (section -6). We also explain how the creator of such libraries can refine the -existing type soundness proof of the host language to argue the correctness -of the tailoring. +5). The final two sections compare programmability of the type elaborator +to work on dependent types and sketch how such an API could be implemented +for other languages. diff --git a/popl-2017/paper.scrbl b/popl-2017/paper.scrbl index 004a22a..b5a1353 100644 --- a/popl-2017/paper.scrbl +++ b/popl-2017/paper.scrbl @@ -7,23 +7,23 @@ @abstract{ -Many typed APIs implicitly acknowledge the @emph{diktat} that the host type - system imposes on the creators of libraries. When a library implements a - string-based domain-specific language, the problem is particularly obvious. - The interpretation functions for the programs in this embedded language - come with the uninformative type that maps a string to some other host - type. Only dependently typed languages can improve on this scenario at the - moment, but they impose a steep learning curve on programmers. +Typed APIs suffer from the @emph{diktat} of the host type system. + Libraries that implement string-based domain-specific languages make this + clash particularly obvious. The interpretation functions for the programs + in embedded languages come with the rather uninformative type that maps a + string to some other host type. Only dependently typed languages can + improve on this scenario at the moment, but they impose a steep learning + curve on programmers. -This paper proposes to tackle this problem with APIs for type - checkers. Specifically, it observes that most typed languages already - employ an elaboration pass to type-check programs. If this elaborator - comes with a sufficiently rich API, the author of a library can supplement - the default types of the library's API with typing rules that improve the - collaboration between host programs and uses of the library. The - evaluation uses a prototype for Typed Racket and illustrates how useful - the idea is for widely available libraries. Also the paper sketches how - the authors of such ``tailored'' rules can argue their soundness. +This paper proposes to tackle this problem with a meta-API for the type + checker. Specifically, it observes that most typed languages already + employ an elaboration pass to type-check programs. If this elaborator came + with a sufficiently rich API, the author of a library could supplement the + default types of the API with typing rules that improve the collaboration + between host programs and uses of the library. To demonstrate the + feasibility and effectiveness of this idea, the paper presents a prototype + for Typed Racket and two case studies. It also sketches how the authors of + such ``tailored'' typing rules can argue their soundness. } @@ -34,12 +34,14 @@ This paper proposes to tackle this problem with APIs for type @; See OUTLINE.md for explanation @include-section{intro.scrbl} @include-section{background.scrbl} +@;@include-section{elaborators.scrbl} @include-section{segfault.scrbl} -@;@include-section{examples.scrbl} +@include-section{regexp.scrbl} +@include-section{define.scrbl} @;@include-section{discussion.scrbl} @;@include-section{friends.scrbl} -@;@include-section{related-work.scrbl} -@;@include-section{conclusion.scrbl} +@include-section{related-work.scrbl} +@include-section{conclusion.scrbl} @section[#:style 'unnumbered]{Acknowledgments} diff --git a/popl-2017/related-work.scrbl b/popl-2017/related-work.scrbl index 41de363..9ba159d 100644 --- a/popl-2017/related-work.scrbl +++ b/popl-2017/related-work.scrbl @@ -4,6 +4,8 @@ @title[#:tag "sec:related-work"]{Experts} +@section{Macros} + @section{SoundX} SoundX is a system for modeling programming languages and defining type-sound extensions, e.g. defining a type derivation for @tt{let} in terms of a type @@ -40,6 +42,12 @@ Our general approach and outlook on type soundness is informed by Cousot. @section{Compiler Plugings} +@; -- HASKELL +@; https://ghc.haskell.org/trac/ghc/wiki/Plugins/TypeChecker +@; http://christiaanb.github.io/posts/type-checker-plugin/ +@; http://adam.gundry.co.uk/pub/typechecker-plugins/typechecker-plugins-2015-07-17.pdf +@; https://github.com/yav/type-nat-solver (copy of paper in src/ folder here) + GHC (constraint solvers) Rust (macros) Scala (macros) diff --git a/popl-2017/segfault.scrbl b/popl-2017/segfault.scrbl index 2afb7ae..6e5a198 100644 --- a/popl-2017/segfault.scrbl +++ b/popl-2017/segfault.scrbl @@ -1,8 +1,54 @@ #lang scribble/sigplan @onecolumn +@; 1. MODEL +@; 2. IMPLEMENTATION +@; 3. EVALUATION -@require["common.rkt"] +@; Q. remove , and only have vector values? +@; because the syntax extension rules expect only values (expressions ruin the proofs) -@title[#:tag "sec:segfault"]{Well Typed Programs do not go SEGFAULT} +@require[ + "common.rkt" + "evaluation.rkt" + glob + racket/sequence + (only-in racket/list take) + (only-in trivial/private/raco-command collect-and-summarize) +] + +@title[#:tag "sec:segfault"]{When using the API can cause a segfault} + +Every programming languages comes with arrays. In Typed Racket, array +facilities come as a library that essentially exports constructors, +dereferencing functions, and mutation operations. SML similarly provides +them from a run-time library@~cite[gr-cup-2004]. +The API for array libraries tends to come with highly conservative +signatures. For example, an array indexing operation calls for an array +and an integer and then produces the designated element from the given +array; run-time checks ensure that the integer is in the interval +@math{[0,n)} where @math{n} is the length of the array. + +To speed up program execution, Typed Racket has access to an unsafe array +indexing operation. Like array indexing in C, this operation +retrieves the bits at the specified location without checking the size of +the index. If used inappropriately, such an unsafe operation can cause the +program to print random results or to segfault. + +In this section we show that the creator of the Typed Racket array library +can replace checked array indexing with unsafe indexing. While this use of +type tailoring is quite simple, it supplies a great case study. To begin +with, it requires an innovation on the standard progress and preservation +method for showing type soundness. Specifically, the author of the array +library must show that the evaluator remains a function and does not +introduce segfaults into typed programs without run-time checks for +indexing (@secref{sec:segfault:model}). The Typed Racket implementation is quite +straightforward; the core consists of two dozen lines +(@secref{sec:segfault:implementation}). Finally, an evaluation on the Racket code +base indicates that the prototype is highly effective for a certain style of +programming. + + +@; ----------------------------------------------------------------------------- +@section[#:tag "sec:segfault:model"]{Elaborating array indexing} @Figure-ref{fig:stlc} describes a simply typed @exact{$\lambda$} calculus with integers and integer arrays. @@ -18,10 +64,10 @@ The operational semantics for the core language are given in @Figure-ref{fig:stl along with type judgments for two primitive operations: @exact{$\checkedref$} and @exact{$\unsaferef$}. Intuitively, @exact{$\checkedref$} is a memory-safe function that performs a bounds check before - dereferencing an array and gives a runtime error when called with incompatible values. + dereferencing an array and raises a runtime error when called with incompatible values. On the other hand, @exact{$\unsaferef$} does not perform a bounds check and therefore may return arbitrary data or raise a memory error when called with an invalid index. -These negative consequences are modeled in the (family of) rule(s) @exact|{\textsc{E-UnsafeFail}}|. +These negative consequences are modeled in the (family of) rule(s) @sc{E-UnsafeFail}. Consequently, the judgment @exact{$\smallstep{e}{e}$} and its transitive closure @exact{$\smallstepstar{e}{e}$} are relations, whereas typing and elaboration are proper functions. @@ -56,6 +102,11 @@ Evaluation of the surface language in @Figure-ref{fig:stlc}, however, } @proof{ + @; (shorter, but repeats following paragraph) + @; The surface language does not allow @exact{$\unsaferef$} and elaboration + @; does not introduce unsafe references, therefore the non-deterministic + @; rule @sc{E-UnsafeFail} is never used. + By induction on terms of the core language, if @exact{$\smallstep{e}{e'}$} and @exact{$e$} is not of the form @exact{$\unsaferef~e_1~e_2$} then @exact{$e'$} is unique. @@ -85,8 +136,8 @@ Additionally, the surface language is type safe. } @proof{ - The interesting cases involve @exact{$\checkedref$}. - First, @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}| + The interesting cases are for array references, for which we observe that + @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}| implies @exact{$\typestoclosed{e_1}{\tarray}$} and @exact{$\typestoclosed{e_2}{\tint}$}. Depending on the values of @exact{$e_1$} and @exact{$e_2$}, @@ -95,8 +146,6 @@ Additionally, the surface language is type safe. } -@section{Extending the Elaborator} - Building on the foundation of type soundness, we extend the elaborator with the @emph{value-directed} rules in @Figure-ref{fig:elab1}. When the elaborator can prove that an array reference is in bounds based on @@ -110,7 +159,7 @@ Conversely, the elaborator raises a compile-time index error if it can prove Our thesis is that adding such rules does not undermine the safety guarantees of the original language. -In this case, we recover the original guarantees by extending the proofs of +In this case, we recover determinism and soundness by extending the proofs of type and memory safety to address the @sc{S-RefPass} and @sc{S-RefFail} elaboration rules. @definition[@exact{$\elabarrowplus$}]{ @@ -126,43 +175,38 @@ In this case, we recover the original guarantees by extending the proofs of @proof{ With the original elaboration scheme, all array references @exact{$\aref{e_1}{e_2}$} - elaborate to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$} + elaborate via @sc{S-Ref} to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$} and @exact{$e_2'$} are the elaboration of @exact{$e_1$} and @exact{$e_2$}. - There are now three possibilities: - @itemize[ - @item{ - @exact{$e_1'$} is an array value @exact{$\vectorvn$} - and @exact{$e_2'$} is an integer value @exact{$i \in \ints$} - and @exact{$0 \le i < n$}. - Both @sc{S-Ref} and @sc{S-RefPass} are possible elaborations, - therefore @exact{$e'$} is either @exact{$\checkedref~\vectorvn~i$} - or @exact{$\unsaferef~\vectorvn~i$}. - Because @exact{$0 \le i < n$}, evaluation must proceed with - @sc{E-CheckPass} or @sc{E-UnsafePass} in each case, respectively. - These rules have the same result, @exact{$v_i$}. - } - @item{ - @exact{$e_1'$} is an array value @exact{$\vectorvn$} - and @exact{$e_2'$} is an integer value @exact{$i \in \ints$} - and either @exact{$i < 0$} or @exact{$i \ge n$}. - The rules @sc{S-Ref} and @sc{S-RefFail} are possible elaborations. - If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$} - and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside - the bounds of the array. - If @sc{S-RefFail} is chosen an index error is raised immediately. - } - @item{ - Otherwise, either @exact{$e_1'$} or @exact{$e_2'$} is not a value - form and we rely on the existing proof of determinism. - } - ] + References for which only @sc{S-Ref} applies remain determinsitic, but + we have two cases where a new rule may also be used: + @itemize[ + @item{ Case @sc{S-RefFail}: + @exact{$e_1'$} is an array literal @exact{$\vectorvn$} + and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$} + and either @exact{$i < 0$} or @exact{$i \ge n$}. + If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$} + and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside + the bounds of the array. + On the other hand, @sc{S-RefFail} raises the index error immediately. + } + @item{ Case @sc{S-RefPass}: + @exact{$e_1'$} is an array literal @exact{$\vectorvn$} + and @exact{$e_2'$} is an integer literal @exact{$i \in \ints$} + and @exact{$0 \le i < n$}. + If @sc{S-Ref} is chosen then @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$}. + If @sc{Ref-RefPass} is chosen then @exact{$e'$} is @exact{$\unsaferef~\vectorvn~i$}. + Because @exact{$0 \le i < n$}, evaluation must proceed with + @sc{E-CheckPass} or @sc{E-UnsafePass} respectively, producing + @exact{$v_i$} in any event. + } + ] } For practical purposes, non-determinism in the elaborator should be - resolved giving the rules in @Figure-ref{fig:elab1} preference over the rules - in @Figure-ref{fig:elab0}. - But the result will be the same in any event. - + resolved by giving the rules in @Figure-ref{fig:elab1} preference over the + rules in @Figure-ref{fig:elab0}. + But knowing that the result is the same for the non-identity elaborations + gives us confidence in their correctness. @theorem["soundness"]{ If @exact{$e$} is a closed term then @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}| @@ -174,7 +218,7 @@ In this case, we recover the original guarantees by extending the proofs of We extend the existing proof with cases for @exact{$\unsaferef$}. @itemize[ @item{ - If @sc{S-RefPass} is applied by the elaborator then @exact{$e'$} is + Case @sc{S-RefPass}: @exact{$e'$} is the call @exact{$\unsaferef~\vectorvn~i$} and @exact{$0 \le i < n$}. By assumption, @exact|{$\typestoclosed{\vectorvn}{\tarray}$}| and @exact{$\typestoclosed{i}{\tint}$}. @@ -182,24 +226,194 @@ In this case, we recover the original guarantees by extending the proofs of to the integer value @exact{$v_i$} by @sc{E-UnsafePass}. } @item{ - If @sc{S-RefFail} is applied by the elaborator then @exact{$e'$} is + Case @sc{S-RefFail}: then @exact{$e'$} is @exact{$\indexerror$} and type soundness follows trivially. } ] } -The key step in both theorems was that elaboration supplied the proposition - @exact{$0 \le i < n$}. -Both the type rules and evaluation rules depended on this premise. +The key step in both theorems is that elaboration supplies the proposition + @exact{$0 \le i < n$} required for safe evaluation. +So long as assumptions like the above are properly stated by language implementors, + type tailoring library authors can help to meet them. -@section{In Practice} +@; ----------------------------------------------------------------------------- +@section[#:tag "sec:segfault:implementation"]{Implementing the elaborator} -Have implemented for typed racket -- library -- with enhancements from @todo{secref} -- via expander, so typechecked afterwards +@; WHY ... WHY ARE THEY ALWAYS PLAYING JAZZ IN THE LOBBY OF THIS HOTEL ??? +@; (Asked the concierge: it's always on, on a loop. For Christmas it's Christmas jazz) -Checked in -- standard distro (includes plot,math,stats) -- pict3d +We have implemented the rules in @Figure-ref{fig:elab1} as a syntax extension for Typed Racket. +Including imports and exports, the implementation is an 18 line module (@Figure-ref{fig:vector-ref-extension}). + + @figure["fig:vector-ref-extension" "Syntax extension for array references" + @codeblock{ + #lang typed/racket ;; vector-ref-extension.rkt + (require + (only-in racket/unsafe/ops unsafe-vector-ref) + (for-syntax racket/base syntax/parse)) + + (define-syntax (-vector-ref stx) + (syntax-parse stx + [(_ #(e* ...) i) + #:when (integer? (syntax->datum #`i)) + ;; Case 1: constant args + (define v-val (syntax->datum #`(e* ...))) + (define i-val (syntax->datum #`i)) + (if (< -1 i-val v-val) + ;; then S-RefPass + #`(unsafe-vector-ref '#(e* ...) i) + ;; else S-RefFail + (error 'vector-ref "~a ~a" v-val i-val))] + [(_ args ...) + ;; Case 2: S-Ref + #`(vector-ref args ...)])) + + (provide (rename-out [-vector-ref vector-ref])) + } + ] + +First, the @racket[require] statement imports the @racket[unsafe-vector-ref] + function to the runtime environment and the libraries @racket[racket/base] + and @racket[syntax/parse] to the compile-time environment. +We name the extension @racket[-vector-ref] to avoid shadowing the uses of + Typed Racket's @racket[vector-ref] in Case 2 of the extension. +At the end of the module, the @racket[provide] statement renames @racket[-vector-ref] + to replace the default @racket[vector-ref] in importing modules. + +Code within @racket[-vector-ref] is evaluated at compile-time to + transform all calls and uses of the extension in an importing module. +For instance, calling @racket[(vector-ref x 4)] will invoke + @racket[-vector-ref] with the expression @racket[(vector-ref x 4)] bound to the + formal parameter @racket[stx]. + @; A higher-order use like @racket[(map vector-ref vs is)] will invoke @racket[-vector-ref] + @; with @racket[stx] bound to the identifier @racket[vector-ref] before evaluating + @; the call to @racket[map]. +In any event, the first task of @racket[-vector-ref] is to destructure its argument + @racket[stx] using the @racket[syntax-parse] form. +We consider three cases. +The first matches expressions with three elements, like @racket[(vector-ref #(0) 0)], + where the first element is anything, the second is a vector literal, and the third + is an integer. +Integer literals are recognized by the @racket[#:when] clause, which extracts + the value from a @emph{pattern variable} @racket[i] and tests whether this value + is an integer. +If this first match is successful, then we implement the @sc{S-RefPass} + and @sc{S-RefFail} rules by comparing the value of the integer literal + contained in @racket[i] against the number of elements captured by the zero-or-more + pattern @racket[(e ...)]. +When the reference is in bounds, we use the constructor @|stx| + to produce code.@note[@elem{If it helps, you can mentally replace all @|stx| + with the arrow @exact{$\elabarrow$} from our model.}] +The second and third cases are simpler. +The second elaborates any call to @racket[-vector-ref] into a @racket[vector-ref] + call---even calls made with zero or seven arguments. +We let the type checker deal with such erroneous cases. + @; The third case replaces higher-order calls of @racket[-vector-ref] with + @; higher-order calls to Typed Racket's @racket[vector-ref] function. + +Existing programs can use the extension by adding a 1-line import statement. + + @codeblock{ + #lang typed/racket + (require vector-ref-extension) + .... + } + +The import shadows @racket[vector-ref] from the @racket[typed/racket] language, + replacing all occurrences with calls to our syntax extension. +As Typed Racket processes the code abbreviated as @racket[....] above, + each vector reference is expanded to either a Typed Racket (checked) + @racket[vector-ref], an @racket[unsafe-vector-ref], or a compile-time @racket[error]. + + +@; ----------------------------------------------------------------------------- +@section[#:tag "sec:segfault:evaluation"]{Evaluation} + +@(let* ([vrx #rx"\\(vector-ref "] + [vr-file* (for/list ([fn (sequence-append + (in-glob "benchmark/vector/*.rkt") + (in-glob "benchmark/vector/*/*.rkt"))] + #:when (with-input-from-file fn + (lambda () + (regexp-match vrx (current-input-port))))) + fn)] + ;; : (Listof (List Symbol Num-Hit Num-Miss)) + [all-file+optz* (with-cache "vector-ref-optz" + (lambda () + (profile-point "counting vector-ref optz") + (let-values ([(_in _out) (make-pipe)]) + (parameterize ([current-output-port _out]) + (displayln "(") + (for-each collect-and-summarize vr-file*) + (displayln ")")) + (close-output-port _out) + (let ([v (read _in)]) + (close-input-port _in) + (begin0 + (for/list ([d (in-list v)]) + (cons (car d) + (or (for/first ([kvvv (in-list (cdr d))] + #:when (eq? (car kvvv) 'vector-ref)) + (list (cadr kvvv) (caddr kvvv))) + (list 0 0)))) + (profile-point "done w/ vector-ref optz"))))) + #:read (lambda (f+o*) + (and + (for/and ([f+o (in-list f+o*)] + [fn (in-list vr-file*)]) + (eq? (car f+o) (string->symbol fn))) + f+o*)))] + [file+optz* (filter (compose1 positive? cadr) all-file+optz*)] + [hit-count (length file+optz*)] + [miss-count (- (length all-file+optz*) hit-count)] + [num-optz (for/sum ([f+t (in-list file+optz*)]) (cadr f+t))]) + ;; -- for missed optz, take `caddr` of element of `file+optz*` + @list[ + @figure["fig:vector-table" @elem{Summary of @racket[vector-ref] evaluation} + @graphical-summary[ + #:hit-count hit-count + #:miss-count miss-count + #:bar-data (for/list ([fhm (in-list (sort file+optz* < #:key cadr))]) + (define h (cadr fhm)) + (define m (caddr fhm)) + (list (format "~a" h) (* 100 (/ h (+ h m))))) + #:bar-title "Percent of Array References Optimized" + #:bar-x (format "# Optimized refs by module") ; (~a total)" num-optz + #:bar-y "%" + #:y-max 100 + ] + ] + @elem{ + The above implementation works for toy examples, but useful programs often + give constant values a name. + For example, the implementation of @racket[gzip] used in the main Racket distribution + declares array constants to implement a Huffman tree and heap. + To accomodate this and similar idioms, we added syntax extensions for binding + data to identifiers and constant folding for arithmetic operations. + These additions are justified in @Secref{sec:define}, but they are straightforward + extensions of the technique described in this section. + + With name-tracking, we were able to optimize + two static array references in the @racket[gzip] implementation, + one in @racket[hangman], + 12 references in a @racket[minesweeper] game, and + 480 references in an implementation of @racket[parcheesi]. + All three programs follow a general pattern of declaring a fixed-size vector + in the scope of a module and implementing helper functions to manipulate the vector. + The unusual success of @racket[parcheesi] was due to an inlining macro that + would unroll a loop over a vector of pawns to straight-line array + references at constant offsets. + + These successes are modest,@note{Especially since we analyzed over 120 files using + arrays for places to optimize.} + but encouraging since the target audience for these + extensions are script writers. + In prototype and throwaway code, we expect programmers to use more constant + values and make more index errors. + Our extensions catch some of these errors without imposing any annotation burden + or increasing compile times --- in the files we analyzed, we observed no + statistically significant difference compiling with and without our extensions + enabled. +}]) diff --git a/popl-2017/texstyle.tex b/popl-2017/texstyle.tex index c284636..75b2a87 100644 --- a/popl-2017/texstyle.tex +++ b/popl-2017/texstyle.tex @@ -37,23 +37,35 @@ \newcommand{\tvar}[1]{\mathsf{#1}} \newcommand{\tnat}{\tvar{Natural}} \newcommand{\tarray}{\tvar{Array}} +\newcommand{\tdictgen}[1]{\llparenthesis #1 \rrparenthesis} %% yuck man +\newcommand{\tdictn}{\tdictgen{l_0 : \tau_0, \ldots,l_{n-1} : \tau_{n-1}}} \newcommand{\tint}{\tvar{Int}} +\newcommand{\toption}[1]{\tvar{Option}\,#1} \newcommand{\tnum}{\tvar{Num}} +\newcommand{\tstring}{\tvar{String}} \newcommand{\tlist}[1]{\tvar{List~#1}} \newcommand{\naturals}{\mathbb{N}} \newcommand{\ints}{\mathbb{Z}} %% -- terms +\newcommand{\dictgen}[1]{\{ #1 \}} +\newcommand{\dictvn}{\dictgen{l_0\!=\!v_0, \ldots, l_{n-1}\!=\!v_{n-1}}} \newcommand{\vectorgen}[1]{\langle #1 \rangle} \newcommand{\vectoren}{\vectorgen{e_0, \ldots, e_{n-1}}} \newcommand{\vectorvn}{\vectorgen{v_0, \ldots, v_{n-1}}} \newcommand{\vectorxn}{\vectorgen{x_0, \ldots, x_{n-1}}} \newcommand{\vlam}[2]{\lambda\,#1\,.\,#2} +\newcommand{\vlet}[3]{\mathsf{let}\,#1 = #2\,\mathsf{in}\,#3} +\newcommand{\vnone}{\mathsf{None}} +\newcommand{\vsome}[1]{\mathsf{Some}\,#1} \newcommand{\aref}[2]{#1@#2} -\newcommand{\checkedref}{\tvar{checked\mbox{-}ref}} -\newcommand{\unsaferef}{\tvar{unsafe\mbox{-}ref}} + +\newcommand{\checkedref}{\RktMeta{checked-ref}} +\newcommand{\unsaferef}{\RktMeta{unsafe-ref}} +\newcommand{\rxm}{\RktMeta{rx-match}} \newcommand{\segfault}{\mathsf{segfault}} \newcommand{\indexerror}{\mathsf{IndexError}} +\newcommand{\syntaxerror}{\mathsf{SyntaxError}} %% -- evaluation contexts \newcommand{\ectx}{E} @@ -64,6 +76,11 @@ \newcommand{\tenvempty}{\cdot} \newcommand{\tenvcons}[3]{#1:#2,#3} +%% -- proof environments +\newcommand{\penv}{\Sigma} +\newcommand{\penvempty}{\tenvempty} +\newcommand{\penvcons}[3]{\tenvcons{#1}{#2}{#3}} + %% -- typing \newcommand{\typestogen}[3]{#1 \vdash #2 : #3} \newcommand{\typestoclosed}[2]{\typestogen{\tenvempty}{#1}{#2}} @@ -77,11 +94,37 @@ \newcommand{\elabstoclosed}[3]{\elabstogen{\tenvempty}{#1}{\elabarrow}{#2}{#3}} \newcommand{\elabsto}[3]{\elabstogen{\tenv}{#1}{\elabarrow}{#2}{#3}} +%% -- elaboration II +\newcommand{\provestogen}[7]{\typestogen{#1;#2}{#3 #4 #5}{#6 \dashv #7}} +\newcommand{\provesto}[4]{\provestogen{\penv}{\tenv}{#1}{\elabarrow}{#2}{#3}{#4}} + +%% -- elaboration types +\newcommand{\pmap}{\phi} +\newcommand{\pmapcons}[3]{#1[#2 \mapsto #3]} +\newcommand{\pdom}{\kappa} +\newcommand{\parrow}{\mapsto} +\newcommand{\pvec}{\mathcal{V}} +\newcommand{\prx}{\mathcal{R}} +\newcommand{\pint}{\mathcal{I}} + %% -- evaluation \newcommand{\bigstep}[2]{#1 \Downarrow #2} \newcommand{\bigstepplus}[2]{#1 \Downarrow^+ #2} -\newcommand{\smallstepstar}[2]{#1 \rightarrow^* #2} -\newcommand{\smallstep}[2]{#1 \rightarrow #2} +\newcommand{\smallsteparrow}{\rightarrow} +\newcommand{\smallstepstar}[2]{#1 \smallsteparrow^* #2} +\newcommand{\smallstep}[2]{#1 \smallsteparrow #2} + +%% -- subtyping +\newcommand{\subt}{\le:} +\newcommand{\subtypesto}[2]{#1 \subt #2} + +%% -- metafunctions +%\newcommand{\mgroups}{\textsf{groups}} +\newcommand{\msup}{\mathsf{sup}} +\newcommand{\mlang}{\mathcal{L}} +\newcommand{\msubstrings}{\mathcal{S}} +\newcommand{\mset}[1]{\{\,#1\,\}} +\newcommand{\mrset}[2]{\{\,#1 \mid #2\,\}} %% -- misc \newcommand{\esubst}[2]{[#2/#1]} diff --git a/trivial/untyped.rkt b/trivial/untyped.rkt index e087ea0..124e424 100644 --- a/trivial/untyped.rkt +++ b/trivial/untyped.rkt @@ -64,18 +64,18 @@ (define-syntax define: (make-keyword-alias 'define (lambda (stx) - (or (format-define stx) + (or (rx-define stx) + (format-define stx) (num-define stx) (lst-define stx) - (rx-define stx) ;(fun-define stx) (vec-define stx))))) (define-syntax let: (make-keyword-alias 'let (lambda (stx) - (or (format-let stx) + (or (rx-let stx) + (format-let stx) ;(fun-let stx) (num-let stx) (lst-let stx) - (rx-let stx) (vec-let stx)))))