diff --git a/popl-2017/Makefile b/popl-2017/Makefile index b7bb608..35b2ed7 100644 --- a/popl-2017/Makefile +++ b/popl-2017/Makefile @@ -6,7 +6,13 @@ compiled/pearl_scrbl.zo: *.rkt *.scrbl raco make -v $(PAPER).scrbl ${PAPER}.pdf: pkg setup texstyle.tex - scribble ++extra mathpartir.sty ++style texstyle.tex --pdf $(PAPER).scrbl + scribble ++extra fig-stlc-surface.tex \ + ++extra fig-stlc-core.tex \ + ++extra fig-elab0.tex \ + ++extra fig-elab1.tex \ + ++extra mathpartir.sty \ + ++style texstyle.tex \ + --pdf $(PAPER).scrbl ${PAPER}.tex: pkg setup texstyle.tex scribble ++style texstyle.tex --latex $(PAPER).scrbl diff --git a/popl-2017/OUTLINE.md b/popl-2017/OUTLINE.md index 66be4db..55af05f 100644 --- a/popl-2017/OUTLINE.md +++ b/popl-2017/OUTLINE.md @@ -1,6 +1,18 @@ Outline of paper --- +- λ ∈ ℕ Σ +- expand is NOT guaranteed to make well-typed terms; you can call it on any expr. +- idea; values carry latent "type" information + (but ya know, this is kind of just demonstrating that expand is useful) +- (afterwards) add set! + nevermind how \tau handles it ... here are rules for expand to handle it +- note: OCaml is firm about maintaining a fast compiler, NOT a big standard library +- thats a totally reasonable separation of concerns/labor + +TODO +- use more RACKET, less math (@racket[...] for identifiers) + 0 intro.scrbl --- @@ -12,10 +24,8 @@ other promises 1 background.scrbl --- -define typed racket, -explain macro API without the word "macro", -goals for extensions -correctness for extensions +define types +define macros (without the word 'macro') 2. examples.scrbl @@ -32,6 +42,14 @@ limits of the extensions (need a value), suggestions to overcome limits (assertions, better analysis), even more extensions + implications +""" +Our technique is implemented as a library of local transformations that + compose to form a function @exact{$\elaborate$} defined over syntactically + well-formed terms. +Using the library is a one-line change for existing programs; however, the + user may wish to remove type casts made redundant by the elaboration. +""" + 4. friends.scrbl --- diff --git a/popl-2017/background.scrbl b/popl-2017/background.scrbl index e8701d7..cb5a8c3 100644 --- a/popl-2017/background.scrbl +++ b/popl-2017/background.scrbl @@ -1,21 +1,381 @@ #lang scribble/sigplan @onecolumn -@require["common.rkt"] +@require["common.rkt" pict racket/class racket/draw] @title[#:tag "sec:background"]{If You Know What I Mean} +@; A poet should be of the +@; old-fahioned meaningless brand: +@; obscure, esoteric, symbolic, +@; -- the critics demand it; +@; so if there's a poem of mine +@; that you do understand +@; I'll gladly explain what it means +@; till you don't understand it. -% - language = untyped, in the end we're doing machine instructions -% - here is what a function does, here is my notation for describing such things -% - easy to make mistakes, so we have type systems to detect errors -% - run type system statically but it's not as good as running the program ya know -% - so, abstraction -% - we want to refine, here is what refinement means - -% this section will die soon but until then here's ideas +@; Computers understand a very limited set of instructions. +@; It is tedious and error-prone to describe even simple thoughts in terms of these +@; instructions, but programming languages provide abstraction mechanisms +@; that can make the process bearable. +@; For example, low-level or assembly languages can implement the following +@; mathematical function for referencing an element of a fixed-length +@; data structure: +@; +@; @exact|{\[ +@; \RktMeta{ref} = \lambda (\RktMeta{v}~\RktMeta{i}) +@; \left\{\begin{array}{l l} +@; \RktMeta{v}_{i+1} & \RktMeta{if}~\RktMeta{v} = \vectorvn +@; \\ ~&\RktMeta{and}~\RktMeta{i} \in \mathbb{N} +@; ~\RktMeta{and}~i < n +@; \\[4pt] +@; \bot & \RktMeta{otherwise} +@; \end{array}\right. +@; \]}| +@; +@; @;Here we use the notation @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_{n-1} \rangle$}| +@; @; to describe a vector of @exact{$n$} elements. +@; Whether the implementation is a labeled sequence of instructions or a C function +@; is not important; what matters is the precise specification +@; and our ability to compute the function via machine. +@; Memory-safe functions with clear semantics are the core building blocks for +@; sane programs. +@; +@; As programs grow in size, complexity, and importance, it is desirable to have +@; some guarantees about how a program will run before actually running the +@; program. +@; Ideally, we would statically prove that @racket[ref] is +@; never applied to arguments that are not vectors or to out-of-bounds indices. +@; @;Such a proof would imply that every call @racket[(ref v i)] in the program +@; @; would yield a non-@|bot| value and help show that the overall program works as intended. +@; But proving such properties is difficult and there are often many +@; functions in a program for which we seek guarantees, so we settle for +@; approximate results. +@; Instead of statically ruling out all calls @racket[(ref v i)] that produce +@; @|bot|, we strive to eliminate a large subset automatically. +@; +@; Type systems have emerged as a powerful and convenient way of statically detecting +@; errors. +@; Instead of tracking the flow of exact values through a program, a type system +@; tracks the flow of @emph{types} denoting a range of possible runtime values. +@; Depending on the type system and programmers' discipline using it, the range +@; might be limited enough to show that @racket[ref] never returns @|bot| +@; in a program; however, the tradeoff is always the time and energy programmers +@; are willing to invest in writing down and maintaining type information. +@; As it stands, the most widely used type systems are those that require +@; minimal annotations from the programmer and catch only shallow, common errors. +@; +@; +@; @section{Problem Statement and Promises} +@; +@; @let*[([all-w 150] +@; [lang-w (/ all-w 2)] +@; [lang-h 20] +@; [lang-rectangle +@; (lambda (brush-style +@; fill-color +@; #:width [lang-w lang-w] ;; getting lazy there ben +@; #:height [lang-h lang-h] +@; #:slant-left [slant-left #f] +@; #:border-color [maybe-border-color #f] +@; #:border-width [border-width 3]) +@; (dc (lambda (dc dx dy) +@; (define old-brush (send dc get-brush)) +@; (define old-pen (send dc get-pen)) +@; (define border-color (or maybe-border-color fill-color)) +@; (send dc set-brush +@; (new brush% [style brush-style] [color fill-color])) +@; (send dc set-pen +@; (new pen% [width border-width] [color border-color])) +@; ;; -- +@; (define path (new dc-path%)) +@; (send path move-to 0 0) +@; (if slant-left +@; (begin (send path line-to (- 0 slant-left) (/ lang-h 2)) +@; (send path line-to 0 lang-h)) +@; (send path line-to 0 lang-h)) +@; (send path line-to lang-w lang-h) +@; (send path line-to lang-w 0) +@; (send path close) +@; (send dc draw-path path dx dy) +@; ;; -- +@; (send dc set-brush old-brush) +@; (send dc set-pen old-pen)) lang-w lang-h))] +@; [all-e (rectangle all-w lang-h)] +@; [hshim (blank (/ all-w 6) lang-h)] +@; [untyped-e (hc-append hshim (lang-rectangle 'vertical-hatch "CornflowerBlue" #:border-width 4 #:height (+ lang-h 2)))] +@; [typed-e (hc-append (lang-rectangle 'horizontal-hatch "Coral" #:border-width 2) hshim)] +@; [reality-of-ml (rc-superimpose (lc-superimpose all-e untyped-e) typed-e)] +@; [value-e +@; (let ([s 'solid] +@; [c "LimeGreen"] +@; [w (- lang-w lang-h)] +@; [b 1]) +@; (hc-append (blank (* lang-h 0.8) lang-h) +@; (lang-rectangle s c #:width (/ lang-w 3) #:height (/ lang-h 3) #:border-width b) +@; (lang-rectangle s c #:border-width b)))] +@; [reality-of-tr (lc-superimpose reality-of-ml value-e)] +@; )]{@list{ +@; No matter how precise the type system, static analysis cannot recognize +@; all correct programs and reject all incorrect programs. +@; Layering a type system on top of an untyped programming language therefore +@; leads to a world where the space of all syntactically valid programs +@; (drawn as a white box, of course) is partitioned into two overlapping sets. +@; +@; @centered[reality-of-ml] +@; +@; On the left, outlined in blue with vertical hatches, we have the set of all untyped programs +@; that produce meaningful results (read: non-error, non-diverging terms). +@; On the right, outlined in orange with horizontal hatches, we have the set of all programs +@; approved by the type system. +@; Some type-correct programs raise runtime errors when evaluated, and some +@; meaningful programs are rejected by the type checker. +@; +@; Again, the impossible goal is for the highlighted boxes to overlap. +@; Our argument is that adding a @emph{syntactic elaboration} phase before +@; typechecking can yield a language described by the +@; solid green area shown below, capturing more of the meaningful untyped +@; programs and fewer of the typed programs that go wrong, though rejecting +@; some typed programs that run to completion but contain a subterm that would +@; raise an exception if evaluated (Theorem @exact|{\ref{thm:relevance}}|). +@; +@; @centered[reality-of-tr] +@; }} +@; +@; Starting from an untyped programming language built from a grammar of terms +@; @exact{$e$} with a reduction relation @exact{$e \Downarrow v$} +@; and a type judgment @exact{$\vdash e : \tau$} relating terms to +@; values @exact{$v$} and types @exact{$\tau$}, we derive an extended language +@; by inserting a function @exact{$\elaborate(e)$} before type checking. +@; The extended language has a type system @exact{$\vdashe$} consisting of a single +@; rule: +@; +@; @exact|{ +@; \begin{mathpar} +@; \inferrule*{ +@; \vdash \elaborate(e) : \tau +@; }{ +@; \vdashe e : \tau +@; } +@; \end{mathpar} +@; }| +@; +@; We also derive a reduction relation on typed terms @exact{$\vdashe e : \tau$} +@; as @exact{$\elaborate(e) \Downarrow v$}. +@; Type soundness for @exact{$\vdashe$} is thus a corollary of +@; type soundness for the @exact{$\vdash$} judgment. +@; +@; The output of @exact{$\elaborate(e)$} is a term @exact{$e'$}, +@; though @exact{$e'$} may be a labeled error term +@; @exact|{$\bot^{e''}$}| meaning the elaborator detected an error in the program. +@; Terms @exact{$e$} such that @exact{$\elaborate(e)$} type checks but @exact{$e$} +@; does not are newly expressible in the extended system. +@; Terms @exact{$e$} such that @exact{$\vdash e : \tau$} but +@; @exact|{$\elaborate(e) = \bot^{e'}$}| are programs validated by the typechecker +@; but rejected by the extended system because the subterm @exact{$e'$} of @exact{$e$} +@; would raise a runtime error if evaluated. +@; +@; Our main contribution is defining a useful collection of @emph{local} +@; transformations to perform during elaboration. +@; Each local elaboration is proven correct in isolation. +@; We then define @exact{$\elaborate(e)$} as the union of these local cases +@; and a default elaboration that applies @exact{$\elaborate$} to all +@; subterms of @exact{$e$}. +@; +@; To avoid reasoning about terms post-elaboration we will describe +@; our local elaborations with type rules of the form: @todo{maybe drop this} +@; +@; @exact|{ +@; \begin{mathpar} +@; \inferrule*{ +@; \prope(f, e) +@; \\\\ +@; \vdash f : \tau_f +@; \\ +@; \vdash e : \tau_e +@; }{ +@; \vdashe f~e : \tau +@; } +@; \end{mathpar} +@; }| +@; +@; where @exact{$\prope$} is a proposition about syntactic terms +@; @exact{$f$} and @exact{$e$} +@; used to justify the transformation @exact|{$\elaborate(f~e)$}|. +@; Despite this inference rule shorthand, elaborations and @exact{$\prope$} +@; conditions run before typechecking and have no access to type information +@; in the program. +@; +@; The relationship between terms @exact{$e$} and @exact{$\elaborate(e)$} is given +@; by three theorems: @todo{see next section for now}. +@; +@; Correct elaborations obey these theorems. +@; @; Note: there is no guarantee that elaboration produces a well-typed term +@; @; because calls like @exact{$\elaborate($}@racket[ref 0 0]@exact{$)$} are allowed. +@; +@; @; We refrain from making stronger statements about elaborate because +@; @; real systems will have lots of other elaborations. -A programming language is used to describe a sequence of instructions that - may be carried out by a machine. +@section{Syntax Extensions} +Syntax extension systems have been incorporated in + a number of mainstream programming languages. +Essentially, syntax extensions let the programmer write code that generates code. +In practice this gives programmers the ability to extend the syntax of their + language, abstract over textual patterns, and control the evaluation order + of expressions. +Racket's syntax extension API is particularly mature and has inspired + similar APIs in at least two other languages, so we adopt it here to introduce + syntax extensions. +As a first example, suppose we want to assert that expressions + @racket[e_1], @racket[e_2], @racket[e_3], and @racket[e_4] all evaluate to + the same value. +By hand, we might write a sequence of unit tests: +@racketblock[ + (begin (check-equal? e_2 e_1) + (check-equal? e_3 e_1) + (check-equal? e_4 e_1)) +] +but these tests follow a simple pattern that we can abstract as a @emph{syntax rule}, + using ellipses (@racket[...]) to capture an arbitrary number of expressions. +@racketblock[ + (define-syntax-rule (check-all-equal? e_1 e_rest ...) + (begin (check-equal? e_rest e_1) ...)) +] +Our tests can now be written more concisely: +@racketblock[ + (check-all-equal? e_1 e_2 e_3 e_4) +] +making them easier to read and maintain. +Moreover, we can easily improve the rule to evaluate @racket[e_1] only once + instead of @math{n-1} times: +@racketblock[ + (define-syntax-rule (check-all-equal? e_1 e_rest ...) + (let ([v e_1]) + (check-equal? e_rest v) ...)) +] +Intuitively, syntax rules perform a search-and-replace before the program + is compiled; however, the replacement process is careful to preserve the + lexical structure of programs. +For instance, a program that uses the same variable name @racket[v] as the syntax rule: +@racketblock[ + (define v 5) + (check-all-equal? (+ 2 2) v) +] +will expand to code that introduces a fresh variable @racket[v1]: +@racketblock[ + (define v 5) + (let ([v1 (+ 2 2)]) + (check-equal? v v1)) +] +to avoid shadowing the programmer's variable with the identifier used inside + the syntax rule. +Details on how this so-called @emph{hygenic} expansion is implemented and + its benefits to extension writers and users are explained by Flatt@~cite[f-popl-2016 fcdb-jfp-2012]. + +In addition to pattern-based syntax rules, one can extend Racket with + arbitrary functions defined over @emph{syntax objects}. +For instance, we can write an extension that formats log messages to a port + @racket[log-output] at runtime when the program is compiled with a flag + @racket[DEBUG] enabled. If the flag is disabled, we perform a no-op. +The expansion of @racket[log] calls happens during compilation: +@(begin +#reader scribble/comment-reader +@codeblock|{ + ;; With DEBUG enabled + (log "everything ok") + ==> (displayln "everything ok" log-output) + ;; With DEBUG disabled + (log "everything still ok") + ==> (void) +}|) + +The @racket[syntax-parse] form is a convenient way to implement @racket[log]. +In this case, we use @racket[syntax-parse] to deconstruct a syntax object + @racket[stx] representing an application of @racket[log]. +The output of syntax parse is a new syntax object built using the constructor + @racket[syntax]. +@codeblock{ + (define-syntax (log stx) + (syntax-parse stx + [(log message) + (if DEBUG + (syntax (displayln message log-output)) + (syntax (void)))])) +} +We can further enhance our implementation with a compile-time check that + the value bound to the pattern variable @racket[message] is a string literal. +@codeblock{ + (define-syntax (log stx) + (syntax-parse stx + [(log message) + (unless (string? (syntax->datum (syntax message))) + (error "log: expected a string literal")) + (if DEBUG + (syntax (displayln message log-output)) + (syntax (void)))])) +} +@; Alternatively, we can use Racket's @emph{syntax classes} to the same effect. +@; The @racket[str] syntax class recognizes literal strings. +@; Binding it to the pattern variable @racket[message] causes calls like +@; @racket[(log 61)] to raise a compile error. +@; @codeblock{ +@; (define-syntax (log stx) +@; (syntax-parse stx +@; [(log message:str) +@; (if DEBUG +@; (syntax (displayln message log-output)) +@; (syntax (void)))])) +@; } +With this enhancement, calls like @racket[(log 61)] are rejected statically. +Unfortunately, arguments that @emph{evaluate} to string literals + are also rejected + because the syntax extension cannot statically predict what value an arbitrary + expression will reduce to. +This is a fundamental limitation, but we can make a small improvement by accepting + any @racket[message] produced by another (trusted) syntax extension. +Suppose @racket[++] is an extension for concatenating two strings. +If we assign a unique @emph{syntax property} to the syntax object produced + by @racket[++], we can later retrieve the property in the @racket[log] extension. + +First, we give an implementation of @racket[++] in terms of Racket's + built-in @racket[string-append] function, crucially using @racket[syntax-property] + to associate the value @racket['string] with the key @racket['static-type]. +@codeblock{ + (define-syntax (++ stx) + (syntax-parse stx + [(++ s1 s2) + (syntax-property + (syntax (string-append s1 s2)) + 'static-type 'string)])) +} +Assuming now that the key @racket['static-type] accurately describes the value + contained in a syntax object, @racket[log] can accept both string + literals and tagged syntax objects. +@codeblock{ + (define-syntax (log stx) + (syntax-parse stx + [(log message) + (define is-string? + (string? (syntax->datum (syntax message)))) + (define expands-to-string? + (eq? 'string + (syntax-property + (local-expand message) + 'static-type))) + (unless (or is-string? expands-to-string?) + (error "log: expected a compile-time string")) + (if DEBUG + (syntax (displayln message log-output)) + (syntax (void)))])) +} + +Our syntax extensions @racket[check-all-equal?], @racket[log], and @racket[++] + are indistinguishable from user-defined functions or core language forms, + yet they perform useful transformations before a program is typechecked or + compiled. +This seamless integration gives Racket programmers the ability to grow the + language and tailor it to their specific needs. + +@; NOTE: we will use similar 'typed macros' in a coming section... right? diff --git a/popl-2017/common.rkt b/popl-2017/common.rkt index a50cd37..053c4d1 100644 --- a/popl-2017/common.rkt +++ b/popl-2017/common.rkt @@ -16,8 +16,11 @@ nrightarrow parag sf + sc + bot id todo + proof ) (require "bib.rkt" @@ -114,6 +117,8 @@ (define (sf x) (elem #:style "sfstyle" x)) +(define (sc x) (exact "\\textsc{" x "}")) + (define (parag . x) (apply elem #:style "paragraph" x)) (define (exact . items) @@ -130,6 +135,9 @@ (cons (if term (element #f (list " (" (defterm term) ") ")) " ") x) (mt-line)))) +(define bot (exact "$\\bot$")) + + ;; Format an identifier ;; Usage: @id[x] (define (id x) @@ -141,3 +149,28 @@ (define (second) (make-element (make-style "relax" '(exact-chars)) "$2^\\emph{nd}$")) + +(define (defthing title #:thing thing #:tag tag . txt) + ;; TODO use tag + (nested #:style 'inset + (list (bold thing " (" (emph title) ")" ":") + (exact "~~") + txt))) + ;;(list txt)))) + +(define-syntax-rule (define-defthing* id* ...) + (begin + (begin + (define (id* title #:tag [tag #f] . txt) + (defthing title #:thing (string-titlecase (symbol->string (object-name id*))) #:tag tag txt)) + (provide id*)) ...)) + +(define-defthing* definition theorem lemma) + +(define (proof . txt) + (nested + (list (emph "Proof:") + (exact "~~") + txt + (exact "\\hfill\\qed")))) + diff --git a/popl-2017/conclusion.scrbl b/popl-2017/conclusion.scrbl index 5fad408..2327c93 100644 --- a/popl-2017/conclusion.scrbl +++ b/popl-2017/conclusion.scrbl @@ -8,3 +8,8 @@ My old clock used to tell the time and subdivide diurnity; but now it's lost both hands and chime and only tells eternity. + + +@; type systems change slowly. That's OK +@; syntax extensions can change much faster and YOU can control the +@; changes. That's YOU joe programmer or YOU jane street capital. Do you. diff --git a/popl-2017/correctness.scrbl b/popl-2017/correctness.scrbl new file mode 100644 index 0000000..6fe96a5 --- /dev/null +++ b/popl-2017/correctness.scrbl @@ -0,0 +1,121 @@ +#lang scribble/sigplan @onecolumn + +@; TODO +@; - revoke the AbsInt strategy +@; - work out the "ICFP style" correctness +@; - use objects in Haskell / ML for regexp (it's the only subtyping case?) +@; ML row polymorphism +@; Haskell ... ? +@; - + +@require["common.rkt"] + +@title[#:tag "sec:correctness"]{Making Sense} + +The thesis of this paper is that syntax extensions are an effective + tool for refining an existing type system. +Ultimately we aim to show that given a fixed program and fixed type system, + the type system will be able to correctly reject more programs that + produce an error at runtime and correctly accept more programs that it + could not previously express @emph{if} a layer of syntax extensions may be + inserted between parsing and typechecking a program. +If library authors are permitted to write such extensions, then the language's + type theory will become amenable to domain-specific typing rules without + changes to the core language. + +To make our goal precise, we define a simple, low-level programming language in @todo{Figure-1}. +On top of this untyped core language we impose the polymorphic type system + defined in @todo{Figure-2}. +The idea is that user code is validated by the type system before it is compiled + to the untyped language and run. +Um, we take the untyped language in @todo{Figure-1} as an abstract machine code. + +Consider again the function @racket[vector-ref]. +In @Secref{sec:background} we claimed that a type system could approximate the + specification of @racket[vector-ref] in order to statically detect bugs. +We formalize this claim by interpreting functions as relations between their + input and output values. + +Start with the untyped function @exact{$\RktMeta{vector-ref}_\lambda$}. +For vectors @exact|{$\langle \RktMeta{v}_0 \ldots \RktMeta{v}_n \rangle$}| and + indices @exact|{$0 \leq i < n$}|, @racket[vector-ref] returns @exact|{$\RktMeta{v}_i$}|. +On all other inputs, @racket[vector-ref] returns @|bot|. + + @exact|{ + \[ \RktMeta{vector-ref} = \big\{ (\RktMeta{v}, \RktMeta{i}, \RktMeta{v}_i) + \mid \RktMeta{vector? v} + ~~ + \RktMeta{natural? i} + ~~ + \RktMeta{i} < \RktMeta{n} + \big\} \] + }| + +The typed version of @exact|{$@RktMeta{vector-ref}$}| understands parametericity and + drop side conditions + + @exact|{ + \[ \RktMeta{vector-ref} = \big\{ (\RktMeta{v}, \RktMeta{i}, \RktMeta{v}_i) + \mid \RktMeta{vector? v} + ~~ + \RktMeta{natural? i} + \big\} \] + }| + +Sound overapprox because typed is superert + +@; @; Programming language: +@; @; vectors strings integers booleans +@; @; lambdas +@; @; map printf regexp-match +@; +@; Types provide sound overapproximation +@; +@; our goal: refine approximation +@; +@; type soundness is separate, we inherit soundness as corrollary of +@; ourtype <= origtype +@; + + +@section{Implementation} +The transformations described in this section have been implemented as a Racket package. +Our @emph{expand} metafunction is implemented by the Racket macro expander. +Each transformation is a macro wrapping a standard library function. +It's good. + +Proofs are mechanized as unit tests within the package. +Readers are invited to critique our proofs by downloading and using the package. + + +@section{Comments} +Maybe you're wondering why we didn't prove contextual equivalence of our transformations. +A few reasons: +@itemlist[ + @item{ + Contextual equivalence is @emph{hard}, we don't expect library writers + to do full proofs of that for every transformation they make. + } + @item{ + Our @emph{expand} metafunction is very simple, but in realistic languages + it will be only part of a comprehensive syntax extension system. + As Wand proved way back when, contextual equivalence for languages with + syntax extensions is trivial in the sense that only syntactically equivalent + terms are equal. + + Given a two terms @racket[e_1] and @racket[e_2], here is a Racket + syntax extension which that distinguishes them. + @racketblock[ + (define-syntax (distinguisher stx) + (syntax-parse stx + [(_ e_1 e_2) + (if (equal? (syntax->datum #'e_1) + (syntax->datum #'e_2)) + #'((λ (x) (x x)) (λ (x) (x x))) + #'(λ (x) x))])) + ] + For non-identical terms, @racket[(distinguisher e_1 ???)] is the context + @racket[C] that separates them. + Specifically @racket[(C e_1)] will diverge and @racket[(C e_2)] will not. + } +] diff --git a/popl-2017/examples.scrbl b/popl-2017/examples.scrbl index 4af196f..1cf9aa0 100644 --- a/popl-2017/examples.scrbl +++ b/popl-2017/examples.scrbl @@ -3,8 +3,181 @@ @require["common.rkt"] @title[#:tag "sec:examples"]{Small Things and Great} +@; He that lets +@; the small things bind him +@; leaves the great +@; undone behind him. -He that lets -the small things bind him -leaves the great -undone behind him. +@Figure-ref{fig:expr} defines an untyped, call-by-value @racket[λ] calculus + augmented with boolean, character, natural number, and vector literals. +The language also includes a set @exact{$\primop$} of primitive operations, + described in @Figure-ref{fig:primop} and a labeled error term @exact{$\bot$}. +For the moment we focus on two primitive operations: @racket[ref], for dereferencing + a vector, and @racket[map], for mapping a function of arity @exact{$m$} over + the columns of an @exact{$m \times n$} matrix. +We specify these operations with an informal mathematical syntax using + @racket[∈] and @racket[=] to dynamically test and pattern match value forms. + + @figure["fig:expr" "Term language" + @exact|{\input{fig-language}}| + + ] + + @figure["fig:primop" "Primitive operations" + @exact|{\input{fig-primops}}| + ] + +This language is intended to model the untyped core of realistic + languages like ML, Haskell, and Typed Racket; albiet using @exact{$\lambda$} + notation instead of a machine language. +Likewise, the polymorphic type system in @Figure-ref{fig:types} is a standard, + implicitly quantified system. +@; These types are CLEARLY LIMITED in what they can express, +@; but even TR, ML+1stclassmodules, GHC+extensions have limitations +@; tho less obvious + + @figure["fig:types" "Type System" + @exact|{\input{fig-types}}| + ] + + @figure["fig:typed-primops" "Typed Primitive Operations" + @exact|{\input{fig-typed-primops}}| + ] + +Using this type system, we can approximate the behavior of @racket[ref] and + @racket[map] with the signatures in @Figure-ref{fig:typed-primops}. +The type @exact|{$\tau_{\RktMeta{ref}}$}| captures the essence of @racket[ref], + but does not express the requirement that @emph{value} of the number + @racket[i] in a call @exact|{\RktMeta{(ref~v~i)}}| + must be strictly less than the length of the vector @racket[v]. +The type @exact|{$\tau_{\RktMeta{map}}$}|, on the other hand, + is limited compared to the untyped @racket[map] because there is no way + to apply a typed function to an unknown number of arguments.@note{Curried functions + run into the same limitation.} + @;Try implementing @racket[apply f (x1 \ldots xn) = f x1 \ldots xn}. +Realistic languages would account for at least the 2-arity case with a second + function @exact|{$\RktMeta{map}_2$}| and ask programmers + to choose between @racket[map] and @exact|{$\RktMeta{map}_2$}| at each call site. +This inability of the type system to express our specifications is a problem. + + +@section{Refining the Imprecise Types} + +There is no reason we could not revise our type system to make + it possible to track the length of vectors and express polymorphism over + the arity of functions. +In fact, GHC can express the type of fixed-length vectors@~cite[lb-sigplan-2014] and Typed Racket + supports variable-arity polymorphism@~cite[stf-esop-2009]. +But such revisions are sweeping changes to the type system and therefore the + language. +Moreover, programmers who use the language to create embedded DSLs may + wish to encode novel type constraints, as we demonstrate in @Secref{sec:regexp}. + +Rather than alter the type system or accept the status quo, + we add the syntax extension system shown in @Figure-ref{fig:syntax-base}. +To underscore the fact that syntax extensions are run on syntactic terms + rather than runtime values, we format the system as a collection of + inference rules. +In practice, we could implement these rules with a pattern matcher like + Racket's @racket[syntax-parse]. +@; Users extend by: +@; - inhertance, modifying definition of a "parser" object +@; - use parameter for recursive call, users update parameter + +Define @exact{$\elaborate(e) = e'$} if and only if @exact{$e \expandsto e'$}. +As presented in @Figure-ref{fig:syntax-base}, @exact{$\elaborate$} is a structurally + recursive identity function. +Still, it satisfies three key properties (the 3 R's of reasonable syntax extensions). +Let @exact{$\Downarrow$} + implicitly quantify over closing substitutions @exact{$\gamma$}. + + @figure["fig:syntax-base" "Default Syntax Extensions" + @exact|{\input{fig-syntax-base.tex}}| + ] + + @theorem["Refinement"]{ + If @exact{$\vdash e : \tau$} then @exact{$\vdash \elaborate(e) : \tau'$} + and @exact{$\tau' <: \tau$}. + Furthermore @exact{$e \Downarrow v$} if and only if @exact{$\elaborate(e) \Downarrow v$}. + } + + In other words, elaboration may make the type of a well-typed term more precise. + (Use type equality in place of @exact{$<:$} for now) + + + @theorem["Retraction"]{ + If @exact{$\not \exists \tau~.~\vdash e : \tau$} but @exact{$\vdash \elaborate(e) : \tau'$} + then @exact{$e \Downarrow v$} if and only if @exact{$\elaborate(e) \Downarrow v$}. + } + + Desugaring a term to make it type check does not change its dynamic behavior. + (Holds trivially because its premise is never met.) + + + @theorem["Relevance"]{ + If @exact|{$\elaborate(e) = \bot^{e'}$}| then @exact{$e'$} is a subterm of @exact{$e$} + and evaluating @exact{$e'$} (but not necessarily @exact{$e$}) raises a runtime error. + } + + Elaboration errors always reflect defects in the user's code and are presented + using source terms rather than expanded terms. + (Holds because the current version of @exact{$\elaborate$} never yields an error term.) + +We claim that these theorems enable reasoning about un-elaborated terms + using the familiar type system and semantics from @Figure-ref["fig:expr" "fig:types"]. +Henceforth, we say that a term @racket[e] typechecks in the extended language + (written @exact{$\vdashe e : \tau$}) + if there exists a type @exact{$\tau$} such that @exact|{$\Gamma_{\primop} \vdash \elaborate(e) : \tau$}|. +A term @racket[e] evaluates to a value @racket[v] in the extended language + (@exact{$e \Downarrowe v$}) if and only if @exact{$\elaborate(e) \Downarrow v$}. + +The crucial feature of @exact{$\elaborate$} is that language users can add + cases to it. +Changing roles from language designers to language users, we leverage this + ability in the next sections based on the intuition that the syntax + of value forms carries information. + + +@section{Bounds Checking for @racket[ref]} + +For a first extension, we can check vector references at compile-time when + @racket[ref] is called with a vector literal and a natural number. +To accomodate future syntax extensions, we recursively expand the arguments to + our @racket[ref] extension before checking. + + @exact|{ + \begin{mathpar} + \inferrule{ + v \expandsto \vectoren + \\ + i \expandsto i' + \\\\ + i' \in \naturals + \\ + i' < n + }{ + \RktMeta{ref}~v~i \expandsto \RktMeta{ref}~\vectoren~i + } + \end{mathpar} + }| + +When this condition is not met, + + +@section{Typed, Generalized @racket[map]} + + + +@section[#:tag "sec:regexp"]{Adding Regular Expressions} + +@; - string-embedded DSL +@; - NOT part of core language, just ideas from crazy library writer +@; - refine codomain of an operation + +We assume @exact{$\Sigma$} is just the lowercase characters @exact{$a \ldots z$}. +To keep the language small, we define strings as vectors of characters + and use e.g. @racket{abc} as shorthand for @exact|{$\vectorgen{a, b, c}$}|. + + +@section[#:tag "sec:define"]{Handling Variables} +@; Add a type system --- thanks Alex! diff --git a/popl-2017/fig-elab0.tex b/popl-2017/fig-elab0.tex new file mode 100644 index 0000000..74930d6 --- /dev/null +++ b/popl-2017/fig-elab0.tex @@ -0,0 +1,49 @@ +\fbox{$\elabsto{e}{e}{\tau}$} + +\begin{mathpar} + \inferrule*[left=S-Var]{ + \Gamma(x) = \tau + }{ + \elabsto{x}{x}{\tau} + } + + \inferrule*[left=S-Int]{ + n \in \ints + }{ + \elabsto{n}{n}{\tint} + } + + \inferrule*[left=S-Lam]{ + \elabsto{e}{e'}{\tau'} + }{ + \elabsto{\vlam{x}{e}}{\vlam{x}{e'}}{\tau \rightarrow \tau'} + } + + \inferrule*[left=S-Arr]{ + \elabsto{e_0}{e_0'}{\tint} + \\ + \ldots + \\ + \elabsto{e_{n-1}}{e_{n-1}'}{\tint} + }{ + \elabsto{\vectoren}{\vectorgen{e_0',\ldots,e_{n-1}'}}{\tarray} + } + + \inferrule*[left=S-App]{ + \elabsto{e_1}{e_1'}{\tau \rightarrow \tau'} + \\ + \elabsto{e_2}{e_2'}{\tau} + }{ + \elabsto{e_1~e_2}{e_1'~e_2'}{\tau'} + } + + \inferrule*[left=S-Ref]{ + \elabsto{e_1}{e_1'}{\tarray} + \\ + \elabsto{e_2}{e_2'}{\tint} + }{ + \elabsto{\aref{e_1}{e_2}}{\checkedref~e_1'~e_2'}{\tint} + } + +\end{mathpar} + diff --git a/popl-2017/fig-elab1.tex b/popl-2017/fig-elab1.tex new file mode 100644 index 0000000..ab7dc9c --- /dev/null +++ b/popl-2017/fig-elab1.tex @@ -0,0 +1,32 @@ +\fbox{$\elabsto{e}{e}{\tau}$} + +\begin{mathpar} + + \inferrule*[left=S-RefPass]{ + \elabsto{e_1}{\vectorvn}{\tarray} + \\ + \elabsto{e_2}{i}{\tint} + \\\\ + i \in \ints + \\ + 0 \le i < n + }{ + \elabsto{\aref{e_1}{e_2}}{\unsaferef~\vectorvn~i}{\tint} + } + + + \inferrule*[left=S-RefFail]{ + \elabsto{e_1}{\vectorvn}{\tarray} + \\ + \elabsto{e_2}{i}{\tint} + \\\\ + i \in \ints + \\ + i < 0 ~~\vee~~ i \ge n + }{ + \elabsto{\aref{e_1}{e_2}}{\indexerror}{\tint} + } + +\end{mathpar} + + diff --git a/popl-2017/fig-reality-of-tr.tex b/popl-2017/fig-reality-of-tr.tex new file mode 100644 index 0000000..57d88ab --- /dev/null +++ b/popl-2017/fig-reality-of-tr.tex @@ -0,0 +1,3 @@ +\begin{center} + to do +\end{center} diff --git a/popl-2017/fig-regexp.tex b/popl-2017/fig-regexp.tex new file mode 100644 index 0000000..df4557a --- /dev/null +++ b/popl-2017/fig-regexp.tex @@ -0,0 +1,11 @@ +\[ + \begin{array}{r l l} + \RktMeta{\++} & = & \lambda\,\vectorgen{v_0~v_1}\,.\, + \left\{\begin{array}{l l} + \vectorgen{v_{00}, \ldots, v_{0m}, v_{10}, \ldots, v_{1n}} + & \RktMeta{if}~v_0 = \vectorgen{v_{00}, \ldots, v_{0m}} + ~\RktMeta{and}~v_1 = \vectorgen{v_{10}, \ldots, v_{1n}} + \\\bot & \RktMeta{otherwise} + \end{array}\right. + \end{array} +\] diff --git a/popl-2017/fig-stlc-core.tex b/popl-2017/fig-stlc-core.tex new file mode 100644 index 0000000..886bd8a --- /dev/null +++ b/popl-2017/fig-stlc-core.tex @@ -0,0 +1,73 @@ +\[ + \begin{array}{l l l} + \delta & \coloneq & \checkedref \mid \unsaferef + \\[8pt] + \ectx & \coloneq & \ehole \mid \ectx~e \mid (\vlam{x}{e})~\ectx \mid \vectorgen{v,\ldots,\ectx,e,\ldots} \mid + \\ & & \delta~E~e \mid \delta~v~E + \end{array} +\] + +\fbox{$\typesto{\delta~e~e}{\tau}$} +\begin{mathpar} + \inferrule*[left=T-Check]{ + \typesto{e_1}{\tarray} + \\ + \typesto{e_2}{\tint} + }{ + \typesto{\checkedref~e_1~e_2}{\tint} + } + + \inferrule*[left=T-Unsafe]{ + \typestoclosed{\vectorvn}{\tarray} + \\ + \typestoclosed{i}{\tint} + \\\\ + i \in \ints + \\ + 0 \leq i < n + }{ + \typesto{\unsaferef~\vectorvn~i}{\tint} + } +\end{mathpar} + + +\fbox{$\smallstep{e}{e}$} +\begin{mathpar} + \inferrule*[left=E-Ctx]{ + \smallstep{e}{e'} + }{ + \smallstep{\ectx[e]}{\ectx[e']} + } + + \inferrule*[left=E-Lam]{ + }{ + \smallstep{(\vlam{x}{e})~v}{e\esubst{x}{v}} + } + + \inferrule*[left=E-CheckPass]{ + 0 \le i < n + }{ + \smallstep{\checkedref~\vectorvn~i}{v_i} + } + + \inferrule*[left=E-CheckFail]{ + i < 0 ~~\vee~~ i \ge n + }{ + \smallstep{\checkedref~\vectorvn~i}{\indexerror} + } + + \inferrule*[left=E-UnsafePass]{ + 0 \le i < n + }{ + \smallstep{\unsaferef~\vectorvn~i}{v_i} + } + + \inferrule*[left=E-UnsafeFail]{ + i < 0 ~~\vee~~ i \ge n + \\ + x \in \ints \cup \{ \segfault \} + }{ + \smallstep{\unsaferef~\vectorvn~i}{x} + } +\end{mathpar} + diff --git a/popl-2017/fig-stlc-surface.tex b/popl-2017/fig-stlc-surface.tex new file mode 100644 index 0000000..331b2e7 --- /dev/null +++ b/popl-2017/fig-stlc-surface.tex @@ -0,0 +1,61 @@ +\[ + \begin{array}{l l l} + v & \coloneq & x \mid \vlam{x}{e} \mid \ints \mid \vectorvn + \\[8pt] + e & \coloneq & v \mid e~e \mid \vectoren \mid \aref{e}{e} + \\[8pt] + \tau & \coloneq & \tint \mid \tarray \mid \tau \rightarrow \tau + \\[8pt] + \tenv & \coloneq & \tenvempty \mid x:\tau,\tenv + \end{array} +\] + + +\fbox{$\typesto{e}{\tau}$} +\begin{mathpar} + \inferrule*[left=T-Var]{ + \tenv(x) = \tau + }{ + \typesto{x}{\tau} + } + + \inferrule*[left=T-Lam]{ + \typestogen{x:\tau,\tenv}{e}{\tau'} + }{ + \typesto{\vlam{x}{e}}{\tau \rightarrow \tau'} + } + + \inferrule*[left=T-App]{ + \typesto{e_1}{\tau \rightarrow \tau'} + \\ + \typesto{e_2}{\tau} + }{ + \typesto{e_1~e_2}{\tau'} + } + + \inferrule*[left=T-Int]{ + i \in \ints + }{ + \typesto{i}{\tint} + } + + \inferrule*[left=T-Arr]{ + \typesto{e_0}{\tint} + \\ + \ldots + \\ + \typesto{e_{n-1}}{\tint} + }{ + \typesto{\vectoren}{\tarray} + } + + \inferrule*[left=T-Ref]{ + \typesto{e_1}{\tarray} + \\ + \typesto{e_2}{\tint} + }{ + \typesto{\aref{e_1}{e_2}}{\tint} + } + +\end{mathpar} + diff --git a/popl-2017/intro.scrbl b/popl-2017/intro.scrbl index ca42932..45615ee 100644 --- a/popl-2017/intro.scrbl +++ b/popl-2017/intro.scrbl @@ -4,174 +4,116 @@ @title[#:tag "sec:intro"]{Type Eleborators Need APIs} -The implementations of richhly typed programming languages tend to employ -an elaboration pass. As the elaborator traverses the (parsed) synatx, it -simultaneously reconstructs types, checks their consistency according to -the underlying type theory, replaces many of the surface-syntax constructs -with constructs from the kernel language, and inserts type information to -create a (n often) fully annotated representation. +Type systems such as ML's, Haskell's, or Scala's help developers convey +their thinking to their successors in a sound and concise manner. The +implementations of such type systems tend to employ elaboration passes to +check the types of a specific program. As an elaborator traverses the +(parsed) synatx, it simultaneously reconstructs types, checks their +consistency according to the underlying type theory, replaces many of the +surface-syntax constructs with constructs from the kernel language, and +inserts type information to create an annotated representation. -Some (implementations of) programming languages also support a way to +Some (implementations of such) programming languages also support a way to programmatically direct the elaborator. For example, Rust and Scala come with compiler plug-ins. Typed Racket and Types Clojure inherit the macro -mechanisms of their parent languages. +mechanisms of their parent languages. Here we refer to such mechanisms as +@defterm{elaborator API}s. -In this paper, we show that such APIs allow programmers to tailor the -underlying type theories to their needs and that such tailorings look -highly promising (section 2). The ``tailor'' can ... correctness ... +Equipping type-checking elaborators with APIs---or using the existing +APIs---promises new ways to expand the power of type checking at a +relatively low cost. Specifically, the developers of libraries can +@defterm{tailor} typing rules to the exported functions and linguistic +constructs. - For convenience, we use Typed Racket and its -API to the elaborator (section 3). To illustrate the usefulness of the -idea, we implement two tailorings. The first one---chosen for the wide -familiarity of the domain---enriches the type-checking of -vector-referencing operations (section 4). The second example explains how -the implementor of a string-based embedded DSL---a regexp-matching -DSL---may tailor the types of the interpretation function (section 5). Our -evaluation validate that both tailorings reduce the number of casts that -the programmer or the elaborator have to insert. +Consider the example of tailoring the API of a library that implements a +string-based, domain-specific languages. Examples of such libraries abound: +formatting, regular-expression matching, database queries, and so on. The +creators of these DSLs tend to know a lot about how programs written in +these DSLs relate to the host program, but they cannot express this +knowledge in API types without resorting to such rich systems as +dependent type theory. + +Type tailoring allows such programmers to refine the typing rules for the +APIs in a relatively straightforward way. Recall the API of the +regular-expression library. In a typical typed language, the matching +function is exported with a type like this: +@; +@verbatim[#:indent 4]{ + +reg_exp_match: String -> Option [Listof String] + +} +@; +When the result is a list of strings, the length of the list depends on the +argument string---but the API for the regular-expression library cannot +express this relationship between the regular expression and the +surrounding host program. As a result, the author of the host program must +inject additional complexity, often in the form of (hidden) run-time checks. + +If type tailoring is available, the creator of the library can refine the +type of @tt{reg_exp_match} with rules such as these: +@; +@verbatim[#:indent 4]{ + + Program |- s : does not contain grouping +------------------------------------------------ + G |- reg_exp_match(s) : Option [List String] +Program |- s : contains one grouping w/o alternative +----------------------------------------------------- + G |- reg_exp_match(s) : Option [List String String] +} +@; +That is, when the extended elaborator can use (a fragment of) the program +to prove that the given string for a specific use of @tt{reg_exp_match} +does not contain grouping specifications---ways to extract a sub-string via +matching---then the @tt{Some} result type is a list that contains a single +string. Similarly, if the string contains exactly one such grouping (and no +alternative), then a match produces a list of two strings. In all other +cases, the type checker uses the default type for the function. +A vector library is another familiar example that offers opportunities for +type tailoring. Often such a library exports a dereferencing construct with +the following type rule: +@; +@verbatim[#:indent 4]{ + G |- v : Vector[X] G |-e : Integer +---------------------------------------- + G |- v[e] ~~> checked_ref(v,e) : X +} +@; +If the elaborator can prove, however, that the indexing expression @tt{e} +is equal to or evaluates to a specific integer @tt{i}, the rule can be +strengthened as follows: +@; +@verbatim[#:indent 4]{ + Program |- e = i for some i in Natural + && Program |- v is vector of length n + && i < n + G |- v : Vector[X] G |-e : Integer +---------------------------------------- + G |- v[e] ~~> unsafe-ref(v,e) : X +} +@; + That is, the elaborator can then eliminate a possibly expensive run-time + check. - - - -Typecheckers for polymorphic languages like Haskell incredibly wasteful. -Given an AST, they immediately throw away the rich @emph{syntax} of expression - and value forms in favor of a @emph{type} abstraction. -So whereas any novice programmer can tell that the expression @tt{(/ 1 0)} - will go wrong, the typechecker cannot because all it sees is @tt{(/ @emph{Num} @emph{Num})}. - -These typecheckers are also incredibly stubborn, and cannot handle basic - forms of polymorphism that are not part of their type algebra. -For instance, the behavior of the function @tt{map} can be stated in clear - English: - - @nested[@elem[#:style 'italic]{ - @tt{(map proc lst ...+) → list?} - Applies @tt{proc} to the elements of the @tt{lsts} from the first elements to the last. - The @tt{proc} argument must accept the same number of arguments as the number - of supplied @tt{lsts}, and all @tt{lsts} must have the same number of elements. - The result is a list containing each result of @tt{proc} in order. - }] - -But the Haskell typechecker can only validate indexed versions of @tt{map} - defined for a fixed number of lists. -Programmers are thus forced to choose between @tt{map}, @tt{zipWith}, and @tt{zipWith3} - depending on the number of lists they want to map over.@note{Maybe need a better - example than @tt{map} because polydots handle it. @tt{curry}?} - -There are a few solutions to these problems. Blah blah. -They all require migrating to a new programming language or instrumenting call - sites across the program, neither of which are desirable solutions for the - working programmer. - -We propose @emph{extending} a language's type system with - @emph{introspective} typing rules that capture value information as well as type - information. -Given a proof theory @exact|{$\Progn \vDash {\tt P}$}| for inferring propositions - @tt{P} from the program text @exact{$\Progn$}, the rule for division would be: - - @exact|{ - \begin{mathpar} - \inferrule*[]{ - \Gamma; \Progn \vdash n_1 : \tnum - \\ - \Gamma; \Progn \vdash n_2 : \tnum - \\\\ - \Progn \vDash n_2 \neq 0 - }{ - \Gamma; \Progn \vdash {\tt /}~n_1~n_2 : \tnum - } - \end{mathpar} - }| - -and the rule for @tt{map} would be: - - @exact|{ - \begin{mathpar} - \inferrule*{ - \Gamma; \Progn \vdash f : \overline{\tvar{A}} \rightarrow \tvar{B} - \\ - \Gamma; \Progn \vdash \overline{x} : \overline{\tlist{A}} - \\\\ - \Progn \vDash {\tt length}(\overline{\tvar{A}}) = {\tt length}(\overline{\tlist{A}}) - }{ - \Gamma; \Progn \vdash {\tt map}~f~\overline{x} : \tlist{B} - } - \end{mathpar} - }| - -These rules would augment---not replace---the existing rules for division - and @tt{map}. -In the case that no information about subterms can be inferred from the program - text, we defer to the division rule that admits runtime errors and the @tt{map} - rule that can only be called with a single list. - - -@; ============================================================================= -@parag{Contributions} -@itemlist[ - @item{ - We propose ``typechecking modulo theories'' (or something): - using the text of the program to refine typechecking judgments. - } - @item{ - As a proof of concept, we implement a simple theory within the Racket - macro expander and use it enhance the user experience of Typed Racket. - The implementation is just a library; we suspect Typed Clojure and TypeScript - programmers could build similar libraries using Clojure's macros and sweet.js. - } - @item{ - Our Racket implementation leverages a novel class of macros. - We define the class, state proof conditions for a new macro - to enter the class, and prove our macros are in the class. - } -] - - -@; ============================================================================= -@parag{Guarantees} -@itemlist[ - @item{ - Refinement of the existing type system. - Adding @exact{$\Progn \vDash$} only rejects programs that would go - wrong dynamically and/or accept programs that are ill-typed, but go - right dynamically. - } - @item{ - Errors reported by our implementation are in terms of the source program, - not of expanded/transformed code. - } -] - - -@; ============================================================================= -@parag{Applications} - -This table summarizes the ICFP pearl. -We infer values from various syntactic domains and use the inferred data to - justify small program transformations. - -@tabular[#:style 'boxed - #:sep @hspace[2] - #:row-properties '(bottom-border ()) - #:column-properties '(right right right) - (list (list @bold{Domain} @bold{Infers} @bold{Use}) - (list "format strings" "arity + type" "guard input") - (list "regexp strings" "# groups" "prove output") - (list "λ" "arity" "implement curry_i") - (list "numbers" "value" "constant folding") - (list "vectors" "size" "guard, optimize") - (list "sql schema" "types" "guard input, prove output"))] - -@; ============================================================================= -@parag{Lineage} - -Herman and Meunier used macros for partial evaluation of string DSLs@~cite[hm-icfp-2004]. -Lorenzen and Erdweg defined criteria for sane deguarings@~cite[le-popl-2016]. - +This paper demonstrates the idea with concrete case studies of type +tailoring (section 2) in the context of Typed Racket and its API to the +elaborator (section 3). To illustrate the usefulness of the idea, we +implement two tailorings. The first one---chosen for the wide familiarity +of the domain---enriches the type-checking of vector-referencing operations +(section 4). The second example explains how the implementor of a +string-based embedded DSL---a regexp-matching DSL---may tailor the types of +the interpretation function (section 5). Our evaluations confirm that these +tailorings reduce the number of run-time checks that the programmer or the +elaborator have to insert into the host program. In addition, we sketch +several other applications of type tailoring in Typed Racket (section +6). We also explain how the creator of such libraries can refine the +existing type soundness proof of the host language to argue the correctness +of the tailoring. diff --git a/popl-2017/paper.scrbl b/popl-2017/paper.scrbl index 4762140..004a22a 100644 --- a/popl-2017/paper.scrbl +++ b/popl-2017/paper.scrbl @@ -3,22 +3,27 @@ @(require "common.rkt") @title{Tailoring Type Theories (T.T.T)} -@authorinfo["Piet Hein" "Dutchman" "piet at hein.com"] +@authorinfo["Piet Hein" "Funen, Denmark" "gruk at piethein.com"] @abstract{ -A static type system is a compromise between precision and usability. - Improving the ability of a type system to distinguish correct and - erroneous programs typically requires that programmers restructure their - code or provide more type annotations, neither of which are desirable - tasks. +Many typed APIs implicitly acknowledge the @emph{diktat} that the host type + system imposes on the creators of libraries. When a library implements a + string-based domain-specific language, the problem is particularly obvious. + The interpretation functions for the programs in this embedded language + come with the uninformative type that maps a string to some other host + type. Only dependently typed languages can improve on this scenario at the + moment, but they impose a steep learning curve on programmers. -This paper presents an elaboration-based technique for refining the - analysis of an existing type system on existing code without changing the - type system or the code. As a proof of concept, we have implemented the - technique as a Typed Racket library. From the programmers' viewpoint, - simply importing the library makes the type system more perceptive---no - annotations or new syntax are required. +This paper proposes to tackle this problem with APIs for type + checkers. Specifically, it observes that most typed languages already + employ an elaboration pass to type-check programs. If this elaborator + comes with a sufficiently rich API, the author of a library can supplement + the default types of the library's API with typing rules that improve the + collaboration between host programs and uses of the library. The + evaluation uses a prototype for Typed Racket and illustrates how useful + the idea is for widely available libraries. Also the paper sketches how + the authors of such ``tailored'' rules can argue their soundness. } @@ -29,11 +34,12 @@ This paper presents an elaboration-based technique for refining the @; See OUTLINE.md for explanation @include-section{intro.scrbl} @include-section{background.scrbl} -@include-section{examples.scrbl} -@include-section{discussion.scrbl} -@include-section{friends.scrbl} -@include-section{related-work.scrbl} -@include-section{conclusion.scrbl} +@include-section{segfault.scrbl} +@;@include-section{examples.scrbl} +@;@include-section{discussion.scrbl} +@;@include-section{friends.scrbl} +@;@include-section{related-work.scrbl} +@;@include-section{conclusion.scrbl} @section[#:style 'unnumbered]{Acknowledgments} diff --git a/popl-2017/related-work.scrbl b/popl-2017/related-work.scrbl index bdec2e1..41de363 100644 --- a/popl-2017/related-work.scrbl +++ b/popl-2017/related-work.scrbl @@ -4,6 +4,7 @@ @title[#:tag "sec:related-work"]{Experts} +@section{SoundX} SoundX is a system for modeling programming languages and defining type-sound extensions, e.g. defining a type derivation for @tt{let} in terms of a type derivation for @tt{λ} expressions. @@ -32,3 +33,35 @@ We propose a deeper notion of correctness for our syntactic transformations, but ] Though we fail on the third point. + +@section{Wepa} + +Our general approach and outlook on type soundness is informed by Cousot. + +@section{Compiler Plugings} + +GHC (constraint solvers) +Rust (macros) +Scala (macros) + + +@section{Parsec} + +Haskell / ML don't really use regular expressions. +They use parser combinators. + +Matthias: give up ENTIRE TOOLCHAIN if you go external + + +@section{Typechecker Plugins} +OutsideIn +Adam Gundry units plugin +Fsharp packs? +extensible records for haskell + +@section{Why no types?} +Could save work in map if we got function arity from type. +Clearly strong argument for mixing types and syntax extensions. +But that's research; requires careful design and definitely not a drop-in + solution like we propose here. +(Would apparaently require sweeping changes) diff --git a/popl-2017/segfault.scrbl b/popl-2017/segfault.scrbl new file mode 100644 index 0000000..2afb7ae --- /dev/null +++ b/popl-2017/segfault.scrbl @@ -0,0 +1,205 @@ +#lang scribble/sigplan @onecolumn + +@require["common.rkt"] + +@title[#:tag "sec:segfault"]{Well Typed Programs do not go SEGFAULT} + +@Figure-ref{fig:stlc} describes a simply typed @exact{$\lambda$} calculus + with integers and integer arrays. +@Figure-ref{fig:elab0} gives a type-directed elaboration of terms written + in this surface language to a typed, executable core language. +The main difference between surface and core is that array references + @exact{$\aref{e}{e}$} are replaced with calls to a primitive operation + @exact{$\checkedref$}. +For example, the term @exact|{$\aref{x}{3}$}| elaborates to @exact|{$\checkedref~x~3$}| + when @exact{$x$} is a variable with type @exact{$\tarray$}. + +The operational semantics for the core language are given in @Figure-ref{fig:stlc-core}, + along with type judgments for two primitive operations: + @exact{$\checkedref$} and @exact{$\unsaferef$}. +Intuitively, @exact{$\checkedref$} is a memory-safe function that performs a bounds check before + dereferencing an array and gives a runtime error when called with incompatible values. +On the other hand, @exact{$\unsaferef$} does not perform a bounds check and therefore + may return arbitrary data or raise a memory error when called with an invalid index. +These negative consequences are modeled in the (family of) rule(s) @exact|{\textsc{E-UnsafeFail}}|. +Consequently, the judgment @exact{$\smallstep{e}{e}$} and its transitive closure + @exact{$\smallstepstar{e}{e}$} are relations, whereas + typing and elaboration are proper functions. + + @figure["fig:stlc" "Simply-Typed λ Calculus" #:style left-figure-style + @exact|{\input{fig-stlc-surface}}| + ] + + @figure["fig:elab0" "Elaborating typed terms" #:style left-figure-style + @exact|{\input{fig-elab0}}| + ] + + @figure["fig:stlc-core" "Core Language" #:style left-figure-style + @exact|{\input{fig-stlc-core}}| + ] + +Evaluation of the surface language in @Figure-ref{fig:stlc}, however, + is deterministic. + + @definition["evaluation"]{ + @exact{$\bigstep{e}{v}$} if and only if @exact{$e$} is closed and there + exists @exact{$e'$}, @exact{$\tau$} such that + @exact|{$\elabstoclosed{e}{e'}{\tau}$}| + and @exact{$\smallstepstar{e'}{v}$}. + } + + @theorem["determinism"]{ + Evaluation is a proper function. + In other words, if @exact{$\bigstep{e}{v}$} + and @exact{$\bigstep{e}{v'}$} + then @exact{$v = v'$}. + } + + @proof{ + By induction on terms of the core language, if + @exact{$\smallstep{e}{e'}$} and @exact{$e$} is not of the form + @exact{$\unsaferef~e_1~e_2$} then @exact{$e'$} is unique. + The theorem follows because the surface language does not allow + @exact{$\unsaferef$} and elaboration does not introduce unsafe references. + } + +Determinism implies that evaluation never invokes the @sc{E-UnsafeFail} rule; + therefore, evaluation of surface terms is memory safe. +Additionally, the surface language is type safe. + + @theorem["soundness"]{ + If @exact{$e$} is closed and @exact|{$\elabstoclosed{e}{e'}{\tau}$}| + then one of the following holds: + @itemlist[ + @item{ + @exact{$e'$} is a value + } + @item{ + @exact{$\smallstep{e'}{\indexerror}$} + } + @item{ + @exact{$\smallstep{e'}{e''}$} + and @exact{$\typestoclosed{e''}{\tau}$} + } + ] + } + + @proof{ + The interesting cases involve @exact{$\checkedref$}. + First, @exact|{$\elabstoclosed{\aref{e_1}{e_2}}{\checkedref~e_1~e_2}{\tint}$}| + implies @exact{$\typestoclosed{e_1}{\tarray}$} + and @exact{$\typestoclosed{e_2}{\tint}$}. + Depending on the values of @exact{$e_1$} and @exact{$e_2$}, + the application steps to either @exact{$\indexerror$} or the @exact{$e_2$}-th + element of the array @exact{$e_1$}---which, by assumption, has type @exact{$\tint$}. + } + + +@section{Extending the Elaborator} + +Building on the foundation of type soundness, we extend the elaborator with + the @emph{value-directed} rules in @Figure-ref{fig:elab1}. +When the elaborator can prove that an array reference is in bounds based on + the syntax of terms, it produces an unsafe reference. +Conversely, the elaborator raises a compile-time index error if it can prove + the index is outside the bounds of the array. + + @figure["fig:elab1" "Value-directed elaboration" #:style left-figure-style + @exact|{\input{fig-elab1}}| + ] + +Our thesis is that adding such rules does not undermine the safety guarantees of the + original language. +In this case, we recover the original guarantees by extending the proofs of + type and memory safety to address the @sc{S-RefPass} and @sc{S-RefFail} elaboration rules. + + @definition[@exact{$\elabarrowplus$}]{ + The relation @exact{$\elabarrowplus$} is the union of the rules in + @Figure-ref["fig:elab0" "fig:elab1"]. + } + + @theorem["determinism"]{ + @exact|{$\bigstepplus{e}{v}$}| is a proper function, where @exact|{$\bigstepplus{e}{v}$}| + if and only if @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}| + and @exact|{$\smallstepstar{e'}{v}$}|. + } + + @proof{ + With the original elaboration scheme, all array references @exact{$\aref{e_1}{e_2}$} + elaborate to @exact{$\checkedref~e_1'~e_2'$}, where @exact{$e_1'$} + and @exact{$e_2'$} are the elaboration of @exact{$e_1$} and @exact{$e_2$}. + There are now three possibilities: + @itemize[ + @item{ + @exact{$e_1'$} is an array value @exact{$\vectorvn$} + and @exact{$e_2'$} is an integer value @exact{$i \in \ints$} + and @exact{$0 \le i < n$}. + Both @sc{S-Ref} and @sc{S-RefPass} are possible elaborations, + therefore @exact{$e'$} is either @exact{$\checkedref~\vectorvn~i$} + or @exact{$\unsaferef~\vectorvn~i$}. + Because @exact{$0 \le i < n$}, evaluation must proceed with + @sc{E-CheckPass} or @sc{E-UnsafePass} in each case, respectively. + These rules have the same result, @exact{$v_i$}. + } + @item{ + @exact{$e_1'$} is an array value @exact{$\vectorvn$} + and @exact{$e_2'$} is an integer value @exact{$i \in \ints$} + and either @exact{$i < 0$} or @exact{$i \ge n$}. + The rules @sc{S-Ref} and @sc{S-RefFail} are possible elaborations. + If @sc{S-Ref} is chosen, @exact{$e'$} is @exact{$\checkedref~\vectorvn~i$} + and evaluates to @exact{$\indexerror$} because @exact{$i$} is outside + the bounds of the array. + If @sc{S-RefFail} is chosen an index error is raised immediately. + } + @item{ + Otherwise, either @exact{$e_1'$} or @exact{$e_2'$} is not a value + form and we rely on the existing proof of determinism. + } + ] + } + + For practical purposes, non-determinism in the elaborator should be + resolved giving the rules in @Figure-ref{fig:elab1} preference over the rules + in @Figure-ref{fig:elab0}. + But the result will be the same in any event. + + + @theorem["soundness"]{ + If @exact{$e$} is a closed term then @exact|{$\elabstoclosedplus{e}{e'}{\tau}$}| + implies that @exact{$e'$} is either a value or steps to an index error or + steps to a term @exact{$e''$} with type @exact{$\tau$}. + } + + @proof{ + We extend the existing proof with cases for @exact{$\unsaferef$}. + @itemize[ + @item{ + If @sc{S-RefPass} is applied by the elaborator then @exact{$e'$} is + the call @exact{$\unsaferef~\vectorvn~i$} and @exact{$0 \le i < n$}. + By assumption, @exact|{$\typestoclosed{\vectorvn}{\tarray}$}| and + @exact{$\typestoclosed{i}{\tint}$}. + Therefore @exact{$e'$} is well-typed by the rule @sc{T-Unsafe} and steps + to the integer value @exact{$v_i$} by @sc{E-UnsafePass}. + } + @item{ + If @sc{S-RefFail} is applied by the elaborator then @exact{$e'$} is + @exact{$\indexerror$} and type soundness follows trivially. + } + ] + } + +The key step in both theorems was that elaboration supplied the proposition + @exact{$0 \le i < n$}. +Both the type rules and evaluation rules depended on this premise. + + +@section{In Practice} + +Have implemented for typed racket +- library +- with enhancements from @todo{secref} +- via expander, so typechecked afterwards + +Checked in +- standard distro (includes plot,math,stats) +- pict3d diff --git a/popl-2017/src/distinguisher.rkt b/popl-2017/src/distinguisher.rkt new file mode 100644 index 0000000..b86d636 --- /dev/null +++ b/popl-2017/src/distinguisher.rkt @@ -0,0 +1,13 @@ +#lang racket/base +(require (for-syntax racket/base syntax/parse)) + +(define-syntax (distinguisher stx) + (syntax-parse stx + [(_ e_1 e_2) + (if (equal? (syntax->datum #'e_1) + (syntax->datum #'e_2)) + #'((λ (x) (x x)) (λ (x) (x x))) + #'(λ (x) x))])) + +;(distinguisher '() '()) +(distinguisher 'A 'B) diff --git a/popl-2017/src/matrix_map.ml b/popl-2017/src/matrix_map.ml new file mode 100644 index 0000000..27b4e45 --- /dev/null +++ b/popl-2017/src/matrix_map.ml @@ -0,0 +1,40 @@ +(* Trying to implment variable arity map in OCaml *) + +let empty xs = + match xs with + | [] -> true + | _ -> false + +let rec ormap f xs = + match xs with + | [] -> true + | x::xs -> (f x) || (ormap f xs) + +(* Error *) +let rec apply f xs = + match xs with + | [] -> failwith "nope" + | [x] -> f x + | x::xs -> + let f2 = f x in + apply f2 xs + +let rec mapN f xss = + match xss with + | [] -> [] + | _ -> + if ormap empty xss + then [] + else apply f (map List.hd xss) :: mapN f (map List.tl xss) + +let _ = + let a1 = mapN (fun x y -> x + y) + [[1, 2, 3] + [0, 4, 5]] + in + let a2 = mapN (fun x y z -> x + y + z) + [[1, 2, 3] + [9, 4, 2] + [0, 4, 5]] + in + () diff --git a/popl-2017/src/racket/check-all-equal.rkt b/popl-2017/src/racket/check-all-equal.rkt new file mode 100644 index 0000000..3e6def7 --- /dev/null +++ b/popl-2017/src/racket/check-all-equal.rkt @@ -0,0 +1,12 @@ +#lang racket/base + +(module+ test + (require rackunit) + + (define-syntax-rule (check-all-eq? e_1 e_rest ...) + (begin (check-equal? e_1 e_1) + (check-equal? e_rest e_1) ...)) + + (check-all-eq? 1 1 1 1 1) + (check-all-eq? 1 2 1 3) +) diff --git a/popl-2017/src/racket/log.rkt b/popl-2017/src/racket/log.rkt new file mode 100644 index 0000000..03ae833 --- /dev/null +++ b/popl-2017/src/racket/log.rkt @@ -0,0 +1,40 @@ +#lang racket/base +(require (for-syntax racket/base syntax/parse)) + +(define-for-syntax DEBUG #f) + +(define log-file (open-output-file "log.txt" #:exists 'append)) + +(define-syntax (log stx) + (syntax-parse stx + [(_ message) + (unless (string? (syntax->datum (syntax message))) + (error 'nope)) + (if DEBUG + #'(displayln (string-append "LOG " message) log-file) + #'(void))])) + + +(log "wepa") +;(log 22) + +(define-syntax (++ stx) + (syntax-parse stx + [(++ s1 s2) + (syntax-property + (syntax (string-append s1 s2)) + 'mytype 'string)])) + +(define-syntax (log1 stx) + (syntax-parse stx + [(log1 msg) + (define e (local-expand #'msg 'expression '())) + (printf "asd ~a\n" e) + (if (or (string? (syntax->datum #'msg)) + (syntax-property e 'mytype)) + #'"SAFE" + #'"NOTSAFE")])) + +(log1 "yolasdf") +(log1 "adsf") +(log1 (++ "asdf" "cde")) diff --git a/popl-2017/texstyle.tex b/popl-2017/texstyle.tex index 082ec54..c284636 100644 --- a/popl-2017/texstyle.tex +++ b/popl-2017/texstyle.tex @@ -3,6 +3,7 @@ \usepackage{listings} \usepackage{stmaryrd} \usepackage{mathpartir} +\usepackage{fourier-orns} \lstset{language=ML} \usepackage[usenames,dvipsnames]{xcolor} \usepackage{multicol} @@ -23,7 +24,7 @@ \overfullrule=1mm %% balance last page columns -\usepackage{flushend} +%\usepackage{flushend} %% for figure 2 \let\ulcorner\relax @@ -32,20 +33,57 @@ \let\lrcorner\relax \usepackage{amssymb} -\newcommand{\interp}{\mathcal{I}} -\newcommand{\untyped}[1]{{\,#1}_\flat} -\newcommand{\trans}{\mathcal{T}} -\newcommand{\elab}{\mathcal{E}} -\newcommand{\elabfe}[1]{\llbracket #1 \rrbracket} -\newcommand{\elabf}{\elabfe{\cdot}} -\newcommand{\subt}{\mathbf{<:\,}} -\newcommand{\tos}{\mathsf{types_of_spec}} -\newcommand{\trt}[1]{\emph{#1}} -\newcommand{\tprintf}{\mathsf{t_printf}} -\newcommand{\mod}[1]{$\mathsf{#1}$} -\newcommand{\evalsto}{\RktMeta{}\RktSym{=={\Stttextmore}}\RktMeta{}\mbox{\hphantom{\Scribtexttt{x}}}} +%% -- types \newcommand{\tvar}[1]{\mathsf{#1}} +\newcommand{\tnat}{\tvar{Natural}} +\newcommand{\tarray}{\tvar{Array}} \newcommand{\tint}{\tvar{Int}} \newcommand{\tnum}{\tvar{Num}} \newcommand{\tlist}[1]{\tvar{List~#1}} -\newcommand{\Progn}{\mathcal{P}} +\newcommand{\naturals}{\mathbb{N}} +\newcommand{\ints}{\mathbb{Z}} + +%% -- terms +\newcommand{\vectorgen}[1]{\langle #1 \rangle} +\newcommand{\vectoren}{\vectorgen{e_0, \ldots, e_{n-1}}} +\newcommand{\vectorvn}{\vectorgen{v_0, \ldots, v_{n-1}}} +\newcommand{\vectorxn}{\vectorgen{x_0, \ldots, x_{n-1}}} +\newcommand{\vlam}[2]{\lambda\,#1\,.\,#2} +\newcommand{\aref}[2]{#1@#2} +\newcommand{\checkedref}{\tvar{checked\mbox{-}ref}} +\newcommand{\unsaferef}{\tvar{unsafe\mbox{-}ref}} +\newcommand{\segfault}{\mathsf{segfault}} +\newcommand{\indexerror}{\mathsf{IndexError}} + +%% -- evaluation contexts +\newcommand{\ectx}{E} +\newcommand{\ehole}{[\cdot]} + +%% -- type environments +\newcommand{\tenv}{\Gamma} +\newcommand{\tenvempty}{\cdot} +\newcommand{\tenvcons}[3]{#1:#2,#3} + +%% -- typing +\newcommand{\typestogen}[3]{#1 \vdash #2 : #3} +\newcommand{\typestoclosed}[2]{\typestogen{\tenvempty}{#1}{#2}} +\newcommand{\typesto}[2]{\typestogen{\tenv}{#1}{#2}} + +%% -- elaboration +\newcommand{\elabarrow}{\rightsquigarrow} +\newcommand{\elabarrowplus}{\elabarrow^{+}} +\newcommand{\elabstogen}[5]{\typestogen{#1}{#2 #3 #4}{#5}} +\newcommand{\elabstoclosedplus}[3]{\elabstogen{\tenvempty}{#1}{\elabarrowplus}{#2}{#3}} +\newcommand{\elabstoclosed}[3]{\elabstogen{\tenvempty}{#1}{\elabarrow}{#2}{#3}} +\newcommand{\elabsto}[3]{\elabstogen{\tenv}{#1}{\elabarrow}{#2}{#3}} + +%% -- evaluation +\newcommand{\bigstep}[2]{#1 \Downarrow #2} +\newcommand{\bigstepplus}[2]{#1 \Downarrow^+ #2} +\newcommand{\smallstepstar}[2]{#1 \rightarrow^* #2} +\newcommand{\smallstep}[2]{#1 \rightarrow #2} + +%% -- misc +\newcommand{\esubst}[2]{[#2/#1]} +\newcommand{\qed}{$\square$} +