stxparse-info/6-12/racket/collects/syntax/parse/private/opt.rkt

431 lines
18 KiB
Racket

#lang racket/base
(require racket/syntax
racket/pretty
syntax/parse/private/residual-ct ;; keep abs. path
syntax/parse/private/minimatch
syntax/parse/private/rep-patterns
syntax/parse/private/kws)
(provide (struct-out pk1)
(rename-out [optimize-matrix0 optimize-matrix]))
;; controls debugging output for optimization successes and failures
(define DEBUG-OPT-SUCCEED #f)
(define DEBUG-OPT-FAIL #f)
;; ----
;; A Matrix is a (listof PK) where each PK has same number of columns
;; A PK is one of
;; - (pk1 (listof pattern) expr) -- a simple row in a parsing matrix
;; - (pk/same pattern Matrix) -- a submatrix with a common first column factored out
;; - (pk/pair Matrix) -- a submatrix with pair patterns in the first column unfolded
;; - (pk/and Matrix) -- a submatrix with and patterns in the first column unfolded
(struct pk1 (patterns k) #:prefab)
(struct pk/same (pattern inner) #:prefab)
(struct pk/pair (inner) #:prefab)
(struct pk/and (inner) #:prefab)
(define (pk-columns pk)
(match pk
[(pk1 patterns k) (length patterns)]
[(pk/same p inner) (add1 (pk-columns inner))]
[(pk/pair inner) (sub1 (pk-columns inner))]
[(pk/and inner) (sub1 (pk-columns inner))]))
;; Can factor pattern P given clauses like
;; [ P P1 ... | e1] [ | [P1 ... | e1] ]
;; [ P ⋮ | ⋮] => [P | [ ⋮ | ⋮] ]
; [ P PN ... | eN] [ | [PN ... | eN] ]
;; if P cannot cut and P succeeds at most once (otherwise may reorder backtracking)
;; Can unfold pair patterns as follows:
;; [ (P11 . P12) P1 ... | e1 ] [ P11 P12 P1 ... | e1 ]
;; [ ⋮ ⋮ | ⋮ ] => check pair, [ ⋮ | ⋮ ]
;; [ (PN1 . PN2) PN ... | eN ] [ PN1 PN2 PN ... | eN ]
;; Can unfold ~and patterns similarly; ~and patterns can hide
;; factoring opportunities.
;; ----
(define (optimize-matrix0 rows)
(define now (current-inexact-milliseconds))
(when (and DEBUG-OPT-SUCCEED (> (length rows) 1))
(eprintf "\n%% optimizing (~s):\n" (length rows))
(pretty-write (matrix->sexpr rows) (current-error-port)))
(define result (optimize-matrix rows))
(define then (current-inexact-milliseconds))
(when (and DEBUG-OPT-SUCCEED (> (length rows) 1))
(cond [(= (length result) (length rows))
(eprintf "%% !! FAILED !! (~s ms)\n\n" (floor (- then now)))]
[else
(eprintf "==> (~s ms)\n" (floor (- then now)))
(pretty-write (matrix->sexpr result) (current-error-port))
(eprintf "\n")]))
result)
;; optimize-matrix : (listof pk1) -> Matrix
(define (optimize-matrix rows)
(cond [(null? rows) null]
[(null? (cdr rows)) rows] ;; no opportunities for 1 row
[(null? (pk1-patterns (car rows))) rows]
[else
;; first unfold and-patterns
(let-values ([(col1 col2)
(for/lists (col1 col2) ([row (in-list rows)])
(unfold-and (car (pk1-patterns row)) null))])
(cond [(ormap pair? col2)
(list
(pk/and
(optimize-matrix*
(for/list ([row (in-list rows)]
[col1 (in-list col1)]
[col2 (in-list col2)])
(pk1 (list* col1
(make-and-pattern col2)
(cdr (pk1-patterns row)))
(pk1-k row))))))]
[else (optimize-matrix* rows)]))]))
;; optimize-matrix* : (listof pk1) -> Matrix
;; The matrix is nonempty, and first column has no unfoldable pat:and.
;; Split into submatrixes (sequences of rows) starting with similar patterns,
;; handle according to similarity, then recursively optimize submatrixes.
(define (optimize-matrix* rows)
(define row1 (car rows))
(define pat1 (car (pk1-patterns row1)))
(define k1 (pk1-k row1))
;; Now accumulate rows starting with patterns like pat1
(define-values (like? combine) (pattern->partitioner pat1))
(let loop ([rows (cdr rows)] [rrows (list row1)])
(cond [(null? rows)
(cons (combine (reverse rrows)) null)]
[else
(define row1 (car rows))
(define pat1 (car (pk1-patterns row1)))
(cond [(like? pat1)
(loop (cdr rows) (cons row1 rrows))]
[else
(cons (combine (reverse rrows))
(optimize-matrix* rows))])])))
;; pattern->partitioner : pattern -> (values (pattern -> boolean) ((listof pk1) -> PK))
(define (pattern->partitioner pat1)
(match pat1
[(pat:pair head tail)
(values (lambda (p) (pat:pair? p))
(lambda (rows)
(when DEBUG-OPT-SUCCEED
(eprintf "-- accumulated ~s rows like ~e\n" (length rows) (pattern->sexpr pat1)))
(cond [(> (length rows) 1)
(pk/pair (optimize-matrix
(for/list ([row (in-list rows)])
(let* ([patterns (pk1-patterns row)]
[pat1 (car patterns)])
(pk1 (list* (pat:pair-head pat1)
(pat:pair-tail pat1)
(cdr patterns))
(pk1-k row))))))]
[else (car rows)])))]
[(? pattern-factorable?)
(values (lambda (pat2) (pattern-equal? pat1 pat2))
(lambda (rows)
(when DEBUG-OPT-SUCCEED
(eprintf "-- accumulated ~s rows like ~e\n" (length rows) (pattern->sexpr pat1)))
(cond [(> (length rows) 1)
(pk/same pat1
(optimize-matrix
(for/list ([row (in-list rows)])
(pk1 (cdr (pk1-patterns row)) (pk1-k row)))))]
[else (car rows)])))]
[_
(values (lambda (pat2)
(when DEBUG-OPT-FAIL
(when (pattern-equal? pat1 pat2)
(eprintf "** cannot factor: ~e\n" (syntax->datum #`#,pat2))))
#f)
(lambda (rows)
;; (length rows) = 1
(car rows)))]))
;; unfold-and : pattern (listof pattern) -> (values pattern (listof pattern))
(define (unfold-and p onto)
(match p
[(pat:and subpatterns)
;; pat:and is worth unfolding if first subpattern is not pat:action
;; if first subpattern is also pat:and, keep unfolding
(let* ([first-sub (car subpatterns)]
[rest-subs (cdr subpatterns)])
(cond [(not (pat:action? first-sub))
(when #f ;; DEBUG-OPT-SUCCEED
(eprintf ">> unfolding: ~e\n" p))
(unfold-and first-sub (*append rest-subs onto))]
[else (values p onto)]))]
[_ (values p onto)]))
(define (pattern-factorable? p)
;; Can factor out p if p can succeed at most once, does not cut
;; - if p can succeed multiple times, then factoring changes success order
;; - if p can cut, then factoring changes which choice points are discarded (too few)
(match p
[(pat:any) #t]
[(pat:svar _n) #t]
[(pat:var/p _ _ _ _ _ (scopts _ commit? _ _))
;; commit? implies delimit-cut
commit?]
[(? pat:integrated?) #t]
[(pat:literal _lit _ip _lp) #t]
[(pat:datum _datum) #t]
[(pat:action _act _pat) #f]
[(pat:head head tail)
(and (pattern-factorable? head)
(pattern-factorable? tail))]
[(pat:dots heads tail)
;; Conservative approximation for common case: one head pattern
;; In general, check if heads don't overlap, don't overlap with tail.
(and (= (length heads) 1)
(let ([head (car heads)])
(and (pattern-factorable? head)))
(equal? tail (pat:datum '())))]
[(pat:and patterns)
(andmap pattern-factorable? patterns)]
[(pat:or patterns) #f]
[(pat:not pattern) #f] ;; FIXME: ?
[(pat:pair head tail)
(and (pattern-factorable? head)
(pattern-factorable? tail))]
[(pat:vector pattern)
(pattern-factorable? pattern)]
[(pat:box pattern)
(pattern-factorable? pattern)]
[(pat:pstruct key pattern)
(pattern-factorable? pattern)]
[(pat:describe pattern _desc _trans _role)
(pattern-factorable? pattern)]
[(pat:delimit pattern)
(pattern-factorable? pattern)]
[(pat:commit pattern) #t]
[(? pat:reflect?) #f]
[(pat:ord pattern _ _)
(pattern-factorable? pattern)]
[(pat:post pattern)
(pattern-factorable? pattern)]
;; ----
[(hpat:var/p _ _ _ _ _ (scopts _ commit? _ _))
commit?]
[(hpat:seq inner)
(pattern-factorable? inner)]
[(hpat:commit inner) #t]
;; ----
[(ehpat head repc)
(and (equal? repc #f)
(pattern-factorable? head))]
;; ----
[else #f]))
(define (subpatterns-equal? as bs)
(and (= (length as) (length bs))
(for/and ([a (in-list as)]
[b (in-list bs)])
(pattern-equal? a b))))
(define (pattern-equal? a b)
(define result
(cond [(and (pat:any? a) (pat:any? b)) #t]
[(and (pat:svar? a) (pat:svar? b))
(bound-identifier=? (pat:svar-name a) (pat:svar-name b))]
[(and (pat:var/p? a) (pat:var/p? b))
(and (free-id/f-equal? (pat:var/p-parser a) (pat:var/p-parser b))
(bound-id/f-equal? (pat:var/p-name a) (pat:var/p-name b))
(equal-iattrs? (pat:var/p-nested-attrs a) (pat:var/p-nested-attrs b))
(equal-argu? (pat:var/p-argu a) (pat:var/p-argu b))
(expr-equal? (pat:var/p-role a) (pat:var/p-role b)))]
[(and (pat:integrated? a) (pat:integrated? b))
(and (bound-id/f-equal? (pat:integrated-name a) (pat:integrated-name b))
(free-identifier=? (pat:integrated-predicate a)
(pat:integrated-predicate b))
(expr-equal? (pat:integrated-role a) (pat:integrated-role b)))]
[(and (pat:literal? a) (pat:literal? b))
;; literals are hard to compare, so compare gensyms attached to
;; literal ids (see rep.rkt) instead
(let ([ka (syntax-property (pat:literal-id a) 'literal)]
[kb (syntax-property (pat:literal-id b) 'literal)])
(and ka kb (eq? ka kb)))]
[(and (pat:datum? a) (pat:datum? b))
(equal? (pat:datum-datum a)
(pat:datum-datum b))]
[(and (pat:head? a) (pat:head? b))
(and (pattern-equal? (pat:head-head a) (pat:head-head b))
(pattern-equal? (pat:head-tail a) (pat:head-tail b)))]
[(and (pat:dots? a) (pat:dots? b))
(and (subpatterns-equal? (pat:dots-heads a) (pat:dots-heads b))
(pattern-equal? (pat:dots-tail a) (pat:dots-tail b)))]
[(and (pat:and? a) (pat:and? b))
(subpatterns-equal? (pat:and-patterns a) (pat:and-patterns b))]
[(and (pat:or? a) (pat:or? b))
(subpatterns-equal? (pat:or-patterns a) (pat:or-patterns b))]
[(and (pat:not? a) (pat:not? b))
(pattern-equal? (pat:not-pattern a) (pat:not-pattern b))]
[(and (pat:pair? a) (pat:pair? b))
(and (pattern-equal? (pat:pair-head a) (pat:pair-head b))
(pattern-equal? (pat:pair-tail a) (pat:pair-tail b)))]
[(and (pat:vector? a) (pat:vector? b))
(pattern-equal? (pat:vector-pattern a) (pat:vector-pattern b))]
[(and (pat:box? a) (pat:box? b))
(pattern-equal? (pat:box-pattern a) (pat:box-pattern b))]
[(and (pat:pstruct? a) (pat:pstruct? b))
(and (equal? (pat:pstruct-key a)
(pat:pstruct-key b))
(pattern-equal? (pat:pstruct-pattern a)
(pat:pstruct-pattern b)))]
[(and (pat:describe? a) (pat:describe? b)) #f] ;; can't compare desc exprs
[(and (pat:delimit? a) (pat:delimit? b))
(pattern-equal? (pat:delimit-pattern a) (pat:delimit-pattern b))]
[(and (pat:commit? a) (pat:commit? b))
(pattern-equal? (pat:commit-pattern a) (pat:commit-pattern b))]
[(and (pat:reflect? a) (pat:reflect? b)) #f] ;; FIXME: ?
[(and (pat:ord? a) (pat:ord? b))
(and (pattern-equal? (pat:ord-pattern a) (pat:ord-pattern b))
(equal? (pat:ord-group a) (pat:ord-group b))
(equal? (pat:ord-index a) (pat:ord-index b)))]
[(and (pat:post? a) (pat:post? b))
(pattern-equal? (pat:post-pattern a) (pat:post-pattern b))]
;; ---
[(and (hpat:var/p? a) (hpat:var/p? b))
(and (free-id/f-equal? (hpat:var/p-parser a) (hpat:var/p-parser b))
(bound-id/f-equal? (hpat:var/p-name a) (hpat:var/p-name b))
(equal-iattrs? (hpat:var/p-nested-attrs a) (hpat:var/p-nested-attrs b))
(equal-argu? (hpat:var/p-argu a) (hpat:var/p-argu b))
(expr-equal? (hpat:var/p-role a) (hpat:var/p-role b)))]
[(and (hpat:seq? a) (hpat:seq? b))
(pattern-equal? (hpat:seq-inner a) (hpat:seq-inner b))]
;; ---
[(and (ehpat? a) (ehpat? b))
(and (equal? (ehpat-repc a) #f)
(equal? (ehpat-repc b) #f)
(pattern-equal? (ehpat-head a) (ehpat-head b)))]
;; FIXME: more?
[else #f]))
(when DEBUG-OPT-FAIL
(when (and (eq? result #f)
(equal? (syntax->datum #`#,a) (syntax->datum #`#,b)))
(eprintf "** pattern-equal? failed on ~e\n" a)))
result)
(define (equal-iattrs? as bs)
(and (= (length as) (length bs))
;; assumes attrs in same order
(for/and ([aa (in-list as)]
[ba (in-list bs)])
(and (bound-identifier=? (attr-name aa) (attr-name ba))
(equal? (attr-depth aa) (attr-depth ba))
(equal? (attr-syntax? aa) (attr-syntax? ba))))))
(define (expr-equal? a b)
;; Expression equality is undecidable in general. Especially difficult for unexpanded
;; code, but it would be very difficult to set up correct env for local-expand because of
;; attr binding rules. So, do *very* conservative approx: simple variables and literals.
;; FIXME: any other common cases?
(cond [(not (and (syntax? a) (syntax? b)))
(equal? a b)]
[(and (identifier? a) (identifier? b))
;; note: "vars" might be identifier macros (unsafe to consider equal),
;; so check var has no compile-time binding
(and (free-identifier=? a b)
(let/ec k (syntax-local-value a (lambda () (k #t))) #f))]
[(syntax-case (list a b) (quote)
[((quote ad) (quote bd))
(cons (syntax->datum #'ad) (syntax->datum #'bd))]
[_ #f])
=> (lambda (ad+bd)
(equal? (car ad+bd) (cdr ad+bd)))]
[else
;; approx: equal? only if both simple data (bool, string, etc), no inner stx
(let ([ad (syntax-e a)]
[bd (syntax-e b)])
(and (equal? ad bd)
(free-identifier=? (datum->syntax a '#%datum) #'#%datum)
(free-identifier=? (datum->syntax b '#%datum) #'#%datum)))]))
(define (equal-argu? a b)
(define (unwrap-arguments x)
(match x
[(arguments pargs kws kwargs)
(values pargs kws kwargs)]))
(define (list-equal? as bs inner-equal?)
(and (= (length as) (length bs))
(andmap inner-equal? as bs)))
(let-values ([(apargs akws akwargs) (unwrap-arguments a)]
[(bpargs bkws bkwargs) (unwrap-arguments b)])
(and (list-equal? apargs bpargs expr-equal?)
(equal? akws bkws)
(list-equal? akwargs bkwargs expr-equal?))))
(define (free-id/f-equal? a b)
(or (and (eq? a #f)
(eq? b #f))
(and (identifier? a)
(identifier? b)
(free-identifier=? a b))))
(define (bound-id/f-equal? a b)
(or (and (eq? a #f)
(eq? b #f))
(and (identifier? a)
(identifier? b)
(bound-identifier=? a b))))
(define (make-and-pattern subs)
(cond [(null? subs) (pat:any)] ;; shouldn't happen
[(null? (cdr subs)) (car subs)]
[else (pat:and subs)]))
(define (*append a b) (if (null? b) a (append a b)))
(define (stx-e x) (if (syntax? x) (syntax-e x) x))
;; ----
(define (matrix->sexpr rows)
(cond [(null? rows) ;; shouldn't happen
'(FAIL)]
[(null? (cdr rows))
(pk->sexpr (car rows))]
[else
(cons 'TRY (map pk->sexpr rows))]))
(define (pk->sexpr pk)
(match pk
[(pk1 pats k)
(cons 'MATCH (map pattern->sexpr pats))]
[(pk/same pat inner)
(list 'SAME (pattern->sexpr pat) (matrix->sexpr inner))]
[(pk/pair inner)
(list 'PAIR (matrix->sexpr inner))]
[(pk/and inner)
(list 'AND (matrix->sexpr inner))]))
(define (pattern->sexpr p)
(match p
[(pat:any) '_]
[(pat:integrated name pred desc _)
(format-symbol "~a:~a" (or name '_) desc)]
[(pat:svar name)
(syntax-e name)]
[(pat:var/p name parser _ _ _ _)
(cond [(and parser (regexp-match #rx"^parse-(.*)$" (symbol->string (syntax-e parser))))
=> (lambda (m)
(format-symbol "~a:~a" (or name '_) (cadr m)))]
[else
(if name (syntax-e name) '_)])]
[(? pat:literal?) `(quote ,(syntax->datum (pat:literal-id p)))]
[(pat:datum datum) datum]
[(? pat:action?) 'ACTION]
[(pat:pair head tail)
(cons (pattern->sexpr head) (pattern->sexpr tail))]
[(pat:head head tail)
(cons (pattern->sexpr head) (pattern->sexpr tail))]
[(pat:dots (list eh) tail)
(list* (pattern->sexpr eh) '... (pattern->sexpr tail))]
[(ehpat _as hpat '#f _cn)
(pattern->sexpr hpat)]
[_ 'PATTERN]))