follow up to William J. Bowman's commit

- put the tests together with the other define-union-language tests
- fix the docs
- add a release note
- construct the merged language a bit more directly
- properly deal with this kind of thing:
  (define-language L1 (e f ::= 1 2 3))
  (define-language L2 (e g ::= 4 5 6))
  (define-union-language L L1 L2)
This commit is contained in:
Robby Findler 2013-03-01 08:20:35 -06:00
parent b0db8798b6
commit 42847ea523
8 changed files with 178 additions and 137 deletions

View File

@ -1,40 +0,0 @@
#lang racket
(provide LBase L1 L2 LMergeUntagged LMergeTagged)
(require redex/reduction-semantics)
;; ------------------------------------------------------------------------
(define-language LBase
(e (+ e e) number))
(define-extended-language L1 LBase
(e .... (- e e)))
(define-extended-language L2 LBase
(e .... (* e e)))
;; Untagged union of two languages that define the same nonterminal
(define-union-language LMergeUntagged L1 L2)
;; Tagged merge of two extended languages that define the same
;; nonterminal
(define-union-language LMergeTagged (f. L1) (d. L2))
;; ------------------------------------------------------------------------
(module+ test
(for ([t (list (term 1) (term (* 1 1)) (term (+ 1 1)) (term (- 1 1)))])
(test-equal (redex-match? LMergeUntagged e t) #t))
(test-equal (redex-match? LMergeTagged f.e 1) #t)
(test-equal (redex-match? LMergeTagged d.e 1) #t)
(test-equal (redex-match? LMergeTagged f.e (term (+ 1 1))) #t)
(test-equal (redex-match? LMergeTagged f.e (term (- 1 1))) #t)
(test-equal (redex-match? LMergeTagged f.e (term (* 1 1))) #f)
(test-equal (redex-match? LMergeTagged d.e (term (+ 1 1))) #t)
(test-equal (redex-match? LMergeTagged d.e (term (* 1 1))) #t)
(test-equal (redex-match? LMergeTagged d.e (term (- 1 1))) #f))

View File

@ -42,6 +42,8 @@ See match-a-pattern.rkt for more details
racket/match
racket/contract
racket/promise
racket/set
data/union-find
racket/performance-hint
(for-syntax racket/base)
"underscore-allowed.rkt"
@ -144,7 +146,7 @@ See match-a-pattern.rkt for more details
(bind-exp rib)
(loop (cdr ribs))))]))))
;; compile-language : language-pict-info[see pict.rkt] (listof nt) (listof (listof sym)) -> compiled-lang
;; compile-language : language-pict-info[see pict.rkt] (listof nt) (listof (uf-set/c symbol?)) -> compiled-lang
(define (compile-language pict-info lang nt-map)
(let* ([clang-ht (make-hasheq)]
[clang-list-ht (make-hasheq)]
@ -210,6 +212,19 @@ See match-a-pattern.rkt for more details
(do-compilation clang-ht clang-list-ht lang)
(struct-copy compiled-lang clang [delayed-cclang compatible-context-language])))
;; mk-uf-sets : (listof (listof sym)) -> (hash[symbol -o> uf-set?])
;; in the result hash, each nt maps to a uf-set that represents
;; the set of non-terminals that are coming from the same group
(define (mk-uf-sets args)
(for/fold ([iht (hash)]) ([same-nts (in-list args)])
(define main-name (car same-nts))
(define main (uf-new main-name))
(for/fold ([iht (hash-set iht main-name main)])
([other (in-list (cdr same-nts))])
(define next (uf-new other))
(uf-union! main next)
(hash-set iht other next))))
;; extract-collapsible-nts : (listof nt) -> (listof any)
(define (extract-collapsible-nts nts)
(define nt-hash (for/hasheq ([nt nts])
@ -2006,7 +2021,7 @@ See match-a-pattern.rkt for more details
(bind? predicate/c)
(bind-name (bind? . -> . symbol?))
(bind-exp (bind? . -> . any/c))
(compile-language (-> any/c (listof nt?) (listof (listof symbol?)) compiled-lang?)))
(compile-language (-> any/c (listof nt?) (hash/c symbol? uf-set?) compiled-lang?)))
(provide compiled-pattern?
print-stats)
@ -2037,4 +2052,5 @@ See match-a-pattern.rkt for more details
build-compatible-context-language
caching-enabled?
check-redudancy
prefix-nts)
prefix-nts
mk-uf-sets)

View File

@ -14,6 +14,8 @@
racket/trace
racket/contract
racket/list
racket/set
data/union-find
mzlib/etc)
(require (for-syntax syntax/name
@ -1745,7 +1747,8 @@
(compile-language (list (list '(uniform-names ...) rhs/lw ...) ...)
(list (make-nt 'first-names (list (make-rhs `r-rhs) ...)) ...
(make-nt 'new-name (list (make-rhs '(nt orig-name)))) ...)
'((uniform-names ...) ...)))))))))]))
(mk-uf-sets '((uniform-names ...) ...))))))))))]))
(define-syntax (define-extended-language stx)
(syntax-case stx ()
@ -1902,7 +1905,7 @@
(define normalized-orig-langs
(for/list ([orig-lang (in-list (syntax->list #'(orig-langs ...)))])
(syntax-case orig-lang ()
[x (identifier? #'x) (list "" #'x (language-id-nts #'x 'define-union-language) orig-lang)]
[x (identifier? #'x) (list #f #'x (language-id-nts #'x 'define-union-language) orig-lang)]
[(prefix lang)
(and (identifier? #'prefix)
(identifier? #'lang))
@ -1912,19 +1915,18 @@
stx orig-lang)])))
;; ht : sym -o> (listof stx)
;; maps each non-terminal (with its prefix) to the
;; list syntax object that they comes from in the original
;; maps each non-terminal (with its prefix) to a
;; list of syntax objects that they come from in the original
;; define-union-language declaration
(define names-table (make-hash))
(for ([normalized-orig-lang (in-list normalized-orig-langs)])
(define prefix (list-ref normalized-orig-lang 0))
(for ([no-prefix-nt (in-list (list-ref normalized-orig-lang 2))])
(define nt (string->symbol (string-append prefix (symbol->string no-prefix-nt))))
(let ([prev (hash-ref names-table nt #f)])
(if prev
(hash-set! names-table nt (cons (list-ref normalized-orig-lang 3) prev))
(hash-set! names-table nt (list (list-ref normalized-orig-lang 3)))))))
(define nt (string->symbol (string-append (or prefix "")
(symbol->string no-prefix-nt))))
(let ([prev (hash-ref names-table nt '())])
(hash-set! names-table nt (cons (list-ref normalized-orig-lang 3) prev)))))
(with-syntax ([(all-names ...) (sort (hash-map names-table (λ (x y) x)) string<=? #:key symbol->string)]
[((prefix old-lang _1 _2) ...) normalized-orig-langs]
@ -1944,62 +1946,72 @@
'(all-names ...)))))))]))
(define (union-language old-langs/prefixes)
(define new-nt-map
(apply
append
(for/list ([old-pr (in-list old-langs/prefixes)])
(define prefix (list-ref old-pr 0))
(define nt-map (compiled-lang-nt-map (list-ref old-pr 1)))
(for/list ([lst (in-list nt-map)])
(for/list ([sym (in-list lst)])
(string->symbol (string-append prefix (symbol->string sym))))))))
(define new-nts
(apply
append
(for/list ([old-lang/prefix (in-list old-langs/prefixes)])
(define prefix (list-ref old-lang/prefix 0))
(define lang (compiled-lang-lang (list-ref old-lang/prefix 1)))
(for/list ([nt (in-list lang)])
(make-nt (string->symbol (string-append prefix (symbol->string (nt-name nt))))
(for/list ([rhs (in-list (nt-rhs nt))])
(make-rhs (prefix-nts prefix (rhs-pattern rhs)))))))))
;; Each language in the union might define the same nt, and might
;; even define the same rhs for the same nt, so merge them to get a
;; proper union.
;; NOTE: This could probably be done when defining new-nts to
;; eliminate a second pass over all the nts.
(define merge-nts
(let ()
(define names-table (make-hash))
(for/list ([nt (in-list new-nts)])
(let* ([name (nt-name nt)]
[prev (hash-ref names-table name #f)])
(if prev
(hash-set! names-table name (make-nt name (remove-duplicates (append (nt-rhs prev) (nt-rhs nt)))))
(hash-set! names-table name nt))))
(hash-map names-table (lambda (x y) y))))
(define (add-prefix prefix sym)
(if prefix
(string->symbol
(string-append prefix
(symbol->string sym)))
sym))
;; nt-maps-with-prefixes : (listof hash[symbol -o> uf-set?])
;; add prefixes on the canonical elements and in the
;; hash-table domains
(define nt-maps-with-prefixes
(for/list ([old-pr (in-list old-langs/prefixes)])
(define prefix (list-ref old-pr 0))
(define nt-map (compiled-lang-nt-map (list-ref old-pr 1)))
(cond
[prefix
(define already-prefixed '())
(for/hash ([(nt a-uf-set) (in-hash nt-map)])
(define already-prefixed?
(for/or ([x (in-list already-prefixed)])
(uf-same-set? x a-uf-set)))
(unless already-prefixed?
(uf-set-canonical! a-uf-set (add-prefix prefix (uf-find a-uf-set)))
(set! already-prefixed (cons a-uf-set already-prefixed)))
(values (add-prefix prefix nt) a-uf-set))]
[else nt-map])))
;; combine all of the nt-maps into a single one,
;; unioning the sets as approrpriate
(define new-nt-map (car nt-maps-with-prefixes))
(for ([nt-map (in-list (cdr nt-maps-with-prefixes))])
(for ([(k this-uf-set) (in-hash nt-map)])
(define final-uf-set (hash-ref new-nt-map k #f))
(cond
[final-uf-set
(uf-union! final-uf-set this-uf-set)]
[else
(set! new-nt-map (hash-set new-nt-map k this-uf-set))])))
(define names-table (make-hash))
(for ([old-lang/prefix (in-list old-langs/prefixes)])
(define prefix (list-ref old-lang/prefix 0))
(define lang (compiled-lang-lang (list-ref old-lang/prefix 1)))
(for ([nt (in-list lang)])
(define name (add-prefix prefix (nt-name nt)))
(define new-rhses
(for/set ([rhs (in-list (nt-rhs nt))])
(if prefix
(make-rhs (prefix-nts prefix (rhs-pattern rhs)))
rhs)))
(hash-set! names-table
name
(set-union new-rhses (hash-ref names-table name (set))))))
(compile-language #f
merge-nts
(remove-duplicates new-nt-map)))
(hash-map names-table (λ (name set) (make-nt name (set->list set))))
new-nt-map))
;; find-primary-nt : symbol lang -> symbol or #f
;; returns the primary non-terminal for a given nt, or #f if `nt' isn't bound in the language.
(define (find-primary-nt nt lang)
(let ([combined (find-combined-nts nt lang)])
(and combined
(car combined))))
;; find-combined-nts : symbol lang -> (listof symbol) or #f
;; returns the combined set of non-terminals for 'nt' from lang
(define (find-combined-nts nt lang)
(ormap (λ (nt-line)
(and (member nt nt-line)
nt-line))
(compiled-lang-nt-map lang)))
(define uf/f (hash-ref (compiled-lang-nt-map lang) nt #f))
(and uf/f
(uf-find uf/f)))
(define (apply-reduction-relation* reductions exp
#:cache-all? [cache-all? (current-cache-all?)]

View File

@ -737,11 +737,32 @@ extends all of them.
Constructs a language that is the union of all of the
languages listed in the @racket[base/prefix-lang].
If the two languages have non-terminals in common, then
@racket[define-union-language] will combine all of the productions
of the common non-terminals. For example, this definition of @racket[L]:
@racketblock[(define-language L1
(e ::=
(+ e e)
number))
(define-language L2
(e ::=
(if e e e)
true
false))
(define-union-language L L1 L2)]
is equivalent to this one:
@racketblock[(define-language L
(e ::=
(+ e e)
number
(if e e e)
true
false))]
If a language has a prefix, then all of the non-terminals
from that language have the corresponding prefix in
the union language. The prefix helps avoid collisions
between the constituent language's non-terminals
(which is illegal).
the union language. The prefix helps avoid unintended collisions
between the constituent language's non-terminals.
For example, with two these two languages:
@racketblock[(define-language UT

View File

@ -831,23 +831,20 @@
(define (test-empty/proc line pat exp ans)
(run-match-test
line
`(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used '() '()) ',pat #t) ',exp)
`(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used '() (hash)) ',pat #t) ',exp)
(match-pattern
(compile-pattern (compile-language 'pict-stuff-not-used '() '()) pat #t)
(compile-pattern (compile-language 'pict-stuff-not-used '() (hash)) pat #t)
exp)
ans))
;; make-nt-map : (listof nt) -> (listof (listof symbol))
(define (make-nt-map nts)
(map (λ (x) (list (nt-name x))) nts))
;; test-lang : sexp[pattern] sexp[term] answer (list/c nt) -> void
;; returns #t if pat matching exp with the language defined by the given nts
(define (test-lang line pat exp ans nts)
(let ([nt-map (make-nt-map nts)])
(let ([nt-map (mk-uf-sets (map (λ (x) (list (nt-name x)))
nts))])
(run-match-test
line
`(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used ',nts ',nt-map) ',pat #t) ',exp)
`(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used ',nts ,nt-map) ',pat #t) ',exp)
(match-pattern
(compile-pattern (compile-language 'pict-stuff-not-used nts nt-map) pat #t)
exp)
@ -909,7 +906,7 @@
(set! xab-lang
(compile-language 'pict-stuff-not-used
nts
(map (λ (x) (list (nt-name x))) nts)))))
(mk-uf-sets (map (λ (x) (list (nt-name x))) nts))))))
(run-match-test
line
`(match-pattern (compile-pattern xab-lang ',pat #t) ',exp)
@ -928,7 +925,7 @@
(list (make-rhs 'a)))
(make-nt 'bb
(list (make-rhs 'b))))
'((aa) (bb)))))
(mk-uf-sets '((aa) (bb))))))
(run-match-test
line
`(match-pattern (compile-pattern ab-lang ',pat #t) ',exp)
@ -971,7 +968,9 @@
`(test-ellipsis-binding ,pat)
(let ([mtch ((compiled-pattern-cp
(let ([nts (map (λ (nt-def) (nt (car nt-def) (map rhs (cdr nt-def)))) nts/sexp)])
(compile-pattern (compile-language 'pict-stuff-not-used nts (make-nt-map nts)) pat #t)))
(compile-pattern (compile-language 'pict-stuff-not-used nts
(mk-uf-sets (map (λ (x) (list (nt-name x))) nts)))
pat #t)))
exp
#t)])
(if mtch

View File

@ -27,21 +27,3 @@
(#rx"expected production" ([not-prod ::=]) (define-language L (x ::= y not-prod z)))
(#rx"expected non-terminal definition" ([not-def q]) (define-language L not-def))
(#rx"expected non-terminal definition" ([not-def ()]) (define-language L not-def))
("define-union-language: two sublanguages both contribute the non-terminal: e"
([L1r L1] [L2r L2]) ([L1 L1] [L2 L2])
(let ()
(define-language L1
(e any))
(define-language L2
(e any))
(define-union-language L L1r L2r)))
("define-union-language: two sublanguages both contribute the non-terminal: -e"
([L1r L1] [L2r (- L2)]) ([L1 L1] [L2 L2])
(let ()
(define-language L1
(-e any))
(define-language L2
(e any))
(define-union-language L L1r L2r)))

View File

@ -456,26 +456,71 @@
(for ([t (list (term 1) (term (* 1 1)) (term (+ 1 1)) (term (- 1 1)))])
(test (redex-match? LMergeUntagged e t) #t)))
;; test that define-union-language properly merges non-terminals
(let ()
(define-language LBase
(e (+ e e) number))
(define-extended-language L1 LBase
(e .... (- e e)))
(define-extended-language L2 LBase
(e .... (* e e)))
;; Untagged union of two languages that define the same nonterminal
(define-union-language LMergeUntagged L1 L2)
;; Tagged merge of two extended languages that define the same
;; nonterminal
(define-union-language LMergeTagged (f. L1) (d. L2))
(test (redex-match? LMergeUntagged e (term 1)) #t)
(test (redex-match? LMergeUntagged e (term (* 1 1))) #t)
(test (redex-match? LMergeUntagged e (term (+ 1 1))) #t)
(test (redex-match? LMergeUntagged e (term (- 1 1))) #t)
(test (redex-match? LMergeTagged f.e 1) #t)
(test (redex-match? LMergeTagged d.e 1) #t)
(test (redex-match? LMergeTagged f.e (term (+ 1 1))) #t)
(test (redex-match? LMergeTagged f.e (term (- 1 1))) #t)
(test (redex-match? LMergeTagged f.e (term (* 1 1))) #f)
(test (redex-match? LMergeTagged d.e (term (+ 1 1))) #t)
(test (redex-match? LMergeTagged d.e (term (* 1 1))) #t)
(test (redex-match? LMergeTagged d.e (term (- 1 1))) #f))
(let ()
(define-language L1 (e f ::= 1))
(define-language L2 (e g ::= 2))
(define-union-language Lc L1 L2)
(test (redex-match? Lc e 1) #t)
(test (redex-match? Lc e 2) #t)
(test (redex-match? Lc f 1) #t)
(test (redex-match? Lc f 2) #t)
(test (redex-match? Lc g 1) #t)
(test (redex-match? Lc g 2) #t))
(let ()
(define-language UT
(e (e e)
(λ (x) e)
x))
(define-language WT
(e (e e)
(λ (x t) e)
x)
(t ( t t)
num))
(define-extended-language UT+ UT
(e ....
(foo e e)))
(define-union-language B (ut. UT+) (wt. WT))
(test (and (redex-match B ut.e (term (foo x x))) #t) #t)
(test (redex-match B wt.e (term (foo x x))) #f))

View File

@ -1,3 +1,9 @@
v5.3.4
* adjusted define-union-language to allow the unioned languages to
have overlapping non-terminals; in that case the productions are
combined
v5.3.3
No changes