diff --git a/collects/redex/examples/union-lang.rkt b/collects/redex/examples/union-lang.rkt deleted file mode 100644 index cd3010d459..0000000000 --- a/collects/redex/examples/union-lang.rkt +++ /dev/null @@ -1,40 +0,0 @@ -#lang racket - -(provide LBase L1 L2 LMergeUntagged LMergeTagged) -(require redex/reduction-semantics) - -;; ------------------------------------------------------------------------ - -(define-language LBase - (e (+ e e) number)) - -(define-extended-language L1 LBase - (e .... (- e e))) - -(define-extended-language L2 LBase - (e .... (* e e))) - -;; Untagged union of two languages that define the same nonterminal -(define-union-language LMergeUntagged L1 L2) - -;; Tagged merge of two extended languages that define the same -;; nonterminal -(define-union-language LMergeTagged (f. L1) (d. L2)) - -;; ------------------------------------------------------------------------ - -(module+ test - - (for ([t (list (term 1) (term (* 1 1)) (term (+ 1 1)) (term (- 1 1)))]) - (test-equal (redex-match? LMergeUntagged e t) #t)) - - (test-equal (redex-match? LMergeTagged f.e 1) #t) - (test-equal (redex-match? LMergeTagged d.e 1) #t) - - (test-equal (redex-match? LMergeTagged f.e (term (+ 1 1))) #t) - (test-equal (redex-match? LMergeTagged f.e (term (- 1 1))) #t) - (test-equal (redex-match? LMergeTagged f.e (term (* 1 1))) #f) - - (test-equal (redex-match? LMergeTagged d.e (term (+ 1 1))) #t) - (test-equal (redex-match? LMergeTagged d.e (term (* 1 1))) #t) - (test-equal (redex-match? LMergeTagged d.e (term (- 1 1))) #f)) diff --git a/collects/redex/private/matcher.rkt b/collects/redex/private/matcher.rkt index c3486ffef7..808d23252a 100644 --- a/collects/redex/private/matcher.rkt +++ b/collects/redex/private/matcher.rkt @@ -42,6 +42,8 @@ See match-a-pattern.rkt for more details racket/match racket/contract racket/promise + racket/set + data/union-find racket/performance-hint (for-syntax racket/base) "underscore-allowed.rkt" @@ -144,7 +146,7 @@ See match-a-pattern.rkt for more details (bind-exp rib) (loop (cdr ribs))))])))) -;; compile-language : language-pict-info[see pict.rkt] (listof nt) (listof (listof sym)) -> compiled-lang +;; compile-language : language-pict-info[see pict.rkt] (listof nt) (listof (uf-set/c symbol?)) -> compiled-lang (define (compile-language pict-info lang nt-map) (let* ([clang-ht (make-hasheq)] [clang-list-ht (make-hasheq)] @@ -210,6 +212,19 @@ See match-a-pattern.rkt for more details (do-compilation clang-ht clang-list-ht lang) (struct-copy compiled-lang clang [delayed-cclang compatible-context-language]))) +;; mk-uf-sets : (listof (listof sym)) -> (hash[symbol -o> uf-set?]) +;; in the result hash, each nt maps to a uf-set that represents +;; the set of non-terminals that are coming from the same group +(define (mk-uf-sets args) + (for/fold ([iht (hash)]) ([same-nts (in-list args)]) + (define main-name (car same-nts)) + (define main (uf-new main-name)) + (for/fold ([iht (hash-set iht main-name main)]) + ([other (in-list (cdr same-nts))]) + (define next (uf-new other)) + (uf-union! main next) + (hash-set iht other next)))) + ;; extract-collapsible-nts : (listof nt) -> (listof any) (define (extract-collapsible-nts nts) (define nt-hash (for/hasheq ([nt nts]) @@ -2006,7 +2021,7 @@ See match-a-pattern.rkt for more details (bind? predicate/c) (bind-name (bind? . -> . symbol?)) (bind-exp (bind? . -> . any/c)) - (compile-language (-> any/c (listof nt?) (listof (listof symbol?)) compiled-lang?))) + (compile-language (-> any/c (listof nt?) (hash/c symbol? uf-set?) compiled-lang?))) (provide compiled-pattern? print-stats) @@ -2037,4 +2052,5 @@ See match-a-pattern.rkt for more details build-compatible-context-language caching-enabled? check-redudancy - prefix-nts) + prefix-nts + mk-uf-sets) diff --git a/collects/redex/private/reduction-semantics.rkt b/collects/redex/private/reduction-semantics.rkt index 19e281a10d..67d698d0fa 100644 --- a/collects/redex/private/reduction-semantics.rkt +++ b/collects/redex/private/reduction-semantics.rkt @@ -14,6 +14,8 @@ racket/trace racket/contract racket/list + racket/set + data/union-find mzlib/etc) (require (for-syntax syntax/name @@ -1745,7 +1747,8 @@ (compile-language (list (list '(uniform-names ...) rhs/lw ...) ...) (list (make-nt 'first-names (list (make-rhs `r-rhs) ...)) ... (make-nt 'new-name (list (make-rhs '(nt orig-name)))) ...) - '((uniform-names ...) ...)))))))))])) + (mk-uf-sets '((uniform-names ...) ...))))))))))])) + (define-syntax (define-extended-language stx) (syntax-case stx () @@ -1902,7 +1905,7 @@ (define normalized-orig-langs (for/list ([orig-lang (in-list (syntax->list #'(orig-langs ...)))]) (syntax-case orig-lang () - [x (identifier? #'x) (list "" #'x (language-id-nts #'x 'define-union-language) orig-lang)] + [x (identifier? #'x) (list #f #'x (language-id-nts #'x 'define-union-language) orig-lang)] [(prefix lang) (and (identifier? #'prefix) (identifier? #'lang)) @@ -1912,19 +1915,18 @@ stx orig-lang)]))) ;; ht : sym -o> (listof stx) - ;; maps each non-terminal (with its prefix) to the - ;; list syntax object that they comes from in the original + ;; maps each non-terminal (with its prefix) to a + ;; list of syntax objects that they come from in the original ;; define-union-language declaration (define names-table (make-hash)) (for ([normalized-orig-lang (in-list normalized-orig-langs)]) (define prefix (list-ref normalized-orig-lang 0)) (for ([no-prefix-nt (in-list (list-ref normalized-orig-lang 2))]) - (define nt (string->symbol (string-append prefix (symbol->string no-prefix-nt)))) - (let ([prev (hash-ref names-table nt #f)]) - (if prev - (hash-set! names-table nt (cons (list-ref normalized-orig-lang 3) prev)) - (hash-set! names-table nt (list (list-ref normalized-orig-lang 3))))))) + (define nt (string->symbol (string-append (or prefix "") + (symbol->string no-prefix-nt)))) + (let ([prev (hash-ref names-table nt '())]) + (hash-set! names-table nt (cons (list-ref normalized-orig-lang 3) prev))))) (with-syntax ([(all-names ...) (sort (hash-map names-table (λ (x y) x)) string<=? #:key symbol->string)] [((prefix old-lang _1 _2) ...) normalized-orig-langs] @@ -1944,62 +1946,72 @@ '(all-names ...)))))))])) (define (union-language old-langs/prefixes) - (define new-nt-map - (apply - append - (for/list ([old-pr (in-list old-langs/prefixes)]) - (define prefix (list-ref old-pr 0)) - (define nt-map (compiled-lang-nt-map (list-ref old-pr 1))) - (for/list ([lst (in-list nt-map)]) - (for/list ([sym (in-list lst)]) - (string->symbol (string-append prefix (symbol->string sym)))))))) - (define new-nts - (apply - append - (for/list ([old-lang/prefix (in-list old-langs/prefixes)]) - (define prefix (list-ref old-lang/prefix 0)) - (define lang (compiled-lang-lang (list-ref old-lang/prefix 1))) - (for/list ([nt (in-list lang)]) - (make-nt (string->symbol (string-append prefix (symbol->string (nt-name nt)))) - (for/list ([rhs (in-list (nt-rhs nt))]) - (make-rhs (prefix-nts prefix (rhs-pattern rhs))))))))) - - ;; Each language in the union might define the same nt, and might - ;; even define the same rhs for the same nt, so merge them to get a - ;; proper union. - ;; NOTE: This could probably be done when defining new-nts to - ;; eliminate a second pass over all the nts. - (define merge-nts - (let () - (define names-table (make-hash)) - (for/list ([nt (in-list new-nts)]) - (let* ([name (nt-name nt)] - [prev (hash-ref names-table name #f)]) - (if prev - (hash-set! names-table name (make-nt name (remove-duplicates (append (nt-rhs prev) (nt-rhs nt))))) - (hash-set! names-table name nt)))) - (hash-map names-table (lambda (x y) y)))) + (define (add-prefix prefix sym) + (if prefix + (string->symbol + (string-append prefix + (symbol->string sym))) + sym)) + + ;; nt-maps-with-prefixes : (listof hash[symbol -o> uf-set?]) + ;; add prefixes on the canonical elements and in the + ;; hash-table domains + (define nt-maps-with-prefixes + (for/list ([old-pr (in-list old-langs/prefixes)]) + (define prefix (list-ref old-pr 0)) + (define nt-map (compiled-lang-nt-map (list-ref old-pr 1))) + (cond + [prefix + (define already-prefixed '()) + (for/hash ([(nt a-uf-set) (in-hash nt-map)]) + (define already-prefixed? + (for/or ([x (in-list already-prefixed)]) + (uf-same-set? x a-uf-set))) + (unless already-prefixed? + (uf-set-canonical! a-uf-set (add-prefix prefix (uf-find a-uf-set))) + (set! already-prefixed (cons a-uf-set already-prefixed))) + (values (add-prefix prefix nt) a-uf-set))] + [else nt-map]))) + ;; combine all of the nt-maps into a single one, + ;; unioning the sets as approrpriate + (define new-nt-map (car nt-maps-with-prefixes)) + (for ([nt-map (in-list (cdr nt-maps-with-prefixes))]) + (for ([(k this-uf-set) (in-hash nt-map)]) + (define final-uf-set (hash-ref new-nt-map k #f)) + (cond + [final-uf-set + (uf-union! final-uf-set this-uf-set)] + [else + (set! new-nt-map (hash-set new-nt-map k this-uf-set))]))) + + (define names-table (make-hash)) + (for ([old-lang/prefix (in-list old-langs/prefixes)]) + (define prefix (list-ref old-lang/prefix 0)) + (define lang (compiled-lang-lang (list-ref old-lang/prefix 1))) + (for ([nt (in-list lang)]) + (define name (add-prefix prefix (nt-name nt))) + (define new-rhses + (for/set ([rhs (in-list (nt-rhs nt))]) + (if prefix + (make-rhs (prefix-nts prefix (rhs-pattern rhs))) + rhs))) + (hash-set! names-table + name + (set-union new-rhses (hash-ref names-table name (set)))))) + (compile-language #f - merge-nts - (remove-duplicates new-nt-map))) + (hash-map names-table (λ (name set) (make-nt name (set->list set)))) + new-nt-map)) ;; find-primary-nt : symbol lang -> symbol or #f ;; returns the primary non-terminal for a given nt, or #f if `nt' isn't bound in the language. (define (find-primary-nt nt lang) - (let ([combined (find-combined-nts nt lang)]) - (and combined - (car combined)))) - -;; find-combined-nts : symbol lang -> (listof symbol) or #f -;; returns the combined set of non-terminals for 'nt' from lang -(define (find-combined-nts nt lang) - (ormap (λ (nt-line) - (and (member nt nt-line) - nt-line)) - (compiled-lang-nt-map lang))) + (define uf/f (hash-ref (compiled-lang-nt-map lang) nt #f)) + (and uf/f + (uf-find uf/f))) (define (apply-reduction-relation* reductions exp #:cache-all? [cache-all? (current-cache-all?)] diff --git a/collects/redex/scribblings/ref.scrbl b/collects/redex/scribblings/ref.scrbl index 1054f06ac1..fb01f411b8 100644 --- a/collects/redex/scribblings/ref.scrbl +++ b/collects/redex/scribblings/ref.scrbl @@ -737,11 +737,32 @@ extends all of them. Constructs a language that is the union of all of the languages listed in the @racket[base/prefix-lang]. + If the two languages have non-terminals in common, then + @racket[define-union-language] will combine all of the productions + of the common non-terminals. For example, this definition of @racket[L]: + @racketblock[(define-language L1 + (e ::= + (+ e e) + number)) + (define-language L2 + (e ::= + (if e e e) + true + false)) + (define-union-language L L1 L2)] + is equivalent to this one: + @racketblock[(define-language L + (e ::= + (+ e e) + number + (if e e e) + true + false))] + If a language has a prefix, then all of the non-terminals from that language have the corresponding prefix in - the union language. The prefix helps avoid collisions - between the constituent language's non-terminals - (which is illegal). + the union language. The prefix helps avoid unintended collisions + between the constituent language's non-terminals. For example, with two these two languages: @racketblock[(define-language UT diff --git a/collects/redex/tests/matcher-test.rkt b/collects/redex/tests/matcher-test.rkt index cb7ec44d53..24f0281422 100644 --- a/collects/redex/tests/matcher-test.rkt +++ b/collects/redex/tests/matcher-test.rkt @@ -831,23 +831,20 @@ (define (test-empty/proc line pat exp ans) (run-match-test line - `(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used '() '()) ',pat #t) ',exp) + `(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used '() (hash)) ',pat #t) ',exp) (match-pattern - (compile-pattern (compile-language 'pict-stuff-not-used '() '()) pat #t) + (compile-pattern (compile-language 'pict-stuff-not-used '() (hash)) pat #t) exp) ans)) - ;; make-nt-map : (listof nt) -> (listof (listof symbol)) - (define (make-nt-map nts) - (map (λ (x) (list (nt-name x))) nts)) - ;; test-lang : sexp[pattern] sexp[term] answer (list/c nt) -> void ;; returns #t if pat matching exp with the language defined by the given nts (define (test-lang line pat exp ans nts) - (let ([nt-map (make-nt-map nts)]) + (let ([nt-map (mk-uf-sets (map (λ (x) (list (nt-name x))) + nts))]) (run-match-test line - `(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used ',nts ',nt-map) ',pat #t) ',exp) + `(match-pattern (compile-pattern (compile-language 'pict-stuff-not-used ',nts ,nt-map) ',pat #t) ',exp) (match-pattern (compile-pattern (compile-language 'pict-stuff-not-used nts nt-map) pat #t) exp) @@ -909,7 +906,7 @@ (set! xab-lang (compile-language 'pict-stuff-not-used nts - (map (λ (x) (list (nt-name x))) nts))))) + (mk-uf-sets (map (λ (x) (list (nt-name x))) nts)))))) (run-match-test line `(match-pattern (compile-pattern xab-lang ',pat #t) ',exp) @@ -928,7 +925,7 @@ (list (make-rhs 'a))) (make-nt 'bb (list (make-rhs 'b)))) - '((aa) (bb))))) + (mk-uf-sets '((aa) (bb)))))) (run-match-test line `(match-pattern (compile-pattern ab-lang ',pat #t) ',exp) @@ -971,7 +968,9 @@ `(test-ellipsis-binding ,pat) (let ([mtch ((compiled-pattern-cp (let ([nts (map (λ (nt-def) (nt (car nt-def) (map rhs (cdr nt-def)))) nts/sexp)]) - (compile-pattern (compile-language 'pict-stuff-not-used nts (make-nt-map nts)) pat #t))) + (compile-pattern (compile-language 'pict-stuff-not-used nts + (mk-uf-sets (map (λ (x) (list (nt-name x))) nts))) + pat #t))) exp #t)]) (if mtch diff --git a/collects/redex/tests/syn-err-tests/language-definition.rktd b/collects/redex/tests/syn-err-tests/language-definition.rktd index 1a740b0d65..a7e881721f 100644 --- a/collects/redex/tests/syn-err-tests/language-definition.rktd +++ b/collects/redex/tests/syn-err-tests/language-definition.rktd @@ -27,21 +27,3 @@ (#rx"expected production" ([not-prod ::=]) (define-language L (x ::= y not-prod z))) (#rx"expected non-terminal definition" ([not-def q]) (define-language L not-def)) (#rx"expected non-terminal definition" ([not-def ()]) (define-language L not-def)) - -("define-union-language: two sublanguages both contribute the non-terminal: e" - ([L1r L1] [L2r L2]) ([L1 L1] [L2 L2]) - (let () - (define-language L1 - (e any)) - (define-language L2 - (e any)) - (define-union-language L L1r L2r))) - -("define-union-language: two sublanguages both contribute the non-terminal: -e" - ([L1r L1] [L2r (- L2)]) ([L1 L1] [L2 L2]) - (let () - (define-language L1 - (-e any)) - (define-language L2 - (e any)) - (define-union-language L L1r L2r))) diff --git a/collects/redex/tests/tl-test.rkt b/collects/redex/tests/tl-test.rkt index 7c3611a1a4..98d059f672 100644 --- a/collects/redex/tests/tl-test.rkt +++ b/collects/redex/tests/tl-test.rkt @@ -456,26 +456,71 @@ (for ([t (list (term 1) (term (* 1 1)) (term (+ 1 1)) (term (- 1 1)))]) (test (redex-match? LMergeUntagged e t) #t))) - + + ;; test that define-union-language properly merges non-terminals + (let () + (define-language LBase + (e (+ e e) number)) + + (define-extended-language L1 LBase + (e .... (- e e))) + + (define-extended-language L2 LBase + (e .... (* e e))) + + ;; Untagged union of two languages that define the same nonterminal + (define-union-language LMergeUntagged L1 L2) + + ;; Tagged merge of two extended languages that define the same + ;; nonterminal + (define-union-language LMergeTagged (f. L1) (d. L2)) + + (test (redex-match? LMergeUntagged e (term 1)) #t) + (test (redex-match? LMergeUntagged e (term (* 1 1))) #t) + (test (redex-match? LMergeUntagged e (term (+ 1 1))) #t) + (test (redex-match? LMergeUntagged e (term (- 1 1))) #t) + + (test (redex-match? LMergeTagged f.e 1) #t) + (test (redex-match? LMergeTagged d.e 1) #t) + + (test (redex-match? LMergeTagged f.e (term (+ 1 1))) #t) + (test (redex-match? LMergeTagged f.e (term (- 1 1))) #t) + (test (redex-match? LMergeTagged f.e (term (* 1 1))) #f) + + (test (redex-match? LMergeTagged d.e (term (+ 1 1))) #t) + (test (redex-match? LMergeTagged d.e (term (* 1 1))) #t) + (test (redex-match? LMergeTagged d.e (term (- 1 1))) #f)) + + (let () + (define-language L1 (e f ::= 1)) + (define-language L2 (e g ::= 2)) + (define-union-language Lc L1 L2) + (test (redex-match? Lc e 1) #t) + (test (redex-match? Lc e 2) #t) + (test (redex-match? Lc f 1) #t) + (test (redex-match? Lc f 2) #t) + (test (redex-match? Lc g 1) #t) + (test (redex-match? Lc g 2) #t)) + (let () (define-language UT (e (e e) (λ (x) e) x)) - + (define-language WT (e (e e) (λ (x t) e) x) (t (→ t t) num)) - + (define-extended-language UT+ UT (e .... (foo e e))) - + (define-union-language B (ut. UT+) (wt. WT)) - + (test (and (redex-match B ut.e (term (foo x x))) #t) #t) (test (redex-match B wt.e (term (foo x x))) #f)) diff --git a/doc/release-notes/redex/HISTORY.txt b/doc/release-notes/redex/HISTORY.txt index ba10bef654..a037257f10 100644 --- a/doc/release-notes/redex/HISTORY.txt +++ b/doc/release-notes/redex/HISTORY.txt @@ -1,3 +1,9 @@ +v5.3.4 + + * adjusted define-union-language to allow the unioned languages to + have overlapping non-terminals; in that case the productions are + combined + v5.3.3 No changes