[bg] huffman trees

This commit is contained in:
ben 2016-03-23 00:06:33 -04:00
parent 04e1cb701f
commit 05df033790
3 changed files with 311 additions and 0 deletions

View File

@ -540,6 +540,7 @@
(define-primop string : ( Char String))
(define-primop sleep : ( Int Unit))
(define-primop string=? : ( String String Bool))
(define-primop string<=? : ( String String Bool))
(define-typed-syntax string-append
[(_ . strs)

View File

@ -45,4 +45,36 @@ mlish tests by Ben
(map-cps [f : (→ A B)] [x* : (List A)] → (List B))
```
`huffman`
---
```
(empty → Symbol*)
(singleton [s : String] → Symbol*)
(insert [s* : Symbol*] [s1 : String] → Symbol*)
(union [s1 : Symbol*] [s2 : Symbol*] → Symbol*)
(contains [s* : Symbol*] [s : Symbol] → Bool)
(list [x : A] → (List A))
(append [x* : (List A)] [y* : (List A)] → (List A))
(length [x* : (List A)] → Int)
(symbols [h : HTree] → Symbol*)
(weight [h : HTree] → Int)
(make-code-tree [left : HTree] [right : HTree] → HTree)
(decode-aux [bits : Bit*] [root : HTree] [current-branch : HTree] → SymbolList)
(decode [bits : Bit*] [tree : HTree] → SymbolList)
(choose-branch [bit : Bit] [branch : HTree] → HTree)
(adjoin-set [x : HTree] [set : HTreeSet] → HTreeSet)
(make-leaf-set [pair* : (List (× Symbol Int))] → HTreeSet)
sample-tree
sample-message
(encode [message : SymbolList] [tree : HTree] → Bit*)
(contains-symbol [s : Symbol] [tree : HTree] → Bool)
(encode-symbol [s : Symbol] [tree : HTree] → Bit*)
(generate-huffman-tree [pair* : (List (× Symbol Frequency))] → HTree)
(successive-merge [tree* : HTreeSet] → HTree)
rock-pair*
rock-tree (generate-huffman-tree rock-pair*))
rock-message
rock-bit* (encode rock-message rock-tree))
```

View File

@ -0,0 +1,278 @@
#lang s-exp "../../../mlish.rkt"
(require "../../rackunit-typechecking.rkt")
;; Huffman trees from SICP
;; =============================================================================
;; === Sets of Symbols
(define-type-alias Symbol String)
;; Set of strings
(define-type Symbol*
[Empty]
[Singleton String]
[Join String Symbol* Symbol*])
(define (empty → Symbol*)
Empty)
(define (singleton [s : String] → Symbol*)
(Singleton s))
(define (insert [s* : Symbol*] [s1 : String] → Symbol*)
(match s* with
[Empty -> (singleton s1)]
[Singleton s2 ->
(if (string<=? s1 s2)
(if (string=? s1 s2)
s*
(Join s2 (singleton s1) (empty)))
(Join s1 (singleton s2) (empty)))]
[Join s2 l* r* ->
(if (string<=? s1 s2)
(if (string=? s1 s2)
s*
(Join s2 (insert l* s1) r*))
(Join s2 l* (insert r* s1)))]))
(define (union [s1 : Symbol*] [s2 : Symbol*] → Symbol*)
(match s1 with
[Empty -> s2]
[Singleton s -> (insert s2 s)]
[Join s l* r* -> (union l* (union r* (insert s2 s)))]))
(define (contains [s* : Symbol*] [s : Symbol] → Bool)
(match s* with
[Empty -> #f]
[Singleton s2 -> (string=? s s2)]
[Join s2 l* r* ->
(if (string<=? s s2)
(if (string=? s s2)
#t
(contains l* s))
(contains r* s))]))
;; -----------------------------------------------------------------------------
(check-type
(insert (empty) "hello")
: Symbol*
⇒ (singleton "hello"))
(check-type
(insert (insert (empty) "a") "b")
: Symbol*
⇒ (Join "b" (singleton "a") (empty)))
(check-type
(insert (insert (empty) "b") "a")
: Symbol*
⇒ (Join "b" (singleton "a") (empty)))
(check-type
(insert (insert (insert (empty) "a") "b") "c")
: Symbol*
⇒ (Join "b" (singleton "a") (singleton "c")))
(check-type
(insert (insert (insert (empty) "c") "b") "a")
: Symbol*
⇒ (Join "c" (Join "b" (singleton "a") (empty)) (empty)))
(check-type
(union
(insert (insert (insert (empty) "c") "b") "a")
(insert (insert (insert (empty) "a") "b") "c"))
: Symbol*
⇒ (Join "b" (singleton "a") (singleton "c")))
;; -----------------------------------------------------------------------------
(define-type (List A)
[⊥]
[∷ A (List A)])
(define-type-alias SymbolList (List Symbol))
(define (list [x : A] → (List A))
(∷ x ⊥))
(define (append [x* : (List A)] [y* : (List A)] → (List A))
(match x* with
[⊥ -> y*]
[∷ x x* ->
(∷ x (append x* y*))]))
(define (length [x* : (List A)] → Int)
(match x* with
[⊥ -> 0]
[∷ x x* -> (+ 1 (length x*))]))
;; -----------------------------------------------------------------------------
(define-type Bit O I)
(define-type-alias Bit* (List Bit))
;; -----------------------------------------------------------------------------
(define-type HTree
[Leaf String Int] ;; Symbol, Weight
[Node HTree HTree Symbol* Int] ;; Left, Right, Symbols, Weight
)
(define (symbols [h : HTree] → Symbol*)
(match h with
[Leaf s w -> (singleton s)]
[Node lh rh s* w -> s*]))
(define (weight [h : HTree] → Int)
(match h with
[Leaf s w -> w]
[Node l r s w -> w]))
(define (make-code-tree [left : HTree] [right : HTree] → HTree)
(Node left right
(union (symbols left) (symbols right))
(+ (weight left) (weight right))))
(define (decode-aux [bits : Bit*] [root : HTree] [current-branch : HTree] → SymbolList)
(match bits with
[⊥ ->
⊥]
[∷ b bit* ->
(match (choose-branch b current-branch) with
[Leaf s w ->
(∷ s (decode-aux bit* root root))]
[Node l r s* w ->
(decode-aux bit* root (Node l r s* w))])]))
(define (decode [bits : Bit*] [tree : HTree] → SymbolList)
(decode-aux bits tree tree))
(define (choose-branch [bit : Bit] [branch : HTree] → HTree)
(match branch with
[Leaf s w ->
;; Error
(Leaf "ERROR" 0)]
[Node l* r* s* w ->
(match bit with
[O -> l*]
[I -> r*])]))
(define-type-alias HTreeSet (List HTree))
(define (adjoin-set [x : HTree] [set : HTreeSet] → HTreeSet)
(match set with
[⊥ -> (list x)]
[∷ y y* ->
(if (< (weight x) (weight y))
(∷ x set)
(∷ y (adjoin-set x y*)))]))
(define (make-leaf-set [pair* : (List (× Symbol Int))] → HTreeSet)
(match pair* with
[⊥ -> ⊥]
[∷ pair pair* ->
(match pair with
[s i ->
(adjoin-set (Leaf s i) (make-leaf-set pair*))])]))
(check-type
(make-leaf-set (∷ (tup "A" 4)
(∷ (tup "B" 2)
(∷ (tup "C" 1)
(∷ (tup "D" 1)
⊥)))))
: HTreeSet
⇒ (∷ (Leaf "D" 1)
(∷ (Leaf "C" 1)
(∷ (Leaf "B" 2)
(∷ (Leaf "A" 4)
⊥)))))
(define sample-tree
(make-code-tree
(Leaf "A" 4)
(make-code-tree
(Leaf "B" 2)
(make-code-tree
(Leaf "D" 1)
(Leaf "C" 1)))))
(define sample-message
(∷ O (∷ I (∷ I (∷ O (∷ O (∷ I (∷ O (∷ I (∷ O (∷ I (∷ I (∷ I (∷ I (∷ O ⊥)))))))))))))))
(check-type
(decode sample-message sample-tree)
: SymbolList
⇒ (∷ "A" (∷ "D" (∷ "A" (∷ "B" (∷ "B" (∷ "C" (∷ "B" ⊥))))))))
(define (encode [message : SymbolList] [tree : HTree] → Bit*)
(match message with
[⊥ -> ⊥]
[∷ m m* ->
(append (encode-symbol m tree) (encode m* tree))]))
(define (contains-symbol [s : Symbol] [tree : HTree] → Bool)
(contains (symbols tree) s))
;; Undefined if symbol is not in tree. Be careful!
(define (encode-symbol [s : Symbol] [tree : HTree] → Bit*)
(match tree with
[Leaf s w -> ⊥]
[Node l* r* s* w ->
(if (contains-symbol s l*)
(∷ O (encode-symbol s l*))
(∷ I (encode-symbol s r*)))]))
(check-type
(encode (decode sample-message sample-tree) sample-tree)
: Bit*
⇒ sample-message)
(define-type-alias Frequency Int)
(define (generate-huffman-tree [pair* : (List (× Symbol Frequency))] → HTree)
(successive-merge (make-leaf-set pair*)))
(define (successive-merge [tree* : HTreeSet] → HTree)
(match tree* with
[⊥ -> (Leaf "ERROR" 0)]
[∷ t t* ->
(match t* with
[⊥ -> t]
[∷ t2 t* ->
(successive-merge (adjoin-set (make-code-tree t t2) t*))])]))
(define rock-pair*
(∷ (tup "A" 2)
(∷ (tup "BOOM" 2)
(∷ (tup "GET" 2)
(∷ (tup "JOB" 2)
(∷ (tup "NA" 16)
(∷ (tup "SHA" 3)
(∷ (tup "YIP" 9)
(∷ (tup "WAH" 1)
⊥)))))))))
(define rock-tree (generate-huffman-tree rock-pair*))
(define rock-message
(∷ "GET" (∷ "A" (∷ "JOB"
(∷ "SHA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA"
(∷ "GET" (∷ "A" (∷ "JOB"
(∷ "SHA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA"
(∷ "WAH" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP"
(∷ "SHA" (∷ "BOOM" ⊥)))))))))))))))))))))))))))))))))))))
(define rock-bit* (encode rock-message rock-tree))
(check-type
(decode rock-bit* rock-tree)
: SymbolList
⇒ rock-message)
(check-type
(length rock-bit*)
: Int
⇒ 84)