diff --git a/Item.mli b/Item.mli new file mode 100644 index 0000000..27a1f08 --- /dev/null +++ b/Item.mli @@ -0,0 +1,6 @@ +module type S = + sig + type t + val compare : t -> t -> int + val to_string : t -> string + end diff --git a/Partition.mli b/Partition.mli new file mode 100644 index 0000000..cc6aa2d --- /dev/null +++ b/Partition.mli @@ -0,0 +1,64 @@ +(** This module offers the abstract data type of a partition of + classes of equivalent items (Union & Find). *) + +(** The items are of type [PrintOrdType.t], that is, they have to obey + a total order, but also they must be printable to ease + debugging. The signature [PrintOrdType] is the input signature of + the functor {!Partition.Make}. *) +module type PrintOrdType = + sig + (** Type of items *) + type t + + (** Same convention as {!Pervasives.compare} *) + val compare : t -> t -> int + + val to_string : t -> string + end + +(** The module signature [S] is the output signature of the functor + {!Partition.Make}. *) +module type S = + sig + type item + type partition + type t = partition + + (** {1 Creation} *) + + (** The value [empty] is an empty partition. *) + val empty : partition + + (** The value of [equiv i j p] is the partition [p] extended with + the equivalence of items [i] and [j]. If both [i] and [j] are + already known to be equivalent, then [equiv i j p == p]. *) + val equiv : item -> item -> partition -> partition + + (** The value of [alias i j p] is the partition [p] extended with + the fact that item [i] is an alias of item [j]. This is the + same as [equiv i j p], except that it is guaranteed that the + item [i] is not the representative of its equivalence class in + [alias i j p]. *) + val alias : item -> item -> partition -> partition + + (** {1 Projection} *) + + (** The value of the call [repr i p] is the representative of item + [i] in the partition [p]. The built-in exception [Not_found] + is raised if [i] is not in [p]. *) + val repr : item -> partition -> item + + (** The side-effect of the call [print p] is the printing of the + partition [p] on standard output, based on [Ord.to_string]. *) + val print : partition -> unit + + (** {1 Predicates} *) + + (** The value of [is_equiv i j p] is [true] if, and only if, the + items [i] and [j] belong to the same equivalence class in the + partition [p], that is, [i] and [j] have the same + representative. *) + val is_equiv : item -> item -> partition -> bool + end + +module Make (Ord : PrintOrdType) : S with type item = Ord.t diff --git a/Partition0.ml b/Partition0.ml new file mode 100644 index 0000000..691c400 --- /dev/null +++ b/Partition0.ml @@ -0,0 +1,47 @@ +(* Naive persistent implementation of Union/Find: O(n^2) worst case *) + +module Make (Item: Item.S) = + struct + + type item = Item.t + type repr = item (** Class representatives *) + + let equal i j = Item.compare i j = 0 + + module ItemMap = Map.Make (Item) + + type height = int + + type partition = item ItemMap.t + type t = partition + + let empty = ItemMap.empty + + let rec repr item partition = + let parent = ItemMap.find item partition in + if equal parent item + then item + else repr parent partition + + let is_equiv (i: item) (j: item) (p: partition) = + equal (repr i p) (repr j p) + + let get_or_set (i: item) (p: partition) : item * partition = + try repr i p, p with Not_found -> i, ItemMap.add i i p + + let equiv (i: item) (j :item) (p: partition) : partition = + let ri, p = get_or_set i p in + let rj, p = get_or_set j p in + if equal ri rj then p else ItemMap.add ri rj p + + let alias = equiv + + (* Printing *) + + let print p = + let print src dst = + Printf.printf "%s -> %s\n" + (Item.to_string src) (Item.to_string dst) + in ItemMap.iter print p + + end diff --git a/Partition1.ml b/Partition1.ml new file mode 100644 index 0000000..c9eea51 --- /dev/null +++ b/Partition1.ml @@ -0,0 +1,69 @@ +(* Persistent implementation of Union/Find with height-balanced + forests and without path compression: O(n*log(n)). + + In the definition of type [t], the height component is that of the + source, that is, if [ItemMap.find i m = (j,h)], then [h] is the + height of [i] (_not_ [j]). +*) + +module Make (Item: Item.S) = + struct + + type item = Item.t + type repr = item (** Class representatives *) + + let equal i j = Item.compare i j = 0 + + module ItemMap = Map.Make (Item) + + type height = int + + type partition = (item * height) ItemMap.t + type t = partition + + let empty = ItemMap.empty + + let rec seek (i: item) (p: partition) : repr * height = + let j, _ as i' = ItemMap.find i p in + if equal i j then i' else seek j p + + let repr item partition = fst (seek item partition) + + let is_equiv (i: item) (j: item) (p: partition) = + equal (repr i p) (repr j p) + + let get_or_set (i: item) (p: partition) = + try seek i p, p with + Not_found -> let i' = i,0 in (i', ItemMap.add i i' p) + + let equiv (i: item) (j: item) (p: partition) : partition = + let (ri,hi), p = get_or_set i p in + let (rj,hj), p = get_or_set j p in + let add = ItemMap.add in + if equal ri rj + then p + else if hi > hj + then add rj (ri,hj) p + else add ri (rj,hi) (if hi < hj then p else add rj (rj,hj+1) p) + + let alias (i: item) (j: item) (p: partition) : partition = + let (ri,hi), p = get_or_set i p in + let (rj,hj), p = get_or_set j p in + let add = ItemMap.add in + if equal ri rj + then p + else if hi = hj || equal ri i + then add ri (rj,hi) @@ add rj (rj, max hj (hi+1)) p + else if hi < hj then add ri (rj,hi) p + else add rj (ri,hj) p + + (* Printing *) + + let print (p: partition) = + let print i (j,hi) = + let _,hj = ItemMap.find j p in + Printf.printf "%s,%d -> %s,%d\n" + (Item.to_string i) hi (Item.to_string j) hj + in ItemMap.iter print p + + end diff --git a/Partition2.ml b/Partition2.ml new file mode 100644 index 0000000..ae61466 --- /dev/null +++ b/Partition2.ml @@ -0,0 +1,115 @@ +(** Persistent implementation of the Union/Find algorithm with + height-balanced forests and without path compression. *) + +module Make (Item: Item.S) = + struct + + type item = Item.t + type repr = item (** Class representatives *) + + let equal i j = Item.compare i j = 0 + + type height = int + + (** Each equivalence class is implemented by a Catalan tree linked + upwardly and otherwise is a link to another node. Those trees + are height-balanced. The type [node] implements nodes in those + trees. *) + type node = + Root of height + (** The value of [Root h] denotes the root of a tree, that is, + the representative of the associated class. The height [h] + is that of the tree, so a tree reduced to its root alone has + heigh 0. *) + + | Link of item * height + (** If not a root, a node is a link to another node. Because the + links are upward, that is, bottom-up, and we seek a purely + functional implementation, we need to uncouple the nodes and + the items here, so the first component of [Link] is an item, + not a node. That is why the type [node] is not recursive, + and called [node], not [tree]: to become a traversable tree, + it needs to be complemented by the type [partition] below to + associate items back to nodes. In order to follow a path + upward in the tree until the root, we start from a link node + giving us the next item, then find the node corresponding to + the item thanks to [partition], and again until we arrive at + the root. + + The height component is that of the source of the link, that + is, [h] is the height of the node linking to the node [Link + (j,h)], _not_ of [j], except when [equal i j]. *) + + module ItemMap = Map.Make (Item) + + (** The type [partition] implements a partition of classes of + equivalent items by means of a map from items to nodes of type + [node] in trees. *) + type partition = node ItemMap.t + + type t = partition + + let empty = ItemMap.empty + + let root (item, height) = ItemMap.add item (Root height) + + let link (src, height) dst = ItemMap.add src (Link (dst, height)) + + let rec seek (i: item) (p: partition) : repr * height = + match ItemMap.find i p with + Root hi -> i,hi + | Link (j,_) -> seek j p + + let repr item partition = fst (seek item partition) + + let is_equiv (i: item) (j: item) (p: partition) = + equal (repr i p) (repr j p) + + let get_or_set (i: item) (p: partition) = + try seek i p, p with + Not_found -> let n = i,0 in (n, root n p) + + let equiv (i: item) (j: item) (p: partition) : partition = + let (ri,hi as ni), p = get_or_set i p in + let (rj,hj as nj), p = get_or_set j p in + if equal ri rj + then p + else if hi > hj + then link nj ri p + else link ni rj (if hi < hj then p else root (rj, hj+1) p) + + (** The call [alias i j p] results in the same partition as [equiv + i j p], except that [i] is not the representative of its class + in [alias i j p] (whilst it may be in [equiv i j p]). + + This property is irrespective of the heights of the + representatives of [i] and [j], that is, of the trees + implementing their classes. If [i] is not a representative of + its class before calling [alias], then the height criteria is + applied (which, without the constraint above, would yield a + height-balanced new tree). *) + let alias (i: item) (j: item) (p: partition) : partition = + let (ri,hi as ni), p = get_or_set i p in + let (rj,hj as nj), p = get_or_set j p in + if equal ri rj + then p + else if hi = hj || equal ri i + then link ni rj @@ root (rj, max hj (hi+1)) p + else if hi < hj then link ni rj p + else link nj ri p + + (** {1 Printing} *) + + let print (p: partition) = + let print i node = + let hi, hj, j = + match node with + Root hi -> hi,hi,i + | Link (j,hi) -> + match ItemMap.find j p with + Root hj | Link (_,hj) -> hi,hj,j in + Printf.printf "%s,%d -> %s,%d\n" + (Item.to_string i) hi (Item.to_string j) hj + in ItemMap.iter print p + + end diff --git a/Partition3.ml b/Partition3.ml new file mode 100644 index 0000000..1d90536 --- /dev/null +++ b/Partition3.ml @@ -0,0 +1,86 @@ +(* Destructive implementation of union/find with height-balanced + forests but without path compression: O(n*log(n)). *) + +module Make (Item: Item.S) = + struct + + type item = Item.t + type repr = item (** Class representatives *) + + let equal i j = Item.compare i j = 0 + + type height = int + + (** Each equivalence class is implemented by a Catalan tree linked + upwardly and otherwise is a link to another node. Those trees + are height-balanced. The type [node] implements nodes in those + trees. *) + type node = {item: item; mutable height: int; mutable parent: node} + + module ItemMap = Map.Make (Item) + + (** The type [partition] implements a partition of classes of + equivalent items by means of a map from items to nodes of type + [node] in trees. *) + type partition = node ItemMap.t + + type t = partition + + let empty = ItemMap.empty + + (** The function [repr] is faster than a persistent implementation + in the worst case because, in the latter case, the cost is O(log n) + for accessing each node in the path to the root, whereas, in the + former, only the access to the first node in the path incurs a cost + of O(log n) -- the other nodes are accessed in constant time by + following the [next] field of type [node]. *) + let seek (i: item) (p: partition) : node = + let rec find_root node = + if node.parent == node then node else find_root node.parent + in find_root (ItemMap.find i p) + + let repr item partition = (seek item partition).item + + let is_equiv (i: item) (j: item) (p: partition) = + equal (repr i p) (repr j p) + + let get_or_set item (p: partition) = + try seek item p, p with + Not_found -> let rec loop = {item; height=0; parent=loop} + in loop, ItemMap.add item loop p + + let link src dst = src.parent <- dst + + let equiv (i: item) (j: item) (p: partition) : partition = + let ni,p = get_or_set i p in + let nj,p = get_or_set j p in + let hi,hj = ni.height, nj.height in + let () = + if not (equal ni.item nj.item) + then if hi > hj + then link nj ni + else (link ni nj; nj.height <- max hj (hi+1)) + in p + + let alias (i: item) (j: item) (p: partition) : partition = + let ni,p = get_or_set i p in + let nj,p = get_or_set j p in + let hi,hj = ni.height, nj.height in + let () = + if not (equal ni.item nj.item) + then if hi = hj || equal ni.item i + then (link ni nj; nj.height <- max hj (hi+1)) + else if hi < hj then link ni nj + else link nj ni + in p + + (* Printing *) + + let print p = + let print _ node = + Printf.printf "%s,%d -> %s,%d\n" + (Item.to_string node.item) node.height + (Item.to_string node.parent.item) node.parent.height + in ItemMap.iter print p + + end diff --git a/PartitionMain.ml b/PartitionMain.ml new file mode 100644 index 0000000..4e69dbd --- /dev/null +++ b/PartitionMain.ml @@ -0,0 +1,40 @@ +module Int = + struct + type t = int + let compare (i: int) (j: int) = Pervasives.compare i j + let to_string = string_of_int + end + +module Test (Part: Partition.S with type item = Int.t) = + struct + open Part + + let () = empty + |> equiv 4 3 + |> equiv 3 8 + |> equiv 6 5 + |> equiv 9 4 + |> equiv 2 1 + |> equiv 8 9 + |> equiv 5 0 + |> equiv 7 2 + |> equiv 6 1 + |> equiv 1 0 + |> equiv 6 7 + |> equiv 8 0 + |> equiv 7 7 + |> equiv 10 10 + |> print + end + + +module Test0 = Test (Partition0.Make(Int)) +let () = print_newline () + +module Test1 = Test (Partition1.Make(Int)) +let () = print_newline () + +module Test2 = Test (Partition2.Make(Int)) +let () = print_newline () + +module Test3 = Test (Partition3.Make(Int)) diff --git a/README.md b/README.md new file mode 100644 index 0000000..0f7010c --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# Some implementations in OCaml of the Union/Find algorithm + +All modules implementing Union/Find can be coerced by the same +signature `Partition.S`. + +Note the function `alias` which is equivalent to `equiv`, but not +symmetric: `alias x y` means that `x` is an alias of `y`, which +translates in the present context as `x` not being the representative +of the equivalence class containing the equivalence between `x` and +`y`. The function `alias` is useful when managing aliases during the +static analyses of programmning languages, so the representatives of +the classes are always the original object. + +The module `PartitionMain` tests each with the same equivalence +relations. + +# `Partition0.ml` + +This is a naive, persistent implementation of Union/Find featuring an +asymptotic worst case cost of O(n^2). + +# `Partition1.ml` + +This is a persistent implementation of Union/Find with height-balanced +forests and without path compression, featuring an asymptotic worst +case cost of O(n*log(n)). + +# `Partition2.ml` + +This is an alternate version of `Partition1.ml`, using a different +data type. + +# `Partition3.ml` + +This is a destructive implementation of Union/Find with +height-balanced forests but without path compression, featuring an +asymptotic worst case of O(n*log(n)). In practice, though, this +implementation should be faster than the previous ones, due to a +smaller multiplicative constant term. + diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..dbc9804 --- /dev/null +++ b/build.sh @@ -0,0 +1,15 @@ +#!/bin/sh +set -x +ocamlfind ocamlc -strict-sequence -w +A-48-4 -c Item.mli +ocamlfind ocamlc -strict-sequence -w +A-48-4 -c Partition.mli +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition0.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition2.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition1.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition3.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition1.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition3.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition0.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition2.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c PartitionMain.ml +ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c PartitionMain.ml +ocamlfind ocamlopt -o PartitionMain.opt Partition0.cmx Partition1.cmx Partition2.cmx Partition3.cmx PartitionMain.cmx diff --git a/clean.sh b/clean.sh new file mode 100755 index 0000000..75ded7c --- /dev/null +++ b/clean.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +\rm -f *.cmi *.cmo *.cmx *.o *.byte *.opt