From 2d994efcd28f85b4849645204dde032f93aa1753 Mon Sep 17 00:00:00 2001
From: Christian Rinderknecht <crinderknecht@numalis.com>
Date: Sun, 12 Aug 2018 15:04:27 +0200
Subject: [PATCH] First import of some implementations of the Union/Find
 algorithm in OCaml.

---
 Item.mli         |   6 +++
 Partition.mli    |  64 ++++++++++++++++++++++++++
 Partition0.ml    |  47 +++++++++++++++++++
 Partition1.ml    |  69 ++++++++++++++++++++++++++++
 Partition2.ml    | 115 +++++++++++++++++++++++++++++++++++++++++++++++
 Partition3.ml    |  86 +++++++++++++++++++++++++++++++++++
 PartitionMain.ml |  40 +++++++++++++++++
 README.md        |  40 +++++++++++++++++
 build.sh         |  15 +++++++
 clean.sh         |   3 ++
 10 files changed, 485 insertions(+)
 create mode 100644 Item.mli
 create mode 100644 Partition.mli
 create mode 100644 Partition0.ml
 create mode 100644 Partition1.ml
 create mode 100644 Partition2.ml
 create mode 100644 Partition3.ml
 create mode 100644 PartitionMain.ml
 create mode 100644 README.md
 create mode 100755 build.sh
 create mode 100755 clean.sh

diff --git a/Item.mli b/Item.mli
new file mode 100644
index 0000000..27a1f08
--- /dev/null
+++ b/Item.mli
@@ -0,0 +1,6 @@
+module type S =
+  sig
+    type t
+    val compare   : t -> t -> int
+    val to_string : t -> string
+  end
diff --git a/Partition.mli b/Partition.mli
new file mode 100644
index 0000000..cc6aa2d
--- /dev/null
+++ b/Partition.mli
@@ -0,0 +1,64 @@
+(** This module offers the abstract data type of a partition of
+    classes of equivalent items (Union & Find). *)
+
+(** The items are of type [PrintOrdType.t], that is, they have to obey
+    a total order, but also they must be printable to ease
+    debugging. The signature [PrintOrdType] is the input signature of
+    the functor {!Partition.Make}. *)
+module type PrintOrdType =
+  sig
+    (** Type of items *)
+    type t
+
+    (** Same convention as {!Pervasives.compare} *)
+    val compare : t -> t -> int
+
+    val to_string : t -> string
+  end
+
+(** The module signature [S] is the output signature of the functor
+    {!Partition.Make}. *)
+module type S =
+  sig
+    type item
+    type partition
+    type t = partition
+
+    (** {1 Creation} *)
+
+    (** The value [empty] is an empty partition. *)
+    val empty : partition
+
+    (** The value of [equiv i j p] is the partition [p] extended with
+        the equivalence of items [i] and [j]. If both [i] and [j] are
+        already known to be equivalent, then [equiv i j p == p]. *)
+    val equiv : item -> item -> partition -> partition
+
+    (** The value of [alias i j p] is the partition [p] extended with
+        the fact that item [i] is an alias of item [j]. This is the
+        same as [equiv i j p], except that it is guaranteed that the
+        item [i] is not the representative of its equivalence class in
+        [alias i j p]. *)
+    val alias : item -> item -> partition -> partition
+
+    (** {1 Projection} *)
+
+    (** The value of the call [repr i p] is the representative of item
+        [i] in the partition [p]. The built-in exception [Not_found]
+        is raised if [i] is not in [p]. *)
+    val repr : item -> partition -> item
+
+    (** The side-effect of the call [print p] is the printing of the
+        partition [p] on standard output, based on [Ord.to_string]. *)
+    val print : partition -> unit
+
+    (** {1 Predicates} *)
+
+    (** The value of [is_equiv i j p] is [true] if, and only if, the
+        items [i] and [j] belong to the same equivalence class in the
+        partition [p], that is, [i] and [j] have the same
+        representative. *)
+    val is_equiv : item -> item -> partition -> bool
+  end
+
+module Make (Ord : PrintOrdType) : S with type item = Ord.t
diff --git a/Partition0.ml b/Partition0.ml
new file mode 100644
index 0000000..691c400
--- /dev/null
+++ b/Partition0.ml
@@ -0,0 +1,47 @@
+(* Naive persistent implementation of Union/Find: O(n^2) worst case *)
+
+module Make (Item: Item.S) =
+  struct
+
+    type item = Item.t
+    type repr = item   (** Class representatives *)
+
+    let equal i j = Item.compare i j = 0
+
+    module ItemMap = Map.Make (Item)
+
+    type height = int
+
+    type partition = item ItemMap.t
+    type t = partition
+
+    let empty = ItemMap.empty
+
+    let rec repr item partition =
+      let parent = ItemMap.find item partition in
+      if   equal parent item
+      then item
+      else repr parent partition
+
+    let is_equiv (i: item) (j: item) (p: partition) =
+      equal (repr i p) (repr j p)
+
+    let get_or_set (i: item) (p: partition) : item * partition =
+      try repr i p, p with Not_found -> i, ItemMap.add i i p
+
+    let equiv (i: item) (j :item) (p: partition) : partition =
+      let ri, p = get_or_set i p in
+      let rj, p = get_or_set j p in
+      if equal ri rj then p else ItemMap.add ri rj p
+
+    let alias = equiv
+
+   (* Printing *)
+
+    let print p =
+      let print src dst =
+        Printf.printf "%s -> %s\n"
+          (Item.to_string src) (Item.to_string dst)
+      in ItemMap.iter print p
+
+  end
diff --git a/Partition1.ml b/Partition1.ml
new file mode 100644
index 0000000..c9eea51
--- /dev/null
+++ b/Partition1.ml
@@ -0,0 +1,69 @@
+(* Persistent implementation of Union/Find with height-balanced
+   forests and without path compression: O(n*log(n)).
+
+   In the definition of type [t], the height component is that of the
+   source, that is, if [ItemMap.find i m = (j,h)], then [h] is the
+   height of [i] (_not_ [j]).
+*)
+
+module Make (Item: Item.S) =
+  struct
+
+    type item = Item.t
+    type repr = item   (** Class representatives *)
+
+    let equal i j = Item.compare i j = 0
+
+    module ItemMap = Map.Make (Item)
+
+    type height = int
+
+    type partition = (item * height) ItemMap.t
+    type t = partition
+
+    let empty = ItemMap.empty
+
+    let rec seek (i: item) (p: partition) : repr * height =
+      let j, _ as i' = ItemMap.find i p in
+      if equal i j then i' else seek j p
+
+    let repr item partition = fst (seek item partition)
+
+    let is_equiv (i: item) (j: item) (p: partition) =
+      equal (repr i p) (repr j p)
+
+    let get_or_set (i: item) (p: partition) =
+      try seek i p, p with
+        Not_found -> let i' = i,0 in (i', ItemMap.add i i' p)
+
+    let equiv (i: item) (j: item) (p: partition) : partition =
+      let (ri,hi), p = get_or_set i p in
+      let (rj,hj), p = get_or_set j p in
+      let add = ItemMap.add in
+      if   equal ri rj
+      then p
+      else if   hi > hj
+           then add rj (ri,hj) p
+           else add ri (rj,hi) (if hi < hj then p else add rj (rj,hj+1) p)
+
+    let alias (i: item) (j: item) (p: partition) : partition =
+      let (ri,hi), p = get_or_set i p in
+      let (rj,hj), p = get_or_set j p in
+      let add = ItemMap.add in
+      if   equal ri rj
+      then p
+      else if   hi = hj || equal ri i
+           then add ri (rj,hi) @@ add rj (rj, max hj (hi+1)) p
+           else if hi < hj then add ri (rj,hi) p
+                           else add rj (ri,hj) p
+
+    (* Printing *)
+
+    let print (p: partition) =
+      let print i (j,hi) =
+        let _,hj = ItemMap.find j p in
+        Printf.printf "%s,%d -> %s,%d\n"
+          (Item.to_string i) hi (Item.to_string j) hj
+      in ItemMap.iter print p
+
+  end
diff --git a/Partition2.ml b/Partition2.ml
new file mode 100644
index 0000000..ae61466
--- /dev/null
+++ b/Partition2.ml
@@ -0,0 +1,115 @@
+(** Persistent implementation of the Union/Find algorithm with
+    height-balanced forests and without path compression. *)
+
+module Make (Item: Item.S) =
+  struct
+
+    type item = Item.t
+    type repr = item   (** Class representatives *)
+
+    let equal i j = Item.compare i j = 0
+
+    type height = int
+
+    (** Each equivalence class is implemented by a Catalan tree linked
+        upwardly and otherwise is a link to another node. Those trees
+        are height-balanced. The type [node] implements nodes in those
+        trees. *)
+    type node =
+      Root of height
+      (** The value of [Root h] denotes the root of a tree, that is,
+          the representative of the associated class.  The height [h]
+          is that of the tree, so a tree reduced to its root alone has
+          heigh 0. *)
+
+    | Link of item * height
+      (** If not a root, a node is a link to another node. Because the
+          links are upward, that is, bottom-up, and we seek a purely
+          functional implementation, we need to uncouple the nodes and
+          the items here, so the first component of [Link] is an item,
+          not a node. That is why the type [node] is not recursive,
+          and called [node], not [tree]: to become a traversable tree,
+          it needs to be complemented by the type [partition] below to
+          associate items back to nodes. In order to follow a path
+          upward in the tree until the root, we start from a link node
+          giving us the next item, then find the node corresponding to
+          the item thanks to [partition], and again until we arrive at
+          the root.
+
+          The height component is that of the source of the link, that
+          is, [h] is the height of the node linking to the node [Link
+          (j,h)], _not_ of [j], except when [equal i j]. *)
+
+    module ItemMap = Map.Make (Item)
+
+    (** The type [partition] implements a partition of classes of
+        equivalent items by means of a map from items to nodes of type
+        [node] in trees. *)
+    type partition = node ItemMap.t
+
+    type t = partition
+
+    let empty = ItemMap.empty
+
+    let root (item, height) = ItemMap.add item (Root height)
+
+    let link (src, height) dst = ItemMap.add src (Link (dst, height))
+
+    let rec seek (i: item) (p: partition) : repr * height =
+      match ItemMap.find i p with
+           Root hi -> i,hi
+      | Link (j,_) -> seek j p
+
+    let repr item partition = fst (seek item partition)
+
+    let is_equiv (i: item) (j: item) (p: partition) =
+      equal (repr i p) (repr j p)
+
+    let get_or_set (i: item) (p: partition) =
+      try seek i p, p with
+        Not_found -> let n = i,0 in (n, root n p)
+
+    let equiv (i: item) (j: item) (p: partition) : partition =
+      let (ri,hi as ni), p = get_or_set i p in
+      let (rj,hj as nj), p = get_or_set j p in
+      if   equal ri rj
+      then p
+      else if   hi > hj
+           then link nj ri p
+           else link ni rj (if hi < hj then p else root (rj, hj+1) p)
+
+    (** The call [alias i j p] results in the same partition as [equiv
+        i j p], except that [i] is not the representative of its class
+        in [alias i j p] (whilst it may be in [equiv i j p]).
+
+        This property is irrespective of the heights of the
+        representatives of [i] and [j], that is, of the trees
+        implementing their classes. If [i] is not a representative of
+        its class before calling [alias], then the height criteria is
+        applied (which, without the constraint above, would yield a
+        height-balanced new tree). *)
+    let alias (i: item) (j: item) (p: partition) : partition =
+      let (ri,hi as ni), p = get_or_set i p in
+      let (rj,hj as nj), p = get_or_set j p in
+      if   equal ri rj
+      then p
+      else if   hi = hj || equal ri i
+           then link ni rj @@ root (rj, max hj (hi+1)) p
+           else if hi < hj then link ni rj p
+                           else link nj ri p
+
+    (** {1 Printing} *)
+
+    let print (p: partition) =
+      let print i node =
+        let hi, hj, j =
+          match node with
+            Root hi -> hi,hi,i
+          | Link (j,hi) ->
+              match ItemMap.find j p with
+                Root hj | Link (_,hj) -> hi,hj,j in
+        Printf.printf "%s,%d -> %s,%d\n"
+          (Item.to_string i) hi (Item.to_string j) hj
+      in ItemMap.iter print p
+
+  end
diff --git a/Partition3.ml b/Partition3.ml
new file mode 100644
index 0000000..1d90536
--- /dev/null
+++ b/Partition3.ml
@@ -0,0 +1,86 @@
+(* Destructive implementation of union/find with height-balanced
+   forests but without path compression: O(n*log(n)). *)
+
+module Make (Item: Item.S) =
+  struct
+
+    type item = Item.t
+    type repr = item   (** Class representatives *)
+
+    let equal i j = Item.compare i j = 0
+
+    type height = int
+
+    (** Each equivalence class is implemented by a Catalan tree linked
+        upwardly and otherwise is a link to another node. Those trees
+        are height-balanced. The type [node] implements nodes in those
+        trees. *)
+    type node = {item: item; mutable height: int; mutable parent: node}
+
+    module ItemMap = Map.Make (Item)
+
+    (** The type [partition] implements a partition of classes of
+        equivalent items by means of a map from items to nodes of type
+        [node] in trees. *)
+    type partition = node ItemMap.t
+
+    type t = partition
+
+    let empty = ItemMap.empty
+
+    (** The function [repr] is faster than a persistent implementation
+        in the worst case because, in the latter case, the cost is O(log n)
+        for accessing each node in the path to the root, whereas, in the
+        former, only the access to the first node in the path incurs a cost
+        of O(log n) -- the other nodes are accessed in constant time by
+        following the [next] field of type [node]. *)
+   let seek (i: item) (p: partition) : node =
+     let rec find_root node =
+       if node.parent == node then node else find_root node.parent
+     in find_root (ItemMap.find i p)
+
+   let repr item partition = (seek item partition).item
+
+    let is_equiv (i: item) (j: item) (p: partition) =
+      equal (repr i p) (repr j p)
+
+   let get_or_set item (p: partition) =
+     try seek item p, p with
+       Not_found -> let rec loop = {item; height=0; parent=loop}
+                   in loop, ItemMap.add item loop p
+
+   let link src dst = src.parent <- dst
+
+   let equiv (i: item) (j: item) (p: partition) : partition =
+     let ni,p  = get_or_set i p in
+     let nj,p  = get_or_set j p in
+     let hi,hj = ni.height, nj.height in
+     let () =
+       if   not (equal ni.item nj.item)
+       then if   hi > hj
+            then link nj ni
+            else (link ni nj; nj.height <- max hj (hi+1))
+     in p
+
+   let alias (i: item) (j: item) (p: partition) : partition =
+     let ni,p  = get_or_set i p in
+     let nj,p  = get_or_set j p in
+     let hi,hj = ni.height, nj.height in
+     let () =
+      if   not (equal ni.item nj.item)
+      then if   hi = hj || equal ni.item i
+           then (link ni nj; nj.height <- max hj (hi+1))
+           else if hi < hj then link ni nj
+                           else link nj ni
+     in p
+
+   (* Printing *)
+
+    let print p =
+      let print _ node =
+        Printf.printf "%s,%d -> %s,%d\n"
+          (Item.to_string node.item) node.height
+          (Item.to_string node.parent.item) node.parent.height
+      in ItemMap.iter print p
+
+  end
diff --git a/PartitionMain.ml b/PartitionMain.ml
new file mode 100644
index 0000000..4e69dbd
--- /dev/null
+++ b/PartitionMain.ml
@@ -0,0 +1,40 @@
+module Int =
+  struct
+    type t = int
+    let compare (i: int) (j: int) = Pervasives.compare i j
+    let to_string = string_of_int
+  end
+
+module Test (Part: Partition.S with type item = Int.t) =
+  struct
+    open Part
+
+    let () = empty
+             |> equiv 4 3
+             |> equiv 3 8
+             |> equiv 6 5
+             |> equiv 9 4
+             |> equiv 2 1
+             |> equiv 8 9
+             |> equiv 5 0
+             |> equiv 7 2
+             |> equiv 6 1
+             |> equiv 1 0
+             |> equiv 6 7
+             |> equiv 8 0
+             |> equiv 7 7
+             |> equiv 10 10
+             |> print
+  end
+
+
+module Test0 = Test (Partition0.Make(Int))
+let () = print_newline ()
+
+module Test1 = Test (Partition1.Make(Int))
+let () = print_newline ()
+
+module Test2 = Test (Partition2.Make(Int))
+let () = print_newline ()
+
+module Test3 = Test (Partition3.Make(Int))
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0f7010c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,40 @@
+# Some implementations in OCaml of the Union/Find algorithm
+
+All modules implementing Union/Find can be coerced by the same
+signature `Partition.S`.
+
+Note the function `alias` which is equivalent to `equiv`, but not
+symmetric: `alias x y` means that `x` is an alias of `y`, which
+translates in the present context as `x` not being the representative
+of the equivalence class containing the equivalence between `x` and
+`y`. The function `alias` is useful when managing aliases during the
+static analyses of programmning languages, so the representatives of
+the classes are always the original object.
+
+The module `PartitionMain` tests each with the same equivalence
+relations.
+
+# `Partition0.ml`
+
+This is a naive, persistent implementation of Union/Find featuring an
+asymptotic worst case cost of O(n^2).
+
+# `Partition1.ml`
+
+This is a persistent implementation of Union/Find with height-balanced
+forests and without path compression, featuring an asymptotic worst
+case cost of O(n*log(n)).
+
+# `Partition2.ml`
+
+This is an alternate version of `Partition1.ml`, using a different
+data type.
+
+# `Partition3.ml`
+
+This is a destructive implementation of Union/Find with
+height-balanced forests but without path compression, featuring an
+asymptotic worst case of O(n*log(n)). In practice, though, this
+implementation should be faster than the previous ones, due to a
+smaller multiplicative constant term.
+
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..dbc9804
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+set -x
+ocamlfind ocamlc -strict-sequence -w +A-48-4 -c Item.mli
+ocamlfind ocamlc -strict-sequence -w +A-48-4 -c Partition.mli
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition0.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition2.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition1.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition3.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition1.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition3.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition0.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Partition2.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c PartitionMain.ml
+ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c PartitionMain.ml
+ocamlfind ocamlopt -o PartitionMain.opt Partition0.cmx Partition1.cmx Partition2.cmx Partition3.cmx PartitionMain.cmx
diff --git a/clean.sh b/clean.sh
new file mode 100755
index 0000000..75ded7c
--- /dev/null
+++ b/clean.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+\rm -f *.cmi *.cmo *.cmx *.o *.byte *.opt