From 1edc151c5dc787f8b56c094218302d33f56dc335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenz=20H=C3=BCdepohl?= Date: Fri, 6 Jan 2017 13:20:18 +0100 Subject: [PATCH] PathHelix: Drop requirement of scipy.spatial It was deemed that scipy is too heavy a requirement for FreeCAD just for this little feature. Fortunately it was possible to extract the k-d tree module from scipy with just minor modifications - it is quite self-contained. Now, only numpy is required. --- src/Mod/Path/CMakeLists.txt | 1 + src/Mod/Path/PathScripts/PathHelix.py | 8 +- src/Mod/Path/PathScripts/kdtree.py | 943 ++++++++++++++++++++++++++ 3 files changed, 950 insertions(+), 2 deletions(-) create mode 100644 src/Mod/Path/PathScripts/kdtree.py diff --git a/src/Mod/Path/CMakeLists.txt b/src/Mod/Path/CMakeLists.txt index a130190b0..1e996f47e 100644 --- a/src/Mod/Path/CMakeLists.txt +++ b/src/Mod/Path/CMakeLists.txt @@ -53,6 +53,7 @@ SET(PathScripts_SRCS PathScripts/PathStock.py PathScripts/PathStop.py PathScripts/PathHelix.py + PathScripts/kdtree.py PathScripts/PathSurface.py PathScripts/PathToolLenOffset.py PathScripts/PathToolLibraryManager.py diff --git a/src/Mod/Path/PathScripts/PathHelix.py b/src/Mod/Path/PathScripts/PathHelix.py index da2e63a72..b4d7da67e 100644 --- a/src/Mod/Path/PathScripts/PathHelix.py +++ b/src/Mod/Path/PathScripts/PathHelix.py @@ -198,7 +198,11 @@ def helix_cut(center, r_out, r_in, dr, zmax, zmin, dz, safe_z, tool_diameter, vf return out def features_by_centers(base, features): - import scipy.spatial + try: + from scipy.spatial import KDTree + except ImportError: + from PathScripts.kdtree import KDTree + features = sorted(features, key = lambda feature : getattr(base.Shape, feature).Surface.Radius, reverse = True) @@ -206,7 +210,7 @@ def features_by_centers(base, features): coordinates = [(cylinder.Surface.Center.x, cylinder.Surface.Center.y) for cylinder in [getattr(base.Shape, feature) for feature in features]] - tree = scipy.spatial.KDTree(coordinates) + tree = KDTree(coordinates) seen = {} by_centers = {} diff --git a/src/Mod/Path/PathScripts/kdtree.py b/src/Mod/Path/PathScripts/kdtree.py new file mode 100644 index 000000000..82158d3c9 --- /dev/null +++ b/src/Mod/Path/PathScripts/kdtree.py @@ -0,0 +1,943 @@ +# Copyright Anne M. Archibald 2008 +# Released under the scipy license +# +# Copyright (c) 2001, 2002 Enthought, Inc. +# All rights reserved. +# +# Copyright (c) 2003-2016 SciPy Developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# a. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# b. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# c. Neither the name of Enthought nor the names of the SciPy Developers +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS +# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import division, print_function, absolute_import + +import sys +import numpy as np +from heapq import heappush, heappop + +__all__ = ['minkowski_distance_p', 'minkowski_distance', + 'distance_matrix', + 'Rectangle', 'KDTree'] + + +def minkowski_distance_p(x, y, p=2): + """ + Compute the p-th power of the L**p distance between two arrays. + + For efficiency, this function computes the L**p distance but does + not extract the pth root. If `p` is 1 or infinity, this is equal to + the actual L**p distance. + + Parameters + ---------- + x : (M, K) array_like + Input array. + y : (N, K) array_like + Input array. + p : float, 1 <= p <= infinity + Which Minkowski p-norm to use. + + Examples + -------- + >>> minkowski_distance_p([[0,0],[0,0]], [[1,1],[0,1]]) + array([2, 1]) + + """ + x = np.asarray(x) + y = np.asarray(y) + if p == np.inf: + return np.amax(np.abs(y-x), axis=-1) + elif p == 1: + return np.sum(np.abs(y-x), axis=-1) + else: + return np.sum(np.abs(y-x)**p, axis=-1) + + +def minkowski_distance(x, y, p=2): + """ + Compute the L**p distance between two arrays. + + Parameters + ---------- + x : (M, K) array_like + Input array. + y : (N, K) array_like + Input array. + p : float, 1 <= p <= infinity + Which Minkowski p-norm to use. + + Examples + -------- + >>> minkowski_distance([[0,0],[0,0]], [[1,1],[0,1]]) + array([ 1.41421356, 1. ]) + + """ + x = np.asarray(x) + y = np.asarray(y) + if p == np.inf or p == 1: + return minkowski_distance_p(x, y, p) + else: + return minkowski_distance_p(x, y, p)**(1./p) + + +class Rectangle(object): + """Hyperrectangle class. + + Represents a Cartesian product of intervals. + """ + def __init__(self, maxes, mins): + """Construct a hyperrectangle.""" + self.maxes = np.maximum(maxes,mins).astype(np.float) + self.mins = np.minimum(maxes,mins).astype(np.float) + self.m, = self.maxes.shape + + def __repr__(self): + return "" % list(zip(self.mins, self.maxes)) + + def volume(self): + """Total volume.""" + return np.prod(self.maxes-self.mins) + + def split(self, d, split): + """ + Produce two hyperrectangles by splitting. + + In general, if you need to compute maximum and minimum + distances to the children, it can be done more efficiently + by updating the maximum and minimum distances to the parent. + + Parameters + ---------- + d : int + Axis to split hyperrectangle along. + split : + Input. + + """ + mid = np.copy(self.maxes) + mid[d] = split + less = Rectangle(self.mins, mid) + mid = np.copy(self.mins) + mid[d] = split + greater = Rectangle(mid, self.maxes) + return less, greater + + def min_distance_point(self, x, p=2.): + """ + Return the minimum distance between input and points in the hyperrectangle. + + Parameters + ---------- + x : array_like + Input. + p : float, optional + Input. + + """ + return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-x,x-self.maxes)),p) + + def max_distance_point(self, x, p=2.): + """ + Return the maximum distance between input and points in the hyperrectangle. + + Parameters + ---------- + x : array_like + Input array. + p : float, optional + Input. + + """ + return minkowski_distance(0, np.maximum(self.maxes-x,x-self.mins),p) + + def min_distance_rectangle(self, other, p=2.): + """ + Compute the minimum distance between points in the two hyperrectangles. + + Parameters + ---------- + other : hyperrectangle + Input. + p : float + Input. + + """ + return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-other.maxes,other.mins-self.maxes)),p) + + def max_distance_rectangle(self, other, p=2.): + """ + Compute the maximum distance between points in the two hyperrectangles. + + Parameters + ---------- + other : hyperrectangle + Input. + p : float, optional + Input. + + """ + return minkowski_distance(0, np.maximum(self.maxes-other.mins,other.maxes-self.mins),p) + + +class KDTree(object): + """ + kd-tree for quick nearest-neighbor lookup + + This class provides an index into a set of k-dimensional points which + can be used to rapidly look up the nearest neighbors of any point. + + Parameters + ---------- + data : (N,K) array_like + The data points to be indexed. This array is not copied, and + so modifying this data will result in bogus results. + leafsize : int, optional + The number of points at which the algorithm switches over to + brute-force. Has to be positive. + + Raises + ------ + RuntimeError + The maximum recursion limit can be exceeded for large data + sets. If this happens, either increase the value for the `leafsize` + parameter or increase the recursion limit by:: + + >>> import sys + >>> sys.setrecursionlimit(10000) + + Notes + ----- + The algorithm used is described in Maneewongvatana and Mount 1999. + The general idea is that the kd-tree is a binary tree, each of whose + nodes represents an axis-aligned hyperrectangle. Each node specifies + an axis and splits the set of points based on whether their coordinate + along that axis is greater than or less than a particular value. + + During construction, the axis and splitting point are chosen by the + "sliding midpoint" rule, which ensures that the cells do not all + become long and thin. + + The tree can be queried for the r closest neighbors of any given point + (optionally returning only those within some maximum distance of the + point). It can also be queried, with a substantial gain in efficiency, + for the r approximate closest neighbors. + + For large dimensions (20 is already large) do not expect this to run + significantly faster than brute force. High-dimensional nearest-neighbor + queries are a substantial open problem in computer science. + + The tree also supports all-neighbors queries, both with arrays of points + and with other kd-trees. These do use a reasonably efficient algorithm, + but the kd-tree is not necessarily the best data structure for this + sort of calculation. + + """ + def __init__(self, data, leafsize=10): + self.data = np.asarray(data) + self.n, self.m = np.shape(self.data) + self.leafsize = int(leafsize) + if self.leafsize < 1: + raise ValueError("leafsize must be at least 1") + self.maxes = np.amax(self.data,axis=0) + self.mins = np.amin(self.data,axis=0) + + self.tree = self.__build(np.arange(self.n), self.maxes, self.mins) + + class node(object): + if sys.version_info[0] >= 3: + def __lt__(self, other): + return id(self) < id(other) + + def __gt__(self, other): + return id(self) > id(other) + + def __le__(self, other): + return id(self) <= id(other) + + def __ge__(self, other): + return id(self) >= id(other) + + def __eq__(self, other): + return id(self) == id(other) + + class leafnode(node): + def __init__(self, idx): + self.idx = idx + self.children = len(idx) + + class innernode(node): + def __init__(self, split_dim, split, less, greater): + self.split_dim = split_dim + self.split = split + self.less = less + self.greater = greater + self.children = less.children+greater.children + + def __build(self, idx, maxes, mins): + if len(idx) <= self.leafsize: + return KDTree.leafnode(idx) + else: + data = self.data[idx] + # maxes = np.amax(data,axis=0) + # mins = np.amin(data,axis=0) + d = np.argmax(maxes-mins) + maxval = maxes[d] + minval = mins[d] + if maxval == minval: + # all points are identical; warn user? + return KDTree.leafnode(idx) + data = data[:,d] + + # sliding midpoint rule; see Maneewongvatana and Mount 1999 + # for arguments that this is a good idea. + split = (maxval+minval)/2 + less_idx = np.nonzero(data <= split)[0] + greater_idx = np.nonzero(data > split)[0] + if len(less_idx) == 0: + split = np.amin(data) + less_idx = np.nonzero(data <= split)[0] + greater_idx = np.nonzero(data > split)[0] + if len(greater_idx) == 0: + split = np.amax(data) + less_idx = np.nonzero(data < split)[0] + greater_idx = np.nonzero(data >= split)[0] + if len(less_idx) == 0: + # _still_ zero? all must have the same value + if not np.all(data == data[0]): + raise ValueError("Troublesome data array: %s" % data) + split = data[0] + less_idx = np.arange(len(data)-1) + greater_idx = np.array([len(data)-1]) + + lessmaxes = np.copy(maxes) + lessmaxes[d] = split + greatermins = np.copy(mins) + greatermins[d] = split + return KDTree.innernode(d, split, + self.__build(idx[less_idx],lessmaxes,mins), + self.__build(idx[greater_idx],maxes,greatermins)) + + def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf): + + side_distances = np.maximum(0,np.maximum(x-self.maxes,self.mins-x)) + if p != np.inf: + side_distances **= p + min_distance = np.sum(side_distances) + else: + min_distance = np.amax(side_distances) + + # priority queue for chasing nodes + # entries are: + # minimum distance between the cell and the target + # distances between the nearest side of the cell and the target + # the head node of the cell + q = [(min_distance, + tuple(side_distances), + self.tree)] + # priority queue for the nearest neighbors + # furthest known neighbor first + # entries are (-distance**p, i) + neighbors = [] + + if eps == 0: + epsfac = 1 + elif p == np.inf: + epsfac = 1/(1+eps) + else: + epsfac = 1/(1+eps)**p + + if p != np.inf and distance_upper_bound != np.inf: + distance_upper_bound = distance_upper_bound**p + + while q: + min_distance, side_distances, node = heappop(q) + if isinstance(node, KDTree.leafnode): + # brute-force + data = self.data[node.idx] + ds = minkowski_distance_p(data,x[np.newaxis,:],p) + for i in range(len(ds)): + if ds[i] < distance_upper_bound: + if len(neighbors) == k: + heappop(neighbors) + heappush(neighbors, (-ds[i], node.idx[i])) + if len(neighbors) == k: + distance_upper_bound = -neighbors[0][0] + else: + # we don't push cells that are too far onto the queue at all, + # but since the distance_upper_bound decreases, we might get + # here even if the cell's too far + if min_distance > distance_upper_bound*epsfac: + # since this is the nearest cell, we're done, bail out + break + # compute minimum distances to the children and push them on + if x[node.split_dim] < node.split: + near, far = node.less, node.greater + else: + near, far = node.greater, node.less + + # near child is at the same distance as the current node + heappush(q,(min_distance, side_distances, near)) + + # far child is further by an amount depending only + # on the split value + sd = list(side_distances) + if p == np.inf: + min_distance = max(min_distance, abs(node.split-x[node.split_dim])) + elif p == 1: + sd[node.split_dim] = np.abs(node.split-x[node.split_dim]) + min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim] + else: + sd[node.split_dim] = np.abs(node.split-x[node.split_dim])**p + min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim] + + # far child might be too far, if so, don't bother pushing it + if min_distance <= distance_upper_bound*epsfac: + heappush(q,(min_distance, tuple(sd), far)) + + if p == np.inf: + return sorted([(-d,i) for (d,i) in neighbors]) + else: + return sorted([((-d)**(1./p),i) for (d,i) in neighbors]) + + def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf): + """ + Query the kd-tree for nearest neighbors + + Parameters + ---------- + x : array_like, last dimension self.m + An array of points to query. + k : integer + The number of nearest neighbors to return. + eps : nonnegative float + Return approximate nearest neighbors; the kth returned value + is guaranteed to be no further than (1+eps) times the + distance to the real kth nearest neighbor. + p : float, 1<=p<=infinity + Which Minkowski p-norm to use. + 1 is the sum-of-absolute-values "Manhattan" distance + 2 is the usual Euclidean distance + infinity is the maximum-coordinate-difference distance + distance_upper_bound : nonnegative float + Return only neighbors within this distance. This is used to prune + tree searches, so if you are doing a series of nearest-neighbor + queries, it may help to supply the distance to the nearest neighbor + of the most recent point. + + Returns + ------- + d : array of floats + The distances to the nearest neighbors. + If x has shape tuple+(self.m,), then d has shape tuple if + k is one, or tuple+(k,) if k is larger than one. Missing + neighbors are indicated with infinite distances. If k is None, + then d is an object array of shape tuple, containing lists + of distances. In either case the hits are sorted by distance + (nearest first). + i : array of integers + The locations of the neighbors in self.data. i is the same + shape as d. + + Examples + -------- + >>> from PathScripts import kdtree + >>> x, y = np.mgrid[0:5, 2:8] + >>> tree = kdtree.KDTree(zip(x.ravel(), y.ravel())) + >>> tree.data + array([[0, 2], + [0, 3], + [0, 4], + [0, 5], + [0, 6], + [0, 7], + [1, 2], + [1, 3], + [1, 4], + [1, 5], + [1, 6], + [1, 7], + [2, 2], + [2, 3], + [2, 4], + [2, 5], + [2, 6], + [2, 7], + [3, 2], + [3, 3], + [3, 4], + [3, 5], + [3, 6], + [3, 7], + [4, 2], + [4, 3], + [4, 4], + [4, 5], + [4, 6], + [4, 7]]) + >>> pts = np.array([[0, 0], [2.1, 2.9]]) + >>> tree.query(pts) + (array([ 2. , 0.14142136]), array([ 0, 13])) + + """ + x = np.asarray(x) + if np.shape(x)[-1] != self.m: + raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.m, np.shape(x))) + if p < 1: + raise ValueError("Only p-norms with 1<=p<=infinity permitted") + retshape = np.shape(x)[:-1] + if retshape != (): + if k is None: + dd = np.empty(retshape,dtype=np.object) + ii = np.empty(retshape,dtype=np.object) + elif k > 1: + dd = np.empty(retshape+(k,),dtype=np.float) + dd.fill(np.inf) + ii = np.empty(retshape+(k,),dtype=np.int) + ii.fill(self.n) + elif k == 1: + dd = np.empty(retshape,dtype=np.float) + dd.fill(np.inf) + ii = np.empty(retshape,dtype=np.int) + ii.fill(self.n) + else: + raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None") + for c in np.ndindex(retshape): + hits = self.__query(x[c], k=k, eps=eps, p=p, distance_upper_bound=distance_upper_bound) + if k is None: + dd[c] = [d for (d,i) in hits] + ii[c] = [i for (d,i) in hits] + elif k > 1: + for j in range(len(hits)): + dd[c+(j,)], ii[c+(j,)] = hits[j] + elif k == 1: + if len(hits) > 0: + dd[c], ii[c] = hits[0] + else: + dd[c] = np.inf + ii[c] = self.n + return dd, ii + else: + hits = self.__query(x, k=k, eps=eps, p=p, distance_upper_bound=distance_upper_bound) + if k is None: + return [d for (d,i) in hits], [i for (d,i) in hits] + elif k == 1: + if len(hits) > 0: + return hits[0] + else: + return np.inf, self.n + elif k > 1: + dd = np.empty(k,dtype=np.float) + dd.fill(np.inf) + ii = np.empty(k,dtype=np.int) + ii.fill(self.n) + for j in range(len(hits)): + dd[j], ii[j] = hits[j] + return dd, ii + else: + raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None") + + def __query_ball_point(self, x, r, p=2., eps=0): + R = Rectangle(self.maxes, self.mins) + + def traverse_checking(node, rect): + if rect.min_distance_point(x, p) > r / (1. + eps): + return [] + elif rect.max_distance_point(x, p) < r * (1. + eps): + return traverse_no_checking(node) + elif isinstance(node, KDTree.leafnode): + d = self.data[node.idx] + return node.idx[minkowski_distance(d, x, p) <= r].tolist() + else: + less, greater = rect.split(node.split_dim, node.split) + return traverse_checking(node.less, less) + \ + traverse_checking(node.greater, greater) + + def traverse_no_checking(node): + if isinstance(node, KDTree.leafnode): + return node.idx.tolist() + else: + return traverse_no_checking(node.less) + \ + traverse_no_checking(node.greater) + + return traverse_checking(self.tree, R) + + def query_ball_point(self, x, r, p=2., eps=0): + """Find all points within distance r of point(s) x. + + Parameters + ---------- + x : array_like, shape tuple + (self.m,) + The point or points to search for neighbors of. + r : positive float + The radius of points to return. + p : float, optional + Which Minkowski p-norm to use. Should be in the range [1, inf]. + eps : nonnegative float, optional + Approximate search. Branches of the tree are not explored if their + nearest points are further than ``r / (1 + eps)``, and branches are + added in bulk if their furthest points are nearer than + ``r * (1 + eps)``. + + Returns + ------- + results : list or array of lists + If `x` is a single point, returns a list of the indices of the + neighbors of `x`. If `x` is an array of points, returns an object + array of shape tuple containing lists of neighbors. + + Notes + ----- + If you have many points whose neighbors you want to find, you may save + substantial amounts of time by putting them in a KDTree and using + query_ball_tree. + + Examples + -------- + >>> from PathScripts import kdtree + >>> x, y = np.mgrid[0:4, 0:4] + >>> points = zip(x.ravel(), y.ravel()) + >>> tree = kdtree.KDTree(points) + >>> tree.query_ball_point([2, 0], 1) + [4, 8, 9, 12] + + """ + x = np.asarray(x) + if x.shape[-1] != self.m: + raise ValueError("Searching for a %d-dimensional point in a " + "%d-dimensional KDTree" % (x.shape[-1], self.m)) + if len(x.shape) == 1: + return self.__query_ball_point(x, r, p, eps) + else: + retshape = x.shape[:-1] + result = np.empty(retshape, dtype=np.object) + for c in np.ndindex(retshape): + result[c] = self.__query_ball_point(x[c], r, p=p, eps=eps) + return result + + def query_ball_tree(self, other, r, p=2., eps=0): + """Find all pairs of points whose distance is at most r + + Parameters + ---------- + other : KDTree instance + The tree containing points to search against. + r : float + The maximum distance, has to be positive. + p : float, optional + Which Minkowski norm to use. `p` has to meet the condition + ``1 <= p <= infinity``. + eps : float, optional + Approximate search. Branches of the tree are not explored + if their nearest points are further than ``r/(1+eps)``, and + branches are added in bulk if their furthest points are nearer + than ``r * (1+eps)``. `eps` has to be non-negative. + + Returns + ------- + results : list of lists + For each element ``self.data[i]`` of this tree, ``results[i]`` is a + list of the indices of its neighbors in ``other.data``. + + """ + results = [[] for i in range(self.n)] + + def traverse_checking(node1, rect1, node2, rect2): + if rect1.min_distance_rectangle(rect2, p) > r/(1.+eps): + return + elif rect1.max_distance_rectangle(rect2, p) < r*(1.+eps): + traverse_no_checking(node1, node2) + elif isinstance(node1, KDTree.leafnode): + if isinstance(node2, KDTree.leafnode): + d = other.data[node2.idx] + for i in node1.idx: + results[i] += node2.idx[minkowski_distance(d,self.data[i],p) <= r].tolist() + else: + less, greater = rect2.split(node2.split_dim, node2.split) + traverse_checking(node1,rect1,node2.less,less) + traverse_checking(node1,rect1,node2.greater,greater) + elif isinstance(node2, KDTree.leafnode): + less, greater = rect1.split(node1.split_dim, node1.split) + traverse_checking(node1.less,less,node2,rect2) + traverse_checking(node1.greater,greater,node2,rect2) + else: + less1, greater1 = rect1.split(node1.split_dim, node1.split) + less2, greater2 = rect2.split(node2.split_dim, node2.split) + traverse_checking(node1.less,less1,node2.less,less2) + traverse_checking(node1.less,less1,node2.greater,greater2) + traverse_checking(node1.greater,greater1,node2.less,less2) + traverse_checking(node1.greater,greater1,node2.greater,greater2) + + def traverse_no_checking(node1, node2): + if isinstance(node1, KDTree.leafnode): + if isinstance(node2, KDTree.leafnode): + for i in node1.idx: + results[i] += node2.idx.tolist() + else: + traverse_no_checking(node1, node2.less) + traverse_no_checking(node1, node2.greater) + else: + traverse_no_checking(node1.less, node2) + traverse_no_checking(node1.greater, node2) + + traverse_checking(self.tree, Rectangle(self.maxes, self.mins), + other.tree, Rectangle(other.maxes, other.mins)) + return results + + def query_pairs(self, r, p=2., eps=0): + """ + Find all pairs of points within a distance. + + Parameters + ---------- + r : positive float + The maximum distance. + p : float, optional + Which Minkowski norm to use. `p` has to meet the condition + ``1 <= p <= infinity``. + eps : float, optional + Approximate search. Branches of the tree are not explored + if their nearest points are further than ``r/(1+eps)``, and + branches are added in bulk if their furthest points are nearer + than ``r * (1+eps)``. `eps` has to be non-negative. + + Returns + ------- + results : set + Set of pairs ``(i,j)``, with ``i < j``, for which the corresponding + positions are close. + + """ + results = set() + + def traverse_checking(node1, rect1, node2, rect2): + if rect1.min_distance_rectangle(rect2, p) > r/(1.+eps): + return + elif rect1.max_distance_rectangle(rect2, p) < r*(1.+eps): + traverse_no_checking(node1, node2) + elif isinstance(node1, KDTree.leafnode): + if isinstance(node2, KDTree.leafnode): + # Special care to avoid duplicate pairs + if id(node1) == id(node2): + d = self.data[node2.idx] + for i in node1.idx: + for j in node2.idx[minkowski_distance(d,self.data[i],p) <= r]: + if i < j: + results.add((i,j)) + else: + d = self.data[node2.idx] + for i in node1.idx: + for j in node2.idx[minkowski_distance(d,self.data[i],p) <= r]: + if i < j: + results.add((i,j)) + elif j < i: + results.add((j,i)) + else: + less, greater = rect2.split(node2.split_dim, node2.split) + traverse_checking(node1,rect1,node2.less,less) + traverse_checking(node1,rect1,node2.greater,greater) + elif isinstance(node2, KDTree.leafnode): + less, greater = rect1.split(node1.split_dim, node1.split) + traverse_checking(node1.less,less,node2,rect2) + traverse_checking(node1.greater,greater,node2,rect2) + else: + less1, greater1 = rect1.split(node1.split_dim, node1.split) + less2, greater2 = rect2.split(node2.split_dim, node2.split) + traverse_checking(node1.less,less1,node2.less,less2) + traverse_checking(node1.less,less1,node2.greater,greater2) + + # Avoid traversing (node1.less, node2.greater) and + # (node1.greater, node2.less) (it's the same node pair twice + # over, which is the source of the complication in the + # original KDTree.query_pairs) + if id(node1) != id(node2): + traverse_checking(node1.greater,greater1,node2.less,less2) + + traverse_checking(node1.greater,greater1,node2.greater,greater2) + + def traverse_no_checking(node1, node2): + if isinstance(node1, KDTree.leafnode): + if isinstance(node2, KDTree.leafnode): + # Special care to avoid duplicate pairs + if id(node1) == id(node2): + for i in node1.idx: + for j in node2.idx: + if i < j: + results.add((i,j)) + else: + for i in node1.idx: + for j in node2.idx: + if i < j: + results.add((i,j)) + elif j < i: + results.add((j,i)) + else: + traverse_no_checking(node1, node2.less) + traverse_no_checking(node1, node2.greater) + else: + # Avoid traversing (node1.less, node2.greater) and + # (node1.greater, node2.less) (it's the same node pair twice + # over, which is the source of the complication in the + # original KDTree.query_pairs) + if id(node1) == id(node2): + traverse_no_checking(node1.less, node2.less) + traverse_no_checking(node1.less, node2.greater) + traverse_no_checking(node1.greater, node2.greater) + else: + traverse_no_checking(node1.less, node2) + traverse_no_checking(node1.greater, node2) + + traverse_checking(self.tree, Rectangle(self.maxes, self.mins), + self.tree, Rectangle(self.maxes, self.mins)) + return results + + def count_neighbors(self, other, r, p=2.): + """ + Count how many nearby pairs can be formed. + + Count the number of pairs (x1,x2) can be formed, with x1 drawn + from self and x2 drawn from `other`, and where + ``distance(x1, x2, p) <= r``. + This is the "two-point correlation" described in Gray and Moore 2000, + "N-body problems in statistical learning", and the code here is based + on their algorithm. + + Parameters + ---------- + other : KDTree instance + The other tree to draw points from. + r : float or one-dimensional array of floats + The radius to produce a count for. Multiple radii are searched with + a single tree traversal. + p : float, 1<=p<=infinity + Which Minkowski p-norm to use + + Returns + ------- + result : int or 1-D array of ints + The number of pairs. Note that this is internally stored in a numpy + int, and so may overflow if very large (2e9). + + """ + def traverse(node1, rect1, node2, rect2, idx): + min_r = rect1.min_distance_rectangle(rect2,p) + max_r = rect1.max_distance_rectangle(rect2,p) + c_greater = r[idx] > max_r + result[idx[c_greater]] += node1.children*node2.children + idx = idx[(min_r <= r[idx]) & (r[idx] <= max_r)] + if len(idx) == 0: + return + + if isinstance(node1,KDTree.leafnode): + if isinstance(node2,KDTree.leafnode): + ds = minkowski_distance(self.data[node1.idx][:,np.newaxis,:], + other.data[node2.idx][np.newaxis,:,:], + p).ravel() + ds.sort() + result[idx] += np.searchsorted(ds,r[idx],side='right') + else: + less, greater = rect2.split(node2.split_dim, node2.split) + traverse(node1, rect1, node2.less, less, idx) + traverse(node1, rect1, node2.greater, greater, idx) + else: + if isinstance(node2,KDTree.leafnode): + less, greater = rect1.split(node1.split_dim, node1.split) + traverse(node1.less, less, node2, rect2, idx) + traverse(node1.greater, greater, node2, rect2, idx) + else: + less1, greater1 = rect1.split(node1.split_dim, node1.split) + less2, greater2 = rect2.split(node2.split_dim, node2.split) + traverse(node1.less,less1,node2.less,less2,idx) + traverse(node1.less,less1,node2.greater,greater2,idx) + traverse(node1.greater,greater1,node2.less,less2,idx) + traverse(node1.greater,greater1,node2.greater,greater2,idx) + + R1 = Rectangle(self.maxes, self.mins) + R2 = Rectangle(other.maxes, other.mins) + if np.shape(r) == (): + r = np.array([r]) + result = np.zeros(1,dtype=int) + traverse(self.tree, R1, other.tree, R2, np.arange(1)) + return result[0] + elif len(np.shape(r)) == 1: + r = np.asarray(r) + n, = r.shape + result = np.zeros(n,dtype=int) + traverse(self.tree, R1, other.tree, R2, np.arange(n)) + return result + else: + raise ValueError("r must be either a single value or a one-dimensional array of values") + + +def distance_matrix(x,y,p=2,threshold=1000000): + """ + Compute the distance matrix. + + Returns the matrix of all pair-wise distances. + + Parameters + ---------- + x : (M, K) array_like + TODO: description needed + y : (N, K) array_like + TODO: description needed + p : float, 1 <= p <= infinity + Which Minkowski p-norm to use. + threshold : positive int + If ``M * N * K`` > `threshold`, algorithm uses a Python loop instead + of large temporary arrays. + + Returns + ------- + result : (M, N) ndarray + Distance matrix. + + Examples + -------- + >>> distance_matrix([[0,0],[0,1]], [[1,0],[1,1]]) + array([[ 1. , 1.41421356], + [ 1.41421356, 1. ]]) + + """ + + x = np.asarray(x) + m, k = x.shape + y = np.asarray(y) + n, kk = y.shape + + if k != kk: + raise ValueError("x contains %d-dimensional vectors but y contains %d-dimensional vectors" % (k, kk)) + + if m*n*k <= threshold: + return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p) + else: + result = np.empty((m,n),dtype=np.float) # FIXME: figure out the best dtype + if m < n: + for i in range(m): + result[i,:] = minkowski_distance(x[i],y,p) + else: + for j in range(n): + result[:,j] = minkowski_distance(x,y[j],p) + return result