forked from ukosuagwu/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgraph.py
More file actions
160 lines (134 loc) · 5.44 KB
/
Copy pathgraph.py
File metadata and controls
160 lines (134 loc) · 5.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
Graph utilities and algorithms
Graphs are represented with their adjacency matrices, preferably using
sparse matrices.
"""
# Authors: Aric Hagberg <hagberg@lanl.gov>
# Gael Varoquaux <gael.varoquaux@normalesup.org>
# Jake Vanderplas <vanderplas@astro.washington.edu>
# License: BSD 3 clause
import numpy as np
from scipy import sparse
from ..metrics.pairwise import pairwise_distances
###############################################################################
# Path and connected component analysis.
# Code adapted from networkx
def single_source_shortest_path_length(graph, source, *, cutoff=None):
"""Return the length of the shortest path from source to all reachable nodes.
Parameters
----------
graph : {sparse matrix, ndarray} of shape (n_nodes, n_nodes)
Adjacency matrix of the graph. Sparse matrix of format LIL is
preferred.
source : int
Start node for path.
cutoff : int, default=None
Depth to stop the search - only paths of length <= cutoff are returned.
Returns
-------
paths : dict
Reachable end nodes mapped to length of path from source,
i.e. `{end: path_length}`.
Examples
--------
>>> from sklearn.utils.graph import single_source_shortest_path_length
>>> import numpy as np
>>> graph = np.array([[ 0, 1, 0, 0],
... [ 1, 0, 1, 0],
... [ 0, 1, 0, 0],
... [ 0, 0, 0, 0]])
>>> single_source_shortest_path_length(graph, 0)
{0: 0, 1: 1, 2: 2}
>>> graph = np.ones((6, 6))
>>> sorted(single_source_shortest_path_length(graph, 2).items())
[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]
"""
if sparse.isspmatrix(graph):
graph = graph.tolil()
else:
graph = sparse.lil_matrix(graph)
seen = {} # level (number of hops) when seen in BFS
level = 0 # the current level
next_level = [source] # dict of nodes to check at next level
while next_level:
this_level = next_level # advance to next level
next_level = set() # and start a new list (fringe)
for v in this_level:
if v not in seen:
seen[v] = level # set the level of vertex v
next_level.update(graph.rows[v])
if cutoff is not None and cutoff <= level:
break
level += 1
return seen # return all path lengths as dictionary
def _fix_connected_components(
X,
graph,
n_connected_components,
component_labels,
mode="distance",
metric="euclidean",
**kwargs,
):
"""Add connections to sparse graph to connect unconnected components.
For each pair of unconnected components, compute all pairwise distances
from one component to the other, and add a connection on the closest pair
of samples. This is a hacky way to get a graph with a single connected
component, which is necessary for example to compute a shortest path
between all pairs of samples in the graph.
Parameters
----------
X : array of shape (n_samples, n_features) or (n_samples, n_samples)
Features to compute the pairwise distances. If `metric =
"precomputed"`, X is the matrix of pairwise distances.
graph : sparse matrix of shape (n_samples, n_samples)
Graph of connection between samples.
n_connected_components : int
Number of connected components, as computed by
`scipy.sparse.csgraph.connected_components`.
component_labels : array of shape (n_samples)
Labels of connected components, as computed by
`scipy.sparse.csgraph.connected_components`.
mode : {'connectivity', 'distance'}, default='distance'
Type of graph matrix: 'connectivity' corresponds to the connectivity
matrix with ones and zeros, and 'distance' corresponds to the distances
between neighbors according to the given metric.
metric : str
Metric used in `sklearn.metrics.pairwise.pairwise_distances`.
kwargs : kwargs
Keyword arguments passed to
`sklearn.metrics.pairwise.pairwise_distances`.
Returns
-------
graph : sparse matrix of shape (n_samples, n_samples)
Graph of connection between samples, with a single connected component.
"""
if metric == "precomputed" and sparse.issparse(X):
raise RuntimeError(
"_fix_connected_components with metric='precomputed' requires the "
"full distance matrix in X, and does not work with a sparse "
"neighbors graph."
)
for i in range(n_connected_components):
idx_i = np.flatnonzero(component_labels == i)
Xi = X[idx_i]
for j in range(i):
idx_j = np.flatnonzero(component_labels == j)
Xj = X[idx_j]
if metric == "precomputed":
D = X[np.ix_(idx_i, idx_j)]
else:
D = pairwise_distances(Xi, Xj, metric=metric, **kwargs)
ii, jj = np.unravel_index(D.argmin(axis=None), D.shape)
if mode == "connectivity":
graph[idx_i[ii], idx_j[jj]] = 1
graph[idx_j[jj], idx_i[ii]] = 1
elif mode == "distance":
graph[idx_i[ii], idx_j[jj]] = D[ii, jj]
graph[idx_j[jj], idx_i[ii]] = D[ii, jj]
else:
raise ValueError(
"Unknown mode=%r, should be one of ['connectivity', 'distance']."
% mode
)
return graph