forked from prakhar1989/Algorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgraph_algorithms.py
More file actions
265 lines (239 loc) · 8.86 KB
/
graph_algorithms.py
File metadata and controls
265 lines (239 loc) · 8.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
from collections import deque
from copy import deepcopy
from union_find.unionfind import UnionFind
import heapq
def BFS(gr, s):
""" Breadth first search
Returns a list of nodes that are "findable" from s """
if not gr.has_node(s):
raise Exception("Node %s not in graph" % s)
nodes_explored = [s]
q = deque([s])
while len(q)!=0:
node = q.popleft()
for each in gr.neighbors(node):
if each not in nodes_explored:
nodes_explored.append(each)
q.append(each)
return nodes_explored
def shortest_hops(gr, s):
""" Finds the shortest number of hops required
to reach a node from s. Returns a dict with mapping:
destination node from s -> no. of hops
"""
if not gr.has_node(s):
raise Exception("Node %s is not in graph" % s)
else:
dist = {}
q = deque([s])
nodes_explored = [s]
for n in gr.nodes():
if n == s: dist[n] = 0
else: dist[n] = float('inf')
while len(q) != 0:
node = q.popleft()
for each in gr.neighbors(node):
if each not in nodes_explored:
nodes_explored.append(each)
q.append(each)
dist[each] = dist[node] + 1
return dist
def undirected_connected_components(gr):
""" Returns a list of connected components
in an undirected graph """
if gr.DIRECTED:
raise Exception("This method works only with a undirected graph")
explored = []
con_components = []
for node in gr.nodes():
if node not in explored:
reachable_nodes = BFS(gr, node)
con_components.append(reachable_nodes)
explored += reachable_nodes
return con_components
def DFS(gr, s):
""" Depth first search wrapper """
path = []
depth_first_search(gr, s, path)
return path
def depth_first_search(gr, s, path):
""" Depth first search
Returns a list of nodes "findable" from s """
if s in path: return False
path.append(s)
for each in gr.neighbors(s):
if each not in path:
depth_first_search(gr, each, path)
def topological_ordering(digr_ori):
""" Returns a topological ordering for a
acyclic directed graph """
if not digr_ori.DIRECTED:
raise Exception("%s is not a directed graph" % digr)
digr = deepcopy(digr_ori)
ordering = []
n = len(digr.nodes())
while n > 0:
sink_node = find_sink_node(digr)
ordering.append((sink_node, n))
digr.del_node(sink_node)
n -= 1
return ordering
def find_sink_node(digr):
""" Finds a sink node (node with all incoming arcs)
in the directed graph. Valid for a acyclic graph only """
# first node is taken as a default
node = digr.nodes()[0]
while digr.neighbors(node):
node = digr.neighbors(node)[0]
return node
def directed_connected_components(digr):
""" Returns a list of strongly connected components
in a directed graph using Kosaraju's two pass algorithm """
if not digr.DIRECTED:
raise Exception("%s is not a directed graph" % digr)
finishing_times = DFS_loop(digr.get_transpose())
# use finishing_times in descending order
nodes_explored, connected_components = [], []
for node in finishing_times[::-1]:
component = []
outer_dfs(digr, node, nodes_explored, component)
if component:
nodes_explored += component
connected_components.append(component)
return connected_components
def outer_dfs(digr, node, nodes_explored, path):
if node in path or node in nodes_explored:
return False
path.append(node)
for each in digr.neighbors(node):
if each not in path or each not in nodes_explored:
outer_dfs(digr, each, nodes_explored, path)
def DFS_loop(digr):
""" Core DFS loop used to find strongly connected components
in a directed graph """
node_explored = [] # list for keeping track of nodes explored
finishing_times = [] # list for adding nodes based on their finishing times
for node in digr.nodes():
if node not in node_explored:
leader_node = node
inner_DFS(digr, node, node_explored, finishing_times)
return finishing_times
def inner_DFS(digr, node, node_explored, finishing_times):
""" Inner DFS used in DFS loop method """
node_explored.append(node) # mark explored
for each in digr.neighbors(node):
if each not in node_explored:
inner_DFS(digr, each, node_explored, finishing_times)
global finishing_counter
# adds nodes based on increasing order of finishing times
finishing_times.append(node)
def shortest_path(digr, s):
""" Finds the shortest path from s to every other vertex findable
from s using Dijkstra's algorithm in O(mlogn) time. Uses heaps
for super fast implementation """
nodes_explored = [s]
nodes_unexplored = DFS(digr, s)[1:] # all accessible nodes from s
dist = {s:0}
node_heap = []
for n in nodes_unexplored:
min = compute_min_dist(digr, n, nodes_explored, dist)
heapq.heappush(node_heap, (min, n))
while len(node_heap) > 0:
min_dist, nearest_node = heapq.heappop(node_heap)
dist[nearest_node] = min_dist
nodes_explored.append(nearest_node)
nodes_unexplored.remove(nearest_node)
# recompute keys for just popped node
for v in digr.neighbors(nearest_node):
if v in nodes_unexplored:
for i in range(len(node_heap)):
if node_heap[i][1] == v:
node_heap[i] = (compute_min_dist(digr, v, nodes_explored, dist), v)
heapq.heapify(node_heap)
return dist
def compute_min_dist(digr, n, nodes_explored, dist):
""" Computes the min dist of node n from a set of
nodes explored in digr, using dist dict. Used in shortest path """
min = float('inf')
for v in nodes_explored:
if digr.has_edge((v, n)):
d = dist[v] + digr.get_edge_weight((v, n))
if d < min: min = d
return min
def minimum_spanning_tree(gr):
""" Uses prim's algorithm to return the minimum
cost spanning tree in a undirected connected graph.
Works only with undirected and connected graphs """
s = gr.nodes()[0]
nodes_explored = [s]
nodes_unexplored = gr.nodes()
nodes_unexplored.remove(s)
min_cost, node_heap = 0, []
#computes the key for each vertex in unexplored
for n in nodes_unexplored:
min = compute_key(gr, n, nodes_explored)
heapq.heappush(node_heap, (min, n))
while len(nodes_unexplored) > 0:
# adds the cheapest to "explored"
node_cost, min_node = heapq.heappop(node_heap)
min_cost += node_cost
nodes_explored.append(min_node)
nodes_unexplored.remove(min_node)
# recompute keys for neighbors of deleted node
for v in gr.neighbors(min_node):
if v in nodes_unexplored:
for i in range(len(node_heap)):
if node_heap[i][1] == v:
node_heap[i] = (compute_key(gr, v, nodes_explored), v)
heapq.heapify(node_heap)
return min_cost
def compute_key(gr, n, nodes_explored):
""" computes minimum key for node n from a set of nodes_explored
in graph gr. Used in Prim's implementation """
min = float('inf')
for v in gr.neighbors(n):
if v in nodes_explored:
w = gr.get_edge_weight((n, v))
if w < min: min = w
return min
def kruskal_MST(gr):
""" computes minimum cost spanning tree in a undirected,
connected graph using Kruskal's MST. Uses union-find data structure
for running times of O(mlogn) """
sorted_edges = sorted(gr.get_edge_weights())
uf = UnionFind()
min_cost = 0
for (w, (u, v)) in sorted_edges:
if (not uf.get_leader(u) and not uf.get_leader(v)) \
or (uf.get_leader(u) != uf.get_leader(v)):
uf.insert(u, v)
min_cost += w
return min_cost
def max_k_clustering(gr, k):
sorted_edges = sorted(gr.get_edge_weights())
uf = UnionFind()
#initialize each node as its cluster
for n in gr.nodes():
uf.insert(n)
for (w, (u, v)) in sorted_edges:
if uf.count_groups() <= k:
return uf.get_sets()
if uf.get_leader(u) != uf.get_leader(v):
uf.make_union(uf.get_leader(u), uf.get_leader(v))
def compute_spacing(c1, c2):
min = float('inf')
for n in c1:
for v in c2:
cost = gr.get_edge_weight((n, v))
if cost < min:
min = cost
return min
def get_max_spacing(clusters):
min = float('inf')
for u in clusters:
for v in clusters:
if u!= v:
spacing = compute_spacing(u,v)
if spacing < min:
min = spacing
return min