-
Notifications
You must be signed in to change notification settings - Fork 266
Expand file tree
/
Copy pathobjects.py
More file actions
854 lines (706 loc) · 31.7 KB
/
objects.py
File metadata and controls
854 lines (706 loc) · 31.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
from typing import Union, Sequence
from collections import defaultdict
from itertools import repeat
from warnings import warn
from igraph.datatypes import UniqueIdGenerator
def _construct_graph_from_dict_list(
cls,
vertices,
edges,
directed: bool = False,
vertex_name_attr: str = "name",
edge_foreign_keys=("source", "target"),
iterative: bool = False,
):
"""Constructs a graph from a list-of-dictionaries representation.
This function is useful when you have two lists of dictionaries, one for
vertices and one for edges, each containing their attributes (e.g. name,
weight). Of course, the edge dictionary must also contain two special keys
that indicate the source and target vertices connected by that edge.
Non-list iterables should work as long as they yield dictionaries or
dict-like objects (they should have the 'items' and '__getitem__' methods).
For instance, a database query result is likely to be fit as long as it's
iterable and yields dict-like objects with every iteration.
@param vertices: the list of dictionaries for the vertices or C{None} if
there are no special attributes assigned to vertices and we
should simply use the edge list of dicts to infer vertex names.
@param edges: the list of dictionaries for the edges. Each dict must have
at least the two keys specified by edge_foreign_keys to label the source
and target vertices, while additional items will be treated as edge
attributes.
@param directed: whether the constructed graph will be directed
@param vertex_name_attr: the name of the distinguished key in the
dicts in the vertex data source that contains the vertex names.
Ignored if C{vertices} is C{None}.
@param edge_foreign_keys: tuple specifying the attributes in each edge
dictionary that contain the source (1st) and target (2nd) vertex names.
These items of each dictionary are also added as edge_attributes.
@param iterative: whether to add the edges to the graph one by one,
iteratively, or to build a large edge list first and use that to
construct the graph. The latter approach is faster but it may
not be suitable if your dataset is large. The default is to
add the edges in a batch from an edge list.
@return: the graph that was constructed
Example:
>>> vertices = [{'name': 'apple'}, {'name': 'pear'}, {'name': 'peach'}]
>>> edges = [{'source': 'apple', 'target': 'pear', 'weight': 1.2},
... {'source': 'apple', 'target': 'peach', 'weight': 0.9}]
>>> g = Graph.DictList(vertices, edges)
The graph has three vertices with names and two edges with weights.
"""
def create_list_from_indices(indices, n):
result = [None] * n
for i, v in indices:
result[i] = v
return result
# Construct the vertices
vertex_attrs = {}
n = 0
if vertices:
for idx, vertex_data in enumerate(vertices):
for k, v in vertex_data.items():
try:
vertex_attrs[k].append((idx, v))
except KeyError:
vertex_attrs[k] = [(idx, v)]
n += 1
for k, v in vertex_attrs.items():
vertex_attrs[k] = create_list_from_indices(v, n)
else:
vertex_attrs[vertex_name_attr] = []
if vertex_name_attr not in vertex_attrs:
raise AttributeError(
f"{vertex_name_attr} is not a key of your vertex dictionaries",
)
vertex_names = vertex_attrs[vertex_name_attr]
# Check for duplicates in vertex_names
if len(vertex_names) != len(set(vertex_names)):
raise ValueError("vertex names are not unique")
# Create a reverse mapping from vertex names to indices
vertex_name_map = UniqueIdGenerator(initial=vertex_names)
# Construct the edges
efk_src, efk_dest = edge_foreign_keys
if iterative:
g = cls(n, [], directed, {}, vertex_attrs)
for idx, edge_data in enumerate(edges):
src_name = edge_data[efk_src]
dst_name = edge_data[efk_dest]
v1 = vertex_name_map[src_name]
if v1 == n:
g.add_vertices(1)
g.vs[n][vertex_name_attr] = src_name
n += 1
v2 = vertex_name_map[dst_name]
if v2 == n:
g.add_vertices(1)
g.vs[n][vertex_name_attr] = dst_name
n += 1
g.add_edge(v1, v2)
for k, v in edge_data.items():
g.es[idx][k] = v
return g
else:
edge_list = []
edge_attrs = {}
m = 0
for idx, edge_data in enumerate(edges):
v1 = vertex_name_map[edge_data[efk_src]]
v2 = vertex_name_map[edge_data[efk_dest]]
edge_list.append((v1, v2))
for k, v in edge_data.items():
try:
edge_attrs[k].append((idx, v))
except KeyError:
edge_attrs[k] = [(idx, v)]
m += 1
for k, v in edge_attrs.items():
edge_attrs[k] = create_list_from_indices(v, m)
# It may have happened that some vertices were added during
# the process
if len(vertex_name_map) > n:
diff = len(vertex_name_map) - n
more = [None] * diff
for v in vertex_attrs.values():
v.extend(more)
vertex_attrs[vertex_name_attr] = list(vertex_name_map.values())
n = len(vertex_name_map)
# Create the graph
return cls(n, edge_list, directed, {}, vertex_attrs, edge_attrs)
def _construct_graph_from_tuple_list(
cls,
edges,
directed: bool = False,
vertex_name_attr: str = "name",
edge_attrs=None,
weights=False,
):
"""Constructs a graph from a list-of-tuples representation.
This representation assumes that the edges of the graph are encoded
in a list of tuples (or lists). Each item in the list must have at least
two elements, which specify the source and the target vertices of the edge.
The remaining elements (if any) specify the edge attributes of that edge,
where the names of the edge attributes originate from the C{edge_attrs}
list. The names of the vertices will be stored in the vertex attribute
given by C{vertex_name_attr}.
The default parameters of this function are suitable for creating
unweighted graphs from lists where each item contains the source vertex
and the target vertex. If you have a weighted graph, you can use items
where the third item contains the weight of the edge by setting
C{edge_attrs} to C{"weight"} or C{["weight"]}. If you have even more
edge attributes, add them to the end of each item in the C{edges}
list and also specify the corresponding edge attribute names in
C{edge_attrs} as a list.
@param edges: the data source for the edges. This must be a list
where each item is a tuple (or list) containing at least two
items: the name of the source and the target vertex. Note that
names will be assigned to the C{name} vertex attribute (or another
vertex attribute if C{vertex_name_attr} is specified), even if
all the vertex names in the list are in fact numbers.
@param directed: whether the constructed graph will be directed
@param vertex_name_attr: the name of the vertex attribute that will
contain the vertex names.
@param edge_attrs: the names of the edge attributes that are filled
with the extra items in the edge list (starting from index 2, since
the first two items are the source and target vertices). If C{None}
or an empty sequence, only the source and target vertices will be
extracted and additional tuple items will be ignored. If a string, it is
interpreted as a single edge attribute.
@param weights: alternative way to specify that the graph is
weighted. If you set C{weights} to C{true} and C{edge_attrs} is
not given, it will be assumed that C{edge_attrs} is C{["weight"]}
and igraph will parse the third element from each item into an
edge weight. If you set C{weights} to a string, it will be assumed
that C{edge_attrs} contains that string only, and igraph will
store the edge weights in that attribute.
@return: the graph that was constructed
"""
if edge_attrs is None:
if not weights:
edge_attrs = ()
else:
if not isinstance(weights, str):
weights = "weight"
edge_attrs = [weights]
else:
if weights:
raise ValueError("`weights` must be False if `edge_attrs` is " "not None")
if isinstance(edge_attrs, str):
edge_attrs = [edge_attrs]
# Set up a vertex ID generator
idgen = UniqueIdGenerator()
# Construct the edges and the edge attributes
edge_list = []
edge_attributes = {}
for name in edge_attrs:
edge_attributes[name] = []
for item in edges:
edge_list.append((idgen[item[0]], idgen[item[1]]))
for index, name in enumerate(edge_attrs, 2):
try:
edge_attributes[name].append(item[index])
except IndexError:
edge_attributes[name].append(None)
# Set up the name vertex attribute
vertex_attributes = {}
vertex_attributes[vertex_name_attr] = list(idgen.values())
n = len(idgen)
# Construct the graph
return cls(n, edge_list, directed, {}, vertex_attributes, edge_attributes)
def _construct_graph_from_list_dict(
cls,
edges,
directed: bool = False,
vertex_name_attr: str = "name",
):
"""Constructs a graph from a dict-of-lists representation.
This function is used to construct a graph from a dictionary of
lists. Other, non-list sequences (e.g. tuples) and lazy iterators are
are accepted. For each key x, its corresponding value must be a sequence of
multiple values y: the edge (x,y) will be created in the graph. x and y
must be either one of:
- two integers: the vertices with those ids will be connected
- two strings: the vertices with those names will be connected
If names are used, the order of vertices is not guaranteed, and each
vertex will be given the vertex_name_attr attribute.
@param edges: the dict of sequences describing the edges
@param directed: whether to create a directed graph
@param vertex_name_attr: vertex attribute that will store the names
@returns: a Graph object
Example:
>>> mydict = {'apple': ['pear', 'peach'], 'pear': ['peach']}
>>> g = Graph.ListDict(mydict)
# The graph has three vertices with names and three edges connecting
# each pair.
"""
first_item = next(iter(edges), 0)
if not isinstance(first_item, (int, str)):
raise ValueError("Keys must be integers or strings")
vertex_attributes = {}
if isinstance(first_item, str):
name_map = UniqueIdGenerator()
edge_list = []
for source, sequence in edges.items():
source_id = name_map[source]
edge_list.extend((source_id, name_map[target]) for target in sequence)
vertex_attributes[vertex_name_attr] = name_map.values()
n = len(name_map)
else:
edge_list = []
n = -1
for source, sequence in edges.items():
n = max(n, source, *sequence)
edge_list.extend(zip(repeat(source), sequence))
n += 1
# Construct the graph
return cls(n, edge_list, directed, {}, vertex_attributes, {})
def _construct_graph_from_dict_dict(
cls,
edges,
directed: bool = False,
vertex_name_attr: str = "name",
):
"""Constructs a graph from a dict-of-dicts representation.
Each key can be an integer or a string and represent a vertex. Each value
is a dict representing edges (outgoing if the graph is directed) from that
vertex. Each dict key is an integer/string for a target vertex, such that
an edge will be created between those two vertices. Integers are
interpreted as vertex_ids from 0 (as used in igraph), strings are
interpreted as vertex names, in which case vertices are given separate
numeric ids. Each value is a dictionary of edge attributes for that edge.
Example:
>>> {'Alice': {'Bob': {'weight': 1.5}, 'David': {'weight': 2}}}
creates a graph with three vertices (Alice, Bob, and David) and two edges:
- Alice - Bob (with weight 1.5)
- Alice - David (with weight 2)
@param edges: the dict of dict of dicts specifying the edges and their
attributes
@param directed: whether to create a directed graph
@param vertex_name_attr: vertex attribute that will store the names
@returns: a Graph object
"""
first_item = next(iter(edges), 0)
if not isinstance(first_item, (int, str)):
raise ValueError("Keys must be integers or strings")
vertex_attributes = {}
edge_attribute_list = []
if isinstance(first_item, str):
name_map = UniqueIdGenerator()
edge_list = []
for source, target_dict in edges.items():
source_id = name_map[source]
for target, edge_attrs in target_dict.items():
edge_list.append((source_id, name_map[target]))
edge_attribute_list.append(edge_attrs)
vertex_attributes[vertex_name_attr] = name_map.values()
n = len(name_map)
else:
edge_list = []
n = -1
for source, target_dict in edges.items():
n = max(n, source, *target_dict)
for target, edge_attrs in target_dict.items():
edge_list.append((source, target))
edge_attribute_list.append(edge_attrs)
n += 1
# Construct graph without edge attributes
graph = cls(n, edge_list, directed, {}, vertex_attributes, {})
# Add edge attributes
for edge, edge_attrs in zip(graph.es, edge_attribute_list):
for key, val in edge_attrs.items():
edge[key] = val
return graph
def _construct_graph_from_dataframe(
cls,
edges,
directed: bool = True,
vertices=None,
use_vids: bool = True,
):
"""Generates a graph from one or two dataframes.
@param edges: pandas DataFrame containing edges and metadata. The first
two columns of this DataFrame contain the source and target vertices
for each edge. These indicate the vertex IDs as nonnegative integers
rather than vertex names unless C{use_vids} is False. Further columns
may contain edge attributes.
@param directed: whether the graph is directed
@param vertices: None (default) or pandas DataFrame containing vertex
metadata. The DataFrame's index must contain the vertex IDs as a
sequence of intergers from 0 to C{len(vertices) - 1}. If C{use_vids}
is C{False}, the first column must contain the unique vertex names.
Vertex names should be strings for full compatibility, but many functions
will work if you set the name with any hashable object. All other columns
will be added as vertex attributes by column name.
@param use_vids: whether to interpret the first two columns of the C{edges}
argument as vertex ids (0-based integers) instead of vertex names.
If this argument is set to True and the first two columns of C{edges}
are not integers, an error is thrown.
@return: the graph
Vertex names in either the C{edges} or C{vertices} arguments that are set
to NaN (not a number) will be set to the string "NA". That might lead
to unexpected behaviour: fill your NaNs with values before calling this
function to mitigate.
"""
try:
import pandas as pd
except ImportError:
raise ImportError(
"You should install pandas in order to use this function"
) from None
if edges.shape[1] < 2:
raise ValueError("The 'edges' DataFrame must contain at least two columns")
if vertices is not None and vertices.shape[1] < 1:
raise ValueError("The 'vertices' DataFrame must contain at least one column")
if use_vids:
if str(edges.dtypes.iloc[0]).startswith(("int", "Int")) and str(
edges.dtypes.iloc[1]
).startswith(("int", "Int")):
# Check pandas nullable integer data type:
# https://pandas.pydata.org/docs/user_guide/integer_na.html
if (edges.iloc[:, :2].isna()).any(axis=None):
raise ValueError("Source and target IDs must not be null")
if (edges.iloc[:, :2] < 0).any(axis=None):
raise ValueError("Source and target IDs must not be negative")
else:
raise TypeError(
f"Source and target IDs must be 0-based integers, found types {edges.dtypes.tolist()[:2]}"
)
if vertices is not None:
vertices = vertices.sort_index()
if not vertices.index.equals(
pd.RangeIndex.from_range(range(vertices.shape[0]))
):
if not str(vertices.index.dtype).startswith("int"):
raise TypeError(
f"Vertex IDs must be 0-based integers, found type {vertices.index.dtype}"
)
elif (vertices.index < 0).any(axis=None):
raise ValueError("Vertex IDs must not be negative")
else:
raise ValueError(
f"Vertex IDs must be an integer sequence from 0 to {vertices.shape[0] - 1}"
)
else:
# Handle if some source and target names in 'edges' are 'NA'
if edges.iloc[:, :2].isna().any(axis=None):
warn(
"In the first two columns of 'edges' NA elements were replaced with string \"NA\"",
stacklevel=1,
)
edges = edges.copy()
edges.iloc[:, :2].fillna("NA", inplace=True)
# Bring DataFrame(s) into same format as with 'use_vids=True'
if vertices is None:
vertices = pd.DataFrame({"name": pd.unique(edges.values[:, :2].ravel())})
if vertices.iloc[:, 0].isna().any():
warn(
"In the first column of 'vertices' NA elements were replaced with string \"NA\"",
stacklevel=1,
)
vertices = vertices.copy()
vertices.iloc[:, 0].fillna("NA", inplace=True)
if vertices.iloc[:, 0].duplicated().any():
raise ValueError("Vertex names must be unique")
if vertices.shape[1] > 1 and "name" in vertices.columns[1:]:
raise ValueError(
"Vertex attribute conflict: DataFrame already contains column 'name'"
)
vertices = vertices.rename({vertices.columns[0]: "name"}, axis=1).reset_index(
drop=True
)
# Map source and target names in 'edges' to IDs
vid_map = pd.Series(vertices.index, index=vertices.iloc[:, 0])
edges = edges.copy()
edges[edges.columns[0]] = edges.iloc[:, 0].map(vid_map)
edges[edges.columns[1]] = edges.iloc[:, 1].map(vid_map)
# Create graph
if vertices is None:
nv = edges.iloc[:, :2].max().max() + 1
g = cls(n=nv, directed=directed)
else:
if not edges.iloc[:, :2].isin(vertices.index).all(axis=None):
raise ValueError(
"Some vertices in the edge DataFrame are missing from vertices DataFrame"
)
nv = vertices.shape[0]
g = cls(n=nv, directed=directed)
# Add vertex attributes
for col in vertices.columns:
g.vs[col] = vertices[col].tolist()
# add edges including optional attributes
e_list = list(edges.iloc[:, :2].itertuples(index=False, name=None))
e_attr = edges.iloc[:, 2:].to_dict(orient="list") if edges.shape[1] > 2 else None
g.add_edges(e_list, e_attr)
return g
def _export_graph_to_dict_list(
graph,
use_vids: bool = True,
skip_none: bool = False,
vertex_name_attr: str = "name",
):
"""Export graph as two lists of dictionaries, for vertices and edges.
This function is the reverse of Graph.DictList.
Example:
>>> g = Graph([(0, 1), (1, 2)])
>>> g.vs["name"] = ["apple", "pear", "peach"]
>>> g.es["name"] = ["first_edge", "second"]
>>> g.to_dict_list()
([{"name": "apple"}, {"name": "pear"}, {"name": "peach"}],
[{"source": 0, "target": 1, "name": "first_edge"},
{"source" 0, "target": 2, name": "second"}])
>>> g.to_dict_list(use_vids=False)
([{"name": "apple"}, {"name": "pear"}, {"name": "peach"}],
[{"source": "apple", "target": "pear", "name": "first_edge"},
{"source" "apple", "target": "peach", name": "second"}])
@param use_vids: whether to label vertices in the output data
structure by their ids or their vertex_name_attr attribute. If
use_vids=False but vertices lack a vertex_name_attr attribute, an
AttributeError is raised.
@param skip_none: whether to skip, for each edge, attributes that
have a value of None. This is useful if only some edges are expected to
possess an attribute.
@param vertex_name_attr: only used with use_vids=False to choose what
vertex attribute to use to name your vertices in the output data
structure.
@return: a tuple with two lists of dictionaries, representing the vertices
and the edges, respectively, with their attributes.
"""
# Output data structures
res_vs, res_es = [], []
if not use_vids:
if vertex_name_attr not in graph.vertex_attributes():
raise AttributeError(f"No vertex attribute {vertex_name_attr}")
vs_names = graph.vs[vertex_name_attr]
for vertex in graph.vs:
if skip_none:
attrdic = {k: v for k, v in vertex.attributes() if v is not None}
else:
attrdic = vertex.attributes()
res_vs.append(attrdic)
for edge in graph.es:
source, target = edge.tuple
if not use_vids:
source, target = vs_names[source], vs_names[target]
if skip_none:
attrdic = {k: v for k, v in edge.attributes() if v is not None}
else:
attrdic = edge.attributes()
attrdic["source"] = source
attrdic["target"] = target
res_es.append(attrdic)
return (res_vs, res_es)
def _export_graph_to_tuple_list(
graph,
use_vids: bool = True,
edge_attrs: Union[str, Sequence[str]] = None,
vertex_name_attr: str = "name",
):
"""Export graph to a list of edge tuples
This function is the reverse of Graph.TupleList.
Example:
>>> g = Graph.Full(3)
>>> g.vs["name"] = ["apple", "pear", "peach"]
>>> g.es["name"] = ["first_edge", "second", "third"]
>>> # Get name of the edge
>>> g.to_tuple_list(edge_attrs=["name"])
[(0, 1, "first_edge"), (0, 2, "second"), (1, 2, "third")]
>>> # Use vertex names, no edge attributes
>>> g.to_tuple_list(use_vids=False)
[("apple", "pear"), ("apple", "peach"), ("pear", "peach")]
@param use_vids: whether to label vertices in the output data
structure by their ids or their vertex_name_attr attribute. If
use_vids=False but vertices lack a vertex_name_attr attribute, an
AttributeError is raised.
@param edge_attrs: list of edge attributes to export
in addition to source and target vertex, which are always the first two
elements of each tuple. None (default) is equivalent to an empty list. A
string is acceptable to signify a single attribute and will be wrapped in
a list internally.
@param vertex_name_attr: only used with use_vids=False to choose what
vertex attribute to use to name your vertices in the output data
structure.
@return: a list of tuples, each representing an edge of the graph.
"""
# Output data structure
res = []
if edge_attrs is not None:
if isinstance(edge_attrs, str):
edge_attrs = [edge_attrs]
missing_attrs = list(set(edge_attrs) - set(graph.edge_attributes()))
if missing_attrs:
raise AttributeError(f"Missing attributes: {missing_attrs}")
else:
edge_attrs = []
if use_vids is False:
if vertex_name_attr not in graph.vertex_attributes():
raise AttributeError(f"No vertex attribute {vertex_name_attr}")
vs_names = graph.vs[vertex_name_attr]
for edge in graph.es:
source, target = edge.tuple
if not use_vids:
source, target = vs_names[source], vs_names[target]
attrlist = [source, target]
attrlist += [edge[attrname] for attrname in edge_attrs]
res.append(tuple(attrlist))
return res
def _export_graph_to_list_dict(
graph,
use_vids: bool = True,
sequence_constructor: callable = list,
vertex_name_attr: str = "name",
):
"""Export graph to a dictionary of lists (or other sequences).
This function is the reverse of Graph.ListDict.
Example:
>>> g = Graph.Full(3)
>>> g.to_sequence_dict() -> {0: [1, 2], 1: [2]}
>>> g.to_sequence_dict(sequence_constructor=tuple) -> {0: (1, 2), 1: (2,)}
>>> g.vs['name'] = ['apple', 'pear', 'peach']
>>> g.to_sequence_dict(use_vids=False)
{'apple': ['pear', 'peach'], 'pear': ['peach']}
@param use_vids: whether to label vertices in the output data
structure by their ids or their vertex_name_attr attribute. If
use_vids=False but vertices lack a vertex_name_attr attribute, an
AttributeError is raised.
@param sequence_constructor: constructor for the data structure
to be used as values of the dictionary. The default (list) makes a dict
of lists, with each list representing the neighbors of the vertex
specified in the respective dictionary key.
@param vertex_name_attr: only used with use_vids=False to choose what
vertex attribute to use to name your vertices in the output data
structure.
@return: dictionary of sequences, keyed by vertices, with each value
containing the neighbors of that vertex.
"""
if not use_vids:
if vertex_name_attr not in graph.vertex_attributes():
raise AttributeError(f"Vertices do not have a {vertex_name_attr} attribute")
vs_names = graph.vs[vertex_name_attr]
# Temporary output data structure
res = defaultdict(list)
for edge in graph.es:
source, target = edge.tuple
if not use_vids:
source = vs_names[source]
target = vs_names[target]
res[source].append(target)
res = {key: sequence_constructor(val) for key, val in res.items()}
return res
def _export_graph_to_dict_dict(
graph,
use_vids: bool = True,
edge_attrs: Union[str, Sequence[str]] = None,
skip_none: bool = False,
vertex_name_attr: str = "name",
):
"""Export graph to dictionary of dicts of edge attributes
This function is the reverse of Graph.DictDict.
Example:
>>> g = Graph.Full(3)
>>> g.es['name'] = ['first_edge', 'second', 'third']
>>> g.to_dict_dict()
{0: {1: {'name': 'first_edge'}, 2: {'name': 'second'}}, 1: {2: {'name': 'third'}}}
@param use_vids: whether to label vertices in the output data
structure by their ids or their vertex_name_attr attribute. If
use_vids=False but vertices lack a vertex_name_attr attribute, an
AttributeError is raised.
@param edge_attrs: list of edge attributes to export.
None (default) signified all attributes (unlike Graph.to_tuple_list). A
string is acceptable to signify a single attribute and will be wrapped
in a list internally.
@param skip_none: whether to skip, for each edge, attributes that
have a value of None. This is useful if only some edges are expected to
possess an attribute.
@param vertex_name_attr: only used with use_vids=False to choose what
vertex attribute to use to name your vertices in the output data
structure.
@return: dictionary of dictionaries of dictionaries, with the outer keys
vertex ids/names, the middle keys ids/names of their neighbors, and the
innermost dictionary representing attributes of that edge.
"""
if edge_attrs is not None:
if isinstance(edge_attrs, str):
edge_attrs = [edge_attrs]
missing_attrs = list(set(edge_attrs) - set(graph.edge_attributes()))
if missing_attrs:
raise AttributeError(f"Missing attributes: {missing_attrs}")
if not use_vids:
if vertex_name_attr not in graph.vertex_attributes():
raise AttributeError(f"Vertices do not have a {vertex_name_attr} attribute")
vs_names = graph.vs[vertex_name_attr]
# Temporary output data structure
res = defaultdict(lambda: defaultdict(dict))
for edge in graph.es:
source, target = edge.tuple
if not use_vids:
source = vs_names[source]
target = vs_names[target]
attrdic = edge.attributes()
if edge_attrs is not None:
attrdic = {k: attrdic[k] for k in edge_attrs}
if skip_none:
attrdic = {k: v for k, v in attrdic.items() if v is not None}
res[source][target] = attrdic
res = {key: dict(val) for key, val in res.items()}
return res
def _export_vertex_dataframe(graph):
"""Export vertices with attributes to pandas.DataFrame
If you want to use vertex names as index, you can do:
>>> from string import ascii_letters
>>> graph = Graph.GRG(25, 0.4)
>>> graph.vs["name"] = ascii_letters[:graph.vcount()]
>>> df = graph.get_vertex_dataframe()
>>> df.set_index('name', inplace=True)
@return: a pandas.DataFrame representing vertices and their attributes.
The index uses vertex IDs, from 0 to N - 1 where N is the number of
vertices.
"""
try:
import pandas as pd
except ImportError:
raise ImportError(
"You should install pandas in order to use this function"
) from None
df = pd.DataFrame(
{attr: graph.vs[attr] for attr in graph.vertex_attributes()},
index=list(range(graph.vcount())),
)
df.index.name = "vertex ID"
return df
def _export_edge_dataframe(graph):
"""Export edges with attributes to pandas.DataFrame
If you want to use source and target vertex IDs as index, you can do:
>>> from string import ascii_letters
>>> graph = Graph.GRG(25, 0.4)
>>> graph.vs["name"] = ascii_letters[:graph.vcount()]
>>> df = graph.get_edge_dataframe()
>>> df.set_index(['source', 'target'], inplace=True)
The index will be a pandas.MultiIndex. You can use the C{drop=False}
option to keep the C{source} and C{target} columns.
If you want to use vertex names in the source and target columns:
>>> df = graph.get_edge_dataframe()
>>> df_vert = graph.get_vertex_dataframe()
>>> df['source'].replace(df_vert['name'], inplace=True)
>>> df['target'].replace(df_vert['name'], inplace=True)
>>> df_vert.set_index('name', inplace=True) # Optional
@return: a pandas.DataFrame representing edges and their attributes.
The index uses edge IDs, from 0 to M - 1 where M is the number of
edges. The first two columns of the dataframe represent the IDs of
source and target vertices for each edge. These columns have names
"source" and "target". If your edges have attributes with the same
names, they will be present in the dataframe, but not in the first
two columns.
"""
try:
import pandas as pd
except ImportError:
raise ImportError(
"You should install pandas in order to use this function"
) from None
df = pd.DataFrame(
{attr: graph.es[attr] for attr in graph.edge_attributes()},
index=list(range(graph.ecount())),
)
df.index.name = "edge ID"
df.insert(0, "source", [e.source for e in graph.es], allow_duplicates=True)
df.insert(1, "target", [e.target for e in graph.es], allow_duplicates=True)
return df