-
Notifications
You must be signed in to change notification settings - Fork 266
Expand file tree
/
Copy pathformula.py
More file actions
221 lines (192 loc) · 7.87 KB
/
formula.py
File metadata and controls
221 lines (192 loc) · 7.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# vim:ts=4:sw=4:sts=4:et
# -*- coding: utf-8 -*-
"""
Implementation of L{igraph.Graph.Formula()}.
You should use this module directly only if you have a very strong reason
to do so. In almost all cases, you are better off with calling
L{igraph.Graph.Formula()}.
"""
from io import StringIO
from igraph.datatypes import UniqueIdGenerator
import re
import tokenize
import token
__all__ = ("construct_graph_from_formula",)
def generate_edges(formula):
"""Parses an edge specification from the head of the given
formula part and yields the following:
- startpoint(s) of the edge by vertex names
- endpoint(s) of the edge by names or an empty list if the vertices are
isolated
- a pair of bools to denote whether we had arrowheads at the start and end
vertices
"""
if formula == "":
yield [], [""], [False, False]
return
name_tokens = {token.NAME, token.NUMBER, token.STRING}
edge_chars = "<>-+"
start_names, end_names, arrowheads = [], [], [False, False]
parsing_vertices = True
# Tokenize the formula
token_gen = tokenize.generate_tokens(StringIO(formula).__next__)
for token_info in token_gen:
# Do the state transitions
token_type, tok, _, _, _ = token_info
if parsing_vertices:
if all(ch in edge_chars for ch in tok) and token_type == token.OP:
parsing_vertices = False
# Check the edge we currently have
if start_names and end_names:
# We have a whole edge
yield start_names, end_names, arrowheads
start_names, end_names = end_names, []
arrowheads = [False, False]
else:
if any(ch not in edge_chars for ch in tok):
parsing_vertices = True
if parsing_vertices:
# We are parsing vertex names at the moment
if token_type in name_tokens:
# We found a vertex name
if token_type == token.STRING:
end_names.append(eval(tok))
else:
end_names.append(str(tok))
elif tok == ":" and token_type == token.OP:
# Separating semicolon between vertex names, we just go on
continue
elif token_type == token.NEWLINE:
# Newlines are fine
pass
elif token_type == token.ENDMARKER:
# End markers are fine
pass
else:
msg = (
"invalid token found in edge specification: %s; "
"token_type=%r; tok=%r" % (formula, token_type, tok)
)
raise SyntaxError(msg)
else:
# We are parsing an edge operator
if "<" in tok:
if ">" in tok:
arrowheads = [True, True]
else:
arrowheads[0] = True
elif ">" in tok:
arrowheads[1] = True
elif "+" in tok:
if tok[0] == "+":
arrowheads[0] = True
if len(tok) > 1 and tok[-1] == "+":
arrowheads[1] = True
# The final edge
yield start_names, end_names, arrowheads
def construct_graph_from_formula(cls, formula=None, attr="name", simplify=True):
"""Graph.Formula(formula = None, attr = "name", simplify = True)
Generates a graph from a graph formula
A graph formula is a simple string representation of a graph.
It is very handy for creating small graphs quickly. The string
consists of vertex names separated by edge operators. An edge
operator is a sequence of dashes (C{-}) that may or may not
start with an arrowhead (C{<} at the beginning of the sequence
or C{>} at the end of the sequence). The edge operators can
be arbitrarily long, i.e., you may use as many dashes to draw
them as you like. This makes a total of four different edge
operators:
- C{-----} makes an undirected edge
- C{<----} makes a directed edge pointing from the vertex
on the right hand side of the operator to the vertex on
the left hand side
- C{---->} is the opposite of C{<----}
- C{<--->} creates a mutual directed edge pair between
the two vertices
If you only use the undirected edge operator (C{-----}),
the graph will be undirected. Otherwise it will be directed.
Vertex names used in the formula will be assigned to the
C{name} vertex attribute of the graph.
Some simple examples:
>>> from igraph import Graph
>>> print(Graph.Formula()) # empty graph
IGRAPH UN-- 0 0 --
+ attr: name (v)
>>> g = Graph.Formula("A-B") # undirected graph
>>> g.vs["name"]
['A', 'B']
>>> print(g)
IGRAPH UN-- 2 1 --
+ attr: name (v)
+ edges (vertex names):
A--B
>>> g.get_edgelist()
[(0, 1)]
>>> g2 = Graph.Formula("A-----------B")
>>> g2.isomorphic(g)
True
>>> g = Graph.Formula("A ---> B") # directed graph
>>> g.vs["name"]
['A', 'B']
>>> print(g)
IGRAPH DN-- 2 1 --
+ attr: name (v)
+ edges (vertex names):
A->B
If you have many disconnected componnets, you can separate them
with commas. You can also specify isolated vertices:
>>> g = Graph.Formula("A--B, C--D, E--F, G--H, I, J, K")
>>> print(", ".join(g.vs["name"]))
A, B, C, D, E, F, G, H, I, J, K
>>> g.connected_components().membership
[0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 6]
The colon (C{:}) operator can be used to specify vertex sets.
If an edge operator connects two vertex sets, then every vertex
from the first vertex set will be connected to every vertex in
the second set:
>>> g = Graph.Formula("A:B:C:D --- E:F:G")
>>> g.isomorphic(Graph.Full_Bipartite(4, 3))
True
Note that you have to quote vertex names if they include spaces
or special characters:
>>> g = Graph.Formula('"this is" +- "a silly" -+ "graph here"')
>>> g.vs["name"]
['this is', 'a silly', 'graph here']
@param formula: the formula itself
@param attr: name of the vertex attribute where the vertex names
will be stored
@param simplify: whether to simplify the constructed graph
@return: the constructed graph:
"""
# If we have no formula, return an empty graph
if formula is None:
return cls(0, vertex_attrs={attr: []})
vertex_ids, edges, directed = UniqueIdGenerator(), [], False
# Loop over each part in the formula
for part in re.compile(r"[,\n]").split(formula):
# Strip leading and trailing whitespace in the part
part = part.strip()
# Parse the first vertex specification from the formula
for start_names, end_names, arrowheads in generate_edges(part):
start_ids = [vertex_ids[name] for name in start_names]
end_ids = [vertex_ids[name] for name in end_names]
if not arrowheads[0] and not arrowheads[1]:
# This is an undirected edge. Do we have a directed graph?
if not directed:
# Nope, add the edge
edges.extend((id1, id2) for id1 in start_ids for id2 in end_ids)
else:
# This is a directed edge
directed = True
if arrowheads[1]:
edges.extend((id1, id2) for id1 in start_ids for id2 in end_ids)
if arrowheads[0]:
edges.extend((id2, id1) for id1 in start_ids for id2 in end_ids)
# Grab the vertex names into a list
vertex_attrs = {}
vertex_attrs[attr] = list(vertex_ids.values())
# Construct and return the graph
result = cls(len(vertex_ids), edges, directed, vertex_attrs=vertex_attrs)
if simplify:
result.simplify()
return result