forked from SoftwareDesignXRays/tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsource_utils.py
More file actions
315 lines (245 loc) · 11.2 KB
/
source_utils.py
File metadata and controls
315 lines (245 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classes and functions that help to inspect Python source w.r.t. TF graphs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os
import re
import numpy as np
from tensorflow.python.debug.lib import profiling
_TENSORFLOW_BASEDIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(
os.path.normpath(os.path.abspath(__file__))))))
UNCOMPILED_SOURCE_SUFFIXES = (".py")
COMPILED_SOURCE_SUFFIXES = (".pyc", ".pyo")
def _norm_abs_path(file_path):
return os.path.normpath(os.path.abspath(file_path))
def is_extension_uncompiled_python_source(file_path):
_, extension = os.path.splitext(file_path)
return extension.lower() in UNCOMPILED_SOURCE_SUFFIXES
def is_extension_compiled_python_source(file_path):
_, extension = os.path.splitext(file_path)
return extension.lower() in COMPILED_SOURCE_SUFFIXES
def _convert_watch_key_to_tensor_name(watch_key):
return watch_key[:watch_key.rfind(":")]
def guess_is_tensorflow_py_library(py_file_path):
"""Guess whether a Python source file is a part of the tensorflow library.
Special cases:
1) Returns False for unit-test files in the library (*_test.py),
2) Returns False for files under python/debug/examples.
Args:
py_file_path: full path of the Python source file in question.
Returns:
(`bool`) Whether the file is a part of the tensorflow library.
Raises:
ValueError: if the extension name of py_file_path does not indicate a Python
source file (compiled or uncomplied).
"""
if (not is_extension_uncompiled_python_source(py_file_path) and
not is_extension_compiled_python_source(py_file_path)):
raise ValueError(
"Input file path (%s) is not a Python source file." % py_file_path)
py_file_path = _norm_abs_path(py_file_path)
return (py_file_path.startswith(_TENSORFLOW_BASEDIR) and
not py_file_path.endswith("_test.py") and
not os.path.dirname(py_file_path).endswith(
os.path.normpath("python/debug/examples")))
def load_source(source_file_path):
with open(source_file_path, "rU") as f:
source_text = f.read()
source_lines = source_text.split("\n")
line_num_width = int(np.ceil(np.log10(len(source_lines)))) + 3
return source_lines, line_num_width
def annotate_source(dump,
source_file_path,
do_dumped_tensors=False,
file_stack_top=False,
min_line=None,
max_line=None):
"""Annotate a Python source file with a list of ops created at each line.
(The annotation doesn't change the source file itself.)
Args:
dump: (`DebugDumpDir`) A `DebugDumpDir` object of which the Python graph
has been loaded.
source_file_path: (`str`) Path to the source file being annotated.
do_dumped_tensors: (`str`) Whether dumped Tensors, instead of ops are to be
used to annotate the source file.
file_stack_top: (`bool`) Whether only the top stack trace in the
specified source file is to be annotated.
min_line: (`None` or `int`) The 1-based line to start annotate the source
file from (inclusive).
max_line: (`None` or `int`) The 1-based line number to end the annotation
at (exclusive).
Returns:
A `dict` mapping 1-based line number to a list of op name(s) created at
that line, or tensor names if `do_dumped_tensors` is True.
Raises:
ValueError: If the dump object does not have a Python graph set.
"""
py_graph = dump.python_graph
if not py_graph:
raise ValueError("Cannot perform source annotation due to a lack of set "
"Python graph in the dump object")
source_file_path = _norm_abs_path(source_file_path)
line_to_op_names = {}
for op in py_graph.get_operations():
for file_path, line_number, _, _ in reversed(dump.node_traceback(op.name)):
if (min_line is not None and line_number < min_line or
max_line is not None and line_number >= max_line):
continue
if _norm_abs_path(file_path) != source_file_path:
continue
if do_dumped_tensors:
watch_keys = dump.debug_watch_keys(op.name)
# Convert watch keys to unique Tensor names.
items_to_append = list(
set(map(_convert_watch_key_to_tensor_name, watch_keys)))
else:
items_to_append = [op.name]
if line_number in line_to_op_names:
line_to_op_names[line_number].extend(items_to_append)
else:
line_to_op_names[line_number] = items_to_append
if file_stack_top:
break
return line_to_op_names
def list_source_files_against_dump(dump,
path_regex_whitelist=None,
node_name_regex_whitelist=None):
"""Generate a list of source files with information regarding ops and tensors.
Args:
dump: (`DebugDumpDir`) A `DebugDumpDir` object of which the Python graph
has been loaded.
path_regex_whitelist: A regular-expression filter for source file path.
node_name_regex_whitelist: A regular-expression filter for node names.
Returns:
A list of tuples regarding the Python source files involved in constructing
the ops and tensors contained in `dump`. Each tuple is:
(source_file_path, is_tf_library, num_nodes, num_tensors, num_dumps,
first_line)
is_tf_library: (`bool`) A guess of whether the file belongs to the
TensorFlow Python library.
num_nodes: How many nodes were created by lines of this source file.
These include nodes with dumps and those without.
num_tensors: How many Tensors were created by lines of this source file.
These include Tensors with dumps and those without.
num_dumps: How many debug Tensor dumps were from nodes (and Tensors)
that were created by this source file.
first_line: The first line number (1-based) that created any nodes or
Tensors in this source file.
The list is sorted by ascending order of source_file_path.
Raises:
ValueError: If the dump object does not have a Python graph set.
"""
py_graph = dump.python_graph
if not py_graph:
raise ValueError("Cannot generate source list due to a lack of set "
"Python graph in the dump object")
path_to_node_names = collections.defaultdict(set)
path_to_tensor_names = collections.defaultdict(set)
path_to_first_line = {}
tensor_name_to_num_dumps = {}
path_regex = (re.compile(path_regex_whitelist)
if path_regex_whitelist else None)
node_name_regex = (re.compile(node_name_regex_whitelist)
if node_name_regex_whitelist else None)
to_skip_file_paths = set()
for op in py_graph.get_operations():
if node_name_regex and not node_name_regex.match(op.name):
continue
for file_path, line_number, _, _ in dump.node_traceback(op.name):
file_path = _norm_abs_path(file_path)
if (file_path in to_skip_file_paths or
path_regex and not path_regex.match(file_path) or
not os.path.isfile(file_path)):
to_skip_file_paths.add(file_path)
continue
path_to_node_names[file_path].add(op.name)
if file_path in path_to_first_line:
if path_to_first_line[file_path] > line_number:
path_to_first_line[file_path] = line_number
else:
path_to_first_line[file_path] = line_number
for output_tensor in op.outputs:
tensor_name = output_tensor.name
path_to_tensor_names[file_path].add(tensor_name)
watch_keys = dump.debug_watch_keys(op.name)
for watch_key in watch_keys:
node_name, output_slot, debug_op = watch_key.split(":")
tensor_name = "%s:%s" % (node_name, output_slot)
if tensor_name not in tensor_name_to_num_dumps:
tensor_name_to_num_dumps[tensor_name] = len(
dump.get_tensors(node_name, int(output_slot), debug_op))
path_to_num_dumps = {}
for path in path_to_tensor_names:
path_to_num_dumps[path] = sum(
tensor_name_to_num_dumps.get(tensor_name, 0)
for tensor_name in path_to_tensor_names[path])
output = []
for file_path in path_to_node_names:
output.append((
file_path,
guess_is_tensorflow_py_library(file_path),
len(path_to_node_names.get(file_path, {})),
len(path_to_tensor_names.get(file_path, {})),
path_to_num_dumps.get(file_path, 0),
path_to_first_line[file_path]))
return sorted(output, key=lambda x: x[0])
def annotate_source_against_profile(profile_data,
source_file_path,
node_name_filter=None,
op_type_filter=None,
min_line=None,
max_line=None):
"""Annotate a Python source file with profiling information at each line.
(The annotation doesn't change the source file itself.)
Args:
profile_data: (`list` of `ProfileDatum`) A list of `ProfileDatum`.
source_file_path: (`str`) Path to the source file being annotated.
node_name_filter: Regular expression to filter by node name.
op_type_filter: Regular expression to filter by op type.
min_line: (`None` or `int`) The 1-based line to start annotate the source
file from (inclusive).
max_line: (`None` or `int`) The 1-based line number to end the annotation
at (exclusive).
Returns:
A `dict` mapping 1-based line number to a the namedtuple
`profiling.LineOrFuncProfileSummary`.
"""
source_file_path = _norm_abs_path(source_file_path)
node_name_regex = re.compile(node_name_filter) if node_name_filter else None
op_type_regex = re.compile(op_type_filter) if op_type_filter else None
line_to_profile_summary = {}
for profile_datum in profile_data:
if not profile_datum.file_path:
continue
if _norm_abs_path(profile_datum.file_path) != source_file_path:
continue
if (min_line is not None and profile_datum.line_number < min_line or
max_line is not None and profile_datum.line_number >= max_line):
continue
if (node_name_regex and
not node_name_regex.match(profile_datum.node_exec_stats.node_name)):
continue
if op_type_regex and not op_type_regex.match(profile_datum.op_type):
continue
if profile_datum.line_number not in line_to_profile_summary:
line_to_profile_summary[profile_datum.line_number] = (
profiling.AggregateProfile(profile_datum))
else:
line_to_profile_summary[profile_datum.line_number].add(profile_datum)
return line_to_profile_summary