forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplotcurve.py
More file actions
155 lines (138 loc) · 5.07 KB
/
plotcurve.py
File metadata and controls
155 lines (138 loc) · 5.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Plot training and testing curve from paddle log.
It takes input from a file or stdin, and output to a file or stdout.
Note: must have numpy and matplotlib installed in order to use this tool.
usage: Plot training and testing curves from paddle log file.
[-h] [-i INPUT] [-o OUTPUT] [--format FORMAT] [key [key ...]]
positional arguments:
key keys of scores to plot, the default will be AvgCost
optional arguments:
-h, --help show this help message and exit
-i INPUT, --input INPUT
input filename of paddle log, default will be standard
input
-o OUTPUT, --output OUTPUT
output filename of figure, default will be standard
output
--format FORMAT figure format(png|pdf|ps|eps|svg)
The keys must be in the order of paddle output(!!!).
For example, paddle.INFO contrains the following log
I0406 21:26:21.325584 3832 Trainer.cpp:601] Pass=0 Batch=7771 AvgCost=0.624935 Eval: error=0.260972
To use this script to generate plot for AvgCost, error:
python plotcurve.py -i paddle.INFO -o figure.png AvgCost error
"""
import six
import sys
import matplotlib
# the following line is added immediately after import matplotlib
# and before import pylot. The purpose is to ensure the plotting
# works even under remote login (i.e. headless display)
matplotlib.use('Agg')
from matplotlib import cm
import matplotlib.pyplot as pyplot
import numpy
import argparse
import re
import os
def plot_paddle_curve(keys, inputfile, outputfile, format='png',
show_fig=False):
"""Plot curves from paddle log and save to outputfile.
:param keys: a list of strings to be plotted, e.g. AvgCost
:param inputfile: a file object for input
:param outputfile: a file object for output
:return: None
"""
pass_pattern = r"Pass=([0-9]*)"
test_pattern = r"Test samples=([0-9]*)"
if not keys:
keys = ['AvgCost']
for k in keys:
pass_pattern += r".*?%s=([0-9e\-\.]*)" % k
test_pattern += r".*?%s=([0-9e\-\.]*)" % k
data = []
test_data = []
compiled_pattern = re.compile(pass_pattern)
compiled_test_pattern = re.compile(test_pattern)
for line in inputfile:
found = compiled_pattern.search(line)
found_test = compiled_test_pattern.search(line)
if found:
data.append([float(x) for x in found.groups()])
if found_test:
test_data.append([float(x) for x in found_test.groups()])
x = numpy.array(data)
x_test = numpy.array(test_data)
if x.shape[0] <= 0:
sys.stderr.write("No data to plot. Exiting!\n")
return
m = len(keys) + 1
for i in six.moves.xrange(1, m):
pyplot.plot(
x[:, 0],
x[:, i],
color=cm.jet(1.0 * (i - 1) / (2 * m)),
label=keys[i - 1])
if (x_test.shape[0] > 0):
pyplot.plot(
x[:, 0],
x_test[:, i],
color=cm.jet(1.0 - 1.0 * (i - 1) / (2 * m)),
label="Test " + keys[i - 1])
pyplot.xlabel('number of epoch')
pyplot.legend(loc='best')
if show_fig:
pyplot.show()
pyplot.savefig(outputfile, bbox_inches='tight')
pyplot.clf()
def main(argv):
"""
main method of plotting curves.
"""
cmdparser = argparse.ArgumentParser(
"Plot training and testing curves from paddle log file.")
cmdparser.add_argument(
'key', nargs='*', help='keys of scores to plot, the default is AvgCost')
cmdparser.add_argument(
'-i',
'--input',
help='input filename of paddle log, '
'default will be standard input')
cmdparser.add_argument(
'-o',
'--output',
help='output filename of figure, '
'default will be standard output')
cmdparser.add_argument('--format', help='figure format(png|pdf|ps|eps|svg)')
args = cmdparser.parse_args(argv)
keys = args.key
if args.input:
inputfile = open(args.input)
else:
inputfile = sys.stdin
format = args.format
if args.output:
outputfile = open(args.output, 'wb')
if not format:
format = os.path.splitext(args.output)[1]
if not format:
format = 'png'
else:
outputfile = sys.stdout
plot_paddle_curve(keys, inputfile, outputfile, format)
inputfile.close()
outputfile.close()
if __name__ == "__main__":
main(sys.argv[1:])