6565up to date with CTest, just make sure the ``*_matches`` and
6666``*_exceptions`` lists are kept up to date with CTest's build handler.
6767"""
68+ from __future__ import print_function
69+ from __future__ import division
70+
6871import re
72+ import math
73+ import multiprocessing
74+ import time
75+ from contextlib import contextmanager
76+
6977from six import StringIO
7078from six import string_types
7179
72-
73- error_matches = [
80+ class prefilter (object ):
81+ """Make regular expressions faster with a simple prefiltering predicate.
82+
83+ Some regular expressions seem to be much more costly than others. In
84+ most cases, we can evaluate a simple precondition, e.g.::
85+
86+ lambda x: "error" in x
87+
88+ to avoid evaluating expensive regexes on all lines in a file. This
89+ can reduce parse time for large files by orders of magnitude when
90+ evaluating lots of expressions.
91+
92+ A ``prefilter`` object is designed to act like a regex,, but
93+ ``search`` and ``match`` check the precondition before bothering to
94+ evaluate the regular expression.
95+
96+ Note that ``match`` and ``search`` just return ``True`` and ``False``
97+ at the moment. Make them return a ``MatchObject`` or ``None`` if it
98+ becomes necessary.
99+ """
100+ def __init__ (self , precondition , * patterns ):
101+ self .patterns = [re .compile (p ) for p in patterns ]
102+ self .pre = precondition
103+ self .pattern = "\n " .join (
104+ ('MERGED:' ,) + patterns )
105+
106+ def search (self , text ):
107+ return self .pre (text ) and any (p .search (text ) for p in self .patterns )
108+
109+ def match (self , text ):
110+ return self .pre (text ) and any (p .match (text ) for p in self .patterns )
111+
112+
113+ _error_matches = [
114+ prefilter (
115+ lambda x : any (s in x for s in (
116+ 'Error:' , 'error' , 'undefined reference' , 'multiply defined' )),
117+ "([^:]+): error[ \\ t]*[0-9]+[ \\ t]*:" ,
118+ "([^:]+): (Error:|error|undefined reference|multiply defined)" ,
119+ "([^ :]+) : (error|fatal error|catastrophic error)" ,
120+ "([^:]+)\\ (([^\\ )]+)\\ ) ?: (error|fatal error|catastrophic error)" ),
121+ prefilter (
122+ lambda s : s .count (':' ) >= 2 ,
123+ "[^ :]+:[0-9]+: [^ \\ t]" ),
74124 "^[Bb]us [Ee]rror" ,
75125 "^[Ss]egmentation [Vv]iolation" ,
76126 "^[Ss]egmentation [Ff]ault" ,
77127 ":.*[Pp]ermission [Dd]enied" ,
78- "([^ :]+):([0-9]+): ([^ \\ t])" ,
79- "([^:]+): error[ \\ t]*[0-9]+[ \\ t]*:" ,
80128 "^Error ([0-9]+):" ,
81129 "^Fatal" ,
82130 "^Error: " ,
86134 "^cc[^C]*CC: ERROR File = ([^,]+), Line = ([0-9]+)" ,
87135 "^ld([^:])*:([ \\ t])*ERROR([^:])*:" ,
88136 "^ild:([ \\ t])*\\ (undefined symbol\\ )" ,
89- "([^ :]+) : (error|fatal error|catastrophic error)" ,
90- "([^:]+): (Error:|error|undefined reference|multiply defined)" ,
91- "([^:]+)\\ (([^\\ )]+)\\ ) ?: (error|fatal error|catastrophic error)" ,
92137 "^fatal error C[0-9]+:" ,
93138 ": syntax error " ,
94139 "^collect2: ld returned 1 exit status" ,
128173 "^Command .* failed with exit code" ,
129174]
130175
131- error_exceptions = [
176+ _error_exceptions = [
132177 "instantiated from " ,
133178 "candidates are:" ,
134179 ": warning" ,
143188]
144189
145190#: Regexes to match file/line numbers in error/warning messages
146- warning_matches = [
147- "([^ :]+):([0-9]+): warning:" ,
148- "([^ :]+):([0-9]+): note:" ,
191+ _warning_matches = [
192+ prefilter (
193+ lambda x : 'warning' in x ,
194+ "([^ :]+):([0-9]+): warning:" ,
195+ "([^:]+): warning ([0-9]+):" ,
196+ "([^:]+): warning[ \\ t]*[0-9]+[ \\ t]*:" ,
197+ "([^ :]+) : warning" ,
198+ "([^:]+): warning" ),
199+ prefilter (
200+ lambda x : 'note:' in x ,
201+ "^([^ :]+):([0-9]+): note:" ),
202+ prefilter (
203+ lambda x : any (s in x for s in ('Warning' , 'Warnung' )),
204+ "^(Warning|Warnung) ([0-9]+):" ,
205+ "^(Warning|Warnung)[ :]" ,
206+ "^cxx: Warning:" ,
207+ "([^ :]+):([0-9]+): (Warning|Warnung)" ,
208+ "^CMake Warning.*:" ),
209+ "file: .* has no symbols" ,
149210 "^cc[^C]*CC: WARNING File = ([^,]+), Line = ([0-9]+)" ,
150211 "^ld([^:])*:([ \\ t])*WARNING([^:])*:" ,
151- "([^:]+): warning ([0-9]+):" ,
152212 "^\" [^\" ]+\" , line [0-9]+: [Ww](arning|arnung)" ,
153- "([^:]+): warning[ \\ t]*[0-9]+[ \\ t]*:" ,
154- "^(Warning|Warnung) ([0-9]+):" ,
155- "^(Warning|Warnung)[ :]" ,
156213 "WARNING: " ,
157- "([^ :]+) : warning" ,
158- "([^:]+): warning" ,
159214 "\" , line [0-9]+\\ .[0-9]+: [0-9]+-[0-9]+ \\ ([WI]\\ )" ,
160- "^cxx: Warning:" ,
161- ".*file: .* has no symbols" ,
162- "([^ :]+):([0-9]+): (Warning|Warnung)" ,
163215 "\\ ([0-9]*\\ ): remark #[0-9]*" ,
164216 "\" .*\" , line [0-9]+: remark\\ ([0-9]*\\ ):" ,
165217 "cc-[0-9]* CC: REMARK File = .*, Line = [0-9]*" ,
166- "^CMake Warning.*:" ,
167218 "^\\ [WARNING\\ ]" ,
168219]
169220
170221#: Regexes to match file/line numbers in error/warning messages
171- warning_exceptions = [
222+ _warning_exceptions = [
172223 "/usr/.*/X11/Xlib\\ .h:[0-9]+: war.*: ANSI C\\ +\\ + forbids declaration" ,
173224 "/usr/.*/X11/Xutil\\ .h:[0-9]+: war.*: ANSI C\\ +\\ + forbids declaration" ,
174225 "/usr/.*/X11/XResource\\ .h:[0-9]+: war.*: ANSI C\\ +\\ + forbids declaration" ,
188239]
189240
190241#: Regexes to match file/line numbers in error/warning messages
191- file_line_matches = [
242+ _file_line_matches = [
192243 "^Warning W[0-9]+ ([a-zA-Z.\\ :/0-9_+ ~-]+) ([0-9]+):" ,
193244 "^([a-zA-Z./0-9_+ ~-]+):([0-9]+):" ,
194245 "^([a-zA-Z.\\ :/0-9_+ ~-]+)\\ (([0-9]+)\\ )" ,
@@ -250,25 +301,120 @@ class BuildWarning(LogEvent):
250301 """LogEvent subclass for build warnings."""
251302
252303
304+ def chunks (l , n ):
305+ """Divide l into n approximately-even chunks."""
306+ chunksize = int (math .ceil (len (l ) / n ))
307+ return [l [i :i + chunksize ] for i in range (0 , len (l ), chunksize )]
308+
309+
310+ @contextmanager
311+ def _time (times , i ):
312+ start = time .time ()
313+ yield
314+ end = time .time ()
315+ times [i ] += end - start
316+
317+
253318def _match (matches , exceptions , line ):
254319 """True if line matches a regex in matches and none in exceptions."""
255320 return (any (m .search (line ) for m in matches ) and
256321 not any (e .search (line ) for e in exceptions ))
257322
258323
259- class CTestLogParser (object ):
260- """Log file parser that extracts errors and warnings."""
261- def __init__ (self ):
262- def compile (regex_array ):
263- return [re .compile (regex ) for regex in regex_array ]
324+ def _profile_match (matches , exceptions , line , match_times , exc_times ):
325+ """Profiled version of match().
326+
327+ Timing is expensive so we have two whole functions. This is much
328+ longer because we have to break up the ``any()`` calls.
329+
330+ """
331+ for i , m in enumerate (matches ):
332+ with _time (match_times , i ):
333+ if m .search (line ):
334+ break
335+ else :
336+ return False
337+
338+ for i , m in enumerate (exceptions ):
339+ with _time (exc_times , i ):
340+ if m .search (line ):
341+ return False
342+ else :
343+ return True
344+
345+
346+ def _parse (lines , offset , profile ):
347+ def compile (regex_array ):
348+ return [regex if isinstance (regex , prefilter ) else re .compile (regex )
349+ for regex in regex_array ]
350+
351+ error_matches = compile (_error_matches )
352+ error_exceptions = compile (_error_exceptions )
353+ warning_matches = compile (_warning_matches )
354+ warning_exceptions = compile (_warning_exceptions )
355+ file_line_matches = compile (_file_line_matches )
356+
357+ matcher , args = _match , []
358+ timings = []
359+ if profile :
360+ matcher = _profile_match
361+ timings = [
362+ [0.0 ] * len (error_matches ), [0.0 ] * len (error_exceptions ),
363+ [0.0 ] * len (warning_matches ), [0.0 ] * len (warning_exceptions )]
364+
365+ errors = []
366+ warnings = []
367+ for i , line in enumerate (lines ):
368+ # use CTest's regular expressions to scrape the log for events
369+ if matcher (error_matches , error_exceptions , line , * timings [:2 ]):
370+ event = BuildError (line .strip (), offset + i + 1 )
371+ errors .append (event )
372+ elif matcher (warning_matches , warning_exceptions , line , * timings [2 :]):
373+ event = BuildWarning (line .strip (), offset + i + 1 )
374+ warnings .append (event )
375+ else :
376+ continue
377+
378+ # get file/line number for each event, if possible
379+ for flm in file_line_matches :
380+ match = flm .search (line )
381+ if match :
382+ event .source_file , source_line_no = match .groups ()
383+
384+ return errors , warnings , timings
385+
386+
387+ def _parse_unpack (args ):
388+ return _parse (* args )
264389
265- self .error_matches = compile (error_matches )
266- self .error_exceptions = compile (error_exceptions )
267- self .warning_matches = compile (warning_matches )
268- self .warning_exceptions = compile (warning_exceptions )
269- self .file_line_matches = compile (file_line_matches )
270390
271- def parse (self , stream , context = 6 ):
391+ class CTestLogParser (object ):
392+ """Log file parser that extracts errors and warnings."""
393+ def __init__ (self , profile = False ):
394+ # whether to record timing information
395+ self .timings = []
396+ self .profile = profile
397+
398+ def print_timings (self ):
399+ """Print out profile of time spent in different regular expressions."""
400+ def stringify (elt ):
401+ return elt if isinstance (elt , str ) else elt .pattern
402+
403+ index = 0
404+ for name , arr in [('error_matches' , _error_matches ),
405+ ('error_exceptions' , _error_exceptions ),
406+ ('warning_matches' , _warning_matches ),
407+ ('warning_exceptions' , _warning_exceptions )]:
408+
409+ print ()
410+ print (name )
411+ for i , elt in enumerate (arr ):
412+ print ("%16.2f %s" % (
413+ self .timings [index ][i ] * 1e6 , stringify (elt )))
414+ index += 1
415+
416+
417+ def parse (self , stream , context = 6 , jobs = None ):
272418 """Parse a log file by searching each line for errors and warnings.
273419
274420 Args:
@@ -281,30 +427,45 @@ def parse(self, stream, context=6):
281427 """
282428 if isinstance (stream , string_types ):
283429 with open (stream ) as f :
284- return self .parse (f )
430+ return self .parse (f , context , jobs )
285431
286432 lines = [line for line in stream ]
287433
288- errors = []
289- warnings = []
290- for i , line in enumerate (lines ):
291- # use CTest's regular expressions to scrape the log for events
292- if _match (self .error_matches , self .error_exceptions , line ):
293- event = BuildError (line .strip (), i + 1 )
294- errors .append (event )
295- elif _match (self .warning_matches , self .warning_exceptions , line ):
296- event = BuildWarning (line .strip (), i + 1 )
297- warnings .append (event )
298- else :
299- continue
300-
301- # get file/line number for each event, if possible
302- for flm in self .file_line_matches :
303- match = flm .search (line )
304- if match :
305- event .source_file , source_line_no = match .groups ()
306-
307- # add log context, as well
434+ if jobs is None :
435+ jobs = multiprocessing .cpu_count ()
436+
437+ # single-thread small logs
438+ if len (lines ) < 10 * jobs :
439+ errors , warnings , self .timings = _parse (lines , 0 , self .profile )
440+
441+ else :
442+ # Build arguments for parallel jobs
443+ args = []
444+ offset = 0
445+ for chunk in chunks (lines , jobs ):
446+ args .append ((chunk , offset , self .profile ))
447+ offset += len (chunk )
448+
449+ # create a pool and farm out the matching job
450+ pool = multiprocessing .Pool (jobs )
451+ try :
452+ # this is a workaround for a Python bug in Pool with ctrl-C
453+ results = pool .map_async (_parse_unpack , args , 1 ).get (9999999 )
454+ errors , warnings , timings = zip (* results )
455+ finally :
456+ pool .terminate ()
457+
458+ # merge results
459+ errors = sum (errors , [])
460+ warnings = sum (warnings , [])
461+
462+ if self .profile :
463+ self .timings = [
464+ [sum (i ) for i in zip (* t )] for t in zip (* timings )]
465+
466+ # add log context to all events
467+ for event in (errors + warnings ):
468+ i = event .line_no - 1
308469 event .pre_context = [
309470 l .rstrip () for l in lines [i - context :i ]]
310471 event .post_context = [
0 commit comments