diff roundup/mailgw.py @ 5151:6bbb6dd97458

Fix subject parsing in mail gateway. The previous parsing routine would not ensure that arguments are at the end of the subject and when subject_suffix_parsing was configured to be 'loose' it would truncate the subject when encountering a double prefix, e.g. Subject: [frobulated] [frobulatedagain] this part would be lost
author Ralf Schlatterbeck <rsc@runtux.com>
date Tue, 19 Jul 2016 11:47:53 +0200
parents 14abd0a67207
children 86190260f077
line wrap: on
line diff
--- a/roundup/mailgw.py	Mon Jul 18 13:55:34 2016 +0200
+++ b/roundup/mailgw.py	Tue Jul 19 11:47:53 2016 +0200
@@ -652,33 +652,33 @@
 
             tmpsubject = tmpsubject[m.end():]
 
-        # Match the title of the subject
-        # if we've not found a valid classname prefix then force the
-        # scanning to handle there being a leading delimiter
-        title_re = r'(?P<title>%s[^%s]*)'%(
-            not self.matches['classname'] and '.' or '', delim_open)
-        m = re.match(title_re, tmpsubject.strip(), re.IGNORECASE)
+        # Match any arguments specified *from the end*
+        # Optionally match and strip quote at the end that dumb mailers
+        # may put there, e.g.
+        #      Re: "[issue1] bla blah [<args>]"
+        q = ''
+        if self.matches['quote']:
+            q = '"?'
+        args_re = r'(?P<argswhole>%s(?P<args>[^%s]*)%s)%s$'%(delim_open,
+            delim_close, delim_close, q)
+        m = re.search(args_re, tmpsubject.strip(), re.IGNORECASE|re.VERBOSE)
         if m:
             self.matches.update(m.groupdict())
-            tmpsubject = tmpsubject[len(self.matches['title']):] # Consume title
+            tmpsubject = tmpsubject [:m.start()]
+        else:
+            self.matches['argswhole'] = self.matches['args'] = None
 
-        if self.matches['title']:
-            self.matches['title'] = self.matches['title'].strip()
-        else:
-            self.matches['title'] = ''
+        # The title of the subject is the remaining tmpsubject.
+        self.matches ['title'] = tmpsubject.strip ()
 
         # strip off the quotes that dumb emailers put around the subject, like
         #      Re: "[issue1] bla blah"
-        if self.matches['quote'] and self.matches['title'].endswith('"'):
+        # but only if we didn't match arguments at the end (which would
+        # already have consumed the quote after the subject)
+        if self.matches['quote'] and not self.matches['argswhole'] \
+           and self.matches['title'].endswith('"'):
             self.matches['title'] = self.matches['title'][:-1]
         
-        # Match any arguments specified
-        args_re = r'(?P<argswhole>%s(?P<args>.+?)%s)?'%(delim_open,
-            delim_close)
-        m = re.search(args_re, tmpsubject.strip(), re.IGNORECASE|re.VERBOSE)
-        if m:
-            self.matches.update(m.groupdict())
-
     def rego_confirm(self):
         ''' Check for registration OTK and confirm the registration if found
         '''

Roundup Issue Tracker: http://roundup-tracker.org/