Mercurial > p > roundup > code
diff roundup/mailgw.py @ 3831:14ec78618bd5
Allow customisation of regular expressions used in email parsing...
...thanks Bruno Damour
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Mon, 26 Mar 2007 04:04:42 +0000 |
| parents | 2b63b1689cef |
| children | ee377c7c90fe |
line wrap: on
line diff
--- a/roundup/mailgw.py Sun Mar 25 23:18:25 2007 +0000 +++ b/roundup/mailgw.py Mon Mar 26 04:04:42 2007 +0000 @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # # Copyright (c) 2001 Bizar Software Pty Ltd (http://www.bizarsoftware.com.au/) # This module is free software, and you may redistribute it and/or modify @@ -72,7 +73,7 @@ an exception, the original message is bounced back to the sender with the explanatory message given in the exception. -$Id: mailgw.py,v 1.184 2007-02-15 03:09:53 richard Exp $ +$Id: mailgw.py,v 1.185 2007-03-26 04:04:42 richard Exp $ """ __docformat__ = 'restructuredtext' @@ -635,11 +636,14 @@ 'argswhole']) # Look for Re: et. al. Used later on for MAILGW_SUBJECT_CONTENT_MATCH - re_re = r'''(?P<refwd>(\s*\W?\s*(fw|fwd|re|aw|sv|ang)\W)+)\s*''' - m = re.match(re_re, tmpsubject, re.IGNORECASE|re.VERBOSE) + refwd_re = config['MAILGW_REFWD_RE'].decode('iso8859-1') + re_re = r'''(?P<refwd>%s)*\s*''' % refwd_re + m = re.match(re_re, tmpsubject, re.IGNORECASE|re.VERBOSE|re.UNICODE) if m: - matches.update(m.groupdict()) - tmpsubject = tmpsubject[len(matches['refwd']):] # Consume Re: + m = m.groupdict() + if m['refwd']: + matches.update(m) + tmpsubject = tmpsubject[len(m['refwd']):] # Consume Re: # Look for Leading " m = re.match(r'(?P<quote>\s*")', tmpsubject, @@ -1005,10 +1009,14 @@ # figure how much we should muck around with the email body keep_citations = config['MAILGW_KEEP_QUOTED_TEXT'] keep_body = config['MAILGW_LEAVE_BODY_UNCHANGED'] + blank_line = re.compile(r'%s' % config['MAILGW_BLANKLINE_RE']) + eol = re.compile(r'%s' % config['MAILGW_EOL_RE']) + signature = re.compile(r'%s' % config['MAILGW_SIGN_RE']) + original_msg = re.compile(r'%s' % config['MAILGW_ORIGMSG_RE']) # parse the body of the message, stripping out bits as appropriate summary, content = parseContent(content, keep_citations, - keep_body) + keep_body, blank_line, eol, signature, original_msg) content = content.strip() # @@ -1209,12 +1217,7 @@ else: return 0 - -def parseContent(content, keep_citations, keep_body, - blank_line=re.compile(r'[\r\n]+\s*[\r\n]+'), - eol=re.compile(r'[\r\n]+'), - signature=re.compile(r'^[>|\s]*-- ?$'), - original_msg=re.compile(r'^[>|\s]*-----\s?Original Message\s?-----$')): +def parseContent(content, keep_citations, keep_body, blank_line, eol, signature, original_msg): ''' The message body is divided into sections by blank lines. Sections where the second and all subsequent lines begin with a ">" or "|" character are considered "quoting sections". The first line of
