view roundup/rfc2822.py @ 3696:790363e96852

Sorting/grouping by multiple properties. - Implement sorting/grouping by multiple properties for the web interface. I'm now using @sort0/@sortdir0,@sort1/@sortdir1,... and @group0/@groupdir0,... when generating URLs from a search template. These are converted to a list internally. When saving URLs (e.g. when storing queries) I'm using @sort=prop1,prop2,... and @group=... with optional '-' prepended to individual props. This means saved URLs are backward compatible with existing trackers (and yes, this was a design goal). I need the clumsy version with @sort0,@sort1 etc, because I'm currently using several selectors and checkboxes (as the classic template does, too). I don't think there is a way around that in HTML? - Updated (hopefully all) documentation to reflect the new URL format and the consequences in the web-interface. - I've set the number of sort/group properties in the classic template to two -- this can easily be reverted by changing n_sort to 1. Richard, would you look over these changes? I've set a tag before and (will set) after commit, so that it would be easy to merge out. Don't be too scared about the size of the change, most is documentation, the guts are in cgi/templating.py and small changes in the classic template.
author Ralf Schlatterbeck <schlatterbeck@users.sourceforge.net>
date Wed, 30 Aug 2006 20:28:26 +0000
parents 97f9fc0bc0ea
children
line wrap: on
line source

"""Some rfc822 functions taken from the new (python2.3) "email" module.
"""
__docformat__ = 'restructuredtext'

import re
from string import letters, digits
from binascii import b2a_base64, a2b_base64

ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<encoded>.*?)      # non-greedy up to the next ?= is the encoded string
  \?=                   # literal ?=
  ''', re.VERBOSE | re.IGNORECASE)

hqre = re.compile(r'^[A-z0-9!"#$%%&\'()*+,-./:;<=>?@\[\]^_`{|}~ ]+$')

CRLF = '\r\n'

def base64_decode(s, convert_eols=None):
    """Decode a raw base64 string.

    If convert_eols is set to a string value, all canonical email linefeeds,
    e.g. "\\r\\n", in the decoded text will be converted to the value of
    convert_eols.  os.linesep is a good choice for convert_eols if you are
    decoding a text attachment.

    This function does not parse a full MIME header value encoded with
    base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
    level email.Header class for that functionality.

    Taken from 'email' module
    """
    if not s:
        return s
    
    dec = a2b_base64(s)
    if convert_eols:
        return dec.replace(CRLF, convert_eols)
    return dec

def unquote_match(match):
    """Turn a match in the form ``=AB`` to the ASCII character with value
    0xab.

    Taken from 'email' module
    """
    s = match.group(0)
    return chr(int(s[1:3], 16))

def qp_decode(s):
    """Decode a string encoded with RFC 2045 MIME header 'Q' encoding.

    This function does not parse a full MIME header value encoded with
    quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
    the high level email.Header class for that functionality.

    Taken from 'email' module
    """
    s = s.replace('_', ' ')
    return re.sub(r'=\w{2}', unquote_match, s)

def _decode_header(header):
    """Decode a message header value without converting charset.

    Returns a list of (decoded_string, charset) pairs containing each of the
    decoded parts of the header.  Charset is None for non-encoded parts of the
    header, otherwise a lower-case string containing the name of the character
    set specified in the encoded string.

    Taken from 'email' module
    """
    # If no encoding, just return the header
    header = str(header)
    if not ecre.search(header):
        return [(header, None)]

    decoded = []
    dec = ''
    for line in header.splitlines():
        # This line might not have an encoding in it
        if not ecre.search(line):
            decoded.append((line, None))
            continue

        parts = ecre.split(line)
        while parts:
            unenc = parts.pop(0)
            if unenc:
                if unenc.strip():
                    decoded.append((unenc, None))
            if parts:
                charset, encoding = [s.lower() for s in parts[0:2]]
                encoded = parts[2]
                dec = ''
                if encoding == 'q':
                    dec = qp_decode(encoded)
                elif encoding == 'b':
                    dec = base64_decode(encoded)
                else:
                    dec = encoded

                if decoded and decoded[-1][1] == charset:
                    decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
                else:
                    decoded.append((dec, charset))
            del parts[0:3]
    return decoded

def decode_header(hdr):
    """ Decodes rfc2822 encoded header and return utf-8 encoded string
    """
    if not hdr:
        return None
    outs = u""
    for section in _decode_header(hdr):
        charset = unaliasCharset(section[1])
        outs += unicode(section[0], charset or 'iso-8859-1', 'replace')
    return outs.encode('utf-8')

def encode_header(header, charset='utf-8'):
    """ Will encode in quoted-printable encoding only if header 
    contains non latin characters
    """

    # Return empty headers unchanged
    if not header:
        return header

    # return plain header if it is not contains non-ascii characters
    if hqre.match(header):
        return header
    
    quoted = ''
    #max_encoded = 76 - len(charset) - 7
    for c in header:
        # Space may be represented as _ instead of =20 for readability
        if c == ' ':
            quoted += '_'
        # These characters can be included verbatim
        elif hqre.match(c) and c not in '_=?':
            quoted += c
        # Otherwise, replace with hex value like =E2
        else:
            quoted += "=%02X" % ord(c)
            plain = 0

    return '=?%s?q?%s?=' % (charset, quoted)

def unaliasCharset(charset):
    if charset:
        return charset.lower().replace("windows-", 'cp')
        #return charset_table.get(charset.lower(), charset)
    return None

def test():
    print encode_header("Contrary, Mary")
    #print unaliasCharset('Windows-1251')

if __name__ == '__main__':
    test()

# vim: et

Roundup Issue Tracker: http://roundup-tracker.org/