changeset 5547:081be318661b

Do not transcode binary email attachments (issue2551004). Only transcode attachments if a charset is specified or if they have a text/* content-type.
author Joseph Myers <jsm@polyomino.org.uk>
date Wed, 26 Sep 2018 01:24:19 +0000
parents 52e50ab5dfda
children fea11d05110e
files roundup/mailgw.py test/test_mailgw.py test/test_mailgw_roundupmessage.py test/test_multipart.py
diffstat 4 files changed, 25 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/roundup/mailgw.py	Tue Sep 25 17:06:18 2018 +0200
+++ b/roundup/mailgw.py	Wed Sep 26 01:24:19 2018 +0000
@@ -241,7 +241,8 @@
 
         if content is not None:
             charset = self.get_content_charset()
-            content = u2s(content.decode(charset or 'iso8859-1', 'replace'))
+            if charset or self.get_content_maintype() == 'text':
+                content = u2s(content.decode(charset or 'iso8859-1', 'replace'))
 
         return content
 
--- a/test/test_mailgw.py	Tue Sep 25 17:06:18 2018 +0200
+++ b/test/test_mailgw.py	Wed Sep 26 01:24:19 2018 +0000
@@ -832,7 +832,6 @@
 --001485f339f8f361fb049188dbba--
 '''
 
-    @pytest.mark.xfail
     def testOctetStreamTranscoding(self):
         self.doNewIssue()
         self._handle_mail(self.octetstream_msg)
@@ -841,13 +840,13 @@
         msg = self.db.msg.getnode (messages[-1])
         assert(len(msg.files) == 1)
         names = {0 : 'testfile'}
-        content = ['''This is a file containing text
+        content = [b'''This is a file containing text
 in latin-1 format \xE4\xF6\xFC\xC4\xD6\xDC\xDF
 ''']
         for n, id in enumerate (msg.files):
             f = self.db.file.getnode (id)
             self.assertEqual(f.name, names.get (n, 'unnamed'))
-            self.assertEqual(f.content, content [n])
+            self.assertEqual(f.binary_content, content [n])
 
     def testMultipartKeepAlternatives(self):
         self.doNewIssue()
--- a/test/test_mailgw_roundupmessage.py	Tue Sep 25 17:06:18 2018 +0200
+++ b/test/test_mailgw_roundupmessage.py	Wed Sep 26 01:24:19 2018 +0000
@@ -126,7 +126,7 @@
             dGVzdCBlbmNvZGVkIG1lc3NhZ2U=
         """)
 
-        self.assertEqual(msg.get_body(), 'test encoded message')
+        self.assertEqual(msg.get_body(), b'test encoded message')
 
 
 class AsAttachmentRoundupMessageTests(TestCase):
@@ -153,7 +153,7 @@
         self.assertEqual(
             msg.as_attachment(),
             ('message.dat', 'application/octet-stream',
-             'test encoded message'))
+             b'test encoded message'))
 
     def test_rfc822(self):
         msg = message_from_string("""
@@ -221,7 +221,7 @@
 
         self.assertEqual(msg.extract_content(), (
             'foo\n',
-            [('foo.pdf', 'application/pdf', 'foo\n')],
+            [('foo.pdf', 'application/pdf', b'foo\n')],
             False
         ))
 
@@ -257,11 +257,11 @@
 
         self.assertEqual(msg.extract_content(), (
             'foo2\n', [
-                ('foo.pdf', 'application/pdf', 'foo\n'),
+                ('foo.pdf', 'application/pdf', b'foo\n'),
                 ('foo.txt', 'text/plain', 'foo\n'),
                 ('foo.html', 'text/html', '<html>foo</html>\n'),
                 ('foo3.txt', 'text/plain', 'foo3\n'),
-                ('foo2.pdf', 'application/pdf', 'foo2\n'),
+                ('foo2.pdf', 'application/pdf', b'foo2\n'),
             ],
             False
         ))
@@ -286,7 +286,7 @@
         self.assertEqual(msg.extract_content(ignore_alternatives=True), (
             'foo2\n', [
                 ('foo3.txt', 'text/plain', 'foo3\n'),
-                ('foo2.pdf', 'application/pdf', 'foo2\n'),
+                ('foo2.pdf', 'application/pdf', b'foo2\n'),
             ],
             False
         ))
@@ -314,7 +314,7 @@
 
         self.assertEqual(msg.extract_content(), (
             'foo\n',
-            [('foo.pdf', 'application/pdf', 'foo\n')],
+            [('foo.pdf', 'application/pdf', b'foo\n')],
             False
         ))
 
@@ -328,7 +328,7 @@
 
         self.assertEqual(msg.extract_content(), (
             'foo\n',
-            [('foo.gpg', 'application/pgp-signature', 'foo\n')],
+            [('foo.gpg', 'application/pgp-signature', b'foo\n')],
             False
         ))
 
--- a/test/test_multipart.py	Tue Sep 25 17:06:18 2018 +0200
+++ b/test/test_multipart.py	Wed Sep 26 01:24:19 2018 +0000
@@ -186,7 +186,7 @@
     text/plain
     application/pdf""",
                   ('foo\n',
-                   [('foo.pdf', 'application/pdf', 'foo\n')], False))
+                   [('foo.pdf', 'application/pdf', b'foo\n')], False))
 
     def testMultipartMixedHtml(self):
         # test with html conversion enabled
@@ -197,7 +197,7 @@
                   ('bar >\n',
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                   ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                   ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with='dehtml')
 
         # test with html conversion disabled
@@ -208,7 +208,7 @@
                   (None,
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                    ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                    ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with=False)
 
     def testMultipartAlternative(self):
@@ -216,7 +216,7 @@
 multipart/alternative
     text/plain
     application/pdf
-        """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False))
+        """, ('foo\n', [('foo.pdf', 'application/pdf', b'foo\n')], False))
 
     def testMultipartAlternativeHtml(self):
         self.TestExtraction("""
@@ -226,7 +226,7 @@
                   ('bar >\n',
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                   ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                   ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with='dehtml')
 
         self.TestExtraction("""
@@ -236,7 +236,7 @@
                   (None,
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                    ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                    ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with=False)
 
     def testMultipartAlternativeHtmlText(self):
@@ -249,7 +249,7 @@
                   ('foo\n',
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                    ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                    ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with='dehtml')
 
         # text should take priority over html when text is first
@@ -261,7 +261,7 @@
                   ('foo\n',
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                    ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                    ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with='dehtml')
 
         # text should take priority over html when text is second and
@@ -274,7 +274,7 @@
                   ('foo\n',
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                    ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                    ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with=False)
 
         # text should take priority over html when text is first and
@@ -287,7 +287,7 @@
                   ('foo\n',
                    [('bar.html', 'text/html',
                       '<html><body>bar &gt;</body></html>\n'),
-                    ('foo.pdf', 'application/pdf', 'foo\n')], False),
+                    ('foo.pdf', 'application/pdf', b'foo\n')], False),
                             convert_html_with=False)
 
     def testDeepMultipartAlternative(self):
@@ -296,7 +296,7 @@
     multipart/alternative
         text/plain
         application/pdf
-        """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False))
+        """, ('foo\n', [('foo.pdf', 'application/pdf', b'foo\n')], False))
 
     def testSignedText(self):
         self.TestExtraction("""
@@ -312,7 +312,7 @@
         application/pdf
     application/pgp-signature""",
                   ('foo\n',
-                   [('foo.pdf', 'application/pdf', 'foo\n')], False))
+                   [('foo.pdf', 'application/pdf', b'foo\n')], False))
 
     def testAttachedSignature(self):
         self.TestExtraction("""
@@ -320,7 +320,7 @@
     text/plain
     application/pgp-signature""",
                   ('foo\n',
-                   [('foo.gpg', 'application/pgp-signature', 'foo\n')], False))
+                   [('foo.gpg', 'application/pgp-signature', b'foo\n')], False))
 
     def testMessageRfc822(self):
         self.TestExtraction("""

Roundup Issue Tracker: http://roundup-tracker.org/