Mercurial > p > roundup > code
comparison test/test_multipart.py @ 5305:e20f472fde7d
issue2550799: provide basic support for handling html only emails
Initial implementation and testing with the dehtml html converter
done.
The use of beautifulsoup 4 is not tested. My test system breaks when
running dehtml.py using beautiful soup. I don't get the failures when
running under the test harness, but the text output is significantly
different (different line breaks, number of newlines etc.)
The tests for dehtml need to be generated for beautiful soup and the
expected output changed. Since I have a wonky install of beautiful
soup, I don't trust my output as the standard to test against. Also
since beautiful soup is optional, the test harness needs to skip the
beautifulsoup tests if import bs4 fails. Again something outside of my
expertise. I deleted the work I had done to implement that. I could
not get it working and wanted to get this feature in in some form.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Fri, 13 Oct 2017 21:46:59 -0400 |
| parents | 364c54991861 |
| children | 91354bf0b683 |
comparison
equal
deleted
inserted
replaced
| 5304:ae32f082e623 | 5305:e20f472fde7d |
|---|---|
| 35 # part of the email header. | 35 # part of the email header. |
| 36 table = {'multipart/signed': ' boundary="boundary-%(indent)s";\n', | 36 table = {'multipart/signed': ' boundary="boundary-%(indent)s";\n', |
| 37 'multipart/mixed': ' boundary="boundary-%(indent)s";\n', | 37 'multipart/mixed': ' boundary="boundary-%(indent)s";\n', |
| 38 'multipart/alternative': ' boundary="boundary-%(indent)s";\n', | 38 'multipart/alternative': ' boundary="boundary-%(indent)s";\n', |
| 39 'text/plain': ' name="foo.txt"\nfoo\n', | 39 'text/plain': ' name="foo.txt"\nfoo\n', |
| 40 'text/html': ' name="bar.html"\n<html><body>bar</body></html>\n', | |
| 40 'application/pgp-signature': ' name="foo.gpg"\nfoo\n', | 41 'application/pgp-signature': ' name="foo.gpg"\nfoo\n', |
| 41 'application/pdf': ' name="foo.pdf"\nfoo\n', | 42 'application/pdf': ' name="foo.pdf"\nfoo\n', |
| 42 'message/rfc822': '\nSubject: foo\n\nfoo\n'} | 43 'message/rfc822': '\nSubject: foo\n\nfoo\n'} |
| 43 | 44 |
| 44 def __init__(self, spec): | 45 def __init__(self, spec): |
| 158 # end | 159 # end |
| 159 p = m.getpart() | 160 p = m.getpart() |
| 160 self.assert_(p is None) | 161 self.assert_(p is None) |
| 161 | 162 |
| 162 def TestExtraction(self, spec, expected): | 163 def TestExtraction(self, spec, expected): |
| 163 self.assertEqual(ExampleMessage(spec).extract_content(), expected) | 164 from roundup.dehtml import dehtml |
| 165 | |
| 166 self.assertEqual(ExampleMessage(spec).extract_content( | |
| 167 html2text=dehtml('dhtml').html2text), expected) | |
| 164 | 168 |
| 165 def testTextPlain(self): | 169 def testTextPlain(self): |
| 166 self.TestExtraction('text/plain', ('foo\n', [])) | 170 self.TestExtraction('text/plain', ('foo\n', [], False)) |
| 167 | 171 |
| 168 def testAttachedTextPlain(self): | 172 def testAttachedTextPlain(self): |
| 169 self.TestExtraction(""" | 173 self.TestExtraction(""" |
| 170 multipart/mixed | 174 multipart/mixed |
| 171 text/plain | 175 text/plain |
| 172 text/plain""", | 176 text/plain""", |
| 173 ('foo\n', | 177 ('foo\n', |
| 174 [('foo.txt', 'text/plain', 'foo\n')])) | 178 [('foo.txt', 'text/plain', 'foo\n')], False)) |
| 175 | 179 |
| 176 def testMultipartMixed(self): | 180 def testMultipartMixed(self): |
| 177 self.TestExtraction(""" | 181 self.TestExtraction(""" |
| 178 multipart/mixed | 182 multipart/mixed |
| 179 text/plain | 183 text/plain |
| 180 application/pdf""", | 184 application/pdf""", |
| 181 ('foo\n', | 185 ('foo\n', |
| 182 [('foo.pdf', 'application/pdf', 'foo\n')])) | 186 [('foo.pdf', 'application/pdf', 'foo\n')], False)) |
| 187 | |
| 188 def testMultipartMixedHtml(self): | |
| 189 self.TestExtraction(""" | |
| 190 multipart/mixed | |
| 191 text/html | |
| 192 application/pdf""", | |
| 193 ('bar\n', | |
| 194 [('bar.html', 'text/html', | |
| 195 '<html><body>bar</body></html>\n'), | |
| 196 ('foo.pdf', 'application/pdf', 'foo\n')], False)) | |
| 183 | 197 |
| 184 def testMultipartAlternative(self): | 198 def testMultipartAlternative(self): |
| 185 self.TestExtraction(""" | 199 self.TestExtraction(""" |
| 186 multipart/alternative | 200 multipart/alternative |
| 187 text/plain | 201 text/plain |
| 188 application/pdf | 202 application/pdf |
| 189 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')])) | 203 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False)) |
| 204 | |
| 205 def testMultipartAlternativeHtml(self): | |
| 206 self.TestExtraction(""" | |
| 207 multipart/alternative | |
| 208 text/html | |
| 209 application/pdf""", | |
| 210 ('bar\n', | |
| 211 [('bar.html', 'text/html', | |
| 212 '<html><body>bar</body></html>\n'), | |
| 213 ('foo.pdf', 'application/pdf', 'foo\n')], False)) | |
| 190 | 214 |
| 191 def testDeepMultipartAlternative(self): | 215 def testDeepMultipartAlternative(self): |
| 192 self.TestExtraction(""" | 216 self.TestExtraction(""" |
| 193 multipart/mixed | 217 multipart/mixed |
| 194 multipart/alternative | 218 multipart/alternative |
| 195 text/plain | 219 text/plain |
| 196 application/pdf | 220 application/pdf |
| 197 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')])) | 221 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False)) |
| 198 | 222 |
| 199 def testSignedText(self): | 223 def testSignedText(self): |
| 200 self.TestExtraction(""" | 224 self.TestExtraction(""" |
| 201 multipart/signed | 225 multipart/signed |
| 202 text/plain | 226 text/plain |
| 203 application/pgp-signature""", ('foo\n', [])) | 227 application/pgp-signature""", ('foo\n', [], False)) |
| 204 | 228 |
| 205 def testSignedAttachments(self): | 229 def testSignedAttachments(self): |
| 206 self.TestExtraction(""" | 230 self.TestExtraction(""" |
| 207 multipart/signed | 231 multipart/signed |
| 208 multipart/mixed | 232 multipart/mixed |
| 209 text/plain | 233 text/plain |
| 210 application/pdf | 234 application/pdf |
| 211 application/pgp-signature""", | 235 application/pgp-signature""", |
| 212 ('foo\n', | 236 ('foo\n', |
| 213 [('foo.pdf', 'application/pdf', 'foo\n')])) | 237 [('foo.pdf', 'application/pdf', 'foo\n')], False)) |
| 214 | 238 |
| 215 def testAttachedSignature(self): | 239 def testAttachedSignature(self): |
| 216 self.TestExtraction(""" | 240 self.TestExtraction(""" |
| 217 multipart/mixed | 241 multipart/mixed |
| 218 text/plain | 242 text/plain |
| 219 application/pgp-signature""", | 243 application/pgp-signature""", |
| 220 ('foo\n', | 244 ('foo\n', |
| 221 [('foo.gpg', 'application/pgp-signature', 'foo\n')])) | 245 [('foo.gpg', 'application/pgp-signature', 'foo\n')], False)) |
| 222 | 246 |
| 223 def testMessageRfc822(self): | 247 def testMessageRfc822(self): |
| 224 self.TestExtraction(""" | 248 self.TestExtraction(""" |
| 225 multipart/mixed | 249 multipart/mixed |
| 226 message/rfc822""", | 250 message/rfc822""", |
| 227 (None, | 251 (None, |
| 228 [('foo.eml', 'message/rfc822', 'Subject: foo\n\nfoo\n')])) | 252 [('foo.eml', 'message/rfc822', 'Subject: foo\n\nfoo\n')], False)) |
| 229 | 253 |
| 230 # vim: set filetype=python ts=4 sw=4 et si | 254 # vim: set filetype=python ts=4 sw=4 et si |
