comparison test/test_multipart.py @ 5305:e20f472fde7d

issue2550799: provide basic support for handling html only emails Initial implementation and testing with the dehtml html converter done. The use of beautifulsoup 4 is not tested. My test system breaks when running dehtml.py using beautiful soup. I don't get the failures when running under the test harness, but the text output is significantly different (different line breaks, number of newlines etc.) The tests for dehtml need to be generated for beautiful soup and the expected output changed. Since I have a wonky install of beautiful soup, I don't trust my output as the standard to test against. Also since beautiful soup is optional, the test harness needs to skip the beautifulsoup tests if import bs4 fails. Again something outside of my expertise. I deleted the work I had done to implement that. I could not get it working and wanted to get this feature in in some form.
author John Rouillard <rouilj@ieee.org>
date Fri, 13 Oct 2017 21:46:59 -0400
parents 364c54991861
children 91354bf0b683
comparison
equal deleted inserted replaced
5304:ae32f082e623 5305:e20f472fde7d
35 # part of the email header. 35 # part of the email header.
36 table = {'multipart/signed': ' boundary="boundary-%(indent)s";\n', 36 table = {'multipart/signed': ' boundary="boundary-%(indent)s";\n',
37 'multipart/mixed': ' boundary="boundary-%(indent)s";\n', 37 'multipart/mixed': ' boundary="boundary-%(indent)s";\n',
38 'multipart/alternative': ' boundary="boundary-%(indent)s";\n', 38 'multipart/alternative': ' boundary="boundary-%(indent)s";\n',
39 'text/plain': ' name="foo.txt"\nfoo\n', 39 'text/plain': ' name="foo.txt"\nfoo\n',
40 'text/html': ' name="bar.html"\n<html><body>bar</body></html>\n',
40 'application/pgp-signature': ' name="foo.gpg"\nfoo\n', 41 'application/pgp-signature': ' name="foo.gpg"\nfoo\n',
41 'application/pdf': ' name="foo.pdf"\nfoo\n', 42 'application/pdf': ' name="foo.pdf"\nfoo\n',
42 'message/rfc822': '\nSubject: foo\n\nfoo\n'} 43 'message/rfc822': '\nSubject: foo\n\nfoo\n'}
43 44
44 def __init__(self, spec): 45 def __init__(self, spec):
158 # end 159 # end
159 p = m.getpart() 160 p = m.getpart()
160 self.assert_(p is None) 161 self.assert_(p is None)
161 162
162 def TestExtraction(self, spec, expected): 163 def TestExtraction(self, spec, expected):
163 self.assertEqual(ExampleMessage(spec).extract_content(), expected) 164 from roundup.dehtml import dehtml
165
166 self.assertEqual(ExampleMessage(spec).extract_content(
167 html2text=dehtml('dhtml').html2text), expected)
164 168
165 def testTextPlain(self): 169 def testTextPlain(self):
166 self.TestExtraction('text/plain', ('foo\n', [])) 170 self.TestExtraction('text/plain', ('foo\n', [], False))
167 171
168 def testAttachedTextPlain(self): 172 def testAttachedTextPlain(self):
169 self.TestExtraction(""" 173 self.TestExtraction("""
170 multipart/mixed 174 multipart/mixed
171 text/plain 175 text/plain
172 text/plain""", 176 text/plain""",
173 ('foo\n', 177 ('foo\n',
174 [('foo.txt', 'text/plain', 'foo\n')])) 178 [('foo.txt', 'text/plain', 'foo\n')], False))
175 179
176 def testMultipartMixed(self): 180 def testMultipartMixed(self):
177 self.TestExtraction(""" 181 self.TestExtraction("""
178 multipart/mixed 182 multipart/mixed
179 text/plain 183 text/plain
180 application/pdf""", 184 application/pdf""",
181 ('foo\n', 185 ('foo\n',
182 [('foo.pdf', 'application/pdf', 'foo\n')])) 186 [('foo.pdf', 'application/pdf', 'foo\n')], False))
187
188 def testMultipartMixedHtml(self):
189 self.TestExtraction("""
190 multipart/mixed
191 text/html
192 application/pdf""",
193 ('bar\n',
194 [('bar.html', 'text/html',
195 '<html><body>bar</body></html>\n'),
196 ('foo.pdf', 'application/pdf', 'foo\n')], False))
183 197
184 def testMultipartAlternative(self): 198 def testMultipartAlternative(self):
185 self.TestExtraction(""" 199 self.TestExtraction("""
186 multipart/alternative 200 multipart/alternative
187 text/plain 201 text/plain
188 application/pdf 202 application/pdf
189 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')])) 203 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False))
204
205 def testMultipartAlternativeHtml(self):
206 self.TestExtraction("""
207 multipart/alternative
208 text/html
209 application/pdf""",
210 ('bar\n',
211 [('bar.html', 'text/html',
212 '<html><body>bar</body></html>\n'),
213 ('foo.pdf', 'application/pdf', 'foo\n')], False))
190 214
191 def testDeepMultipartAlternative(self): 215 def testDeepMultipartAlternative(self):
192 self.TestExtraction(""" 216 self.TestExtraction("""
193 multipart/mixed 217 multipart/mixed
194 multipart/alternative 218 multipart/alternative
195 text/plain 219 text/plain
196 application/pdf 220 application/pdf
197 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')])) 221 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False))
198 222
199 def testSignedText(self): 223 def testSignedText(self):
200 self.TestExtraction(""" 224 self.TestExtraction("""
201 multipart/signed 225 multipart/signed
202 text/plain 226 text/plain
203 application/pgp-signature""", ('foo\n', [])) 227 application/pgp-signature""", ('foo\n', [], False))
204 228
205 def testSignedAttachments(self): 229 def testSignedAttachments(self):
206 self.TestExtraction(""" 230 self.TestExtraction("""
207 multipart/signed 231 multipart/signed
208 multipart/mixed 232 multipart/mixed
209 text/plain 233 text/plain
210 application/pdf 234 application/pdf
211 application/pgp-signature""", 235 application/pgp-signature""",
212 ('foo\n', 236 ('foo\n',
213 [('foo.pdf', 'application/pdf', 'foo\n')])) 237 [('foo.pdf', 'application/pdf', 'foo\n')], False))
214 238
215 def testAttachedSignature(self): 239 def testAttachedSignature(self):
216 self.TestExtraction(""" 240 self.TestExtraction("""
217 multipart/mixed 241 multipart/mixed
218 text/plain 242 text/plain
219 application/pgp-signature""", 243 application/pgp-signature""",
220 ('foo\n', 244 ('foo\n',
221 [('foo.gpg', 'application/pgp-signature', 'foo\n')])) 245 [('foo.gpg', 'application/pgp-signature', 'foo\n')], False))
222 246
223 def testMessageRfc822(self): 247 def testMessageRfc822(self):
224 self.TestExtraction(""" 248 self.TestExtraction("""
225 multipart/mixed 249 multipart/mixed
226 message/rfc822""", 250 message/rfc822""",
227 (None, 251 (None,
228 [('foo.eml', 'message/rfc822', 'Subject: foo\n\nfoo\n')])) 252 [('foo.eml', 'message/rfc822', 'Subject: foo\n\nfoo\n')], False))
229 253
230 # vim: set filetype=python ts=4 sw=4 et si 254 # vim: set filetype=python ts=4 sw=4 et si

Roundup Issue Tracker: http://roundup-tracker.org/