comparison test/test_multipart.py @ 5307:5b4931cfc182

Test entity code path in dehtml.
author John Rouillard <rouilj@ieee.org>
date Fri, 13 Oct 2017 23:03:46 -0400
parents 91354bf0b683
children 2120f77554d5
comparison
equal deleted inserted replaced
5306:91354bf0b683 5307:5b4931cfc182
35 # part of the email header. 35 # part of the email header.
36 table = {'multipart/signed': ' boundary="boundary-%(indent)s";\n', 36 table = {'multipart/signed': ' boundary="boundary-%(indent)s";\n',
37 'multipart/mixed': ' boundary="boundary-%(indent)s";\n', 37 'multipart/mixed': ' boundary="boundary-%(indent)s";\n',
38 'multipart/alternative': ' boundary="boundary-%(indent)s";\n', 38 'multipart/alternative': ' boundary="boundary-%(indent)s";\n',
39 'text/plain': ' name="foo.txt"\nfoo\n', 39 'text/plain': ' name="foo.txt"\nfoo\n',
40 'text/html': ' name="bar.html"\n<html><body>bar</body></html>\n', 40 'text/html': ' name="bar.html"\n<html><body>bar &gt;</body></html>\n',
41 'application/pgp-signature': ' name="foo.gpg"\nfoo\n', 41 'application/pgp-signature': ' name="foo.gpg"\nfoo\n',
42 'application/pdf': ' name="foo.pdf"\nfoo\n', 42 'application/pdf': ' name="foo.pdf"\nfoo\n',
43 'message/rfc822': '\nSubject: foo\n\nfoo\n'} 43 'message/rfc822': '\nSubject: foo\n\nfoo\n'}
44 44
45 def __init__(self, spec): 45 def __init__(self, spec):
193 # test with html conversion enabled 193 # test with html conversion enabled
194 self.TestExtraction(""" 194 self.TestExtraction("""
195 multipart/mixed 195 multipart/mixed
196 text/html 196 text/html
197 application/pdf""", 197 application/pdf""",
198 ('bar\n', 198 ('bar >\n',
199 [('bar.html', 'text/html', 199 [('bar.html', 'text/html',
200 '<html><body>bar</body></html>\n'), 200 '<html><body>bar &gt;</body></html>\n'),
201 ('foo.pdf', 'application/pdf', 'foo\n')], False), 201 ('foo.pdf', 'application/pdf', 'foo\n')], False),
202 convert_html_with='dehtml') 202 convert_html_with='dehtml')
203 203
204 # test with html conversion disabled 204 # test with html conversion disabled
205 self.TestExtraction(""" 205 self.TestExtraction("""
206 multipart/mixed 206 multipart/mixed
207 text/html 207 text/html
208 application/pdf""", 208 application/pdf""",
209 (None, 209 (None,
210 [('bar.html', 'text/html', 210 [('bar.html', 'text/html',
211 '<html><body>bar</body></html>\n'), 211 '<html><body>bar &gt;</body></html>\n'),
212 ('foo.pdf', 'application/pdf', 'foo\n')], False), 212 ('foo.pdf', 'application/pdf', 'foo\n')], False),
213 convert_html_with=False) 213 convert_html_with=False)
214 214
215 def testMultipartAlternative(self): 215 def testMultipartAlternative(self):
216 self.TestExtraction(""" 216 self.TestExtraction("""
222 def testMultipartAlternativeHtml(self): 222 def testMultipartAlternativeHtml(self):
223 self.TestExtraction(""" 223 self.TestExtraction("""
224 multipart/alternative 224 multipart/alternative
225 text/html 225 text/html
226 application/pdf""", 226 application/pdf""",
227 ('bar\n', 227 ('bar >\n',
228 [('bar.html', 'text/html', 228 [('bar.html', 'text/html',
229 '<html><body>bar</body></html>\n'), 229 '<html><body>bar &gt;</body></html>\n'),
230 ('foo.pdf', 'application/pdf', 'foo\n')], False), 230 ('foo.pdf', 'application/pdf', 'foo\n')], False),
231 convert_html_with='dehtml') 231 convert_html_with='dehtml')
232 232
233 self.TestExtraction(""" 233 self.TestExtraction("""
234 multipart/alternative 234 multipart/alternative
235 text/html 235 text/html
236 application/pdf""", 236 application/pdf""",
237 (None, 237 (None,
238 [('bar.html', 'text/html', 238 [('bar.html', 'text/html',
239 '<html><body>bar</body></html>\n'), 239 '<html><body>bar &gt;</body></html>\n'),
240 ('foo.pdf', 'application/pdf', 'foo\n')], False), 240 ('foo.pdf', 'application/pdf', 'foo\n')], False),
241 convert_html_with=False) 241 convert_html_with=False)
242 242
243 def testMultipartAlternativeHtmlText(self): 243 def testMultipartAlternativeHtmlText(self):
244 # text should take priority over html when html is first 244 # text should take priority over html when html is first
247 text/html 247 text/html
248 text/plain 248 text/plain
249 application/pdf""", 249 application/pdf""",
250 ('foo\n', 250 ('foo\n',
251 [('bar.html', 'text/html', 251 [('bar.html', 'text/html',
252 '<html><body>bar</body></html>\n'), 252 '<html><body>bar &gt;</body></html>\n'),
253 ('foo.pdf', 'application/pdf', 'foo\n')], False), 253 ('foo.pdf', 'application/pdf', 'foo\n')], False),
254 convert_html_with='dehtml') 254 convert_html_with='dehtml')
255 255
256 # text should take priority over html when text is first 256 # text should take priority over html when text is first
257 self.TestExtraction(""" 257 self.TestExtraction("""
259 text/plain 259 text/plain
260 text/html 260 text/html
261 application/pdf""", 261 application/pdf""",
262 ('foo\n', 262 ('foo\n',
263 [('bar.html', 'text/html', 263 [('bar.html', 'text/html',
264 '<html><body>bar</body></html>\n'), 264 '<html><body>bar &gt;</body></html>\n'),
265 ('foo.pdf', 'application/pdf', 'foo\n')], False), 265 ('foo.pdf', 'application/pdf', 'foo\n')], False),
266 convert_html_with='dehtml') 266 convert_html_with='dehtml')
267 267
268 # text should take priority over html when text is second and 268 # text should take priority over html when text is second and
269 # html is disabled 269 # html is disabled
272 text/html 272 text/html
273 text/plain 273 text/plain
274 application/pdf""", 274 application/pdf""",
275 ('foo\n', 275 ('foo\n',
276 [('bar.html', 'text/html', 276 [('bar.html', 'text/html',
277 '<html><body>bar</body></html>\n'), 277 '<html><body>bar &gt;</body></html>\n'),
278 ('foo.pdf', 'application/pdf', 'foo\n')], False), 278 ('foo.pdf', 'application/pdf', 'foo\n')], False),
279 convert_html_with=False) 279 convert_html_with=False)
280 280
281 # text should take priority over html when text is first and 281 # text should take priority over html when text is first and
282 # html is disabled 282 # html is disabled
285 text/plain 285 text/plain
286 text/html 286 text/html
287 application/pdf""", 287 application/pdf""",
288 ('foo\n', 288 ('foo\n',
289 [('bar.html', 'text/html', 289 [('bar.html', 'text/html',
290 '<html><body>bar</body></html>\n'), 290 '<html><body>bar &gt;</body></html>\n'),
291 ('foo.pdf', 'application/pdf', 'foo\n')], False), 291 ('foo.pdf', 'application/pdf', 'foo\n')], False),
292 convert_html_with=False) 292 convert_html_with=False)
293 293
294 def testDeepMultipartAlternative(self): 294 def testDeepMultipartAlternative(self):
295 self.TestExtraction(""" 295 self.TestExtraction("""

Roundup Issue Tracker: http://roundup-tracker.org/