comparison test/test_multipart.py @ 5547:081be318661b

Do not transcode binary email attachments (issue2551004). Only transcode attachments if a charset is specified or if they have a text/* content-type.
author Joseph Myers <jsm@polyomino.org.uk>
date Wed, 26 Sep 2018 01:24:19 +0000
parents 725266c03eab
children f8893e1cde0d
comparison
equal deleted inserted replaced
5546:52e50ab5dfda 5547:081be318661b
184 self.TestExtraction(""" 184 self.TestExtraction("""
185 multipart/mixed 185 multipart/mixed
186 text/plain 186 text/plain
187 application/pdf""", 187 application/pdf""",
188 ('foo\n', 188 ('foo\n',
189 [('foo.pdf', 'application/pdf', 'foo\n')], False)) 189 [('foo.pdf', 'application/pdf', b'foo\n')], False))
190 190
191 def testMultipartMixedHtml(self): 191 def testMultipartMixedHtml(self):
192 # test with html conversion enabled 192 # test with html conversion enabled
193 self.TestExtraction(""" 193 self.TestExtraction("""
194 multipart/mixed 194 multipart/mixed
195 text/html 195 text/html
196 application/pdf""", 196 application/pdf""",
197 ('bar >\n', 197 ('bar >\n',
198 [('bar.html', 'text/html', 198 [('bar.html', 'text/html',
199 '<html><body>bar &gt;</body></html>\n'), 199 '<html><body>bar &gt;</body></html>\n'),
200 ('foo.pdf', 'application/pdf', 'foo\n')], False), 200 ('foo.pdf', 'application/pdf', b'foo\n')], False),
201 convert_html_with='dehtml') 201 convert_html_with='dehtml')
202 202
203 # test with html conversion disabled 203 # test with html conversion disabled
204 self.TestExtraction(""" 204 self.TestExtraction("""
205 multipart/mixed 205 multipart/mixed
206 text/html 206 text/html
207 application/pdf""", 207 application/pdf""",
208 (None, 208 (None,
209 [('bar.html', 'text/html', 209 [('bar.html', 'text/html',
210 '<html><body>bar &gt;</body></html>\n'), 210 '<html><body>bar &gt;</body></html>\n'),
211 ('foo.pdf', 'application/pdf', 'foo\n')], False), 211 ('foo.pdf', 'application/pdf', b'foo\n')], False),
212 convert_html_with=False) 212 convert_html_with=False)
213 213
214 def testMultipartAlternative(self): 214 def testMultipartAlternative(self):
215 self.TestExtraction(""" 215 self.TestExtraction("""
216 multipart/alternative 216 multipart/alternative
217 text/plain 217 text/plain
218 application/pdf 218 application/pdf
219 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False)) 219 """, ('foo\n', [('foo.pdf', 'application/pdf', b'foo\n')], False))
220 220
221 def testMultipartAlternativeHtml(self): 221 def testMultipartAlternativeHtml(self):
222 self.TestExtraction(""" 222 self.TestExtraction("""
223 multipart/alternative 223 multipart/alternative
224 text/html 224 text/html
225 application/pdf""", 225 application/pdf""",
226 ('bar >\n', 226 ('bar >\n',
227 [('bar.html', 'text/html', 227 [('bar.html', 'text/html',
228 '<html><body>bar &gt;</body></html>\n'), 228 '<html><body>bar &gt;</body></html>\n'),
229 ('foo.pdf', 'application/pdf', 'foo\n')], False), 229 ('foo.pdf', 'application/pdf', b'foo\n')], False),
230 convert_html_with='dehtml') 230 convert_html_with='dehtml')
231 231
232 self.TestExtraction(""" 232 self.TestExtraction("""
233 multipart/alternative 233 multipart/alternative
234 text/html 234 text/html
235 application/pdf""", 235 application/pdf""",
236 (None, 236 (None,
237 [('bar.html', 'text/html', 237 [('bar.html', 'text/html',
238 '<html><body>bar &gt;</body></html>\n'), 238 '<html><body>bar &gt;</body></html>\n'),
239 ('foo.pdf', 'application/pdf', 'foo\n')], False), 239 ('foo.pdf', 'application/pdf', b'foo\n')], False),
240 convert_html_with=False) 240 convert_html_with=False)
241 241
242 def testMultipartAlternativeHtmlText(self): 242 def testMultipartAlternativeHtmlText(self):
243 # text should take priority over html when html is first 243 # text should take priority over html when html is first
244 self.TestExtraction(""" 244 self.TestExtraction("""
247 text/plain 247 text/plain
248 application/pdf""", 248 application/pdf""",
249 ('foo\n', 249 ('foo\n',
250 [('bar.html', 'text/html', 250 [('bar.html', 'text/html',
251 '<html><body>bar &gt;</body></html>\n'), 251 '<html><body>bar &gt;</body></html>\n'),
252 ('foo.pdf', 'application/pdf', 'foo\n')], False), 252 ('foo.pdf', 'application/pdf', b'foo\n')], False),
253 convert_html_with='dehtml') 253 convert_html_with='dehtml')
254 254
255 # text should take priority over html when text is first 255 # text should take priority over html when text is first
256 self.TestExtraction(""" 256 self.TestExtraction("""
257 multipart/alternative 257 multipart/alternative
259 text/html 259 text/html
260 application/pdf""", 260 application/pdf""",
261 ('foo\n', 261 ('foo\n',
262 [('bar.html', 'text/html', 262 [('bar.html', 'text/html',
263 '<html><body>bar &gt;</body></html>\n'), 263 '<html><body>bar &gt;</body></html>\n'),
264 ('foo.pdf', 'application/pdf', 'foo\n')], False), 264 ('foo.pdf', 'application/pdf', b'foo\n')], False),
265 convert_html_with='dehtml') 265 convert_html_with='dehtml')
266 266
267 # text should take priority over html when text is second and 267 # text should take priority over html when text is second and
268 # html is disabled 268 # html is disabled
269 self.TestExtraction(""" 269 self.TestExtraction("""
272 text/plain 272 text/plain
273 application/pdf""", 273 application/pdf""",
274 ('foo\n', 274 ('foo\n',
275 [('bar.html', 'text/html', 275 [('bar.html', 'text/html',
276 '<html><body>bar &gt;</body></html>\n'), 276 '<html><body>bar &gt;</body></html>\n'),
277 ('foo.pdf', 'application/pdf', 'foo\n')], False), 277 ('foo.pdf', 'application/pdf', b'foo\n')], False),
278 convert_html_with=False) 278 convert_html_with=False)
279 279
280 # text should take priority over html when text is first and 280 # text should take priority over html when text is first and
281 # html is disabled 281 # html is disabled
282 self.TestExtraction(""" 282 self.TestExtraction("""
285 text/html 285 text/html
286 application/pdf""", 286 application/pdf""",
287 ('foo\n', 287 ('foo\n',
288 [('bar.html', 'text/html', 288 [('bar.html', 'text/html',
289 '<html><body>bar &gt;</body></html>\n'), 289 '<html><body>bar &gt;</body></html>\n'),
290 ('foo.pdf', 'application/pdf', 'foo\n')], False), 290 ('foo.pdf', 'application/pdf', b'foo\n')], False),
291 convert_html_with=False) 291 convert_html_with=False)
292 292
293 def testDeepMultipartAlternative(self): 293 def testDeepMultipartAlternative(self):
294 self.TestExtraction(""" 294 self.TestExtraction("""
295 multipart/mixed 295 multipart/mixed
296 multipart/alternative 296 multipart/alternative
297 text/plain 297 text/plain
298 application/pdf 298 application/pdf
299 """, ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False)) 299 """, ('foo\n', [('foo.pdf', 'application/pdf', b'foo\n')], False))
300 300
301 def testSignedText(self): 301 def testSignedText(self):
302 self.TestExtraction(""" 302 self.TestExtraction("""
303 multipart/signed 303 multipart/signed
304 text/plain 304 text/plain
310 multipart/mixed 310 multipart/mixed
311 text/plain 311 text/plain
312 application/pdf 312 application/pdf
313 application/pgp-signature""", 313 application/pgp-signature""",
314 ('foo\n', 314 ('foo\n',
315 [('foo.pdf', 'application/pdf', 'foo\n')], False)) 315 [('foo.pdf', 'application/pdf', b'foo\n')], False))
316 316
317 def testAttachedSignature(self): 317 def testAttachedSignature(self):
318 self.TestExtraction(""" 318 self.TestExtraction("""
319 multipart/mixed 319 multipart/mixed
320 text/plain 320 text/plain
321 application/pgp-signature""", 321 application/pgp-signature""",
322 ('foo\n', 322 ('foo\n',
323 [('foo.gpg', 'application/pgp-signature', 'foo\n')], False)) 323 [('foo.gpg', 'application/pgp-signature', b'foo\n')], False))
324 324
325 def testMessageRfc822(self): 325 def testMessageRfc822(self):
326 self.TestExtraction(""" 326 self.TestExtraction("""
327 multipart/mixed 327 multipart/mixed
328 message/rfc822""", 328 message/rfc822""",

Roundup Issue Tracker: http://roundup-tracker.org/