Mercurial > p > roundup > code
annotate roundup/cgi/TAL/XMLParser.py @ 5305:e20f472fde7d
issue2550799: provide basic support for handling html only emails
Initial implementation and testing with the dehtml html converter
done.
The use of beautifulsoup 4 is not tested. My test system breaks when
running dehtml.py using beautiful soup. I don't get the failures when
running under the test harness, but the text output is significantly
different (different line breaks, number of newlines etc.)
The tests for dehtml need to be generated for beautiful soup and the
expected output changed. Since I have a wonky install of beautiful
soup, I don't trust my output as the standard to test against. Also
since beautiful soup is optional, the test harness needs to skip the
beautifulsoup tests if import bs4 fails. Again something outside of my
expertise. I deleted the work I had done to implement that. I could
not get it working and wanted to get this feature in in some form.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Fri, 13 Oct 2017 21:46:59 -0400 |
| parents | 8c2402a78bb0 |
| children | 88dbacd11cd1 |
| rev | line source |
|---|---|
| 1049 | 1 ############################################################################## |
| 2 # | |
| 3 # Copyright (c) 2001, 2002 Zope Corporation and Contributors. | |
| 4 # All Rights Reserved. | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
5 # |
| 1049 | 6 # This software is subject to the provisions of the Zope Public License, |
| 7 # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. | |
| 8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | |
| 9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | |
| 11 # FOR A PARTICULAR PURPOSE | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
12 # |
| 1049 | 13 ############################################################################## |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
14 # Modifications for Roundup: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
15 # 1. commented out zLOG references |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
16 """ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
17 Generic expat-based XML parser base class. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
18 """ |
|
1071
c08b3820edd1
Adhering to ZPL
Richard Jones <richard@users.sourceforge.net>
parents:
1049
diff
changeset
|
19 |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
20 #import zLOG |
| 1049 | 21 |
| 22 class XMLParser: | |
| 23 | |
| 24 ordered_attributes = 0 | |
| 25 | |
| 26 handler_names = [ | |
| 27 "StartElementHandler", | |
| 28 "EndElementHandler", | |
| 29 "ProcessingInstructionHandler", | |
| 30 "CharacterDataHandler", | |
| 31 "UnparsedEntityDeclHandler", | |
| 32 "NotationDeclHandler", | |
| 33 "StartNamespaceDeclHandler", | |
| 34 "EndNamespaceDeclHandler", | |
| 35 "CommentHandler", | |
| 36 "StartCdataSectionHandler", | |
| 37 "EndCdataSectionHandler", | |
| 38 "DefaultHandler", | |
| 39 "DefaultHandlerExpand", | |
| 40 "NotStandaloneHandler", | |
| 41 "ExternalEntityRefHandler", | |
| 42 "XmlDeclHandler", | |
| 43 "StartDoctypeDeclHandler", | |
| 44 "EndDoctypeDeclHandler", | |
| 45 "ElementDeclHandler", | |
| 46 "AttlistDeclHandler" | |
| 47 ] | |
| 48 | |
| 49 def __init__(self, encoding=None): | |
| 50 self.parser = p = self.createParser() | |
| 51 if self.ordered_attributes: | |
| 52 try: | |
| 53 self.parser.ordered_attributes = self.ordered_attributes | |
| 54 except AttributeError: | |
| 55 #zLOG.LOG("TAL.XMLParser", zLOG.INFO, | |
| 56 # "Can't set ordered_attributes") | |
| 57 self.ordered_attributes = 0 | |
| 58 for name in self.handler_names: | |
| 59 method = getattr(self, name, None) | |
| 60 if method is not None: | |
| 61 try: | |
| 62 setattr(p, name, method) | |
| 63 except AttributeError: | |
| 64 #zLOG.LOG("TAL.XMLParser", zLOG.PROBLEM, | |
| 65 # "Can't set expat handler %s" % name) | |
| 66 pass | |
| 67 | |
| 68 def createParser(self, encoding=None): | |
| 69 global XMLParseError | |
| 70 try: | |
| 71 from Products.ParsedXML.Expat import pyexpat | |
| 72 XMLParseError = pyexpat.ExpatError | |
| 73 return pyexpat.ParserCreate(encoding, ' ') | |
| 74 except ImportError: | |
| 75 from xml.parsers import expat | |
| 76 XMLParseError = expat.ExpatError | |
| 77 return expat.ParserCreate(encoding, ' ') | |
| 78 | |
| 79 def parseFile(self, filename): | |
| 80 self.parseStream(open(filename)) | |
| 81 | |
| 82 def parseString(self, s): | |
| 83 self.parser.Parse(s, 1) | |
| 84 | |
| 85 def parseURL(self, url): | |
| 86 import urllib | |
| 87 self.parseStream(urllib.urlopen(url)) | |
| 88 | |
| 89 def parseStream(self, stream): | |
| 90 self.parser.ParseFile(stream) | |
| 91 | |
| 92 def parseFragment(self, s, end=0): | |
| 93 self.parser.Parse(s, end) |
