|
19 | 19 | import gettext |
20 | 20 | _ = gettext.gettext |
21 | 21 |
|
| 22 | +from xml.dom import XHTML_NAMESPACE |
| 23 | + |
22 | 24 | class XMLParser(html5parser.HTMLParser): |
23 | 25 | """ liberal XML parser """ |
24 | 26 |
|
@@ -66,16 +68,21 @@ def normalizeToken(self, token): |
66 | 68 |
|
67 | 69 | # ensure that non-void XHTML elements have content so that separate |
68 | 70 | # open and close tags are emitted |
69 | | - if token["type"] == "EndTag" and \ |
70 | | - token["name"] not in voidElements and \ |
71 | | - token["name"] == self.tree.openElements[-1].name and \ |
72 | | - not self.tree.openElements[-1].hasContent(): |
73 | | - for e in self.tree.openElements: |
74 | | - if 'xmlns' in e.attributes.keys(): |
75 | | - if e.attributes['xmlns'] <> 'http://www.w3.org/1999/xhtml': |
76 | | - break |
| 71 | + if token["type"] == "EndTag": |
| 72 | + if token["name"] in voidElements: |
| 73 | + if not self.tree.openElements or \ |
| 74 | + self.tree.openElements[-1].name != token["name"]: |
| 75 | + token["type"] = "EmptyTag" |
| 76 | + if not token.has_key("data"): token["data"] = {} |
77 | 77 | else: |
78 | | - self.tree.insertText('') |
| 78 | + if token["name"] == self.tree.openElements[-1].name and \ |
| 79 | + not self.tree.openElements[-1].hasContent(): |
| 80 | + for e in self.tree.openElements: |
| 81 | + if 'xmlns' in e.attributes.keys(): |
| 82 | + if e.attributes['xmlns'] != XHTML_NAMESPACE: |
| 83 | + break |
| 84 | + else: |
| 85 | + self.tree.insertText('') |
79 | 86 |
|
80 | 87 | return token |
81 | 88 |
|
|
0 commit comments