diff test/html_norm.py @ 7560:5cadcaa13bed

prevent <newline tag mangling remove charref and entityref handlers; change where newline inserted after start tag. A test under python2 ended up with a newline between the opening '<' and the tag. This was caused by a string that had escaped &gt; and &lt;. embedded code block &lt;pre&gt\n\n<pre> python\nline 1\nline 2\n</pre> The code was mapping &lt; etc back to < and > and confusing the parser as to where the tag really started. It inserpreted the real pre tag as data and inserted a newline.
author John Rouillard <rouilj@ieee.org>
date Sun, 23 Jul 2023 16:11:23 -0400
parents 5bc36b65d06b
children
line wrap: on
line diff
--- a/test/html_norm.py	Sat Jul 22 22:05:44 2023 -0400
+++ b/test/html_norm.py	Sun Jul 23 16:11:23 2023 -0400
@@ -40,7 +40,7 @@
 
     Note that using this rewrites all attributes parsed by HTMLParser
     into attr="value" form even though HTMLParser accepts other
-    attribute specifiction forms.
+    attribute specification forms.
     """
 
     debug = False  # set to true to enable more verbose output
@@ -63,7 +63,7 @@
             if self.debug: print("     attr:", attr)
             self.current_normalized_string += ' %s="%s"' % attr
 
-        self.current_normalized_string += ">"
+        self.current_normalized_string += ">\n"
 
         if tag == 'pre':
             self.preserve_data = True
@@ -83,26 +83,11 @@
             data = " ".join(data.strip().split())
 
         if data:
-            self.current_normalized_string += "\n%s" % data
+            self.current_normalized_string += "%s" % data
 
     def handle_comment(self, data):
         print("Comment  :", data)
 
-    def handle_entityref(self, name):
-        c = chr(name2codepoint[name])
-        if self.debug: print("Named ent:", c)
-
-        self.current_normalized_string += "%s" % c
-
-    def handle_charref(self, name):
-        if name.startswith('x'):
-            c = chr(int(name[1:], 16))
-        else:
-            c = chr(int(name))
-        if self.debug: print("Num ent  :", c)
-
-        self.current_normalized_string += "%s" % c
-
     def handle_decl(self, data):
         print("Decl     :", data)
 

Roundup Issue Tracker: http://roundup-tracker.org/