forked from html5lib/html5lib-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsupport.py
More file actions
128 lines (109 loc) · 4.07 KB
/
Copy pathsupport.py
File metadata and controls
128 lines (109 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from __future__ import absolute_import, division, unicode_literals
import os
import sys
import codecs
import glob
base_path = os.path.split(__file__)[0]
test_dir = os.path.join(base_path, 'testdata')
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
os.path.pardir,
os.path.pardir)))
import html5lib
from html5lib import html5parser, treebuilders
del base_path
#Build a dict of avaliable trees
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
"DOM":treebuilders.getTreeBuilder("dom")}
#Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
except ImportError:
pass
def get_data_files(subdirectory, files='*.dat'):
return glob.glob(os.path.join(test_dir,subdirectory,files))
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
self.default = default
dict.__init__(self, *args, **kwargs)
def __getitem__(self, key):
return dict.get(self, key, self.default)
class TestData(object):
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
if encoding == None:
self.f = open(filename, mode="rb")
else:
self.f = codecs.open(filename, encoding=encoding)
self.encoding = encoding
self.newTestHeading = newTestHeading
def __del__(self):
self.f.close()
def __iter__(self):
data = DefaultDict(None)
key=None
for line in self.f:
heading = self.isSectionHeading(line)
if heading:
if data and heading == self.newTestHeading:
#Remove trailing newline
data[key] = data[key][:-1]
yield self.normaliseOutput(data)
data = DefaultDict(None)
key = heading
data[key]="" if self.encoding else b""
elif key is not None:
data[key] += line
if data:
yield self.normaliseOutput(data)
def isSectionHeading(self, line):
"""If the current heading is a test section heading return the heading,
otherwise return False"""
#print(line)
if line.startswith("#" if self.encoding else b"#"):
return line[1:].strip()
else:
return False
def normaliseOutput(self, data):
#Remove trailing newlines
for key,value in data.items():
if value.endswith("\n" if self.encoding else b"\n"):
data[key] = value[:-1]
return data
def convert(stripChars):
def convertData(data):
"""convert the output of str(document) to the format used in the testcases"""
data = data.split("\n")
rv = []
for line in data:
if line.startswith("|"):
rv.append(line[stripChars:])
else:
rv.append(line)
return "\n".join(rv)
return convertData
convertExpected = convert(2)
def errorMessage(input, expected, actual):
msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
(repr(input), repr(expected), repr(actual)))
if sys.version_info.major == 2:
msg = msg.encode("ascii", "backslashreplace")
return msg