forked from html5lib/html5lib-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsupport.py
More file actions
127 lines (110 loc) · 4.02 KB
/
Copy pathsupport.py
File metadata and controls
127 lines (110 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import sys
import codecs
import glob
base_path = os.path.split(__file__)[0]
if os.path.exists(os.path.join(base_path, 'testdata')):
#release
test_dir = os.path.join(base_path, 'testdata')
else:
#development
test_dir = os.path.abspath(
os.path.join(base_path,
os.path.pardir, os.path.pardir,
os.path.pardir, 'testdata'))
assert os.path.exists(test_dir), "Test data not found"
#import the development html5lib
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
os.path.pardir,
os.path.pardir)))
import html5lib
from html5lib import html5parser, treebuilders
del base_path
#Build a dict of avaliable trees
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
"DOM":treebuilders.getTreeBuilder("dom")}
#Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
except ImportError:
pass
def html5lib_test_files(subdirectory, files='*.dat'):
return glob.glob(os.path.join(test_dir,subdirectory,files))
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
self.default = default
dict.__init__(self, *args, **kwargs)
def __getitem__(self, key):
return dict.get(self, key, self.default)
class TestData(object):
def __init__(self, filename, newTestHeading="data"):
self.f = codecs.open(filename, encoding="utf8")
self.newTestHeading = newTestHeading
def __iter__(self):
data = DefaultDict(None)
key=None
for line in self.f:
heading = self.isSectionHeading(line)
if heading:
if data and heading == self.newTestHeading:
#Remove trailing newline
data[key] = data[key][:-1]
yield self.normaliseOutput(data)
data = DefaultDict(None)
key = heading
data[key]=""
elif key is not None:
data[key] += line
if data:
yield self.normaliseOutput(data)
def isSectionHeading(self, line):
"""If the current heading is a test section heading return the heading,
otherwise return False"""
if line.startswith("#"):
return line[1:].strip()
else:
return False
def normaliseOutput(self, data):
#Remove trailing newlines
for key,value in data.iteritems():
if value.endswith("\n"):
data[key] = value[:-1]
return data
def convert(stripChars):
def convertData(data):
"""convert the output of str(document) to the format used in the testcases"""
data = data.split("\n")
rv = []
for line in data:
if line.startswith("|"):
rv.append(line[stripChars:])
else:
rv.append(line)
return "\n".join(rv)
return convertData
convertExpected = convert(2)