1919#from html5lib import inputstream
2020#END RELEASE
2121
22- def parseTestcase (testString ):
23- testString = testString .split ("\n " )
24- try :
25- if testString [0 ] != "#data" :
26- sys .stderr .write (testString )
27- assert testString [0 ] == "#data"
28- except :
29- raise
30- input = []
31- encoding = []
32- currentList = input
33- for line in testString :
34- if line and not (line .startswith ("#encoding" ) or
35- line .startswith ("#data" )):
36- currentList .append (line )
37- elif line .startswith ("#encoding" ):
38- currentList = encoding
39- return "\n " .join (input ), encoding [0 ]
40-
41- class TestCase (unittest .TestCase ):
42- def runEncodingTest (self , input , encoding ):
43- #XXX - move this out into the setup function
44- #concatenate all consecutive character tokens into a single token
45- stream = inputstream .HTMLInputStream (input , chardet = False )
46-
47- errorMsg = "\n " .join (["\n \n Input" , input ,"\n Expected:" , encoding ,
48- "\n Recieved:" , stream .charEncoding ])
49- self .assertEquals (encoding .lower (), stream .charEncoding .lower (),
50- errorMsg )
51-
52- class ChardetTest (unittest .TestCase ):
53- def testChardet (self ):
54- f = open ("encoding/chardet/test_big5.txt" )
55- stream = inputstream .HTMLInputStream (f .read (), chardet = True )
56- self .assertEquals ("big5" , stream .charEncoding .lower (),
57- "Chardet failed: expected big5 got " +
58- stream .charEncoding .lower ())
22+ import re , glob , unittest , inputstream
5923
60- def test_encoding ():
61- for filename in glob .glob ('encoding/*.dat' ):
62- f = open (filename )
63- tests = f .read ().split ("#data\n " )
64- for test in tests :
65- if test == "" :
66- continue
67- test = "#data\n " + test
68- input , encoding = parseTestcase (test )
69- yield TestCase .runEncodingTest , input , encoding
24+ class Html5EncodingTestCase (unittest .TestCase ): pass
7025
7126def buildTestSuite ():
72- tests = 0
73- for func , input , encoding in test_encoding ():
74- tests += 1
75- testName = 'test%d' % tests
76- testFunc = lambda self , method = func , input = input , encoding = encoding , \
77- : method (self , input , encoding )
78- testFunc .__doc__ = 'Encoding %s' % (testName )
79- instanceMethod = new .instancemethod (testFunc , None , TestCase )
80- setattr (TestCase , testName , instanceMethod )
81- testSuite = unittest .TestLoader ().loadTestsFromTestCase (TestCase )
27+ for filename in glob .glob ("encoding/*.dat" ):
28+ test_name = os .path .basename (filename ).replace ('.dat' ,'' ). \
29+ replace ('-' ,'' )
30+ for idx ,(data ,encoding ) in enumerate (re .compile (
31+ "^#data\s*\n (.*?)\n #encoding\s*\n (.*?)\n " ,
32+ re .DOTALL | re .MULTILINE ).findall (open (filename ).read ())):
33+ def encodingTest (self ):
34+ stream = inputstream .HTMLInputStream (data ,chardet = False )
35+ assert encoding == stream .charEncoding .lower ()
36+ setattr (Html5EncodingTestCase , 'test_%s_%d' % (test_name , idx + 1 ),
37+ encodingTest )
38+
8239 try :
8340 import chardet
84- testSuite .addTest (ChardetTest ('testChardet' ))
41+ def test_chardet (self ):
42+ data = open ("encoding/chardet/test_big5.txt" ).read ()
43+ encoding = inputstream .HTMLInputStream (data ).charEncoding
44+ assert encoding .lower () == "big5"
45+ setattr (Html5EncodingTestCase , 'test_chardet' , test_chardet )
8546 except ImportError :
8647 print "chardet not found, skipping chardet tests"
87- return testSuite
48+
49+ return unittest .defaultTestLoader .loadTestsFromName (__name__ )
8850
8951def main ():
90- unittest .main (defaultTest = "buildTestSuite" )
52+ buildTestSuite ()
53+ unittest .main ()
9154
9255if __name__ == "__main__" :
93- main ()
56+ main ()
0 commit comments