forked from html5lib/html5lib-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_stream.py
More file actions
104 lines (82 loc) · 3.94 KB
/
test_stream.py
File metadata and controls
104 lines (82 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from __future__ import absolute_import, division, unicode_literals
from . import support # flake8: noqa
import unittest
import codecs
from html5lib.inputstream import HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream
class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
_defaultChunkSize = 2
class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
_defaultChunkSize = 2
class HTMLInputStreamTest(unittest.TestCase):
def test_char_ascii(self):
stream = HTMLInputStream(b"'", encoding='ascii')
self.assertEqual(stream.charEncoding[0], 'ascii')
self.assertEqual(stream.char(), "'")
def test_char_utf8(self):
stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
self.assertEqual(stream.charEncoding[0], 'utf-8')
self.assertEqual(stream.char(), '\u2018')
def test_char_win1252(self):
stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
self.assertEqual(stream.charEncoding[0], 'windows-1252')
self.assertEqual(stream.char(), "\xa9")
self.assertEqual(stream.char(), "\xf1")
self.assertEqual(stream.char(), "\u2019")
def test_bom(self):
stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
self.assertEqual(stream.charEncoding[0], 'utf-8')
self.assertEqual(stream.char(), "'")
def test_utf_16(self):
stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
self.assertTrue(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
self.assertEqual(len(stream.charsUntil(' ', True)), 1025)
def test_newlines(self):
stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
self.assertEqual(stream.position(), (1, 0))
self.assertEqual(stream.charsUntil('c'), "a\nbb\n")
self.assertEqual(stream.position(), (3, 0))
self.assertEqual(stream.charsUntil('x'), "ccc\ndddd")
self.assertEqual(stream.position(), (4, 4))
self.assertEqual(stream.charsUntil('e'), "x")
self.assertEqual(stream.position(), (4, 5))
def test_newlines2(self):
size = HTMLUnicodeInputStream._defaultChunkSize
stream = HTMLInputStream("\r" * size + "\n")
self.assertEqual(stream.charsUntil('x'), "\n" * size)
def test_position(self):
stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
self.assertEqual(stream.position(), (1, 0))
self.assertEqual(stream.charsUntil('c'), "a\nbb\n")
self.assertEqual(stream.position(), (3, 0))
stream.unget("\n")
self.assertEqual(stream.position(), (2, 2))
self.assertEqual(stream.charsUntil('c'), "\n")
self.assertEqual(stream.position(), (3, 0))
stream.unget("\n")
self.assertEqual(stream.position(), (2, 2))
self.assertEqual(stream.char(), "\n")
self.assertEqual(stream.position(), (3, 0))
self.assertEqual(stream.charsUntil('e'), "ccc\nddd")
self.assertEqual(stream.position(), (4, 3))
self.assertEqual(stream.charsUntil('h'), "e\nf\ng")
self.assertEqual(stream.position(), (6, 1))
def test_position2(self):
stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
self.assertEqual(stream.position(), (1, 0))
self.assertEqual(stream.char(), "a")
self.assertEqual(stream.position(), (1, 1))
self.assertEqual(stream.char(), "b")
self.assertEqual(stream.position(), (1, 2))
self.assertEqual(stream.char(), "c")
self.assertEqual(stream.position(), (1, 3))
self.assertEqual(stream.char(), "\n")
self.assertEqual(stream.position(), (2, 0))
self.assertEqual(stream.char(), "d")
self.assertEqual(stream.position(), (2, 1))
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == '__main__':
main()