-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStream.php
More file actions
100 lines (86 loc) · 2.75 KB
/
Stream.php
File metadata and controls
100 lines (86 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
<?php
declare(strict_types=1);
namespace JustHTML;
final class StreamDummyNode
{
public string $namespace = 'html';
}
final class StreamSink
{
/** @var array<int, array{0:string,1:mixed}> */
public array $tokens = [];
/** @var array<int, StreamDummyNode> */
public array $openElements = [];
public function processToken($token): int
{
if ($token instanceof Tag) {
if ($token->kind === Tag::START) {
$attrs = $token->attrs ? $token->attrs : [];
$this->tokens[] = ['start', [$token->name, $attrs]];
$this->openElements[] = new StreamDummyNode();
} else {
$this->tokens[] = ['end', $token->name];
if ($this->openElements) {
array_pop($this->openElements);
}
}
return TokenSinkResult::Continue;
}
if ($token instanceof CommentToken) {
$this->tokens[] = ['comment', $token->data];
return TokenSinkResult::Continue;
}
if ($token instanceof DoctypeToken) {
$dt = $token->doctype;
$this->tokens[] = ['doctype', [$dt->name, $dt->publicId, $dt->systemId]];
return TokenSinkResult::Continue;
}
return TokenSinkResult::Continue;
}
public function processCharacters(string $data): void
{
$this->tokens[] = ['text', $data];
}
}
final class Stream
{
public static function stream($html, ?string $encoding = null, bool $bytes = false): \Generator
{
if ($html === null) {
$htmlStr = '';
} elseif ($bytes) {
[$htmlStr, $_] = Encoding::decodeHtml((string)$html, $encoding);
} else {
$htmlStr = (string)$html;
}
$sink = new StreamSink();
$tokenizer = new Tokenizer($sink);
$tokenizer->initialize($htmlStr);
while (true) {
$isEof = $tokenizer->step();
if ($sink->tokens) {
$textBuffer = [];
foreach ($sink->tokens as $entry) {
$event = $entry[0];
$data = $entry[1];
if ($event === 'text') {
$textBuffer[] = $data;
continue;
}
if ($textBuffer) {
yield ['text', implode('', $textBuffer)];
$textBuffer = [];
}
yield [$event, $data];
}
if ($textBuffer) {
yield ['text', implode('', $textBuffer)];
}
$sink->tokens = [];
}
if ($isEof) {
break;
}
}
}
}