-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDocumentNode.php
More file actions
161 lines (139 loc) · 4.4 KB
/
DocumentNode.php
File metadata and controls
161 lines (139 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
<?php
namespace OneOffTech\Parse\Client\DocumentFormat;
use Countable;
use JsonSerializable;
use OneOffTech\Parse\Client\Exceptions\EmptyDocumentException;
use OneOffTech\Parse\Client\Exceptions\InvalidDocumentFormatException;
use RecursiveArrayIterator;
use RecursiveIteratorIterator;
use ReturnTypeWillChange;
class DocumentNode implements Countable, JsonSerializable
{
public function __construct(
public readonly array $content,
public readonly array $attributes = [],
) {}
public function type(): string
{
return 'doc';
}
/**
* The number of pages in this document as extracted by the parser.
*/
public function count(): int
{
return count($this->content);
}
/**
* Test if the document is empty, i.e. contains no pages or has no textual content on any of the pages
*/
public function isEmpty(): bool
{
return $this->count() === 0 || ! $this->hasContent();
}
/**
* Test if the document has discernible textual content on any of the pages
*/
public function hasContent(): bool
{
foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) {
if (($key === 'text' || $key === 'content') && ! empty($value)) {
return true;
}
}
return false;
}
/**
* The pages in this document
*
* @return \OneOffTech\Parse\Client\DocumentFormat\PageNode[]
*/
public function pages(): array
{
return array_map(fn ($page) => PageNode::fromArray($page), $this->content);
}
public function text(): string
{
$text = [];
foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) {
if (($key === 'text' || $key === 'content') && ! empty($value)) {
$text[] = $value;
}
}
return implode(PHP_EOL, $text);
}
/**
* Throw exception if document has no textual content
*
* @throws OneOffTech\Parse\Client\Exceptions\EmptyDocumentException when document has no textual content
*/
public function throwIfNoContent(): self
{
if (! $this->hasContent()) {
throw new EmptyDocumentException('Document has no textual content.');
}
return $this;
}
/**
* Return an array representation of the document node
*/
public function toArray(): array
{
return [
'category' => 'doc',
'attributes' => null,
'content' => $this->content,
];
}
/**
* Return a JSON serialization of the document node
*/
public function toJson(): string
{
return json_encode($this);
}
#[ReturnTypeWillChange]
public function jsonSerialize(): mixed
{
return $this->toArray();
}
/**
* Create a document node from associative array
*/
public static function fromArray(array $data): DocumentNode
{
if (! (isset($data['category']) && isset($data['content']))) {
throw new InvalidDocumentFormatException('Unexpected document structure. Missing category or content.');
}
if ($data['category'] !== 'doc') {
throw new InvalidDocumentFormatException("Unexpected node category. Expecting [doc] found [{$data['category']}].");
}
if (! is_array($data['content'])) {
throw new InvalidDocumentFormatException('Unexpected content format. Expecting [array].');
}
return new DocumentNode($data['content'] ?? [], $data['attributes'] ?? []);
}
public static function fromString(string $data): DocumentNode
{
return static::fromArray([
'category' => 'doc',
'attributes' => null,
'content' => [
[
'category' => 'page',
'attributes' => [
'page' => 1,
],
'content' => [
[
'category' => 'body',
'content' => $data,
'marks' => [],
'attributes' => [],
],
],
],
],
]);
}
}