-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_docx_parser.cpp
More file actions
393 lines (333 loc) · 14 KB
/
test_docx_parser.cpp
File metadata and controls
393 lines (333 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
/**
* Self-contained test suite for docx_comment_parser.
*
* Creates a synthetic .docx in-memory (using zlib to deflate XML parts),
* writes it to a temp file, then exercises the parser API.
*
* No external test framework required.
*/
#include "docx_comment_parser.h"
#include <cassert>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
// On MSVC the test's in-memory ZIP builder uses crc32(), Bytef, and uInt.
// The vendored header supplies all three without requiring a system zlib.
// On Linux/macOS/MinGW the system <zlib.h> is used (no IMPLEMENTATION define,
// so the function bodies live only in zip_reader.cpp's TU).
#ifdef _MSC_VER
# include "../vendor/zlib/zlib.h"
#else
# include <zlib.h>
#endif
// ─── Minimal ZIP writer (stored entries only – good enough for testing) ───────
static void write_le16(std::vector<uint8_t>& v, uint16_t x) {
v.push_back(x & 0xFF); v.push_back(x >> 8);
}
static void write_le32(std::vector<uint8_t>& v, uint32_t x) {
for (int i = 0; i < 4; ++i, x >>= 8) v.push_back(x & 0xFF);
}
struct ZipBuilder {
struct Entry {
std::string name;
std::vector<uint8_t> data;
uint32_t offset{0};
uint32_t crc{0};
};
std::vector<Entry> entries;
void add(const std::string& name, const std::string& content) {
Entry e;
e.name.assign(name);
e.data.assign(content.begin(), content.end());
e.crc = crc32(0, reinterpret_cast<const Bytef*>(content.data()),
static_cast<uInt>(content.size()));
entries.push_back(std::move(e));
}
std::vector<uint8_t> build() {
std::vector<uint8_t> out;
out.reserve(8192);
for (auto& e : entries) {
e.offset = static_cast<uint32_t>(out.size());
// Local file header
write_le32(out, 0x04034b50u); // sig
write_le16(out, 20); // version needed
write_le16(out, 0); // flags
write_le16(out, 0); // method: stored
write_le16(out, 0); // mod time
write_le16(out, 0); // mod date
write_le32(out, e.crc);
write_le32(out, static_cast<uint32_t>(e.data.size())); // comp size
write_le32(out, static_cast<uint32_t>(e.data.size())); // uncomp size
write_le16(out, static_cast<uint16_t>(e.name.size()));
write_le16(out, 0); // extra len
for (char c : e.name) out.push_back(static_cast<uint8_t>(c));
out.insert(out.end(), e.data.begin(), e.data.end());
}
uint32_t cd_start = static_cast<uint32_t>(out.size());
uint16_t cd_count = static_cast<uint16_t>(entries.size());
for (const auto& e : entries) {
write_le32(out, 0x02014b50u); // CD sig
write_le16(out, 20); // version made by
write_le16(out, 20); // version needed
write_le16(out, 0); // flags
write_le16(out, 0); // method: stored
write_le16(out, 0);
write_le16(out, 0);
write_le32(out, e.crc);
write_le32(out, static_cast<uint32_t>(e.data.size()));
write_le32(out, static_cast<uint32_t>(e.data.size()));
write_le16(out, static_cast<uint16_t>(e.name.size()));
write_le16(out, 0); write_le16(out, 0); // extra, comment
write_le16(out, 0); write_le16(out, 0); // disk, int attrib
write_le32(out, 0); // ext attrib
write_le32(out, e.offset);
for (char c : e.name) out.push_back(static_cast<uint8_t>(c));
}
uint32_t cd_size = static_cast<uint32_t>(out.size()) - cd_start;
// EOCD
write_le32(out, 0x06054b50u);
write_le16(out, 0); write_le16(out, 0); // disk numbers
write_le16(out, cd_count);
write_le16(out, cd_count);
write_le32(out, cd_size);
write_le32(out, cd_start);
write_le16(out, 0); // comment len
return out;
}
};
// ─── XML fixtures ────────────────────────────────────────────────────────────
static const char* CONTENT_TYPES = R"(<?xml version="1.0" encoding="UTF-8"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/word/document.xml"
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
<Override PartName="/word/comments.xml"
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>
</Types>)";
static const char* RELS = R"(<?xml version="1.0" encoding="UTF-8"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
Target="word/document.xml"/>
</Relationships>)";
static const char* WORD_RELS = R"(<?xml version="1.0" encoding="UTF-8"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
Target="comments.xml"/>
</Relationships>)";
// comments.xml: two comments, second is a reply (para_id linkage via commentsExtended)
static const char* COMMENTS_XML = R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml">
<w:comment w:id="0" w:author="Alice Tester" w:date="2024-03-01T10:00:00Z" w:initials="AT">
<w:p w14:paraId="AAAA0001">
<w:pPr><w:pStyle w:val="CommentText"/></w:pPr>
<w:r><w:t>This sentence needs revision.</w:t></w:r>
</w:p>
</w:comment>
<w:comment w:id="1" w:author="Bob Reviewer" w:date="2024-03-02T14:30:00Z" w:initials="BR">
<w:p w14:paraId="AAAA0002">
<w:r><w:t>Agreed, let me fix this.</w:t></w:r>
</w:p>
</w:comment>
<w:comment w:id="2" w:author="Alice Tester" w:date="2024-03-03T09:00:00Z" w:initials="AT">
<w:p w14:paraId="AAAA0003">
<w:r><w:t>Thanks for the quick fix!</w:t></w:r>
</w:p>
</w:comment>
</w:comments>)";
// commentsExtended.xml: comments 1 and 2 are replies to comment 0
static const char* COMMENTS_EXT_XML = R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w16cex:commentsEx xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex">
<w16cex:commentEx w16cex:paraId="AAAA0001" w16cex:done="0"/>
<w16cex:commentEx w16cex:paraId="AAAA0002" w16cex:paraIdParent="AAAA0001" w16cex:done="0"/>
<w16cex:commentEx w16cex:paraId="AAAA0003" w16cex:paraIdParent="AAAA0001" w16cex:done="1"/>
</w16cex:commentsEx>)";
// document.xml: a paragraph anchored to comment 0
static const char* DOCUMENT_XML = R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:p>
<w:commentRangeStart w:id="0"/>
<w:r><w:t xml:space="preserve">The quick brown fox </w:t></w:r>
<w:r><w:t>jumps over the lazy dog.</w:t></w:r>
<w:commentRangeEnd w:id="0"/>
<w:r><w:commentReference w:id="0"/></w:r>
</w:p>
</w:body>
</w:document>)";
// ─── Helpers ─────────────────────────────────────────────────────────────────
static std::string make_temp_docx() {
ZipBuilder zb;
zb.add("[Content_Types].xml", CONTENT_TYPES);
zb.add("_rels/.rels", RELS);
zb.add("word/_rels/document.xml.rels", WORD_RELS);
zb.add("word/comments.xml", COMMENTS_XML);
zb.add("word/commentsExtended.xml", COMMENTS_EXT_XML);
zb.add("word/document.xml", DOCUMENT_XML);
auto bytes = zb.build();
// Write to temp file
std::string path = "/tmp/test_docx_parser_fixture.docx";
std::ofstream f(path, std::ios::binary);
if (!f) throw std::runtime_error("Cannot create temp file: " + path);
f.write(reinterpret_cast<const char*>(bytes.data()), bytes.size());
return path;
}
// ─── Assertion helpers ────────────────────────────────────────────────────────
static int g_passed = 0;
static int g_failed = 0;
#define CHECK(expr) \
do { \
if (!(expr)) { \
std::cerr << "FAIL [" << __LINE__ << "] " #expr "\n"; \
++g_failed; \
} else { \
++g_passed; \
} \
} while(0)
#define CHECK_EQ(a, b) \
do { \
if ((a) != (b)) { \
std::cerr << "FAIL [" << __LINE__ << "] " #a " == " #b \
<< " (got: " << (a) << " vs " << (b) << ")\n"; \
++g_failed; \
} else { \
++g_passed; \
} \
} while(0)
// ─── Tests ────────────────────────────────────────────────────────────────────
void test_basic_parsing(const std::string& path) {
std::cout << "\n=== test_basic_parsing ===\n";
docx::DocxParser p;
p.parse(path);
const auto& comments = p.comments();
CHECK_EQ(comments.size(), 3u);
const auto* c0 = p.find_by_id(0);
CHECK(c0 != nullptr);
CHECK_EQ(c0->author, std::string("Alice Tester"));
CHECK_EQ(c0->initials, std::string("AT"));
CHECK_EQ(c0->date, std::string("2024-03-01T10:00:00Z"));
CHECK(c0->text.find("revision") != std::string::npos);
CHECK_EQ(c0->paragraph_style, std::string("CommentText"));
}
void test_threading(const std::string& path) {
std::cout << "\n=== test_threading ===\n";
docx::DocxParser p;
p.parse(path);
const auto* c0 = p.find_by_id(0);
const auto* c1 = p.find_by_id(1);
const auto* c2 = p.find_by_id(2);
CHECK(c0 && !c0->is_reply);
CHECK(c1 && c1->is_reply);
CHECK_EQ(c1->parent_id, 0);
CHECK(c2 && c2->is_reply);
CHECK_EQ(c2->parent_id, 0);
// Replies on root
CHECK_EQ(c0->replies.size(), 2u);
// Thread chain
CHECK_EQ(c0->thread_ids.size(), 3u);
CHECK_EQ(c0->thread_ids[0], 0);
auto thread = p.thread(0);
CHECK_EQ(thread.size(), 3u);
}
void test_done_flag(const std::string& path) {
std::cout << "\n=== test_done_flag ===\n";
docx::DocxParser p;
p.parse(path);
const auto* c2 = p.find_by_id(2);
CHECK(c2 != nullptr);
CHECK(c2->done);
const auto* c0 = p.find_by_id(0);
CHECK(c0 && !c0->done);
}
void test_anchor_text(const std::string& path) {
std::cout << "\n=== test_anchor_text ===\n";
docx::DocxParser p;
p.parse(path);
const auto* c0 = p.find_by_id(0);
CHECK(c0 != nullptr);
CHECK(!c0->referenced_text.empty());
CHECK(c0->referenced_text.find("quick brown fox") != std::string::npos);
}
void test_by_author(const std::string& path) {
std::cout << "\n=== test_by_author ===\n";
docx::DocxParser p;
p.parse(path);
auto alice = p.by_author("Alice Tester");
CHECK_EQ(alice.size(), 2u);
auto bob = p.by_author("Bob Reviewer");
CHECK_EQ(bob.size(), 1u);
auto nobody = p.by_author("Nobody");
CHECK(nobody.empty());
}
void test_stats(const std::string& path) {
std::cout << "\n=== test_stats ===\n";
docx::DocxParser p;
p.parse(path);
const auto& s = p.stats();
CHECK_EQ(s.total_comments, 3u);
CHECK_EQ(s.total_replies, 2u);
CHECK_EQ(s.total_root_comments,1u);
CHECK_EQ(s.total_resolved, 1u);
CHECK_EQ(s.unique_authors.size(), 2u);
CHECK_EQ(s.earliest_date, std::string("2024-03-01T10:00:00Z"));
CHECK_EQ(s.latest_date, std::string("2024-03-03T09:00:00Z"));
}
void test_root_comments(const std::string& path) {
std::cout << "\n=== test_root_comments ===\n";
docx::DocxParser p;
p.parse(path);
auto roots = p.root_comments();
CHECK_EQ(roots.size(), 1u);
CHECK_EQ(roots[0]->id, 0);
}
void test_batch_parser(const std::string& path) {
std::cout << "\n=== test_batch_parser ===\n";
docx::BatchParser bp(2);
bp.parse_all({path, path}); // same file twice — valid
CHECK_EQ(bp.errors().size(), 0u);
CHECK_EQ(bp.comments(path).size(), 3u);
CHECK_EQ(bp.stats(path).total_comments, 3u);
bp.release_all();
}
void test_missing_file() {
std::cout << "\n=== test_missing_file ===\n";
docx::DocxParser p;
bool threw = false;
try {
p.parse("/nonexistent/path/file.docx");
} catch (const docx::DocxFileError&) {
threw = true;
} catch (...) {}
CHECK(threw);
}
// ─── main ─────────────────────────────────────────────────────────────────────
int main() {
std::string path;
try {
path = make_temp_docx();
std::cout << "Test fixture: " << path << "\n";
} catch (const std::exception& ex) {
std::cerr << "Failed to create fixture: " << ex.what() << "\n";
return 1;
}
test_basic_parsing(path);
test_threading(path);
test_done_flag(path);
test_anchor_text(path);
test_by_author(path);
test_stats(path);
test_root_comments(path);
test_batch_parser(path);
test_missing_file();
std::remove(path.c_str());
std::cout << "\n──────────────────────────────\n";
std::cout << "Results: " << g_passed << " passed, " << g_failed << " failed\n";
return (g_failed == 0) ? 0 : 1;
}