-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse.test.js
More file actions
51 lines (38 loc) · 1.43 KB
/
parse.test.js
File metadata and controls
51 lines (38 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
const fs = require('fs');
const path = require('path');
const util = require('util');
const readFile = util.promisify(fs.readFile);
const { SimplePDFParser } = require('../lib');
async function runParser(fileName, options) {
const fileBuffer = await readFile(path.join(__dirname, './pdfs/', fileName));
const parser = new SimplePDFParser(fileBuffer, options);
return parser.parse();
}
test('extracts all text lines', async () => {
const result = await runParser('images-and-formatting.pdf', {
extractImages: false,
});
expect(Array.isArray(result)).toBe(true);
expect(result.filter((item) => item.type === 'text')).toHaveLength(19);
});
test('extracts all images', async () => {
const result = await runParser('images-and-formatting.pdf');
expect(Array.isArray(result)).toBe(true);
expect(result.filter((item) => item.type === 'image')).toHaveLength(2);
});
test('joins paragraphs', async () => {
const result = await runParser('images-and-formatting.pdf', {
extractImages: false,
joinParagraphs: true,
});
expect(Array.isArray(result)).toBe(true);
expect(result.filter((item) => item.type === 'text')).toHaveLength(5);
});
test('includes empty texts', async () => {
const result = await runParser('images-and-formatting.pdf', {
extractImages: false,
ignoreEmptyText: false,
});
expect(Array.isArray(result)).toBe(true);
expect(result.filter((item) => item.type === 'text')).toHaveLength(24);
});