Skip to content

Commit b6ce78e

Browse files
author
haniyakonain
committed
Add multi-extractor combination tests with format validation
- Added test-extraction-combinations.sh script to test different extractor combinations - Tests mappings-only, all-enabled, and PageId+Label combinations - Validates both N-Triples and Turtle output formats - Integrated into GitHub Actions workflow - TriX format warnings already implemented in Extraction.scala How to run manually: cd scripts/src/main/bash chmod +x test-extraction-combinations.sh ./test-extraction-combinations.sh Note: Server must be running on localhost:9999 before running tests
1 parent 4731988 commit b6ce78e

File tree

3 files changed

+39
-6
lines changed

3 files changed

+39
-6
lines changed

.github/workflows/server-web-api-test.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ jobs:
5151
run: |
5252
cd scripts/src/main/bash
5353
./coords-integration-test.sh
54+
- name: Run Multi-Extractor Combination Tests
55+
run: |
56+
cd scripts/src/main/bash
57+
./test-extraction-combinations.sh
5458
- name: Stop DBpedia Server
5559
run: |
5660
if [ -f server.pid ]; then
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
# Test Multi-Extractor API (PR #794)
3+
4+
API="http://localhost:9999/extraction/en"
5+
TITLE="Albert_Einstein"
6+
7+
echo "Testing Multi-Extractor API"
8+
9+
# Test 1: Mappings-only + N-Triples
10+
curl -s "$API/extract?title=$TITLE&format=n-triples&extractors=mappings" | grep -q "^<" && echo "✓ Mappings N-Triples" || echo "✗ FAIL"
11+
12+
# Test 2: All-enabled + Turtle
13+
curl -s "$API/extract?title=$TITLE&format=turtle-triples&extractors=custom" | grep -qE "(@prefix|^<)" && echo "✓ All-enabled Turtle" || echo "✗ FAIL"
14+
15+
# Test 3: PageId + Label + N-Triples
16+
curl -s "$API/extract?title=$TITLE&format=n-triples&extractors=PageIdExtractor&extractors=LabelExtractor" | grep -q "^<" && echo "✓ PageId+Label N-Triples" || echo "✗ FAIL"
17+
18+
# Test 4: PageId + Label + Turtle
19+
curl -s "$API/extract?title=$TITLE&format=turtle-triples&extractors=PageIdExtractor&extractors=LabelExtractor" | grep -qE "(@prefix|^<)" && echo "✓ PageId+Label Turtle" || echo "✗ FAIL"
20+
21+
echo "Done"

server/src/main/scala/org/dbpedia/extraction/server/resources/Extraction.scala

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -302,14 +302,22 @@ name match {
302302
val writer = new StringWriter
303303
val formatter = createFormatter(finalFormat, writer)
304304

305-
if (errors.nonEmpty && (finalFormat.contains("turtle") || finalFormat.contains("n-triples") || finalFormat.contains("n-quads"))) {
306-
writer.write("# Warning: Partial results - some extractors failed:\n")
307-
errors.foreach(err => writer.write(s"# - $err\n"))
308-
writer.write("\n")
309-
}
310-
311305
val finalDestination = new DeduplicatingDestination(new WriterDestination(() => writer, formatter))
312306
finalDestination.open()
307+
308+
if (errors.nonEmpty) {
309+
if (finalFormat.contains("turtle") || finalFormat.contains("n-triples") || finalFormat.contains("n-quads")) { writer.write("# Warning: Partial results - some extractors failed:\n")
310+
errors.foreach(err => writer.write(s"# - $err\n"))
311+
writer.write("\n") // Show extractor errors as comments (RDF-safe, won't break parsing)
312+
} else if (finalFormat == "trix") {
313+
// XML comments for TriX format
314+
writer.write("<!-- WARNING: Partial extraction results\n")
315+
writer.write(" Some extractors failed:\n")
316+
errors.foreach(err => writer.write(s" - $err\n"))
317+
writer.write("-->\n")
318+
}
319+
}
320+
313321
finalDestination.write(collector.quads)
314322
finalDestination.close()
315323

0 commit comments

Comments
 (0)