@@ -2,72 +2,63 @@ package org.dbpedia.extraction.util
22
33import org .scalatest .{BeforeAndAfter , FlatSpec , Matchers }
44
5- /**
6- * Test suite for DataQualityMonitor
7- */
85class DataQualityMonitorTest extends FlatSpec with Matchers with BeforeAndAfter {
96
107 before {
11- // Reset metrics before each test
128 DataQualityMonitor .reset()
139 }
1410
15- " DataQualityMonitor" should " create a monitor for an extractor " in {
11+ " DataQualityMonitor" should " create monitor" in {
1612 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
1713 monitor should not be null
1814 monitor.extractorName should be(" TestExtractor" )
1915 }
2016
21- it should " record invalid data errors" in {
17+ it should " record errors" in {
2218 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
23-
2419 monitor.logInvalidData(
2520 " Albert_Einstein" ,
2621 " Invalid IRI syntax" ,
2722 Some (new IllegalArgumentException (" Test exception" )),
2823 Some (" http://malformed url" )
2924 )
3025
31- val metrics = monitor.getMetrics()
32- metrics should not be empty
26+ monitor.getMetrics() should not be empty
3327 monitor.getTotalErrors() should be(1 )
3428 }
3529
36- it should " categorize errors correctly " in {
30+ it should " categorize errors" in {
3731 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
3832
39- // Log different types of errors
4033 monitor.logInvalidData(" Page1" , " Invalid data" , exception = Some (new IllegalArgumentException ()))
4134 monitor.logInvalidData(" Page2" , " Malformed URL" , exception = None )
4235 monitor.logInvalidData(" Page3" , " Missing property" , exception = None )
4336
44- val metrics = monitor.getMetrics()
45- metrics.size should be >= 1
37+ monitor.getMetrics().size should be >= 1
4638 monitor.getTotalErrors() should be(3 )
4739 }
4840
49- it should " track multiple extractors independently" in {
50- val monitor1 = DataQualityMonitor .forExtractor(" ExtractorA" )
51- val monitor2 = DataQualityMonitor .forExtractor(" ExtractorB" )
41+ it should " track extractors independently" in {
42+ val m1 = DataQualityMonitor .forExtractor(" ExtractorA" )
43+ val m2 = DataQualityMonitor .forExtractor(" ExtractorB" )
5244
53- monitor1 .logInvalidData(" Page1" , " Error in A" )
54- monitor1 .logInvalidData(" Page2" , " Another error in A" )
55- monitor2 .logInvalidData(" Page3" , " Error in B" )
45+ m1 .logInvalidData(" Page1" , " Error in A" )
46+ m1 .logInvalidData(" Page2" , " Another error in A" )
47+ m2 .logInvalidData(" Page3" , " Error in B" )
5648
57- monitor1 .getTotalErrors() should be(2 )
58- monitor2 .getTotalErrors() should be(1 )
49+ m1 .getTotalErrors() should be(2 )
50+ m2 .getTotalErrors() should be(1 )
5951 }
6052
6153 it should " provide global metrics" in {
62- val monitor1 = DataQualityMonitor .forExtractor(" ExtractorA" )
63- val monitor2 = DataQualityMonitor .forExtractor(" ExtractorB" )
54+ val m1 = DataQualityMonitor .forExtractor(" ExtractorA" )
55+ val m2 = DataQualityMonitor .forExtractor(" ExtractorB" )
6456
65- monitor1 .logInvalidData(" Page1" , " Error 1" )
66- monitor2 .logInvalidData(" Page2" , " Error 2" )
67- monitor2 .logInvalidData(" Page3" , " Error 3" )
57+ m1 .logInvalidData(" Page1" , " Error 1" )
58+ m2 .logInvalidData(" Page2" , " Error 2" )
59+ m2 .logInvalidData(" Page3" , " Error 3" )
6860
69- val globalMetrics = DataQualityMonitor .getGlobalMetrics()
70- globalMetrics.values.sum should be(3 )
61+ DataQualityMonitor .getGlobalMetrics().values.sum should be(3 )
7162 }
7263
7364 it should " retrieve error details" in {
@@ -81,60 +72,41 @@ class DataQualityMonitorTest extends FlatSpec with Matchers with BeforeAndAfter
8172 details.head.extractorName should be(" TestExtractor" )
8273 }
8374
84- it should " export errors to CSV format " in {
75+ it should " export to CSV" in {
8576 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
8677
8778 monitor.logInvalidData(" Einstein" , " Invalid IRI" )
8879 monitor.logInvalidData(" Tesla" , " Malformed URL" )
8980
9081 val csv = DataQualityMonitor .exportToCsv(" TestExtractor:InvalidData" , limit = 100 )
91-
92- csv should include(" Extractor,PageTitle,ErrorMessage,Timestamp" )
93- csv should include(" TestExtractor" )
82+ csv should include(" Extractor,PageTitle,ErrorMessage" )
9483 csv should include(" Einstein" )
9584 }
9685
97- it should " limit stored error details to prevent memory issues " in {
86+ it should " limit stored details" in {
9887 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
88+ (1 to 11000 ).foreach(i => monitor.logInvalidData(s " Page $i" , " Error" ))
9989
100- // Try to log more than the limit (10000)
101- for (i <- 1 to 11000 ) {
102- monitor.logInvalidData(s " Page $i" , " Error" )
103- }
104-
105- // Should still work without memory issues
106- val metrics = monitor.getMetrics()
10790 monitor.getTotalErrors() should be(11000 )
108-
109- // But details should be limited
11091 val details = DataQualityMonitor .getErrorDetails(" TestExtractor:InvalidData" , limit = 20000 )
11192 details.size should be <= 10000
11293 }
11394
11495 it should " log skipped extractions" in {
11596 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
116-
117- // Should not throw exception
11897 monitor.logSkipped(" TestPage" , " Not in main namespace" )
119-
120- // Skipped pages don't count as errors
12198 monitor.getTotalErrors() should be(0 )
12299 }
123100
124101 it should " log successful extractions" in {
125102 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
126-
127- // Should not throw exception
128103 monitor.logSuccess(" Einstein" , 5 )
129-
130- // Successful extractions don't count as errors
131104 monitor.getTotalErrors() should be(0 )
132105 }
133106
134- it should " handle concurrent logging safely " in {
107+ it should " handle concurrent logging" in {
135108 val monitor = DataQualityMonitor .forExtractor(" ConcurrentExtractor" )
136109
137- // Simulate concurrent logging from multiple threads
138110 val threads = (1 to 10 ).map { i =>
139111 new Thread {
140112 override def run (): Unit = {
@@ -147,12 +119,10 @@ class DataQualityMonitorTest extends FlatSpec with Matchers with BeforeAndAfter
147119
148120 threads.foreach(_.start())
149121 threads.foreach(_.join())
150-
151- // Should have logged all errors without data corruption
152122 monitor.getTotalErrors() should be(1000 )
153123 }
154124
155- " Error details" should " include all required fields" in {
125+ " Error details" should " include required fields" in {
156126 val monitor = DataQualityMonitor .forExtractor(" TestExtractor" )
157127
158128 monitor.logInvalidData(
0 commit comments