Skip to content

Commit 73cbddb

Browse files
committed
Merge pull request dbpedia#142 from jimkont/live_features
Various enhancements for Live
2 parents 3014472 + aa3ce43 commit 73cbddb

File tree

5 files changed

+95
-19
lines changed

5 files changed

+95
-19
lines changed

live/live.default.ini

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,27 @@ mappingsOAIUri = http://mappings.dbpedia.org/index.php/Special:OAIRepository
4141
mappingsOaiPrefix = oai:en.wikipedia.org:enwiki:
4242
mappingsBaseWikiUri = http://mappings.dbpedia.org/wiki/
4343

44+
;*********************
45+
; FEEDERS
46+
;*********************
47+
48+
feeder.live.enabled=true
49+
feeder.live.pollInterval=3000
50+
feeder.live.sleepInterval=1000
51+
52+
feeder.mappings.enabled=true
53+
feeder.mappings.pollInterval=2000
54+
feeder.mappings.sleepInterval=1000
55+
56+
feeder.unmodified.enabled=true
57+
feeder.unmodified.pollInterval=2000
58+
feeder.unmodified.sleepInterval=1000
59+
60+
feeder.unmodified.minDaysAgo=30
61+
feeder.unmodified.chunk=5000
62+
feeder.unmodified.threshold=500
63+
feeder.unmodified.sleepTime=30000
64+
4465
;*********************
4566
; OPTIONS FOR LANGUAGE
4667
;*********************

live/src/main/SQL/dbstructure.sql

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
-- Table structure for table `DBPEDIALIVE_CACHE`
33
--
44

5+
SET SESSION innodb_file_per_table=1;
6+
SET SESSION innodb_file_format=Barracuda;
7+
58
DROP TABLE IF EXISTS `DBPEDIALIVE_CACHE`;
69
CREATE TABLE IF NOT EXISTS `DBPEDIALIVE_CACHE` (
710
`pageID` int(11) NOT NULL DEFAULT '0' COMMENT 'The wikipedia page ID',
@@ -15,4 +18,9 @@ CREATE TABLE IF NOT EXISTS `DBPEDIALIVE_CACHE` (
1518

1619
PRIMARY KEY (`pageID`),
1720
KEY `updated_index` (`updated`)
18-
) DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
21+
) DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ENGINE = InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
22+
23+
-- We use innodb_file_per_table=1; innodb_file_format=Barracuda; ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4
24+
-- because in English Wikipedia the cache can reach up to 200GB!!!
25+
-- This way we reduce I/O and space a lot. It makes the db a little slower but it is also easier to recover
26+
-- when tables are stored in separate files.

live/src/main/java/org/dbpedia/extraction/live/main/Main.java

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,21 +55,33 @@ protected PasswordAuthentication getPasswordAuthentication() {
5555
public static void initLive() {
5656

5757
PropertyConfigurator.configure("log4j.live.properties");
58-
59-
feeders .add( new OAIFeederMappings("FeederMappings", LiveQueuePriority.MappingPriority,
58+
if (Boolean.parseBoolean(LiveOptions.options.get("feeder.mappings.enabled")) == true) {
59+
long pollInterval = Long.parseLong(LiveOptions.options.get("feeder.mappings.pollInterval"));
60+
long sleepInterval = Long.parseLong(LiveOptions.options.get("feeder.mappings.sleepInterval"));
61+
feeders .add( new OAIFeederMappings("FeederMappings", LiveQueuePriority.MappingPriority,
6062
LiveOptions.options.get("mappingsOAIUri"), LiveOptions.options.get("mappingsBaseWikiUri"), LiveOptions.options.get("mappingsOaiPrefix"),
61-
2000, 1000, LiveOptions.options.get("uploaded_dump_date"),
63+
pollInterval, sleepInterval, LiveOptions.options.get("uploaded_dump_date"),
6264
LiveOptions.options.get("working_directory")));
65+
}
6366

64-
65-
feeders .add( new OAIFeeder("FeederLive", LiveQueuePriority.LivePriority,
67+
if (Boolean.parseBoolean(LiveOptions.options.get("feeder.live.enabled")) == true) {
68+
long pollInterval = Long.parseLong(LiveOptions.options.get("feeder.live.pollInterval"));
69+
long sleepInterval = Long.parseLong(LiveOptions.options.get("feeder.live.sleepInterval"));
70+
feeders .add( new OAIFeeder("FeederLive", LiveQueuePriority.LivePriority,
6671
LiveOptions.options.get("oaiUri"), LiveOptions.options.get("baseWikiUri"), LiveOptions.options.get("oaiPrefix"),
67-
3000, 1000, LiveOptions.options.get("uploaded_dump_date"),
72+
pollInterval, sleepInterval, LiveOptions.options.get("uploaded_dump_date"),
6873
LiveOptions.options.get("working_directory")));
74+
}
6975

70-
feeders .add( new UnmodifiedFeeder("FeederUnmodified", LiveQueuePriority.UnmodifiedPagePriority,
71-
30, 5000,500,30000,
76+
if (Boolean.parseBoolean(LiveOptions.options.get("feeder.unmodified.enabled")) == true) {
77+
int minDaysAgo = Integer.parseInt(LiveOptions.options.get("feeder.unmodified.minDaysAgo"));
78+
int chunk = Integer.parseInt(LiveOptions.options.get("feeder.unmodified.chunk"));
79+
int threshold = Integer.parseInt(LiveOptions.options.get("feeder.unmodified.threshold"));
80+
long sleepTime = Long.parseLong(LiveOptions.options.get("feeder.unmodified.sleepTime"));
81+
feeders .add( new UnmodifiedFeeder("FeederUnmodified", LiveQueuePriority.UnmodifiedPagePriority,
82+
minDaysAgo, chunk, threshold, sleepTime,
7283
LiveOptions.options.get("uploaded_dump_date"), LiveOptions.options.get("working_directory")));
84+
}
7385

7486
int threads = Integer.parseInt(LiveOptions.options.get("ProcessingThreads"));
7587
for (int i=0; i < threads ; i++){

live/src/main/java/org/dbpedia/extraction/live/processor/PageProcessor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public void run(){
7474
processPage(page);
7575
}
7676
catch (Exception exp){
77-
logger.error("Failed to process page");
77+
logger.error("Failed to process page: " + exp.getMessage());
7878
}
7979
}
8080
}

live/src/main/java/org/dbpedia/extraction/live/storage/JDBCUtil.java

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,43 @@ public static boolean execSQL(String query) {
4040
/*
4141
* Execs an SQL query and returns true if everything went ok or false in case of exception
4242
* */
43+
4344
public static boolean execSQL(String query, boolean sparql) {
45+
try {
46+
_execSQL(query,sparql);
47+
return true;
48+
} catch (Exception e) {
49+
//When Virtuoso commits a CHECKPOINT we fail to insert anything
50+
//and get a Transaction deadlock exception
51+
//here we lock everything and try X attempts every Y seconds
52+
if (e.toString().contains("Transaction deadlock")) {
53+
synchronized (JDBCUtil.class) {
54+
//The checkpoint lasts around 2-3 minutes
55+
int attempts = 10;
56+
int sleep = 30000;
57+
for (int i = 1; i<attempts; i++) {
58+
try {
59+
logger.warn("Transaction Deadlock, retrying query: " + i + "/" + attempts);
60+
_execSQL(query,sparql);
61+
//When no exception return true
62+
return true;
63+
} catch (Exception e1) {
64+
logger.warn("Transaction Deadlock, retrying query: " + i + "/" + attempts + "(FAILED)");
65+
try {
66+
Thread.sleep(sleep);
67+
} catch (InterruptedException e2) {
68+
//do nothing
69+
}
70+
}
71+
}
72+
}
73+
}
74+
logger.warn(e.getMessage());
75+
}
76+
return false;
77+
}
78+
79+
private static void _execSQL(String query, boolean sparql) throws Exception {
4480

4581
Connection conn = null;
4682
Statement stmt = null;
@@ -49,32 +85,31 @@ public static boolean execSQL(String query, boolean sparql) {
4985
conn = (sparql == false) ? JDBCPoolConnection.getCachePoolConnection() : JDBCPoolConnection.getStorePoolConnection();
5086
stmt = conn.createStatement();
5187
result = stmt.executeQuery(query);
52-
53-
return true;
5488
} catch (Exception e) {
5589
logger.warn(e.getMessage());
56-
String message = e.getMessage();
57-
if (message.contains("datetime"))
58-
return true;
59-
return false;
90+
//TODO Hack until Virtuoso fixes its datetime bug
91+
//see http://sourceforge.net/mailarchive/forum.php?thread_name=CA%2Bu4%2Ba0RacpXoABoHL9wZJmxoTvAazwtbn3EKtay5a3%3DS7O96g%40mail.gmail.com&forum_name=virtuoso-users
92+
String message = e.toString();
93+
if (!message.contains("datetime"))
94+
throw new Exception(e.getMessage());
6095
} finally {
6196
try {
6297
if (result != null)
6398
result.close();
6499
} catch (Exception e) {
65-
logger.warn(e.getMessage());
100+
throw new Exception(e.getMessage());
66101
}
67102
try {
68103
if (stmt != null)
69104
stmt.close();
70105
} catch (Exception e) {
71-
logger.warn(e.getMessage());
106+
throw new Exception(e.getMessage());
72107
}
73108
try {
74109
if (conn != null)
75110
conn.close();
76111
} catch (Exception e) {
77-
logger.warn(e.getMessage());
112+
throw new Exception(e.getMessage());
78113
}
79114
}
80115
}

0 commit comments

Comments
 (0)