@@ -21,6 +21,8 @@ def __init__(self):
2121
2222 self .create_content_models ()
2323 self .model_ids = self .get_content_models ()
24+ self .create_wbt_types ()
25+ self .wbt_types = self .get_wbt_types ()
2426
2527 def mysql_connect (self ):
2628 """Helper function connecting to SQL database."""
@@ -112,6 +114,24 @@ def bot_schema(self):
112114 cur .close ()
113115 return out
114116
117+ def create_wbt_types (self ):
118+ """Creates three wbt_types: label, description and alias"""
119+ cur = self .conn .cursor ()
120+ q = "INSERT IGNORE INTO wbt_type (wby_name ) VALUES('label'),('description'),('alias')"
121+ cur .execute (q )
122+ self .conn .commit ()
123+ cur .close ()
124+
125+ def get_wbt_types (self ):
126+ """Returns the existing wbt_types from wbt_type-table"""
127+ cur = self .conn .cursor ()
128+ q = "SELECT * FROM wbt_type"
129+ cur .execute (q )
130+ out = dict ((y .decode ('utf-8' ), x ) for x , y in cur .fetchall ())
131+ self .conn .commit ()
132+ cur .close ()
133+ return out
134+
115135 def create_content_models (self ):
116136 """Creates a few content models"""
117137 cur = self .conn .cursor ()
@@ -195,6 +215,53 @@ def get_content_id(self):
195215 cur .close ()
196216 return content_id
197217
218+ def get_wbtl_id (self , wbtl_type_id = None , wbxl_language = None , wbx_text = None , wbxl_id = None ):
219+ """Returns wbtl_id (int) in wbt_term_in_lang-table for given wbtl_type_id (int),
220+ wbxl_language (language code), wbx_text (str) and wbxl_id (int)"""
221+ cur = self .conn .cursor ()
222+ try :
223+ cur .execute ("""SELECT wbtl_id FROM wbt_term_in_lang, wbt_text_in_lang,
224+ wbt_text WHERE wbtl_text_in_lang_id=wbxl_id AND wbtl_type_id=%s
225+ AND wbxl_language=%s AND wbxl_text_id=wbx_id AND wbx_text=%s""" ,
226+ [wbtl_type_id , wbxl_language , wbx_text ])
227+ wbtl_id = cur .fetchone ()[0 ]
228+ except :
229+ cur .execute ("INSERT INTO wbt_term_in_lang VALUES(NULL,%s,%s)" , [wbtl_type_id , wbxl_id ])
230+ cur .execute ("SELECT LAST_INSERT_ID()" )
231+ wbtl_id = cur .fetchone ()[0 ]
232+ cur .close ()
233+ return wbtl_id
234+
235+ def get_wbxl_id (self , wbxl_language = None , wbx_text = None , wbx_id = None ):
236+ """Returns wbxl_id (int) in wbt_text_in_lang-table for given
237+ wbxl_language (language code), wbx_text (str) and wbx_id (int)"""
238+ cur = self .conn .cursor ()
239+ try :
240+ cur .execute ("""SELECT wbxl_id FROM wbt_text_in_lang, wbt_text
241+ WHERE wbxl_language=%s AND wbxl_text_id=wbx_id AND wbx_text=%s""" ,
242+ [wbxl_language , wbx_text ])
243+ wbxl_id = cur .fetchone ()[0 ]
244+ except :
245+ cur .execute ("INSERT INTO wbt_text_in_lang VALUES(NULL,%s,%s)" ,
246+ [wbxl_language , wbx_id ])
247+ cur .execute ("SELECT LAST_INSERT_ID()" )
248+ wbxl_id = cur .fetchone ()[0 ]
249+ cur .close ()
250+ return wbxl_id
251+
252+ def get_wbx_id (self , wbx_text = None ):
253+ """Returns wbx_id (int) in wbt_text-table for given wbx_text (str)"""
254+ cur = self .conn .cursor ()
255+ try :
256+ cur .execute ("SELECT wbx_id FROM wbt_text WHERE wbx_text=%s" , [wbx_text ])
257+ wbx_id = cur .fetchone ()[0 ]
258+ except :
259+ cur .execute ("INSERT INTO wbt_text VALUES(NULL,%s)" , [wbx_text ])
260+ cur .execute ("SELECT LAST_INSERT_ID()" )
261+ wbx_id = cur .fetchone ()[0 ]
262+ cur .close ()
263+ return wbx_id
264+
198265 def get_model_id (self , content_model = None ):
199266 """Returns model_id (int) for the given content_model in content_models-table"""
200267 cur = self .conn .cursor ()
@@ -382,3 +449,22 @@ def insert(self, text_id=None, text=None, page_id=None, page_title=None,
382449 cur .execute (q9 )
383450 cur .close ()
384451 pass
452+
453+ def insert_secondary (self , fingerprint = None , new_eid = None , content_model = None ):
454+ """Inserts fingerprint data into 5 (4 per item or property) secondary tables"""
455+ cur = self .conn .cursor ()
456+ for key , v in fingerprint .items ():
457+ wby_id = self .wbt_types .get (key ) # key is 'label', 'alias' or 'description'; wby_id is the corresponding id
458+ for lang , values in v .items ():
459+ values = [values ] if isinstance (values , dict ) else values # for labels and descriptions only, to make them consistent with aliases
460+ for value in values :
461+ wbx_text = self .conn .escape_string (value ['value' ])[:255 ] # escaping & truncating
462+ wbx_id = self .get_wbx_id (wbx_text = wbx_text ) # wbt_text
463+ wbxl_id = self .get_wbxl_id (wbxl_language = lang , wbx_text = wbx_text , wbx_id = wbx_id ) # wbt_text_in_lang
464+ wbtl_id = self .get_wbtl_id (wbtl_type_id = wby_id , wbxl_language = lang , wbx_text = wbx_text , wbxl_id = wbxl_id ) # wbt_term_in_lang
465+ if content_model == 'wikibase-item' :
466+ cur .execute ("INSERT INTO wbt_item_terms VALUES(NULL,%s,%s)" , [new_eid , wbtl_id ])
467+ if content_model == 'wikibase-property' :
468+ cur .execute ("INSERT IGNORE INTO wbt_property_terms VALUES(NULL,%s,%s)" , [new_eid , wbtl_id ])
469+ cur .close ()
470+ pass
0 commit comments