Mercurial > p > roundup > code
diff roundup/backends/indexer_rdbms.py @ 6911:8bda74ee7070
flake8 updates
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sat, 03 Sep 2022 22:29:34 -0400 |
| parents | 56c9bcdea47f |
| children |
line wrap: on
line diff
--- a/roundup/backends/indexer_rdbms.py Sat Sep 03 21:41:48 2022 -0400 +++ b/roundup/backends/indexer_rdbms.py Sat Sep 03 22:29:34 2022 -0400 @@ -7,6 +7,7 @@ from roundup.backends.indexer_common import Indexer as IndexerBase from roundup.anypy.strings import us2u, u2s + class Indexer(IndexerBase): def __init__(self, db): IndexerBase.__init__(self, db) @@ -46,19 +47,19 @@ # first, find the id of the (classname, itemid, property) a = self.db.arg sql = 'select _textid from __textids where _class=%s and '\ - '_itemid=%s and _prop=%s'%(a, a, a) + '_itemid=%s and _prop=%s' % (a, a, a) self.db.cursor.execute(sql, identifier) r = self.db.cursor.fetchone() if not r: # not previously indexed id = self.db.newid('__textids') sql = 'insert into __textids (_textid, _class, _itemid, _prop)'\ - ' values (%s, %s, %s, %s)'%(a, a, a, a) + ' values (%s, %s, %s, %s)' % (a, a, a, a) self.db.cursor.execute(sql, (id, ) + identifier) else: id = int(r[0]) # clear out any existing indexed values - sql = 'delete from __words where _textid=%s'%a + sql = 'delete from __words where _textid=%s' % a self.db.cursor.execute(sql, (id, )) # ok, find all the unique words in the text @@ -66,14 +67,16 @@ text = text.upper() wordlist = [u2s(w) for w in re.findall(r'(?u)\b\w{%d,%d}\b' - % (self.minlength, self.maxlength), text)] + % (self.minlength, self.maxlength), + text)] words = set() for word in wordlist: - if self.is_stopword(word): continue + if self.is_stopword(word): + continue words.add(word) # for each word, add an entry in the db - sql = 'insert into __words (_word, _textid) values (%s, %s)'%(a, a) + sql = 'insert into __words (_word, _textid) values (%s, %s)' % (a, a) words = [(word, id) for word in words] self.db.cursor.executemany(sql, words) @@ -85,28 +88,30 @@ if not wordlist: return [] - l = [word.upper() for word in wordlist - if self.minlength <= len(word) <= self.maxlength] - l = [word for word in l if not self.is_stopword(word)] + cap_wl = [word.upper() for word in wordlist + if self.minlength <= len(word) <= self.maxlength] + clean_wl = [word for word in cap_wl if not self.is_stopword(word)] - if not l: + if not clean_wl: return [] if self.db.implements_intersect: # simple AND search - sql = 'select distinct(_textid) from __words where _word=%s'%self.db.arg - sql = '\nINTERSECT\n'.join([sql]*len(l)) - self.db.cursor.execute(sql, tuple(l)) + sql = 'select distinct(_textid) from __words where _word=%s' % ( + self.db.arg) + sql = '\nINTERSECT\n'.join([sql]*len(clean_wl)) + self.db.cursor.execute(sql, tuple(clean_wl)) r = self.db.cursor.fetchall() if not r: return [] a = ','.join([self.db.arg] * len(r)) sql = 'select _class, _itemid, _prop from __textids '\ - 'where _textid in (%s)'%a + 'where _textid in (%s)' % a self.db.cursor.execute(sql, tuple([int(row[0]) for row in r])) else: - # A more complex version for MySQL since it doesn't implement INTERSECT + # A more complex version for MySQL since it doesn't + # implement INTERSECT # Construct SQL statement to join __words table to itself # multiple times. @@ -119,12 +124,13 @@ join_list = [] match_list = [] - for n in range(len(l) - 1): + for n in range(len(clean_wl) - 1): join_list.append(join_tmpl % (n + 2)) match_list.append(match_tmpl % (n + 2, self.db.arg)) - sql = sql%(' '.join(join_list), self.db.arg, ' '.join(match_list)) - self.db.cursor.execute(sql, l) + sql = sql % (' '.join(join_list), self.db.arg, + ' '.join(match_list)) + self.db.cursor.execute(sql, clean_wl) r = [x[0] for x in self.db.cursor.fetchall()] if not r: @@ -132,9 +138,8 @@ a = ','.join([self.db.arg] * len(r)) sql = 'select _class, _itemid, _prop from __textids '\ - 'where _textid in (%s)'%a + 'where _textid in (%s)' % a self.db.cursor.execute(sql, tuple(map(int, r))) return self.db.cursor.fetchall() -
