comparison roundup/hyperdb.py @ 5867:ee2e8f8d6648

Implement exact string search .. in the 'filter' method of hyperdb.Class (and the corresponding backend implementations).
author Ralf Schlatterbeck <rsc@runtux.com>
date Mon, 26 Aug 2019 18:18:02 +0200
parents 936275dfe1fa
children 16e1255b16cf
comparison
equal deleted inserted replaced
5865:04deafac71ab 5867:ee2e8f8d6648
339 """ 339 """
340 m = dre.match(designator) 340 m = dre.match(designator)
341 if m is None: 341 if m is None:
342 raise DesignatorError(_('"%s" not a node designator')%designator) 342 raise DesignatorError(_('"%s" not a node designator')%designator)
343 return m.group(1), m.group(2) 343 return m.group(1), m.group(2)
344
345 class Exact_Match(object):
346 """ Used to encapsulate exact match semantics search values
347 """
348 def __init__(self, value):
349 self.value = value
344 350
345 class Proptree(object): 351 class Proptree(object):
346 """ Simple tree data structure for property lookup. Each node in 352 """ Simple tree data structure for property lookup. Each node in
347 the tree is a roundup Class Property that has to be navigated to 353 the tree is a roundup Class Property that has to be navigated to
348 find given property. The need_for attribute is used to mark nodes 354 find given property. The need_for attribute is used to mark nodes
459 """ Recursively search for the given properties in a proptree. 465 """ Recursively search for the given properties in a proptree.
460 Once all properties are non-transitive, the search generates a 466 Once all properties are non-transitive, the search generates a
461 simple _filter call which does the real work 467 simple _filter call which does the real work
462 """ 468 """
463 filterspec = {} 469 filterspec = {}
470 exact_match_spec = {}
464 for p in self.children: 471 for p in self.children:
465 if 'search' in p.need_for: 472 if 'search' in p.need_for:
466 if p.children: 473 if p.children:
467 p.search(sort = False) 474 p.search(sort = False)
468 filterspec[p.name] = p.val 475 if isinstance(p.val, type([])):
476 exact = []
477 subst = []
478 for v in p.val:
479 if isinstance(v, Exact_Match):
480 exact.append(v.value)
481 else:
482 subst.append(v)
483 if exact:
484 exact_match_spec[p.name] = exact
485 if subst:
486 filterspec[p.name] = subst
487 else:
488 assert not isinstance(p.val, Exact_Match)
489 filterspec[p.name] = p.val
469 self.val = self.cls._filter(search_matches, filterspec, sort and self, 490 self.val = self.cls._filter(search_matches, filterspec, sort and self,
470 retired=retired) 491 retired=retired,
492 exact_match_spec=exact_match_spec)
471 return self.val 493 return self.val
472 494
473 def sort (self, ids=None): 495 def sort (self, ids=None):
474 """ Sort ids by the order information stored in self. With 496 """ Sort ids by the order information stored in self. With
475 optimisations: Some order attributes may be precomputed (by the 497 optimisations: Some order attributes may be precomputed (by the
553 for pt in self.sortattr: 575 for pt in self.sortattr:
554 pt.sort_result = None 576 pt.sort_result = None
555 return ids 577 return ids
556 578
557 def _set_val(self, val): 579 def _set_val(self, val):
558 """Check if self._val is already defined. If yes, we compute the 580 """ Check if self._val is already defined. If yes, we compute the
559 intersection of the old and the new value(s) 581 intersection of the old and the new value(s)
582 Note: If self is a Leaf node we need to compute a
583 union: Normally we intersect (logical and) different
584 subqueries into a Link or Multilink property. But for
585 leaves we might have a part of a query in a filterspec and
586 in an exact_match_spec. These have to be all there, the
587 generated search will ensure a logical and of all tests for
588 equality/substring search.
560 """ 589 """
561 if self.has_values: 590 if self.has_values:
562 v = self._val 591 v = self._val
563 if not isinstance(self._val, type([])): 592 if not isinstance(self._val, type([])):
564 v = [self._val] 593 v = [self._val]
565 vals = set(v) 594 vals = set(v)
566 vals.intersection_update(val) 595 if not isinstance(val, type([])):
596 val = [val]
597 # if cls is None we're a leaf
598 if self.cls:
599 vals.intersection_update(val)
600 else:
601 vals.update(val)
567 self._val = [v for v in vals] 602 self._val = [v for v in vals]
568 else: 603 else:
569 self._val = val 604 self._val = val
570 self.has_values = True 605 self.has_values = True
571 606
1259 db.issue.find(messages={'1':1,'3':1}, files={'7':1}) 1294 db.issue.find(messages={'1':1,'3':1}, files={'7':1})
1260 """ 1295 """
1261 raise NotImplementedError 1296 raise NotImplementedError
1262 1297
1263 def _filter(self, search_matches, filterspec, sort=(None,None), 1298 def _filter(self, search_matches, filterspec, sort=(None,None),
1264 group=(None,None), retired=False): 1299 group=(None,None), retired=False, exact_match_spec={}):
1265 """For some backends this implements the non-transitive 1300 """For some backends this implements the non-transitive
1266 search, for more information see the filter method. 1301 search, for more information see the filter method.
1267 """ 1302 """
1268 raise NotImplementedError 1303 raise NotImplementedError
1269 1304
1270 def _proptree(self, filterspec, sortattr=[], retr=False): 1305 def _proptree(self, exact_match_spec, filterspec, sortattr=[], retr=False):
1271 """Build a tree of all transitive properties in the given 1306 """Build a tree of all transitive properties in the given
1272 filterspec. 1307 exact_match_spec/filterspec.
1273 If we retrieve (retr is True) linked items we don't follow 1308 If we retrieve (retr is True) linked items we don't follow
1274 across multilinks. We also don't follow if the searched value 1309 across multilinks. We also don't follow if the searched value
1275 can contain NULL values. 1310 can contain NULL values.
1276 """ 1311 """
1277 proptree = Proptree(self.db, self, '', self.getprops(), retr=retr) 1312 proptree = Proptree(self.db, self, '', self.getprops(), retr=retr)
1278 for key, v in filterspec.items(): 1313 for exact, spec in enumerate((filterspec, exact_match_spec)):
1279 keys = key.split('.') 1314 for key, v in spec.items():
1280 p = proptree 1315 keys = key.split('.')
1281 mlseen = False 1316 p = proptree
1282 for k in keys: 1317 mlseen = False
1283 if isinstance (p.propclass, Multilink): 1318 for k in keys:
1284 mlseen = True 1319 if isinstance (p.propclass, Multilink):
1285 isnull = v == '-1' or v is None 1320 mlseen = True
1286 nullin = isinstance(v, type([])) and ('-1' in v or None in v) 1321 isnull = v == '-1' or v is None
1287 r = retr and not mlseen and not isnull and not nullin 1322 islist = isinstance(v, type([]))
1288 p = p.append(k, retr=r) 1323 nullin = islist and ('-1' in v or None in v)
1289 p.val = v 1324 r = retr and not mlseen and not isnull and not nullin
1325 p = p.append(k, retr=r)
1326 if exact:
1327 if isinstance(v, type([])):
1328 vv = []
1329 for x in v:
1330 vv.append(Exact_Match(x))
1331 p.val = vv
1332 else:
1333 p.val = [Exact_Match(v)]
1334 else:
1335 p.val = v
1290 multilinks = {} 1336 multilinks = {}
1291 for s in sortattr: 1337 for s in sortattr:
1292 keys = s[1].split('.') 1338 keys = s[1].split('.')
1293 p = proptree 1339 p = proptree
1294 mlseen = False 1340 mlseen = False
1351 if 'id' not in seen : 1397 if 'id' not in seen :
1352 sortattr.append(('+', 'id')) 1398 sortattr.append(('+', 'id'))
1353 return sortattr 1399 return sortattr
1354 1400
1355 def filter(self, search_matches, filterspec, sort=[], group=[], 1401 def filter(self, search_matches, filterspec, sort=[], group=[],
1356 retired=False): 1402 retired=False, exact_match_spec={}):
1357 """Return a list of the ids of the active nodes in this class that 1403 """Return a list of the ids of the active nodes in this class that
1358 match the 'filter' spec, sorted by the group spec and then the 1404 match the 'filter' spec, sorted by the group spec and then the
1359 sort spec. 1405 sort spec.
1360 1406
1407 "search_matches" is a container type which by default is None
1408 and optionally contains IDs of items to match. If non-empty only
1409 IDs of the initial set are returned.
1410
1361 "filterspec" is {propname: value(s)} 1411 "filterspec" is {propname: value(s)}
1412 "exact_match_spec" is the same format as "filterspec" but
1413 specifies exact match for the given propnames. This only makes a
1414 difference for String properties, these specify case insensitive
1415 substring search when in "filterspec" and exact match when in
1416 exact_match_spec.
1362 1417
1363 "sort" and "group" are [(dir, prop), ...] where dir is '+', '-' 1418 "sort" and "group" are [(dir, prop), ...] where dir is '+', '-'
1364 or None and prop is a prop name or None. Note that for 1419 or None and prop is a prop name or None. Note that for
1365 backward-compatibility reasons a single (dir, prop) tuple is 1420 backward-compatibility reasons a single (dir, prop) tuple is
1366 also allowed. 1421 also allowed.
1367 1422
1368 "search_matches" is a container type 1423 The parameter retired when set to False, returns only live
1424 (un-retired) results. When setting it to True, only retired
1425 items are returned. If None, both retired and unretired items
1426 are returned. The default is False, i.e. only live items are
1427 returned by default.
1369 1428
1370 The filter must match all properties specificed. If the property 1429 The filter must match all properties specificed. If the property
1371 value to match is a list: 1430 value to match is a list:
1372 1431
1373 1. String properties must match all elements in the list, and 1432 1. String properties must match all elements in the list, and
1374 2. Other properties must match any of the elements in the list. 1433 2. Other properties must match any of the elements in the list.
1375 1434
1376 Note that now the propname in filterspec and prop in a 1435 This also means that for strings in exact_match_spec it doesn't
1377 sort/group spec may be transitive, i.e., it may contain 1436 make sense to specify multiple values because those cannot all
1378 properties of the form link.link.link.name, e.g. you can search 1437 be matched exactly.
1379 for all issues where a message was added by a certain user in 1438
1380 the last week with a filterspec of 1439 The propname in filterspec and prop in a sort/group spec may be
1440 transitive, i.e., it may contain properties of the form
1441 link.link.link.name, e.g. you can search for all issues where a
1442 message was added by a certain user in the last week with a
1443 filterspec of
1381 {'messages.author' : '42', 'messages.creation' : '.-1w;'} 1444 {'messages.author' : '42', 'messages.creation' : '.-1w;'}
1382 1445
1383 Implementation note: 1446 Implementation note:
1384 This implements a non-optimized version of Transitive search 1447 This implements a non-optimized version of Transitive search
1385 using _filter implemented in a backend class. A more efficient 1448 using _filter implemented in a backend class. A more efficient
1386 version can be implemented in the individual backends -- e.g., 1449 version can be implemented in the individual backends -- e.g.,
1387 an SQL backend will want to create a single SQL statement and 1450 an SQL backend will want to create a single SQL statement and
1388 override the filter method instead of implementing _filter. 1451 override the filter method instead of implementing _filter.
1389 """ 1452 """
1390 sortattr = self._sortattr(sort = sort, group = group) 1453 sortattr = self._sortattr(sort = sort, group = group)
1391 proptree = self._proptree(filterspec, sortattr) 1454 proptree = self._proptree(exact_match_spec, filterspec, sortattr)
1392 proptree.search(search_matches, retired=retired) 1455 proptree.search(search_matches, retired=retired)
1393 return proptree.sort() 1456 return proptree.sort()
1394 1457
1395 # non-optimized filter_iter, a backend may chose to implement a 1458 # non-optimized filter_iter, a backend may chose to implement a
1396 # better version that provides a real iterator that pre-fills the 1459 # better version that provides a real iterator that pre-fills the

Roundup Issue Tracker: http://roundup-tracker.org/