Mercurial > p > roundup > code
comparison roundup/hyperdb.py @ 5867:ee2e8f8d6648
Implement exact string search
.. in the 'filter' method of hyperdb.Class (and the corresponding
backend implementations).
| author | Ralf Schlatterbeck <rsc@runtux.com> |
|---|---|
| date | Mon, 26 Aug 2019 18:18:02 +0200 |
| parents | 936275dfe1fa |
| children | 16e1255b16cf |
comparison
equal
deleted
inserted
replaced
| 5865:04deafac71ab | 5867:ee2e8f8d6648 |
|---|---|
| 339 """ | 339 """ |
| 340 m = dre.match(designator) | 340 m = dre.match(designator) |
| 341 if m is None: | 341 if m is None: |
| 342 raise DesignatorError(_('"%s" not a node designator')%designator) | 342 raise DesignatorError(_('"%s" not a node designator')%designator) |
| 343 return m.group(1), m.group(2) | 343 return m.group(1), m.group(2) |
| 344 | |
| 345 class Exact_Match(object): | |
| 346 """ Used to encapsulate exact match semantics search values | |
| 347 """ | |
| 348 def __init__(self, value): | |
| 349 self.value = value | |
| 344 | 350 |
| 345 class Proptree(object): | 351 class Proptree(object): |
| 346 """ Simple tree data structure for property lookup. Each node in | 352 """ Simple tree data structure for property lookup. Each node in |
| 347 the tree is a roundup Class Property that has to be navigated to | 353 the tree is a roundup Class Property that has to be navigated to |
| 348 find given property. The need_for attribute is used to mark nodes | 354 find given property. The need_for attribute is used to mark nodes |
| 459 """ Recursively search for the given properties in a proptree. | 465 """ Recursively search for the given properties in a proptree. |
| 460 Once all properties are non-transitive, the search generates a | 466 Once all properties are non-transitive, the search generates a |
| 461 simple _filter call which does the real work | 467 simple _filter call which does the real work |
| 462 """ | 468 """ |
| 463 filterspec = {} | 469 filterspec = {} |
| 470 exact_match_spec = {} | |
| 464 for p in self.children: | 471 for p in self.children: |
| 465 if 'search' in p.need_for: | 472 if 'search' in p.need_for: |
| 466 if p.children: | 473 if p.children: |
| 467 p.search(sort = False) | 474 p.search(sort = False) |
| 468 filterspec[p.name] = p.val | 475 if isinstance(p.val, type([])): |
| 476 exact = [] | |
| 477 subst = [] | |
| 478 for v in p.val: | |
| 479 if isinstance(v, Exact_Match): | |
| 480 exact.append(v.value) | |
| 481 else: | |
| 482 subst.append(v) | |
| 483 if exact: | |
| 484 exact_match_spec[p.name] = exact | |
| 485 if subst: | |
| 486 filterspec[p.name] = subst | |
| 487 else: | |
| 488 assert not isinstance(p.val, Exact_Match) | |
| 489 filterspec[p.name] = p.val | |
| 469 self.val = self.cls._filter(search_matches, filterspec, sort and self, | 490 self.val = self.cls._filter(search_matches, filterspec, sort and self, |
| 470 retired=retired) | 491 retired=retired, |
| 492 exact_match_spec=exact_match_spec) | |
| 471 return self.val | 493 return self.val |
| 472 | 494 |
| 473 def sort (self, ids=None): | 495 def sort (self, ids=None): |
| 474 """ Sort ids by the order information stored in self. With | 496 """ Sort ids by the order information stored in self. With |
| 475 optimisations: Some order attributes may be precomputed (by the | 497 optimisations: Some order attributes may be precomputed (by the |
| 553 for pt in self.sortattr: | 575 for pt in self.sortattr: |
| 554 pt.sort_result = None | 576 pt.sort_result = None |
| 555 return ids | 577 return ids |
| 556 | 578 |
| 557 def _set_val(self, val): | 579 def _set_val(self, val): |
| 558 """Check if self._val is already defined. If yes, we compute the | 580 """ Check if self._val is already defined. If yes, we compute the |
| 559 intersection of the old and the new value(s) | 581 intersection of the old and the new value(s) |
| 582 Note: If self is a Leaf node we need to compute a | |
| 583 union: Normally we intersect (logical and) different | |
| 584 subqueries into a Link or Multilink property. But for | |
| 585 leaves we might have a part of a query in a filterspec and | |
| 586 in an exact_match_spec. These have to be all there, the | |
| 587 generated search will ensure a logical and of all tests for | |
| 588 equality/substring search. | |
| 560 """ | 589 """ |
| 561 if self.has_values: | 590 if self.has_values: |
| 562 v = self._val | 591 v = self._val |
| 563 if not isinstance(self._val, type([])): | 592 if not isinstance(self._val, type([])): |
| 564 v = [self._val] | 593 v = [self._val] |
| 565 vals = set(v) | 594 vals = set(v) |
| 566 vals.intersection_update(val) | 595 if not isinstance(val, type([])): |
| 596 val = [val] | |
| 597 # if cls is None we're a leaf | |
| 598 if self.cls: | |
| 599 vals.intersection_update(val) | |
| 600 else: | |
| 601 vals.update(val) | |
| 567 self._val = [v for v in vals] | 602 self._val = [v for v in vals] |
| 568 else: | 603 else: |
| 569 self._val = val | 604 self._val = val |
| 570 self.has_values = True | 605 self.has_values = True |
| 571 | 606 |
| 1259 db.issue.find(messages={'1':1,'3':1}, files={'7':1}) | 1294 db.issue.find(messages={'1':1,'3':1}, files={'7':1}) |
| 1260 """ | 1295 """ |
| 1261 raise NotImplementedError | 1296 raise NotImplementedError |
| 1262 | 1297 |
| 1263 def _filter(self, search_matches, filterspec, sort=(None,None), | 1298 def _filter(self, search_matches, filterspec, sort=(None,None), |
| 1264 group=(None,None), retired=False): | 1299 group=(None,None), retired=False, exact_match_spec={}): |
| 1265 """For some backends this implements the non-transitive | 1300 """For some backends this implements the non-transitive |
| 1266 search, for more information see the filter method. | 1301 search, for more information see the filter method. |
| 1267 """ | 1302 """ |
| 1268 raise NotImplementedError | 1303 raise NotImplementedError |
| 1269 | 1304 |
| 1270 def _proptree(self, filterspec, sortattr=[], retr=False): | 1305 def _proptree(self, exact_match_spec, filterspec, sortattr=[], retr=False): |
| 1271 """Build a tree of all transitive properties in the given | 1306 """Build a tree of all transitive properties in the given |
| 1272 filterspec. | 1307 exact_match_spec/filterspec. |
| 1273 If we retrieve (retr is True) linked items we don't follow | 1308 If we retrieve (retr is True) linked items we don't follow |
| 1274 across multilinks. We also don't follow if the searched value | 1309 across multilinks. We also don't follow if the searched value |
| 1275 can contain NULL values. | 1310 can contain NULL values. |
| 1276 """ | 1311 """ |
| 1277 proptree = Proptree(self.db, self, '', self.getprops(), retr=retr) | 1312 proptree = Proptree(self.db, self, '', self.getprops(), retr=retr) |
| 1278 for key, v in filterspec.items(): | 1313 for exact, spec in enumerate((filterspec, exact_match_spec)): |
| 1279 keys = key.split('.') | 1314 for key, v in spec.items(): |
| 1280 p = proptree | 1315 keys = key.split('.') |
| 1281 mlseen = False | 1316 p = proptree |
| 1282 for k in keys: | 1317 mlseen = False |
| 1283 if isinstance (p.propclass, Multilink): | 1318 for k in keys: |
| 1284 mlseen = True | 1319 if isinstance (p.propclass, Multilink): |
| 1285 isnull = v == '-1' or v is None | 1320 mlseen = True |
| 1286 nullin = isinstance(v, type([])) and ('-1' in v or None in v) | 1321 isnull = v == '-1' or v is None |
| 1287 r = retr and not mlseen and not isnull and not nullin | 1322 islist = isinstance(v, type([])) |
| 1288 p = p.append(k, retr=r) | 1323 nullin = islist and ('-1' in v or None in v) |
| 1289 p.val = v | 1324 r = retr and not mlseen and not isnull and not nullin |
| 1325 p = p.append(k, retr=r) | |
| 1326 if exact: | |
| 1327 if isinstance(v, type([])): | |
| 1328 vv = [] | |
| 1329 for x in v: | |
| 1330 vv.append(Exact_Match(x)) | |
| 1331 p.val = vv | |
| 1332 else: | |
| 1333 p.val = [Exact_Match(v)] | |
| 1334 else: | |
| 1335 p.val = v | |
| 1290 multilinks = {} | 1336 multilinks = {} |
| 1291 for s in sortattr: | 1337 for s in sortattr: |
| 1292 keys = s[1].split('.') | 1338 keys = s[1].split('.') |
| 1293 p = proptree | 1339 p = proptree |
| 1294 mlseen = False | 1340 mlseen = False |
| 1351 if 'id' not in seen : | 1397 if 'id' not in seen : |
| 1352 sortattr.append(('+', 'id')) | 1398 sortattr.append(('+', 'id')) |
| 1353 return sortattr | 1399 return sortattr |
| 1354 | 1400 |
| 1355 def filter(self, search_matches, filterspec, sort=[], group=[], | 1401 def filter(self, search_matches, filterspec, sort=[], group=[], |
| 1356 retired=False): | 1402 retired=False, exact_match_spec={}): |
| 1357 """Return a list of the ids of the active nodes in this class that | 1403 """Return a list of the ids of the active nodes in this class that |
| 1358 match the 'filter' spec, sorted by the group spec and then the | 1404 match the 'filter' spec, sorted by the group spec and then the |
| 1359 sort spec. | 1405 sort spec. |
| 1360 | 1406 |
| 1407 "search_matches" is a container type which by default is None | |
| 1408 and optionally contains IDs of items to match. If non-empty only | |
| 1409 IDs of the initial set are returned. | |
| 1410 | |
| 1361 "filterspec" is {propname: value(s)} | 1411 "filterspec" is {propname: value(s)} |
| 1412 "exact_match_spec" is the same format as "filterspec" but | |
| 1413 specifies exact match for the given propnames. This only makes a | |
| 1414 difference for String properties, these specify case insensitive | |
| 1415 substring search when in "filterspec" and exact match when in | |
| 1416 exact_match_spec. | |
| 1362 | 1417 |
| 1363 "sort" and "group" are [(dir, prop), ...] where dir is '+', '-' | 1418 "sort" and "group" are [(dir, prop), ...] where dir is '+', '-' |
| 1364 or None and prop is a prop name or None. Note that for | 1419 or None and prop is a prop name or None. Note that for |
| 1365 backward-compatibility reasons a single (dir, prop) tuple is | 1420 backward-compatibility reasons a single (dir, prop) tuple is |
| 1366 also allowed. | 1421 also allowed. |
| 1367 | 1422 |
| 1368 "search_matches" is a container type | 1423 The parameter retired when set to False, returns only live |
| 1424 (un-retired) results. When setting it to True, only retired | |
| 1425 items are returned. If None, both retired and unretired items | |
| 1426 are returned. The default is False, i.e. only live items are | |
| 1427 returned by default. | |
| 1369 | 1428 |
| 1370 The filter must match all properties specificed. If the property | 1429 The filter must match all properties specificed. If the property |
| 1371 value to match is a list: | 1430 value to match is a list: |
| 1372 | 1431 |
| 1373 1. String properties must match all elements in the list, and | 1432 1. String properties must match all elements in the list, and |
| 1374 2. Other properties must match any of the elements in the list. | 1433 2. Other properties must match any of the elements in the list. |
| 1375 | 1434 |
| 1376 Note that now the propname in filterspec and prop in a | 1435 This also means that for strings in exact_match_spec it doesn't |
| 1377 sort/group spec may be transitive, i.e., it may contain | 1436 make sense to specify multiple values because those cannot all |
| 1378 properties of the form link.link.link.name, e.g. you can search | 1437 be matched exactly. |
| 1379 for all issues where a message was added by a certain user in | 1438 |
| 1380 the last week with a filterspec of | 1439 The propname in filterspec and prop in a sort/group spec may be |
| 1440 transitive, i.e., it may contain properties of the form | |
| 1441 link.link.link.name, e.g. you can search for all issues where a | |
| 1442 message was added by a certain user in the last week with a | |
| 1443 filterspec of | |
| 1381 {'messages.author' : '42', 'messages.creation' : '.-1w;'} | 1444 {'messages.author' : '42', 'messages.creation' : '.-1w;'} |
| 1382 | 1445 |
| 1383 Implementation note: | 1446 Implementation note: |
| 1384 This implements a non-optimized version of Transitive search | 1447 This implements a non-optimized version of Transitive search |
| 1385 using _filter implemented in a backend class. A more efficient | 1448 using _filter implemented in a backend class. A more efficient |
| 1386 version can be implemented in the individual backends -- e.g., | 1449 version can be implemented in the individual backends -- e.g., |
| 1387 an SQL backend will want to create a single SQL statement and | 1450 an SQL backend will want to create a single SQL statement and |
| 1388 override the filter method instead of implementing _filter. | 1451 override the filter method instead of implementing _filter. |
| 1389 """ | 1452 """ |
| 1390 sortattr = self._sortattr(sort = sort, group = group) | 1453 sortattr = self._sortattr(sort = sort, group = group) |
| 1391 proptree = self._proptree(filterspec, sortattr) | 1454 proptree = self._proptree(exact_match_spec, filterspec, sortattr) |
| 1392 proptree.search(search_matches, retired=retired) | 1455 proptree.search(search_matches, retired=retired) |
| 1393 return proptree.sort() | 1456 return proptree.sort() |
| 1394 | 1457 |
| 1395 # non-optimized filter_iter, a backend may chose to implement a | 1458 # non-optimized filter_iter, a backend may chose to implement a |
| 1396 # better version that provides a real iterator that pre-fills the | 1459 # better version that provides a real iterator that pre-fills the |
