Mercurial > p > roundup > code
comparison roundup/backends/rdbms_common.py @ 5867:ee2e8f8d6648
Implement exact string search
.. in the 'filter' method of hyperdb.Class (and the corresponding
backend implementations).
| author | Ralf Schlatterbeck <rsc@runtux.com> |
|---|---|
| date | Mon, 26 Aug 2019 18:18:02 +0200 |
| parents | 936275dfe1fa |
| children | 16e1255b16cf |
comparison
equal
deleted
inserted
replaced
| 5865:04deafac71ab | 5867:ee2e8f8d6648 |
|---|---|
| 1512 """ | 1512 """ |
| 1513 # For many databases the LIKE operator ignores case. | 1513 # For many databases the LIKE operator ignores case. |
| 1514 # Postgres and Oracle have an ILIKE operator to support this. | 1514 # Postgres and Oracle have an ILIKE operator to support this. |
| 1515 # We define the default here, can be changed in derivative class | 1515 # We define the default here, can be changed in derivative class |
| 1516 case_insensitive_like = 'LIKE' | 1516 case_insensitive_like = 'LIKE' |
| 1517 | |
| 1518 # For some databases (mysql) the = operator for strings ignores case. | |
| 1519 # We define the default here, can be changed in derivative class | |
| 1520 case_sensitive_equal = '=' | |
| 1517 | 1521 |
| 1518 def schema(self): | 1522 def schema(self): |
| 1519 """ A dumpable version of the schema that we can store in the | 1523 """ A dumpable version of the schema that we can store in the |
| 1520 database | 1524 database |
| 1521 """ | 1525 """ |
| 2397 where = "%s.linkid in (%s)" % ( | 2401 where = "%s.linkid in (%s)" % ( |
| 2398 multilink_table, ','.join([self.db.arg] * len(v))) | 2402 multilink_table, ','.join([self.db.arg] * len(v))) |
| 2399 return where, v, True # True to indicate original | 2403 return where, v, True # True to indicate original |
| 2400 | 2404 |
| 2401 def _filter_sql (self, search_matches, filterspec, srt=[], grp=[], retr=0, | 2405 def _filter_sql (self, search_matches, filterspec, srt=[], grp=[], retr=0, |
| 2402 retired=False): | 2406 retired=False, exact_match_spec={}): |
| 2403 """ Compute the proptree and the SQL/ARGS for a filter. | 2407 """ Compute the proptree and the SQL/ARGS for a filter. |
| 2404 For argument description see filter below. | 2408 For argument description see filter below. |
| 2405 We return a 3-tuple, the proptree, the sql and the sql-args | 2409 We return a 3-tuple, the proptree, the sql and the sql-args |
| 2406 or None if no SQL is necessary. | 2410 or None if no SQL is necessary. |
| 2407 The flag retr serves to retrieve *all* non-Multilink properties | 2411 The flag retr serves to retrieve *all* non-Multilink properties |
| 2421 a = self.db.arg | 2425 a = self.db.arg |
| 2422 | 2426 |
| 2423 # figure the WHERE clause from the filterspec | 2427 # figure the WHERE clause from the filterspec |
| 2424 mlfilt = 0 # are we joining with Multilink tables? | 2428 mlfilt = 0 # are we joining with Multilink tables? |
| 2425 sortattr = self._sortattr (group = grp, sort = srt) | 2429 sortattr = self._sortattr (group = grp, sort = srt) |
| 2426 proptree = self._proptree(filterspec, sortattr, retr) | 2430 proptree = self._proptree(exact_match_spec, filterspec, sortattr, retr) |
| 2427 mlseen = 0 | 2431 mlseen = 0 |
| 2428 for pt in reversed(proptree.sortattr): | 2432 for pt in reversed(proptree.sortattr): |
| 2429 p = pt | 2433 p = pt |
| 2430 while p.parent: | 2434 while p.parent: |
| 2431 if isinstance (p.propclass, Multilink): | 2435 if isinstance (p.propclass, Multilink): |
| 2466 else: | 2470 else: |
| 2467 frum.append(tn) | 2471 frum.append(tn) |
| 2468 gen_join = True | 2472 gen_join = True |
| 2469 | 2473 |
| 2470 if p.has_values and isinstance(v, type([])): | 2474 if p.has_values and isinstance(v, type([])): |
| 2471 result = self._filter_multilink_expression(pln, tn, v) | 2475 result = self._filter_multilink_expression(pln, |
| 2476 tn, v) | |
| 2472 # XXX: We dont need an id join if we used the filter | 2477 # XXX: We dont need an id join if we used the filter |
| 2473 gen_join = len(result) == 3 | 2478 gen_join = len(result) == 3 |
| 2474 | 2479 |
| 2475 if gen_join: | 2480 if gen_join: |
| 2476 where.append('_%s.id=%s.nodeid'%(pln,tn)) | 2481 where.append('_%s.id=%s.nodeid'%(pln,tn)) |
| 2504 args.append(v) | 2509 args.append(v) |
| 2505 if 'sort' in p.need_for or 'retrieve' in p.need_for: | 2510 if 'sort' in p.need_for or 'retrieve' in p.need_for: |
| 2506 rc = oc = ac = '_%s.id'%pln | 2511 rc = oc = ac = '_%s.id'%pln |
| 2507 elif isinstance(propclass, String): | 2512 elif isinstance(propclass, String): |
| 2508 if 'search' in p.need_for: | 2513 if 'search' in p.need_for: |
| 2514 exact = [] | |
| 2509 if not isinstance(v, type([])): | 2515 if not isinstance(v, type([])): |
| 2510 v = [v] | 2516 v = [v] |
| 2511 | 2517 new_v = [] |
| 2512 # Quote special search characters '%' and '_' for | 2518 for x in v: |
| 2513 # correct matching with LIKE/ILIKE | 2519 if isinstance(x, hyperdb.Exact_Match): |
| 2514 # Note that we now pass the elements of v as query | 2520 exact.append(True) |
| 2515 # arguments and don't interpolate the quoted string | 2521 new_v.append(x.value) |
| 2516 # into the sql statement. Should be safer. | 2522 else: |
| 2517 v = [self.db.search_stringquote(s) for s in v] | 2523 exact.append(False) |
| 2524 # Quote special search characters '%' and '_' for | |
| 2525 # correct matching with LIKE/ILIKE | |
| 2526 # Note that we now pass the elements of v as query | |
| 2527 # arguments and don't interpolate the quoted string | |
| 2528 # into the sql statement. Should be safer. | |
| 2529 new_v.append(self.db.search_stringquote(x)) | |
| 2530 v = new_v | |
| 2518 | 2531 |
| 2519 # now add to the where clause | 2532 # now add to the where clause |
| 2520 where.append('(' | 2533 w = [] |
| 2521 +' and '.join(["_%s._%s %s %s ESCAPE %s"%( | 2534 for vv, ex in zip(v, exact): |
| 2522 pln, | 2535 if ex: |
| 2523 k, | 2536 w.append("_%s._%s %s %s"%( |
| 2524 self.case_insensitive_like, | 2537 pln, k, self.case_sensitive_equal, a)) |
| 2525 a, | 2538 args.append(vv) |
| 2526 a) for s in v]) | 2539 else: |
| 2527 +')') | 2540 w.append("_%s._%s %s %s ESCAPE %s"%( |
| 2528 for vv in v: | 2541 pln, k, self.case_insensitive_like, a, a)) |
| 2529 args.extend((vv, '\\')) | 2542 args.extend((vv, '\\')) |
| 2543 where.append ('(' + ' and '.join(w) + ')') | |
| 2530 if 'sort' in p.need_for: | 2544 if 'sort' in p.need_for: |
| 2531 oc = ac = 'lower(_%s._%s)'%(pln, k) | 2545 oc = ac = 'lower(_%s._%s)'%(pln, k) |
| 2532 elif isinstance(propclass, Link): | 2546 elif isinstance(propclass, Link): |
| 2533 if 'search' in p.need_for: | 2547 if 'search' in p.need_for: |
| 2534 if p.children: | 2548 if p.children: |
| 2709 args = tuple(args) | 2723 args = tuple(args) |
| 2710 __traceback_info__ = (sql, args) | 2724 __traceback_info__ = (sql, args) |
| 2711 return proptree, sql, args | 2725 return proptree, sql, args |
| 2712 | 2726 |
| 2713 def filter(self, search_matches, filterspec, sort=[], group=[], | 2727 def filter(self, search_matches, filterspec, sort=[], group=[], |
| 2714 retired=False): | 2728 retired=False, exact_match_spec={}): |
| 2715 """Return a list of the ids of the active nodes in this class that | 2729 """Return a list of the ids of the active nodes in this class that |
| 2716 match the 'filter' spec, sorted by the group spec and then the | 2730 match the 'filter' spec, sorted by the group spec and then the |
| 2717 sort spec | 2731 sort spec |
| 2718 | 2732 |
| 2719 "filterspec" is {propname: value(s)} | 2733 "filterspec" is {propname: value(s)} |
| 2733 """ | 2747 """ |
| 2734 if __debug__: | 2748 if __debug__: |
| 2735 start_t = time.time() | 2749 start_t = time.time() |
| 2736 | 2750 |
| 2737 sq = self._filter_sql (search_matches, filterspec, sort, group, | 2751 sq = self._filter_sql (search_matches, filterspec, sort, group, |
| 2738 retired=retired) | 2752 retired=retired, |
| 2753 exact_match_spec=exact_match_spec) | |
| 2739 # nothing to match? | 2754 # nothing to match? |
| 2740 if sq is None: | 2755 if sq is None: |
| 2741 return [] | 2756 return [] |
| 2742 proptree, sql, args = sq | 2757 proptree, sql, args = sq |
| 2743 | 2758 |
| 2757 if __debug__: | 2772 if __debug__: |
| 2758 self.db.stats['filtering'] += (time.time() - start_t) | 2773 self.db.stats['filtering'] += (time.time() - start_t) |
| 2759 return l | 2774 return l |
| 2760 | 2775 |
| 2761 def filter_iter(self, search_matches, filterspec, sort=[], group=[], | 2776 def filter_iter(self, search_matches, filterspec, sort=[], group=[], |
| 2762 retired=False): | 2777 retired=False, exact_match_spec={}): |
| 2763 """Iterator similar to filter above with same args. | 2778 """Iterator similar to filter above with same args. |
| 2764 Limitation: We don't sort on multilinks. | 2779 Limitation: We don't sort on multilinks. |
| 2765 This uses an optimisation: We put all nodes that are in the | 2780 This uses an optimisation: We put all nodes that are in the |
| 2766 current row into the node cache. Then we return the node id. | 2781 current row into the node cache. Then we return the node id. |
| 2767 That way a fetch of a node won't create another sql-fetch (with | 2782 That way a fetch of a node won't create another sql-fetch (with |
| 2768 a join) from the database because the nodes are already in the | 2783 a join) from the database because the nodes are already in the |
| 2769 cache. We're using our own temporary cursor. | 2784 cache. We're using our own temporary cursor. |
| 2770 """ | 2785 """ |
| 2771 sq = self._filter_sql(search_matches, filterspec, sort, group, retr=1, | 2786 sq = self._filter_sql(search_matches, filterspec, sort, group, retr=1, |
| 2772 retired=retired) | 2787 retired=retired, |
| 2788 exact_match_spec=exact_match_spec) | |
| 2773 # nothing to match? | 2789 # nothing to match? |
| 2774 if sq is None: | 2790 if sq is None: |
| 2775 return | 2791 return |
| 2776 proptree, sql, args = sq | 2792 proptree, sql, args = sq |
| 2777 cursor = self.db.conn.cursor() | 2793 cursor = self.db.conn.cursor() |
