comparison roundup/backends/rdbms_common.py @ 5867:ee2e8f8d6648

Implement exact string search .. in the 'filter' method of hyperdb.Class (and the corresponding backend implementations).
author Ralf Schlatterbeck <rsc@runtux.com>
date Mon, 26 Aug 2019 18:18:02 +0200
parents 936275dfe1fa
children 16e1255b16cf
comparison
equal deleted inserted replaced
5865:04deafac71ab 5867:ee2e8f8d6648
1512 """ 1512 """
1513 # For many databases the LIKE operator ignores case. 1513 # For many databases the LIKE operator ignores case.
1514 # Postgres and Oracle have an ILIKE operator to support this. 1514 # Postgres and Oracle have an ILIKE operator to support this.
1515 # We define the default here, can be changed in derivative class 1515 # We define the default here, can be changed in derivative class
1516 case_insensitive_like = 'LIKE' 1516 case_insensitive_like = 'LIKE'
1517
1518 # For some databases (mysql) the = operator for strings ignores case.
1519 # We define the default here, can be changed in derivative class
1520 case_sensitive_equal = '='
1517 1521
1518 def schema(self): 1522 def schema(self):
1519 """ A dumpable version of the schema that we can store in the 1523 """ A dumpable version of the schema that we can store in the
1520 database 1524 database
1521 """ 1525 """
2397 where = "%s.linkid in (%s)" % ( 2401 where = "%s.linkid in (%s)" % (
2398 multilink_table, ','.join([self.db.arg] * len(v))) 2402 multilink_table, ','.join([self.db.arg] * len(v)))
2399 return where, v, True # True to indicate original 2403 return where, v, True # True to indicate original
2400 2404
2401 def _filter_sql (self, search_matches, filterspec, srt=[], grp=[], retr=0, 2405 def _filter_sql (self, search_matches, filterspec, srt=[], grp=[], retr=0,
2402 retired=False): 2406 retired=False, exact_match_spec={}):
2403 """ Compute the proptree and the SQL/ARGS for a filter. 2407 """ Compute the proptree and the SQL/ARGS for a filter.
2404 For argument description see filter below. 2408 For argument description see filter below.
2405 We return a 3-tuple, the proptree, the sql and the sql-args 2409 We return a 3-tuple, the proptree, the sql and the sql-args
2406 or None if no SQL is necessary. 2410 or None if no SQL is necessary.
2407 The flag retr serves to retrieve *all* non-Multilink properties 2411 The flag retr serves to retrieve *all* non-Multilink properties
2421 a = self.db.arg 2425 a = self.db.arg
2422 2426
2423 # figure the WHERE clause from the filterspec 2427 # figure the WHERE clause from the filterspec
2424 mlfilt = 0 # are we joining with Multilink tables? 2428 mlfilt = 0 # are we joining with Multilink tables?
2425 sortattr = self._sortattr (group = grp, sort = srt) 2429 sortattr = self._sortattr (group = grp, sort = srt)
2426 proptree = self._proptree(filterspec, sortattr, retr) 2430 proptree = self._proptree(exact_match_spec, filterspec, sortattr, retr)
2427 mlseen = 0 2431 mlseen = 0
2428 for pt in reversed(proptree.sortattr): 2432 for pt in reversed(proptree.sortattr):
2429 p = pt 2433 p = pt
2430 while p.parent: 2434 while p.parent:
2431 if isinstance (p.propclass, Multilink): 2435 if isinstance (p.propclass, Multilink):
2466 else: 2470 else:
2467 frum.append(tn) 2471 frum.append(tn)
2468 gen_join = True 2472 gen_join = True
2469 2473
2470 if p.has_values and isinstance(v, type([])): 2474 if p.has_values and isinstance(v, type([])):
2471 result = self._filter_multilink_expression(pln, tn, v) 2475 result = self._filter_multilink_expression(pln,
2476 tn, v)
2472 # XXX: We dont need an id join if we used the filter 2477 # XXX: We dont need an id join if we used the filter
2473 gen_join = len(result) == 3 2478 gen_join = len(result) == 3
2474 2479
2475 if gen_join: 2480 if gen_join:
2476 where.append('_%s.id=%s.nodeid'%(pln,tn)) 2481 where.append('_%s.id=%s.nodeid'%(pln,tn))
2504 args.append(v) 2509 args.append(v)
2505 if 'sort' in p.need_for or 'retrieve' in p.need_for: 2510 if 'sort' in p.need_for or 'retrieve' in p.need_for:
2506 rc = oc = ac = '_%s.id'%pln 2511 rc = oc = ac = '_%s.id'%pln
2507 elif isinstance(propclass, String): 2512 elif isinstance(propclass, String):
2508 if 'search' in p.need_for: 2513 if 'search' in p.need_for:
2514 exact = []
2509 if not isinstance(v, type([])): 2515 if not isinstance(v, type([])):
2510 v = [v] 2516 v = [v]
2511 2517 new_v = []
2512 # Quote special search characters '%' and '_' for 2518 for x in v:
2513 # correct matching with LIKE/ILIKE 2519 if isinstance(x, hyperdb.Exact_Match):
2514 # Note that we now pass the elements of v as query 2520 exact.append(True)
2515 # arguments and don't interpolate the quoted string 2521 new_v.append(x.value)
2516 # into the sql statement. Should be safer. 2522 else:
2517 v = [self.db.search_stringquote(s) for s in v] 2523 exact.append(False)
2524 # Quote special search characters '%' and '_' for
2525 # correct matching with LIKE/ILIKE
2526 # Note that we now pass the elements of v as query
2527 # arguments and don't interpolate the quoted string
2528 # into the sql statement. Should be safer.
2529 new_v.append(self.db.search_stringquote(x))
2530 v = new_v
2518 2531
2519 # now add to the where clause 2532 # now add to the where clause
2520 where.append('(' 2533 w = []
2521 +' and '.join(["_%s._%s %s %s ESCAPE %s"%( 2534 for vv, ex in zip(v, exact):
2522 pln, 2535 if ex:
2523 k, 2536 w.append("_%s._%s %s %s"%(
2524 self.case_insensitive_like, 2537 pln, k, self.case_sensitive_equal, a))
2525 a, 2538 args.append(vv)
2526 a) for s in v]) 2539 else:
2527 +')') 2540 w.append("_%s._%s %s %s ESCAPE %s"%(
2528 for vv in v: 2541 pln, k, self.case_insensitive_like, a, a))
2529 args.extend((vv, '\\')) 2542 args.extend((vv, '\\'))
2543 where.append ('(' + ' and '.join(w) + ')')
2530 if 'sort' in p.need_for: 2544 if 'sort' in p.need_for:
2531 oc = ac = 'lower(_%s._%s)'%(pln, k) 2545 oc = ac = 'lower(_%s._%s)'%(pln, k)
2532 elif isinstance(propclass, Link): 2546 elif isinstance(propclass, Link):
2533 if 'search' in p.need_for: 2547 if 'search' in p.need_for:
2534 if p.children: 2548 if p.children:
2709 args = tuple(args) 2723 args = tuple(args)
2710 __traceback_info__ = (sql, args) 2724 __traceback_info__ = (sql, args)
2711 return proptree, sql, args 2725 return proptree, sql, args
2712 2726
2713 def filter(self, search_matches, filterspec, sort=[], group=[], 2727 def filter(self, search_matches, filterspec, sort=[], group=[],
2714 retired=False): 2728 retired=False, exact_match_spec={}):
2715 """Return a list of the ids of the active nodes in this class that 2729 """Return a list of the ids of the active nodes in this class that
2716 match the 'filter' spec, sorted by the group spec and then the 2730 match the 'filter' spec, sorted by the group spec and then the
2717 sort spec 2731 sort spec
2718 2732
2719 "filterspec" is {propname: value(s)} 2733 "filterspec" is {propname: value(s)}
2733 """ 2747 """
2734 if __debug__: 2748 if __debug__:
2735 start_t = time.time() 2749 start_t = time.time()
2736 2750
2737 sq = self._filter_sql (search_matches, filterspec, sort, group, 2751 sq = self._filter_sql (search_matches, filterspec, sort, group,
2738 retired=retired) 2752 retired=retired,
2753 exact_match_spec=exact_match_spec)
2739 # nothing to match? 2754 # nothing to match?
2740 if sq is None: 2755 if sq is None:
2741 return [] 2756 return []
2742 proptree, sql, args = sq 2757 proptree, sql, args = sq
2743 2758
2757 if __debug__: 2772 if __debug__:
2758 self.db.stats['filtering'] += (time.time() - start_t) 2773 self.db.stats['filtering'] += (time.time() - start_t)
2759 return l 2774 return l
2760 2775
2761 def filter_iter(self, search_matches, filterspec, sort=[], group=[], 2776 def filter_iter(self, search_matches, filterspec, sort=[], group=[],
2762 retired=False): 2777 retired=False, exact_match_spec={}):
2763 """Iterator similar to filter above with same args. 2778 """Iterator similar to filter above with same args.
2764 Limitation: We don't sort on multilinks. 2779 Limitation: We don't sort on multilinks.
2765 This uses an optimisation: We put all nodes that are in the 2780 This uses an optimisation: We put all nodes that are in the
2766 current row into the node cache. Then we return the node id. 2781 current row into the node cache. Then we return the node id.
2767 That way a fetch of a node won't create another sql-fetch (with 2782 That way a fetch of a node won't create another sql-fetch (with
2768 a join) from the database because the nodes are already in the 2783 a join) from the database because the nodes are already in the
2769 cache. We're using our own temporary cursor. 2784 cache. We're using our own temporary cursor.
2770 """ 2785 """
2771 sq = self._filter_sql(search_matches, filterspec, sort, group, retr=1, 2786 sq = self._filter_sql(search_matches, filterspec, sort, group, retr=1,
2772 retired=retired) 2787 retired=retired,
2788 exact_match_spec=exact_match_spec)
2773 # nothing to match? 2789 # nothing to match?
2774 if sq is None: 2790 if sq is None:
2775 return 2791 return
2776 proptree, sql, args = sq 2792 proptree, sql, args = sq
2777 cursor = self.db.conn.cursor() 2793 cursor = self.db.conn.cursor()

Roundup Issue Tracker: http://roundup-tracker.org/