comparison roundup/hyperdb.py @ 4472:34dce76bb202

Multilink fixes and optimizations: - Optimisation: Late evaluation of Multilinks (only in rdbms backends): previously we materialized each multilink in a Node -- this creates an SQL query for each multilink (e.g. 'files' and 'messages' for each line in the issue index display) -- even if the multilinks aren't displayed. Now we compute multilinks only if they're accessed (and keep them cached). - Add a filter_iter similar to the existing filter call. This feature is considered experimental. This is currently not used in the web-interface but passes all tests for the filter call except sorting by Multilinks (which isn't supported by SQL and isn't a sane concept anyway). When using filter_iter instead of filter this saves a *lot* of SQL queries: Filter returns only the IDs of Nodes in the database, the additional content of a Node has to be fetched in a separate SQL call. The new filter_iter also returns the IDs of Nodes (one by one, it's an iterator) but pre-seeds the cache with the content of the Node. The information needed for seeding the cache is retrieved in the same SQL query as the ids.
author Ralf Schlatterbeck <schlatterbeck@users.sourceforge.net>
date Mon, 21 Mar 2011 20:44:39 +0000
parents 9b619dcb030a
children 1613754d2646
comparison
equal deleted inserted replaced
4471:4f353d71d716 4472:34dce76bb202
282 282
283 class Proptree(object): 283 class Proptree(object):
284 """ Simple tree data structure for optimizing searching of 284 """ Simple tree data structure for optimizing searching of
285 properties. Each node in the tree represents a roundup Class 285 properties. Each node in the tree represents a roundup Class
286 Property that has to be navigated for finding the given search 286 Property that has to be navigated for finding the given search
287 or sort properties. The sort_type attribute is used for 287 or sort properties. The need_for attribute is used for
288 distinguishing nodes in the tree used for sorting or searching: If 288 distinguishing nodes in the tree used for sorting, searching or
289 it is 0 for a node, that node is not used for sorting. If it is 1, 289 retrieval: The attribute is a dictionary containing one or several
290 it is used for both, sorting and searching. If it is 2 it is used 290 of the values 'sort', 'search', 'retrieve'.
291 for sorting only.
292 291
293 The Proptree is also used for transitively searching attributes for 292 The Proptree is also used for transitively searching attributes for
294 backends that do not support transitive search (e.g. anydbm). The 293 backends that do not support transitive search (e.g. anydbm). The
295 _val attribute with set_val is used for this. 294 _val attribute with set_val is used for this.
296 """ 295 """
297 296
298 def __init__(self, db, cls, name, props, parent = None): 297 def __init__(self, db, cls, name, props, parent=None, retr=False):
299 self.db = db 298 self.db = db
300 self.name = name 299 self.name = name
301 self.props = props 300 self.props = props
302 self.parent = parent 301 self.parent = parent
303 self._val = None 302 self._val = None
306 self.classname = None 305 self.classname = None
307 self.uniqname = None 306 self.uniqname = None
308 self.children = [] 307 self.children = []
309 self.sortattr = [] 308 self.sortattr = []
310 self.propdict = {} 309 self.propdict = {}
311 self.sort_type = 0 310 self.need_for = {'search' : True}
312 self.sort_direction = None 311 self.sort_direction = None
313 self.sort_ids = None 312 self.sort_ids = None
314 self.sort_ids_needed = False 313 self.sort_ids_needed = False
315 self.sort_result = None 314 self.sort_result = None
316 self.attr_sort_done = False 315 self.attr_sort_done = False
317 self.tree_sort_done = False 316 self.tree_sort_done = False
318 self.propclass = None 317 self.propclass = None
319 self.orderby = [] 318 self.orderby = []
319 self.sql_idx = None # index of retrieved column in sql result
320 if parent: 320 if parent:
321 self.root = parent.root 321 self.root = parent.root
322 self.depth = parent.depth + 1 322 self.depth = parent.depth + 1
323 else: 323 else:
324 self.root = self 324 self.root = self
325 self.seqno = 1 325 self.seqno = 1
326 self.depth = 0 326 self.depth = 0
327 self.sort_type = 1 327 self.need_for['sort'] = True
328 self.id = self.root.seqno 328 self.id = self.root.seqno
329 self.root.seqno += 1 329 self.root.seqno += 1
330 if self.cls: 330 if self.cls:
331 self.classname = self.cls.classname 331 self.classname = self.cls.classname
332 self.uniqname = '%s%s' % (self.cls.classname, self.id) 332 self.uniqname = '%s%s' % (self.cls.classname, self.id)
333 if not self.parent: 333 if not self.parent:
334 self.uniqname = self.cls.classname 334 self.uniqname = self.cls.classname
335 335 if retr:
336 def append(self, name, sort_type = 0): 336 self.append_retr_props()
337
338 def append(self, name, need_for='search', retr=False):
337 """Append a property to self.children. Will create a new 339 """Append a property to self.children. Will create a new
338 propclass for the child. 340 propclass for the child.
339 """ 341 """
340 if name in self.propdict: 342 if name in self.propdict:
341 pt = self.propdict[name] 343 pt = self.propdict[name]
342 if sort_type and not pt.sort_type: 344 pt.need_for[need_for] = True
343 pt.sort_type = 1 345 if retr and isinstance(pt.propclass, Link):
346 pt.append_retr_props()
344 return pt 347 return pt
345 propclass = self.props[name] 348 propclass = self.props[name]
346 cls = None 349 cls = None
347 props = None 350 props = None
348 if isinstance(propclass, (Link, Multilink)): 351 if isinstance(propclass, (Link, Multilink)):
349 cls = self.db.getclass(propclass.classname) 352 cls = self.db.getclass(propclass.classname)
350 props = cls.getprops() 353 props = cls.getprops()
351 child = self.__class__(self.db, cls, name, props, parent = self) 354 child = self.__class__(self.db, cls, name, props, parent = self)
352 child.sort_type = sort_type 355 child.need_for = {need_for : True}
353 child.propclass = propclass 356 child.propclass = propclass
354 self.children.append(child) 357 self.children.append(child)
355 self.propdict[name] = child 358 self.propdict[name] = child
359 if retr and isinstance(child.propclass, Link):
360 child.append_retr_props()
356 return child 361 return child
362
363 def append_retr_props(self):
364 """Append properties for retrieval."""
365 for name, prop in self.cls.getprops(protected=1).iteritems():
366 if isinstance(prop, Multilink):
367 continue
368 self.append(name, need_for='retrieve')
357 369
358 def compute_sort_done(self, mlseen=False): 370 def compute_sort_done(self, mlseen=False):
359 """ Recursively check if attribute is needed for sorting 371 """ Recursively check if attribute is needed for sorting
360 (self.sort_type > 0) or all children have tree_sort_done set and 372 ('sort' in self.need_for) or all children have tree_sort_done set and
361 sort_ids_needed unset: set self.tree_sort_done if one of the conditions 373 sort_ids_needed unset: set self.tree_sort_done if one of the conditions
362 holds. Also remove sort_ids_needed recursively once having seen a 374 holds. Also remove sort_ids_needed recursively once having seen a
363 Multilink. 375 Multilink.
364 """ 376 """
365 if isinstance (self.propclass, Multilink): 377 if isinstance (self.propclass, Multilink):
369 self.tree_sort_done = True 381 self.tree_sort_done = True
370 for p in self.children: 382 for p in self.children:
371 p.compute_sort_done(mlseen) 383 p.compute_sort_done(mlseen)
372 if not p.tree_sort_done: 384 if not p.tree_sort_done:
373 self.tree_sort_done = False 385 self.tree_sort_done = False
374 if not self.sort_type: 386 if 'sort' not in self.need_for:
375 self.tree_sort_done = True 387 self.tree_sort_done = True
376 if mlseen: 388 if mlseen:
377 self.tree_sort_done = False 389 self.tree_sort_done = False
378 390
379 def ancestors(self): 391 def ancestors(self):
387 Once all properties are non-transitive, the search generates a 399 Once all properties are non-transitive, the search generates a
388 simple _filter call which does the real work 400 simple _filter call which does the real work
389 """ 401 """
390 filterspec = {} 402 filterspec = {}
391 for p in self.children: 403 for p in self.children:
392 if p.sort_type < 2: 404 if 'search' in p.need_for:
393 if p.children: 405 if p.children:
394 p.search(sort = False) 406 p.search(sort = False)
395 filterspec[p.name] = p.val 407 filterspec[p.name] = p.val
396 self.val = self.cls._filter(search_matches, filterspec, sort and self) 408 self.val = self.cls._filter(search_matches, filterspec, sort and self)
397 return self.val 409 return self.val
411 """ All children needed for sorting. If intermediate is True, 423 """ All children needed for sorting. If intermediate is True,
412 intermediate nodes (not being a sort attribute) are returned, 424 intermediate nodes (not being a sort attribute) are returned,
413 too. 425 too.
414 """ 426 """
415 return [p for p in self.children 427 return [p for p in self.children
416 if p.sort_type > 0 and (intermediate or p.sort_direction)] 428 if 'sort' in p.need_for and (intermediate or p.sort_direction)]
417 429
418 def __iter__(self): 430 def __iter__(self):
419 """ Yield nodes in depth-first order -- visited nodes first """ 431 """ Yield nodes in depth-first order -- visited nodes first """
420 for p in self.children: 432 for p in self.children:
421 yield p 433 yield p
532 dir_idx.append (idx) 544 dir_idx.append (idx)
533 directions.append (sa.sort_direction) 545 directions.append (sa.sort_direction)
534 curdir = sa.sort_direction 546 curdir = sa.sort_direction
535 idx += 1 547 idx += 1
536 sortattr.append (val) 548 sortattr.append (val)
537 #print >> sys.stderr, "\nsortattr", sortattr
538 sortattr = zip (*sortattr) 549 sortattr = zip (*sortattr)
539 for dir, i in reversed(zip(directions, dir_idx)): 550 for dir, i in reversed(zip(directions, dir_idx)):
540 rev = dir == '-' 551 rev = dir == '-'
541 sortattr = sorted (sortattr, key = lambda x:x[i:idx], reverse = rev) 552 sortattr = sorted (sortattr, key = lambda x:x[i:idx], reverse = rev)
542 idx = i 553 idx = i
1053 """For some backends this implements the non-transitive 1064 """For some backends this implements the non-transitive
1054 search, for more information see the filter method. 1065 search, for more information see the filter method.
1055 """ 1066 """
1056 raise NotImplementedError 1067 raise NotImplementedError
1057 1068
1058 def _proptree(self, filterspec, sortattr=[]): 1069 def _proptree(self, filterspec, sortattr=[], retr=False):
1059 """Build a tree of all transitive properties in the given 1070 """Build a tree of all transitive properties in the given
1060 filterspec. 1071 filterspec.
1061 """ 1072 If we retrieve (retr is True) linked items we don't follow
1062 proptree = Proptree(self.db, self, '', self.getprops()) 1073 across multilinks. We also don't follow if the searched value
1074 can contain NULL values.
1075 """
1076 proptree = Proptree(self.db, self, '', self.getprops(), retr=retr)
1063 for key, v in filterspec.iteritems(): 1077 for key, v in filterspec.iteritems():
1064 keys = key.split('.') 1078 keys = key.split('.')
1065 p = proptree 1079 p = proptree
1080 mlseen = False
1066 for k in keys: 1081 for k in keys:
1067 p = p.append(k) 1082 if isinstance (p.propclass, Multilink):
1083 mlseen = True
1084 isnull = v == '-1' or v is None
1085 nullin = isinstance(v, type([])) and ('-1' in v or None in v)
1086 r = retr and not mlseen and not isnull and not nullin
1087 p = p.append(k, retr=r)
1068 p.val = v 1088 p.val = v
1069 multilinks = {} 1089 multilinks = {}
1070 for s in sortattr: 1090 for s in sortattr:
1071 keys = s[1].split('.') 1091 keys = s[1].split('.')
1072 p = proptree 1092 p = proptree
1093 mlseen = False
1073 for k in keys: 1094 for k in keys:
1074 p = p.append(k, sort_type = 2) 1095 if isinstance (p.propclass, Multilink):
1096 mlseen = True
1097 r = retr and not mlseen
1098 p = p.append(k, need_for='sort', retr=r)
1075 if isinstance (p.propclass, Multilink): 1099 if isinstance (p.propclass, Multilink):
1076 multilinks[p] = True 1100 multilinks[p] = True
1077 if p.cls: 1101 if p.cls:
1078 p = p.append(p.cls.orderprop(), sort_type = 2) 1102 p = p.append(p.cls.orderprop(), need_for='sort')
1079 if p.sort_direction: # if an orderprop is also specified explicitly 1103 if p.sort_direction: # if an orderprop is also specified explicitly
1080 continue 1104 continue
1081 p.sort_direction = s[0] 1105 p.sort_direction = s[0]
1082 proptree.sortattr.append (p) 1106 proptree.sortattr.append (p)
1083 for p in multilinks.iterkeys(): 1107 for p in multilinks.iterkeys():
1156 1180
1157 Implementation note: 1181 Implementation note:
1158 This implements a non-optimized version of Transitive search 1182 This implements a non-optimized version of Transitive search
1159 using _filter implemented in a backend class. A more efficient 1183 using _filter implemented in a backend class. A more efficient
1160 version can be implemented in the individual backends -- e.g., 1184 version can be implemented in the individual backends -- e.g.,
1161 an SQL backen will want to create a single SQL statement and 1185 an SQL backend will want to create a single SQL statement and
1162 override the filter method instead of implementing _filter. 1186 override the filter method instead of implementing _filter.
1163 """ 1187 """
1164 sortattr = self._sortattr(sort = sort, group = group) 1188 sortattr = self._sortattr(sort = sort, group = group)
1165 proptree = self._proptree(filterspec, sortattr) 1189 proptree = self._proptree(filterspec, sortattr)
1166 proptree.search(search_matches) 1190 proptree.search(search_matches)
1167 return proptree.sort() 1191 return proptree.sort()
1192
1193 # non-optimized filter_iter, a backend may chose to implement a
1194 # better version that provides a real iterator that pre-fills the
1195 # cache for each id returned. Note that the filter_iter doesn't
1196 # promise to correctly sort by multilink (which isn't sane to do
1197 # anyway).
1198 filter_iter = filter
1168 1199
1169 def count(self): 1200 def count(self):
1170 """Get the number of nodes in this class. 1201 """Get the number of nodes in this class.
1171 1202
1172 If the returned integer is 'numnodes', the ids of all the nodes 1203 If the returned integer is 'numnodes', the ids of all the nodes

Roundup Issue Tracker: http://roundup-tracker.org/