view roundup/support.py @ 5096:e74c3611b138

- issue2550636, issue2550909: Added support for Whoosh indexer. Also adds new config.ini setting called indexer to select indexer. See ``doc/upgrading.txt`` for details. Initial patch done by David Wolever. Patch modified (see ticket or below for changes), docs updated and committed. I have an outstanding issue with test/test_indexer.py. I have to comment out all imports and tests for indexers I don't have (i.e. mysql, postgres) otherwise no tests run. With that change made, dbm, sqlite (rdbms), xapian and whoosh indexes are all passing the indexer tests. Changes summary: 1) support native back ends dbm and rdbms. (original patch only fell through to dbm) 2) Developed whoosh stopfilter to not index stopwords or words outside the the maxlength and minlength limits defined in index_common.py. Required to pass the extremewords test_indexer test. Also I removed a call to .lower on the input text as the tokenizer I chose automatically does the lowercase. 3) Added support for max/min length to find. This was needed to pass extremewords test. 4) Added back a call to save_index in add_text. This allowed all but two tests to pass. 5) Fixed a call to: results = searcher.search(query.Term("identifier", identifier)) which had an extra parameter that is an error under current whoosh. 6) Set limit=None in search call for find() otherwise it only return 10 items. This allowed it to pass manyresults test Also due to changes in the roundup code removed the call in indexer_whoosh to from roundup.anypy.sets_ import set since we use the python builtin set.
author John Rouillard <rouilj@ieee.org>
date Sat, 25 Jun 2016 20:10:03 -0400
parents ac0c117cd24c
children 64b05e24dbd8
line wrap: on
line source

"""Implements various support classes and functions used in a number of
places in Roundup code.
"""

__docformat__ = 'restructuredtext'

import os, time, sys, re

class TruthDict:
    '''Returns True for valid keys, False for others.
    '''
    def __init__(self, keys):
        if keys:
            self.keys = {}
            for col in keys:
                self.keys[col] = 1
        else:
            self.__getitem__ = lambda name: 1

    def __getitem__(self, name):
        return self.keys.has_key(name)

def ensureParentsExist(dest):
    if not os.path.exists(os.path.dirname(dest)):
        os.makedirs(os.path.dirname(dest))

class PrioList:
    '''Manages a sorted list.

    Currently only implements method 'append' and iteration from a
    full list interface.
    Implementation: We manage a "sorted" status and sort on demand.
    Appending to the list will require re-sorting before use.
    >>> p = PrioList()
    >>> for i in 5,7,1,-1:
    ...  p.append(i)
    ...
    >>> for k in p:
    ...  print k
    ...
    -1
    1
    5
    7

    '''
    def __init__(self):
        self.list   = []
        self.sorted = True

    def append(self, item):
        self.list.append(item)
        self.sorted = False

    def __iter__(self):
        if not self.sorted:
            self.list.sort()
            self.sorted = True
        return iter(self.list)

class Progress:
    '''Progress display for console applications.

    See __main__ block at end of file for sample usage.
    '''
    def __init__(self, info, sequence):
        self.info = info
        self.sequence = iter(sequence)
        self.total = len(sequence)
        self.start = self.now = time.time()
        self.num = 0
        self.stepsize = self.total / 100 or 1
        self.steptimes = []
        self.display()

    def __iter__(self): return self

    def next(self):
        self.num += 1

        if self.num > self.total:
            print self.info, 'done', ' '*(75-len(self.info)-6)
            sys.stdout.flush()
            return self.sequence.next()

        if self.num % self.stepsize:
            return self.sequence.next()

        self.display()
        return self.sequence.next()

    def display(self):
        # figure how long we've spent - guess how long to go
        now = time.time()
        steptime = now - self.now
        self.steptimes.insert(0, steptime)
        if len(self.steptimes) > 5:
            self.steptimes.pop()
        steptime = sum(self.steptimes) / len(self.steptimes)
        self.now = now
        eta = steptime * ((self.total - self.num)/self.stepsize)

        # tell it like it is (or might be)
        if now - self.start > 3:
            M = eta / 60
            H = M / 60
            M = M % 60
            S = eta % 60
            if self.total:
                s = '%s %2d%% (ETA %02d:%02d:%02d)'%(self.info,
                    self.num * 100. / self.total, H, M, S)
            else:
                s = '%s 0%% (ETA %02d:%02d:%02d)'%(self.info, H, M, S)
        elif self.total:
            s = '%s %2d%%'%(self.info, self.num * 100. / self.total)
        else:
            s = '%s %d done'%(self.info, self.num)
        sys.stdout.write(s + ' '*(75-len(s)) + '\r')
        sys.stdout.flush()

# vim: set et sts=4 sw=4 :

Roundup Issue Tracker: http://roundup-tracker.org/