view scripts/contributors.py @ 5096:e74c3611b138

- issue2550636, issue2550909: Added support for Whoosh indexer. Also adds new config.ini setting called indexer to select indexer. See ``doc/upgrading.txt`` for details. Initial patch done by David Wolever. Patch modified (see ticket or below for changes), docs updated and committed. I have an outstanding issue with test/test_indexer.py. I have to comment out all imports and tests for indexers I don't have (i.e. mysql, postgres) otherwise no tests run. With that change made, dbm, sqlite (rdbms), xapian and whoosh indexes are all passing the indexer tests. Changes summary: 1) support native back ends dbm and rdbms. (original patch only fell through to dbm) 2) Developed whoosh stopfilter to not index stopwords or words outside the the maxlength and minlength limits defined in index_common.py. Required to pass the extremewords test_indexer test. Also I removed a call to .lower on the input text as the tokenizer I chose automatically does the lowercase. 3) Added support for max/min length to find. This was needed to pass extremewords test. 4) Added back a call to save_index in add_text. This allowed all but two tests to pass. 5) Fixed a call to: results = searcher.search(query.Term("identifier", identifier)) which had an extra parameter that is an error under current whoosh. 6) Set limit=None in search call for find() otherwise it only return 10 items. This allowed it to pass manyresults test Also due to changes in the roundup code removed the call in indexer_whoosh to from roundup.anypy.sets_ import set since we use the python builtin set.
author John Rouillard <rouilj@ieee.org>
date Sat, 25 Jun 2016 20:10:03 -0400
parents 64daaa4bf816
children 3fa026621f69
line wrap: on
line source

"""
Get Mercurial history data and output list of contributors with years.

Public domain work by:

  anatoly techtonik <techtonik@gmail.com>

"""

from subprocess import check_output

# --- output settings
contributors_by_year = True
years_for_contributors = True
verbose = True
# /--

# --- project specific configuration
ALIASES = {
  'Richard Jones <richard@mechanicalcat.net>':
      ['richard',
       'Richard Jones <richard@users.sourceforge.net>'],
  'Bernhard Reiter <bernhard@intevation.de>':
      ['Bernhard Reiter <ber@users.sourceforge.net>',
       'Bernhard Reiter <Bernhard.Reiter@intevation.de>'],
  'Ralf Schlatterbeck <rsc@runtux.com>':
      ['Ralf Schlatterbeck <schlatterbeck@users.sourceforge.net>'],
  'Stefan Seefeld <stefan@seefeld.name>':
      ['Stefan Seefeld <stefan@users.sourceforge.net>'],
  'John P. Rouillard <rouilj@cs.umb.edu>':
      ['rouilj'],
}
ROBOTS = ['No Author <no-author@users.sourceforge.net>']
# /-- 


def compress(years):
  """
  Given a list of years like [2003, 2004, 2007],
  compress it into string like '2003-2004, 2007'

  >>> compress([2002])
  '2002'
  >>> compress([2003, 2002])
  '2002-2003'
  >>> compress([2009, 2004, 2005, 2006, 2007])
  '2004-2007, 2009'
  >>> compress([2001, 2003, 2004, 2005])
  '2001, 2003-2005'
  >>> compress([2009, 2011])
  '2009, 2011'
  >>> compress([2009, 2010, 2011, 2006, 2007])
  '2006-2007, 2009-2011'
  >>> compress([2002, 2003, 2004, 2005, 2006, 2009, 2012])
  '2002-2006, 2009, 2012'
  """
  years = sorted(years)
  # compress years into string
  comma = ', '
  yearstr = ''
  for i in range(0,len(years)-1):
    if years[i+1]-years[i] == 1:
      if not yearstr or yearstr.endswith(comma):
        yearstr += '%s' % years[i]
      if yearstr.endswith('-'):
        pass
      else:
        yearstr += '-'
    else:
      yearstr += '%s, ' % years[i]

  if len(years) == 1:
    yearstr += str(years[0])
  else:
    yearstr += '%s' % years[-1]
  return yearstr


if __name__ == '__main__':
  if verbose:
    print("Getting HG log...")
  authorship = check_output('hg log --template "{date(date,\\"%Y\\")},{author}\n"')
  # authorship are strings like
  # 2003,Richard Jones <richard@users.sourceforge.net>
  # ...

  if verbose:
    print("Splitting...")
  # transform to a list of tuples
  authorship = [line.split(',', 1) for line in authorship.splitlines()]

  if verbose:
    print("Sorting...")
  years = {}  # year -> set(author1, author2, ...)
  names = {}  # author -> set(years)
  for year, author in authorship:
    if author in ROBOTS:
      continue
    # process aliases
    for name, aliases in ALIASES.items():
      if author in aliases:
        author = name
        break
    author = author.replace('<', '(')
    author = author.replace('>', ')')
    # years
    if not year in years:
      years[year] = set()
    years[year].add(author)
    # names
    if not author in names:
      names[author] = set()
    names[author].add(int(year))


  if contributors_by_year:
    if verbose:
      print("Contributors by year...")
    print('')
    for year in sorted(years, reverse=True):
      print(year)
      for author in sorted(years[year]):
        print("  " + author)
    print('')

  if years_for_contributors:
    if verbose:
      print("Years for each contributor...")
    print('')
    
    def last_year(name):
      """Return year of the latest contribution for a given name"""
      return sorted(list(names[name]))[-1]

    def first_year(name):
      """Return year of the first contribution"""
      return sorted(list(names[name]))[0]

    def year_cmp(name1, name2):
      """
      Year comparison function. First sort by latest contribution year (desc).
      If it matches, compare first contribution year (asc). This ensures that
      the most recent and long-term contributors are at the top.
      """
      if last_year(name1) != last_year(name2):
        return last_year(name1) - last_year(name2)
      else:
        return first_year(name2) - first_year(name1)
    
    print("Copyright (c)")
    for author in sorted(list(names), cmp=year_cmp, reverse=True):
      years = list(names[author])
      yearstr = compress(years)

      if 0: #DEBUG
        print(years, yearstr, author)
      else:
        print("    %s %s" % (yearstr, author))
    print('')

Roundup Issue Tracker: http://roundup-tracker.org/