diff scripts/import_sf.py @ 3534:ad9f06e32f7c

scripts/import_sf.py will import a tracker from Sourceforge.NET
author Richard Jones <richard@users.sourceforge.net>
date Fri, 03 Feb 2006 03:49:24 +0000
parents
children 00a7983b4666
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/import_sf.py	Fri Feb 03 03:49:24 2006 +0000
@@ -0,0 +1,409 @@
+import sys, sets, os, csv, time, urllib2, httplib, mimetypes
+from elementtree import ElementTree
+
+from roundup import instance, hyperdb, date, support, password
+
+def get_url(aid):
+    """ so basically we have to jump through hoops, given an artifact id, to
+    figure what the URL should be to access that artifact, and hence any
+    attached files."""
+    conn = httplib.HTTPConnection("sourceforge.net")
+    conn.request("GET", "/support/tracker.php?aid=%s"%aid)
+    response = conn.getresponse()
+    assert response.status == 302, 'response code was %s'%response.status
+    return 'http://sourceforge.net' + response.getheader('location')
+
+def fetch_files(xml_file, file_dir):
+    """ Fetch files referenced in the xml_file into the dir file_dir. """
+    root = ElementTree.parse(xml_file).getroot()
+    to_fetch = sets.Set()
+    deleted = sets.Set()
+    for artifact in root.find('artifacts'):
+        for field in artifact.findall('field'):
+            if field.get('name') == 'artifact_id':
+                aid = field.text
+        for field in artifact.findall('field'):
+            if field.get('name') != 'artifact_history': continue
+            for event in field.findall('history'):
+                d = {}
+                for field in event.findall('field'):
+                    d[field.get('name')] = field.text
+                if d['field_name'] == 'File Added':
+                    fid = d['old_value'].split(':')[0]
+                    to_fetch.add((aid, fid))
+                if d['field_name'] == 'File Deleted':
+                    fid = d['old_value'].split(':')[0]
+                    deleted.add((aid, fid))
+    to_fetch = to_fetch - deleted
+
+    got = sets.Set(os.listdir(file_dir))
+    to_fetch = to_fetch - got
+
+    # load cached urls (sigh)
+    urls = {}
+    if os.path.exists(os.path.join(file_dir, 'urls.txt')):
+        for line in open(os.path.join(file_dir, 'urls.txt')):
+            aid, url = line.strip().split()
+            urls[aid] = url
+
+    for aid, fid in Progress('Fetching files', list(to_fetch)):
+        if fid in got: continue
+        if not urls.has_key(aid):
+            urls[aid] = get_url(aid)
+            f = open(os.path.join(file_dir, 'urls.txt'), 'a')
+            f.write('%s %s\n'%(aid, urls[aid]))
+            f.close()
+        url = urls[aid] + '&file_id=' + fid
+        f = urllib2.urlopen(url)
+        data = f.read()
+        n = open(os.path.join(file_dir, fid), 'w')
+        n.write(data)
+        f.close()
+        n.close()
+
+def import_xml(tracker_home, xml_file, file_dir):
+    """ Generate Roundup tracker import files based on the tracker schema,
+    sf.net xml export and downloaded files from sf.net. """
+    tracker = instance.open(tracker_home)
+    db = tracker.open('admin')
+
+    resolved = db.status.lookup('resolved')
+    unread = db.status.lookup('unread')
+    chatting = db.status.lookup('unread')
+    critical = db.priority.lookup('critical')
+    urgent = db.priority.lookup('urgent')
+    bug = db.priority.lookup('bug')
+    feature = db.priority.lookup('feature')
+    wish = db.priority.lookup('wish')
+    adminuid = db.user.lookup('admin')
+    anonuid = db.user.lookup('anonymous')
+
+    root = ElementTree.parse(xml_file).getroot()
+
+    def to_date(ts):
+        return date.Date(time.gmtime(float(ts)))
+
+    # parse out the XML
+    artifacts = []
+    categories = sets.Set()
+    users = sets.Set()
+    add_files = sets.Set()
+    remove_files = sets.Set()
+    for artifact in root.find('artifacts'):
+        d = {}
+        op = {}
+        artifacts.append(d)
+        for field in artifact.findall('field'):
+            name = field.get('name')
+            if name == 'artifact_messages':
+                for message in field.findall('message'):
+                    l = d.setdefault('messages', [])
+                    m = {}
+                    l.append(m)
+                    for field in message.findall('field'):
+                        name = field.get('name')
+                        if name == 'adddate':
+                            m[name] = to_date(field.text)
+                        else:
+                            m[name] = field.text
+                        if name == 'user_name': users.add(field.text)
+            elif name == 'artifact_history':
+                for event in field.findall('history'):
+                    l = d.setdefault('history', [])
+                    e = {}
+                    l.append(e)
+                    for field in event.findall('field'):
+                        name = field.get('name')
+                        if name == 'entrydate':
+                            e[name] = to_date(field.text)
+                        else:
+                            e[name] = field.text
+                        if name == 'mod_by': users.add(field.text)
+                    if e['field_name'] == 'File Added':
+                        add_files.add(e['old_value'].split(':')[0])
+                    elif e['field_name'] == 'File Deleted':
+                        remove_files.add(e['old_value'].split(':')[0])
+            elif name == 'details':
+                op['body'] = field.text
+            elif name == 'submitted_by':
+                op['user_name'] = field.text
+                d[name] = field.text
+                users.add(field.text)
+            elif name == 'open_date':
+                thedate = to_date(field.text)
+                op['adddate'] = thedate
+                d[name] = thedate
+            else:
+                d[name] = field.text
+
+        categories.add(d['category'])
+
+        if op.has_key('body'):
+            l = d.setdefault('messages', [])
+            l.insert(0, op)
+
+    add_files -= remove_files
+
+    # create users
+    userd = {'nobody': '2'}
+    users.remove('nobody')
+    data = [
+        {'id': '1', 'username': 'admin', 'password': password.Password('admin'),
+            'roles': 'Admin', 'address': 'richard@python.org'},
+        {'id': '2', 'username': 'anonymous', 'roles': 'Anonymous'},
+    ]
+    for n, user in enumerate(list(users)):
+        userd[user] = n+3
+        data.append({'id': str(n+3), 'username': user, 'roles': 'User',
+            'address': '%s@users.sourceforge.net'%user})
+    write_csv(db.user, data)
+    users=userd
+
+    # create categories
+    categoryd = {'None': None}
+    categories.remove('None')
+    data = []
+    for n, category in enumerate(list(categories)):
+        categoryd[category] = n
+        data.append({'id': str(n), 'name': category})
+    write_csv(db.keyword, data)
+    categories = categoryd
+
+    # create issues
+    issue_data = []
+    file_data = []
+    message_data = []
+    issue_journal = []
+    message_id = 0
+    for artifact in artifacts:
+        d = {}
+        d['id'] = artifact['artifact_id']
+        d['title'] = artifact['summary']
+        d['assignedto'] = users[artifact['assigned_to']]
+        if d['assignedto'] == '2':
+            d['assignedto'] = None
+        d['creation'] = artifact['open_date']
+        activity = artifact['open_date']
+        d['creator'] = users[artifact['submitted_by']]
+        actor = d['creator']
+        if categories[artifact['category']]:
+            d['topic'] = [categories[artifact['category']]]
+        issue_journal.append((
+            d['id'], d['creation'].get_tuple(), d['creator'], "'create'", {}
+        ))
+
+        p = int(artifact['priority'])
+        if artifact['artifact_type'] == 'Feature Requests':
+            if p > 3:
+                d['priority'] = feature
+            else:
+                d['priority'] = wish
+        else:
+            if p > 7:
+                d['priority'] = critical
+            elif p > 5:
+                d['priority'] = urgent
+            elif p > 3:
+                d['priority'] = bug
+            else:
+                d['priority'] = feature
+
+        s = artifact['status']
+        if s == 'Closed':
+            d['status'] = resolved
+        elif s == 'Deleted':
+            d['status'] = resolved
+            d['is retired'] = True
+        else:
+            d['status'] = unread
+
+        messages = []
+        nosy = sets.Set()
+        for message in artifact.get('messages', []):
+            message_id += 1
+            authid = users[message['user_name']]
+            body = convert_message(message['body'], message_id)
+            if not body: continue
+            m = {'content': body, 'author': authid,
+                'date': message['adddate'], 'id': str(message_id),
+                'creation': message['adddate'], }
+            message_data.append(m)
+            messages.append(message_id)
+            if authid not in (None, '2'):
+                nosy.add(authid)
+            activity = message['adddate']
+            actor = authid
+            if d['status'] == unread:
+                d['status'] = chatting
+        d['messages'] = messages
+        d['nosy'] = list(nosy)
+
+        files = []
+        for event in artifact.get('history', []):
+            if event['field_name'] == 'File Added':
+                fid, name = event['old_value'].split(':')
+                if fid in add_files:
+                    files.append(fid)
+                    name = name.strip()
+                    try:
+                        f = open(os.path.join(file_dir, fid))
+                        content = f.read()
+                        f.close()
+                    except:
+                        content = 'content missing'
+                    file_data.append({
+                        'id': fid,
+                        'creation': event['entrydate'],
+                        'creator': users[event['mod_by']],
+                        'name': name,
+                        'type': mimetypes.guess_type(name)[0],
+                        'content': content,
+                    })
+                continue
+            elif event['field_name'] == 'close_date':
+                action = "'set'"
+                info = { 'status': 'unread' }
+            elif event['field_name'] == 'summary':
+                action = "'set'"
+                info = { 'title': event['old_value'] }
+            else:
+                continue
+            row = [ d['id'], event['entrydate'].get_tuple(),
+                users[event['mod_by']], action, info ]
+            if event['entrydate'] > activity:
+                activity = event['entrydate']
+            issue_journal.append(row)
+        d['files'] = files
+
+        d['activity'] = activity
+        d['actor'] = actor
+        issue_data.append(d)
+
+    write_csv(db.issue, issue_data)
+    write_csv(db.msg, message_data)
+    write_csv(db.file, file_data)
+
+    f = open('/tmp/imported/issue-journals.csv', 'w')
+    writer = csv.writer(f, colon_separated)
+    writer.writerows(issue_journal)
+    f.close()
+
+def convert_message(content, id):
+    ''' Strip off the useless sf message header crap '''
+    if content[:14] == 'Logged In: YES':
+        return '\n'.join(content.splitlines()[3:]).strip()
+    return content
+
+class colon_separated(csv.excel):
+    delimiter = ':'
+
+today = date.Date('.')
+def write_csv(klass, data):
+    props = klass.getprops()
+    if not os.path.exists('/tmp/imported'):
+        os.mkdir('/tmp/imported')
+    f = open('/tmp/imported/%s.csv'%klass.classname, 'w')
+    writer = csv.writer(f, colon_separated)
+    propnames = klass.export_propnames()
+    propnames.append('is retired')
+    writer.writerow(propnames)
+    for entry in data:
+        row = []
+        for name in propnames:
+            if name == 'is retired':
+                continue
+            prop = props[name]
+            if entry.has_key(name):
+                if isinstance(prop, hyperdb.Date) or \
+                        isinstance(prop, hyperdb.Interval):
+                    row.append(repr(entry[name].get_tuple()))
+                elif isinstance(prop, hyperdb.Password):
+                    row.append(repr(str(entry[name])))
+                else:
+                    row.append(repr(entry[name]))
+            elif isinstance(prop, hyperdb.Multilink):
+                row.append('[]')
+            elif name in ('creator', 'actor'):
+                row.append("'1'")
+            elif name in ('created', 'activity'):
+                row.append(repr(today.get_tuple()))
+            else:
+                row.append('None')
+        row.append(entry.get('is retired', False))
+        writer.writerow(row)
+
+        if isinstance(klass, hyperdb.FileClass) and entry.get('content'):
+            fname = klass.exportFilename('/tmp/imported/', entry['id'])
+            support.ensureParentsExist(fname)
+            c = open(fname, 'w')
+            if isinstance(entry['content'], unicode):
+                c.write(entry['content'].encode('utf8'))
+            else:
+                c.write(entry['content'])
+            c.close()
+
+    f.close()
+    f = open('/tmp/imported/%s-journals.csv'%klass.classname, 'w')
+    f.close()
+
+class Progress:
+    '''Progress display for console applications.
+
+    See __main__ block at end of file for sample usage.
+    '''
+    def __init__(self, info, sequence):
+        self.info = info
+        self.sequence = iter(sequence)
+        self.total = len(sequence)
+        self.start = self.now = time.time()
+        self.num = 0
+        self.stepsize = self.total / 100 or 1
+        self.steptimes = []
+        self.display()
+
+    def __iter__(self): return self
+
+    def next(self):
+        self.num += 1
+
+        if self.num > self.total:
+            print self.info, 'done', ' '*(75-len(self.info)-6)
+            sys.stdout.flush()
+            return self.sequence.next()
+
+        if self.num % self.stepsize:
+            return self.sequence.next()
+
+        self.display()
+        return self.sequence.next()
+
+    def display(self):
+        # figure how long we've spent - guess how long to go
+        now = time.time()
+        steptime = now - self.now
+        self.steptimes.insert(0, steptime)
+        if len(self.steptimes) > 5:
+            self.steptimes.pop()
+        steptime = sum(self.steptimes) / len(self.steptimes)
+        self.now = now
+        eta = steptime * ((self.total - self.num)/self.stepsize)
+
+        # tell it like it is (or might be)
+        if now - self.start > 3:
+            M = eta / 60
+            H = M / 60
+            M = M % 60
+            S = eta % 60
+            s = '%s %2d%% (ETA %02d:%02d:%02d)'%(self.info,
+                self.num * 100. / self.total, H, M, S)
+        else:
+            s = '%s %2d%%'%(self.info, self.num * 100. / self.total)
+        sys.stdout.write(s + ' '*(75-len(s)) + '\r')
+        sys.stdout.flush()
+
+if __name__ == '__main__':
+    if sys.argv[1] == 'import':
+        import_xml(*sys.argv[2:])
+    elif sys.argv[1] == 'files':
+        fetch_files(*sys.argv[2:])
+

Roundup Issue Tracker: http://roundup-tracker.org/