Mercurial > p > roundup > code
diff scripts/import_sf.py @ 3534:ad9f06e32f7c
scripts/import_sf.py will import a tracker from Sourceforge.NET
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Fri, 03 Feb 2006 03:49:24 +0000 |
| parents | |
| children | 00a7983b4666 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/import_sf.py Fri Feb 03 03:49:24 2006 +0000 @@ -0,0 +1,409 @@ +import sys, sets, os, csv, time, urllib2, httplib, mimetypes +from elementtree import ElementTree + +from roundup import instance, hyperdb, date, support, password + +def get_url(aid): + """ so basically we have to jump through hoops, given an artifact id, to + figure what the URL should be to access that artifact, and hence any + attached files.""" + conn = httplib.HTTPConnection("sourceforge.net") + conn.request("GET", "/support/tracker.php?aid=%s"%aid) + response = conn.getresponse() + assert response.status == 302, 'response code was %s'%response.status + return 'http://sourceforge.net' + response.getheader('location') + +def fetch_files(xml_file, file_dir): + """ Fetch files referenced in the xml_file into the dir file_dir. """ + root = ElementTree.parse(xml_file).getroot() + to_fetch = sets.Set() + deleted = sets.Set() + for artifact in root.find('artifacts'): + for field in artifact.findall('field'): + if field.get('name') == 'artifact_id': + aid = field.text + for field in artifact.findall('field'): + if field.get('name') != 'artifact_history': continue + for event in field.findall('history'): + d = {} + for field in event.findall('field'): + d[field.get('name')] = field.text + if d['field_name'] == 'File Added': + fid = d['old_value'].split(':')[0] + to_fetch.add((aid, fid)) + if d['field_name'] == 'File Deleted': + fid = d['old_value'].split(':')[0] + deleted.add((aid, fid)) + to_fetch = to_fetch - deleted + + got = sets.Set(os.listdir(file_dir)) + to_fetch = to_fetch - got + + # load cached urls (sigh) + urls = {} + if os.path.exists(os.path.join(file_dir, 'urls.txt')): + for line in open(os.path.join(file_dir, 'urls.txt')): + aid, url = line.strip().split() + urls[aid] = url + + for aid, fid in Progress('Fetching files', list(to_fetch)): + if fid in got: continue + if not urls.has_key(aid): + urls[aid] = get_url(aid) + f = open(os.path.join(file_dir, 'urls.txt'), 'a') + f.write('%s %s\n'%(aid, urls[aid])) + f.close() + url = urls[aid] + '&file_id=' + fid + f = urllib2.urlopen(url) + data = f.read() + n = open(os.path.join(file_dir, fid), 'w') + n.write(data) + f.close() + n.close() + +def import_xml(tracker_home, xml_file, file_dir): + """ Generate Roundup tracker import files based on the tracker schema, + sf.net xml export and downloaded files from sf.net. """ + tracker = instance.open(tracker_home) + db = tracker.open('admin') + + resolved = db.status.lookup('resolved') + unread = db.status.lookup('unread') + chatting = db.status.lookup('unread') + critical = db.priority.lookup('critical') + urgent = db.priority.lookup('urgent') + bug = db.priority.lookup('bug') + feature = db.priority.lookup('feature') + wish = db.priority.lookup('wish') + adminuid = db.user.lookup('admin') + anonuid = db.user.lookup('anonymous') + + root = ElementTree.parse(xml_file).getroot() + + def to_date(ts): + return date.Date(time.gmtime(float(ts))) + + # parse out the XML + artifacts = [] + categories = sets.Set() + users = sets.Set() + add_files = sets.Set() + remove_files = sets.Set() + for artifact in root.find('artifacts'): + d = {} + op = {} + artifacts.append(d) + for field in artifact.findall('field'): + name = field.get('name') + if name == 'artifact_messages': + for message in field.findall('message'): + l = d.setdefault('messages', []) + m = {} + l.append(m) + for field in message.findall('field'): + name = field.get('name') + if name == 'adddate': + m[name] = to_date(field.text) + else: + m[name] = field.text + if name == 'user_name': users.add(field.text) + elif name == 'artifact_history': + for event in field.findall('history'): + l = d.setdefault('history', []) + e = {} + l.append(e) + for field in event.findall('field'): + name = field.get('name') + if name == 'entrydate': + e[name] = to_date(field.text) + else: + e[name] = field.text + if name == 'mod_by': users.add(field.text) + if e['field_name'] == 'File Added': + add_files.add(e['old_value'].split(':')[0]) + elif e['field_name'] == 'File Deleted': + remove_files.add(e['old_value'].split(':')[0]) + elif name == 'details': + op['body'] = field.text + elif name == 'submitted_by': + op['user_name'] = field.text + d[name] = field.text + users.add(field.text) + elif name == 'open_date': + thedate = to_date(field.text) + op['adddate'] = thedate + d[name] = thedate + else: + d[name] = field.text + + categories.add(d['category']) + + if op.has_key('body'): + l = d.setdefault('messages', []) + l.insert(0, op) + + add_files -= remove_files + + # create users + userd = {'nobody': '2'} + users.remove('nobody') + data = [ + {'id': '1', 'username': 'admin', 'password': password.Password('admin'), + 'roles': 'Admin', 'address': 'richard@python.org'}, + {'id': '2', 'username': 'anonymous', 'roles': 'Anonymous'}, + ] + for n, user in enumerate(list(users)): + userd[user] = n+3 + data.append({'id': str(n+3), 'username': user, 'roles': 'User', + 'address': '%s@users.sourceforge.net'%user}) + write_csv(db.user, data) + users=userd + + # create categories + categoryd = {'None': None} + categories.remove('None') + data = [] + for n, category in enumerate(list(categories)): + categoryd[category] = n + data.append({'id': str(n), 'name': category}) + write_csv(db.keyword, data) + categories = categoryd + + # create issues + issue_data = [] + file_data = [] + message_data = [] + issue_journal = [] + message_id = 0 + for artifact in artifacts: + d = {} + d['id'] = artifact['artifact_id'] + d['title'] = artifact['summary'] + d['assignedto'] = users[artifact['assigned_to']] + if d['assignedto'] == '2': + d['assignedto'] = None + d['creation'] = artifact['open_date'] + activity = artifact['open_date'] + d['creator'] = users[artifact['submitted_by']] + actor = d['creator'] + if categories[artifact['category']]: + d['topic'] = [categories[artifact['category']]] + issue_journal.append(( + d['id'], d['creation'].get_tuple(), d['creator'], "'create'", {} + )) + + p = int(artifact['priority']) + if artifact['artifact_type'] == 'Feature Requests': + if p > 3: + d['priority'] = feature + else: + d['priority'] = wish + else: + if p > 7: + d['priority'] = critical + elif p > 5: + d['priority'] = urgent + elif p > 3: + d['priority'] = bug + else: + d['priority'] = feature + + s = artifact['status'] + if s == 'Closed': + d['status'] = resolved + elif s == 'Deleted': + d['status'] = resolved + d['is retired'] = True + else: + d['status'] = unread + + messages = [] + nosy = sets.Set() + for message in artifact.get('messages', []): + message_id += 1 + authid = users[message['user_name']] + body = convert_message(message['body'], message_id) + if not body: continue + m = {'content': body, 'author': authid, + 'date': message['adddate'], 'id': str(message_id), + 'creation': message['adddate'], } + message_data.append(m) + messages.append(message_id) + if authid not in (None, '2'): + nosy.add(authid) + activity = message['adddate'] + actor = authid + if d['status'] == unread: + d['status'] = chatting + d['messages'] = messages + d['nosy'] = list(nosy) + + files = [] + for event in artifact.get('history', []): + if event['field_name'] == 'File Added': + fid, name = event['old_value'].split(':') + if fid in add_files: + files.append(fid) + name = name.strip() + try: + f = open(os.path.join(file_dir, fid)) + content = f.read() + f.close() + except: + content = 'content missing' + file_data.append({ + 'id': fid, + 'creation': event['entrydate'], + 'creator': users[event['mod_by']], + 'name': name, + 'type': mimetypes.guess_type(name)[0], + 'content': content, + }) + continue + elif event['field_name'] == 'close_date': + action = "'set'" + info = { 'status': 'unread' } + elif event['field_name'] == 'summary': + action = "'set'" + info = { 'title': event['old_value'] } + else: + continue + row = [ d['id'], event['entrydate'].get_tuple(), + users[event['mod_by']], action, info ] + if event['entrydate'] > activity: + activity = event['entrydate'] + issue_journal.append(row) + d['files'] = files + + d['activity'] = activity + d['actor'] = actor + issue_data.append(d) + + write_csv(db.issue, issue_data) + write_csv(db.msg, message_data) + write_csv(db.file, file_data) + + f = open('/tmp/imported/issue-journals.csv', 'w') + writer = csv.writer(f, colon_separated) + writer.writerows(issue_journal) + f.close() + +def convert_message(content, id): + ''' Strip off the useless sf message header crap ''' + if content[:14] == 'Logged In: YES': + return '\n'.join(content.splitlines()[3:]).strip() + return content + +class colon_separated(csv.excel): + delimiter = ':' + +today = date.Date('.') +def write_csv(klass, data): + props = klass.getprops() + if not os.path.exists('/tmp/imported'): + os.mkdir('/tmp/imported') + f = open('/tmp/imported/%s.csv'%klass.classname, 'w') + writer = csv.writer(f, colon_separated) + propnames = klass.export_propnames() + propnames.append('is retired') + writer.writerow(propnames) + for entry in data: + row = [] + for name in propnames: + if name == 'is retired': + continue + prop = props[name] + if entry.has_key(name): + if isinstance(prop, hyperdb.Date) or \ + isinstance(prop, hyperdb.Interval): + row.append(repr(entry[name].get_tuple())) + elif isinstance(prop, hyperdb.Password): + row.append(repr(str(entry[name]))) + else: + row.append(repr(entry[name])) + elif isinstance(prop, hyperdb.Multilink): + row.append('[]') + elif name in ('creator', 'actor'): + row.append("'1'") + elif name in ('created', 'activity'): + row.append(repr(today.get_tuple())) + else: + row.append('None') + row.append(entry.get('is retired', False)) + writer.writerow(row) + + if isinstance(klass, hyperdb.FileClass) and entry.get('content'): + fname = klass.exportFilename('/tmp/imported/', entry['id']) + support.ensureParentsExist(fname) + c = open(fname, 'w') + if isinstance(entry['content'], unicode): + c.write(entry['content'].encode('utf8')) + else: + c.write(entry['content']) + c.close() + + f.close() + f = open('/tmp/imported/%s-journals.csv'%klass.classname, 'w') + f.close() + +class Progress: + '''Progress display for console applications. + + See __main__ block at end of file for sample usage. + ''' + def __init__(self, info, sequence): + self.info = info + self.sequence = iter(sequence) + self.total = len(sequence) + self.start = self.now = time.time() + self.num = 0 + self.stepsize = self.total / 100 or 1 + self.steptimes = [] + self.display() + + def __iter__(self): return self + + def next(self): + self.num += 1 + + if self.num > self.total: + print self.info, 'done', ' '*(75-len(self.info)-6) + sys.stdout.flush() + return self.sequence.next() + + if self.num % self.stepsize: + return self.sequence.next() + + self.display() + return self.sequence.next() + + def display(self): + # figure how long we've spent - guess how long to go + now = time.time() + steptime = now - self.now + self.steptimes.insert(0, steptime) + if len(self.steptimes) > 5: + self.steptimes.pop() + steptime = sum(self.steptimes) / len(self.steptimes) + self.now = now + eta = steptime * ((self.total - self.num)/self.stepsize) + + # tell it like it is (or might be) + if now - self.start > 3: + M = eta / 60 + H = M / 60 + M = M % 60 + S = eta % 60 + s = '%s %2d%% (ETA %02d:%02d:%02d)'%(self.info, + self.num * 100. / self.total, H, M, S) + else: + s = '%s %2d%%'%(self.info, self.num * 100. / self.total) + sys.stdout.write(s + ' '*(75-len(s)) + '\r') + sys.stdout.flush() + +if __name__ == '__main__': + if sys.argv[1] == 'import': + import_xml(*sys.argv[2:]) + elif sys.argv[1] == 'files': + fetch_files(*sys.argv[2:]) +
