Mercurial > p > roundup > code
comparison scripts/import_sf.py @ 3534:ad9f06e32f7c
scripts/import_sf.py will import a tracker from Sourceforge.NET
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Fri, 03 Feb 2006 03:49:24 +0000 |
| parents | |
| children | 00a7983b4666 |
comparison
equal
deleted
inserted
replaced
| 3533:f0432dc11c2d | 3534:ad9f06e32f7c |
|---|---|
| 1 import sys, sets, os, csv, time, urllib2, httplib, mimetypes | |
| 2 from elementtree import ElementTree | |
| 3 | |
| 4 from roundup import instance, hyperdb, date, support, password | |
| 5 | |
| 6 def get_url(aid): | |
| 7 """ so basically we have to jump through hoops, given an artifact id, to | |
| 8 figure what the URL should be to access that artifact, and hence any | |
| 9 attached files.""" | |
| 10 conn = httplib.HTTPConnection("sourceforge.net") | |
| 11 conn.request("GET", "/support/tracker.php?aid=%s"%aid) | |
| 12 response = conn.getresponse() | |
| 13 assert response.status == 302, 'response code was %s'%response.status | |
| 14 return 'http://sourceforge.net' + response.getheader('location') | |
| 15 | |
| 16 def fetch_files(xml_file, file_dir): | |
| 17 """ Fetch files referenced in the xml_file into the dir file_dir. """ | |
| 18 root = ElementTree.parse(xml_file).getroot() | |
| 19 to_fetch = sets.Set() | |
| 20 deleted = sets.Set() | |
| 21 for artifact in root.find('artifacts'): | |
| 22 for field in artifact.findall('field'): | |
| 23 if field.get('name') == 'artifact_id': | |
| 24 aid = field.text | |
| 25 for field in artifact.findall('field'): | |
| 26 if field.get('name') != 'artifact_history': continue | |
| 27 for event in field.findall('history'): | |
| 28 d = {} | |
| 29 for field in event.findall('field'): | |
| 30 d[field.get('name')] = field.text | |
| 31 if d['field_name'] == 'File Added': | |
| 32 fid = d['old_value'].split(':')[0] | |
| 33 to_fetch.add((aid, fid)) | |
| 34 if d['field_name'] == 'File Deleted': | |
| 35 fid = d['old_value'].split(':')[0] | |
| 36 deleted.add((aid, fid)) | |
| 37 to_fetch = to_fetch - deleted | |
| 38 | |
| 39 got = sets.Set(os.listdir(file_dir)) | |
| 40 to_fetch = to_fetch - got | |
| 41 | |
| 42 # load cached urls (sigh) | |
| 43 urls = {} | |
| 44 if os.path.exists(os.path.join(file_dir, 'urls.txt')): | |
| 45 for line in open(os.path.join(file_dir, 'urls.txt')): | |
| 46 aid, url = line.strip().split() | |
| 47 urls[aid] = url | |
| 48 | |
| 49 for aid, fid in Progress('Fetching files', list(to_fetch)): | |
| 50 if fid in got: continue | |
| 51 if not urls.has_key(aid): | |
| 52 urls[aid] = get_url(aid) | |
| 53 f = open(os.path.join(file_dir, 'urls.txt'), 'a') | |
| 54 f.write('%s %s\n'%(aid, urls[aid])) | |
| 55 f.close() | |
| 56 url = urls[aid] + '&file_id=' + fid | |
| 57 f = urllib2.urlopen(url) | |
| 58 data = f.read() | |
| 59 n = open(os.path.join(file_dir, fid), 'w') | |
| 60 n.write(data) | |
| 61 f.close() | |
| 62 n.close() | |
| 63 | |
| 64 def import_xml(tracker_home, xml_file, file_dir): | |
| 65 """ Generate Roundup tracker import files based on the tracker schema, | |
| 66 sf.net xml export and downloaded files from sf.net. """ | |
| 67 tracker = instance.open(tracker_home) | |
| 68 db = tracker.open('admin') | |
| 69 | |
| 70 resolved = db.status.lookup('resolved') | |
| 71 unread = db.status.lookup('unread') | |
| 72 chatting = db.status.lookup('unread') | |
| 73 critical = db.priority.lookup('critical') | |
| 74 urgent = db.priority.lookup('urgent') | |
| 75 bug = db.priority.lookup('bug') | |
| 76 feature = db.priority.lookup('feature') | |
| 77 wish = db.priority.lookup('wish') | |
| 78 adminuid = db.user.lookup('admin') | |
| 79 anonuid = db.user.lookup('anonymous') | |
| 80 | |
| 81 root = ElementTree.parse(xml_file).getroot() | |
| 82 | |
| 83 def to_date(ts): | |
| 84 return date.Date(time.gmtime(float(ts))) | |
| 85 | |
| 86 # parse out the XML | |
| 87 artifacts = [] | |
| 88 categories = sets.Set() | |
| 89 users = sets.Set() | |
| 90 add_files = sets.Set() | |
| 91 remove_files = sets.Set() | |
| 92 for artifact in root.find('artifacts'): | |
| 93 d = {} | |
| 94 op = {} | |
| 95 artifacts.append(d) | |
| 96 for field in artifact.findall('field'): | |
| 97 name = field.get('name') | |
| 98 if name == 'artifact_messages': | |
| 99 for message in field.findall('message'): | |
| 100 l = d.setdefault('messages', []) | |
| 101 m = {} | |
| 102 l.append(m) | |
| 103 for field in message.findall('field'): | |
| 104 name = field.get('name') | |
| 105 if name == 'adddate': | |
| 106 m[name] = to_date(field.text) | |
| 107 else: | |
| 108 m[name] = field.text | |
| 109 if name == 'user_name': users.add(field.text) | |
| 110 elif name == 'artifact_history': | |
| 111 for event in field.findall('history'): | |
| 112 l = d.setdefault('history', []) | |
| 113 e = {} | |
| 114 l.append(e) | |
| 115 for field in event.findall('field'): | |
| 116 name = field.get('name') | |
| 117 if name == 'entrydate': | |
| 118 e[name] = to_date(field.text) | |
| 119 else: | |
| 120 e[name] = field.text | |
| 121 if name == 'mod_by': users.add(field.text) | |
| 122 if e['field_name'] == 'File Added': | |
| 123 add_files.add(e['old_value'].split(':')[0]) | |
| 124 elif e['field_name'] == 'File Deleted': | |
| 125 remove_files.add(e['old_value'].split(':')[0]) | |
| 126 elif name == 'details': | |
| 127 op['body'] = field.text | |
| 128 elif name == 'submitted_by': | |
| 129 op['user_name'] = field.text | |
| 130 d[name] = field.text | |
| 131 users.add(field.text) | |
| 132 elif name == 'open_date': | |
| 133 thedate = to_date(field.text) | |
| 134 op['adddate'] = thedate | |
| 135 d[name] = thedate | |
| 136 else: | |
| 137 d[name] = field.text | |
| 138 | |
| 139 categories.add(d['category']) | |
| 140 | |
| 141 if op.has_key('body'): | |
| 142 l = d.setdefault('messages', []) | |
| 143 l.insert(0, op) | |
| 144 | |
| 145 add_files -= remove_files | |
| 146 | |
| 147 # create users | |
| 148 userd = {'nobody': '2'} | |
| 149 users.remove('nobody') | |
| 150 data = [ | |
| 151 {'id': '1', 'username': 'admin', 'password': password.Password('admin'), | |
| 152 'roles': 'Admin', 'address': 'richard@python.org'}, | |
| 153 {'id': '2', 'username': 'anonymous', 'roles': 'Anonymous'}, | |
| 154 ] | |
| 155 for n, user in enumerate(list(users)): | |
| 156 userd[user] = n+3 | |
| 157 data.append({'id': str(n+3), 'username': user, 'roles': 'User', | |
| 158 'address': '%s@users.sourceforge.net'%user}) | |
| 159 write_csv(db.user, data) | |
| 160 users=userd | |
| 161 | |
| 162 # create categories | |
| 163 categoryd = {'None': None} | |
| 164 categories.remove('None') | |
| 165 data = [] | |
| 166 for n, category in enumerate(list(categories)): | |
| 167 categoryd[category] = n | |
| 168 data.append({'id': str(n), 'name': category}) | |
| 169 write_csv(db.keyword, data) | |
| 170 categories = categoryd | |
| 171 | |
| 172 # create issues | |
| 173 issue_data = [] | |
| 174 file_data = [] | |
| 175 message_data = [] | |
| 176 issue_journal = [] | |
| 177 message_id = 0 | |
| 178 for artifact in artifacts: | |
| 179 d = {} | |
| 180 d['id'] = artifact['artifact_id'] | |
| 181 d['title'] = artifact['summary'] | |
| 182 d['assignedto'] = users[artifact['assigned_to']] | |
| 183 if d['assignedto'] == '2': | |
| 184 d['assignedto'] = None | |
| 185 d['creation'] = artifact['open_date'] | |
| 186 activity = artifact['open_date'] | |
| 187 d['creator'] = users[artifact['submitted_by']] | |
| 188 actor = d['creator'] | |
| 189 if categories[artifact['category']]: | |
| 190 d['topic'] = [categories[artifact['category']]] | |
| 191 issue_journal.append(( | |
| 192 d['id'], d['creation'].get_tuple(), d['creator'], "'create'", {} | |
| 193 )) | |
| 194 | |
| 195 p = int(artifact['priority']) | |
| 196 if artifact['artifact_type'] == 'Feature Requests': | |
| 197 if p > 3: | |
| 198 d['priority'] = feature | |
| 199 else: | |
| 200 d['priority'] = wish | |
| 201 else: | |
| 202 if p > 7: | |
| 203 d['priority'] = critical | |
| 204 elif p > 5: | |
| 205 d['priority'] = urgent | |
| 206 elif p > 3: | |
| 207 d['priority'] = bug | |
| 208 else: | |
| 209 d['priority'] = feature | |
| 210 | |
| 211 s = artifact['status'] | |
| 212 if s == 'Closed': | |
| 213 d['status'] = resolved | |
| 214 elif s == 'Deleted': | |
| 215 d['status'] = resolved | |
| 216 d['is retired'] = True | |
| 217 else: | |
| 218 d['status'] = unread | |
| 219 | |
| 220 messages = [] | |
| 221 nosy = sets.Set() | |
| 222 for message in artifact.get('messages', []): | |
| 223 message_id += 1 | |
| 224 authid = users[message['user_name']] | |
| 225 body = convert_message(message['body'], message_id) | |
| 226 if not body: continue | |
| 227 m = {'content': body, 'author': authid, | |
| 228 'date': message['adddate'], 'id': str(message_id), | |
| 229 'creation': message['adddate'], } | |
| 230 message_data.append(m) | |
| 231 messages.append(message_id) | |
| 232 if authid not in (None, '2'): | |
| 233 nosy.add(authid) | |
| 234 activity = message['adddate'] | |
| 235 actor = authid | |
| 236 if d['status'] == unread: | |
| 237 d['status'] = chatting | |
| 238 d['messages'] = messages | |
| 239 d['nosy'] = list(nosy) | |
| 240 | |
| 241 files = [] | |
| 242 for event in artifact.get('history', []): | |
| 243 if event['field_name'] == 'File Added': | |
| 244 fid, name = event['old_value'].split(':') | |
| 245 if fid in add_files: | |
| 246 files.append(fid) | |
| 247 name = name.strip() | |
| 248 try: | |
| 249 f = open(os.path.join(file_dir, fid)) | |
| 250 content = f.read() | |
| 251 f.close() | |
| 252 except: | |
| 253 content = 'content missing' | |
| 254 file_data.append({ | |
| 255 'id': fid, | |
| 256 'creation': event['entrydate'], | |
| 257 'creator': users[event['mod_by']], | |
| 258 'name': name, | |
| 259 'type': mimetypes.guess_type(name)[0], | |
| 260 'content': content, | |
| 261 }) | |
| 262 continue | |
| 263 elif event['field_name'] == 'close_date': | |
| 264 action = "'set'" | |
| 265 info = { 'status': 'unread' } | |
| 266 elif event['field_name'] == 'summary': | |
| 267 action = "'set'" | |
| 268 info = { 'title': event['old_value'] } | |
| 269 else: | |
| 270 continue | |
| 271 row = [ d['id'], event['entrydate'].get_tuple(), | |
| 272 users[event['mod_by']], action, info ] | |
| 273 if event['entrydate'] > activity: | |
| 274 activity = event['entrydate'] | |
| 275 issue_journal.append(row) | |
| 276 d['files'] = files | |
| 277 | |
| 278 d['activity'] = activity | |
| 279 d['actor'] = actor | |
| 280 issue_data.append(d) | |
| 281 | |
| 282 write_csv(db.issue, issue_data) | |
| 283 write_csv(db.msg, message_data) | |
| 284 write_csv(db.file, file_data) | |
| 285 | |
| 286 f = open('/tmp/imported/issue-journals.csv', 'w') | |
| 287 writer = csv.writer(f, colon_separated) | |
| 288 writer.writerows(issue_journal) | |
| 289 f.close() | |
| 290 | |
| 291 def convert_message(content, id): | |
| 292 ''' Strip off the useless sf message header crap ''' | |
| 293 if content[:14] == 'Logged In: YES': | |
| 294 return '\n'.join(content.splitlines()[3:]).strip() | |
| 295 return content | |
| 296 | |
| 297 class colon_separated(csv.excel): | |
| 298 delimiter = ':' | |
| 299 | |
| 300 today = date.Date('.') | |
| 301 def write_csv(klass, data): | |
| 302 props = klass.getprops() | |
| 303 if not os.path.exists('/tmp/imported'): | |
| 304 os.mkdir('/tmp/imported') | |
| 305 f = open('/tmp/imported/%s.csv'%klass.classname, 'w') | |
| 306 writer = csv.writer(f, colon_separated) | |
| 307 propnames = klass.export_propnames() | |
| 308 propnames.append('is retired') | |
| 309 writer.writerow(propnames) | |
| 310 for entry in data: | |
| 311 row = [] | |
| 312 for name in propnames: | |
| 313 if name == 'is retired': | |
| 314 continue | |
| 315 prop = props[name] | |
| 316 if entry.has_key(name): | |
| 317 if isinstance(prop, hyperdb.Date) or \ | |
| 318 isinstance(prop, hyperdb.Interval): | |
| 319 row.append(repr(entry[name].get_tuple())) | |
| 320 elif isinstance(prop, hyperdb.Password): | |
| 321 row.append(repr(str(entry[name]))) | |
| 322 else: | |
| 323 row.append(repr(entry[name])) | |
| 324 elif isinstance(prop, hyperdb.Multilink): | |
| 325 row.append('[]') | |
| 326 elif name in ('creator', 'actor'): | |
| 327 row.append("'1'") | |
| 328 elif name in ('created', 'activity'): | |
| 329 row.append(repr(today.get_tuple())) | |
| 330 else: | |
| 331 row.append('None') | |
| 332 row.append(entry.get('is retired', False)) | |
| 333 writer.writerow(row) | |
| 334 | |
| 335 if isinstance(klass, hyperdb.FileClass) and entry.get('content'): | |
| 336 fname = klass.exportFilename('/tmp/imported/', entry['id']) | |
| 337 support.ensureParentsExist(fname) | |
| 338 c = open(fname, 'w') | |
| 339 if isinstance(entry['content'], unicode): | |
| 340 c.write(entry['content'].encode('utf8')) | |
| 341 else: | |
| 342 c.write(entry['content']) | |
| 343 c.close() | |
| 344 | |
| 345 f.close() | |
| 346 f = open('/tmp/imported/%s-journals.csv'%klass.classname, 'w') | |
| 347 f.close() | |
| 348 | |
| 349 class Progress: | |
| 350 '''Progress display for console applications. | |
| 351 | |
| 352 See __main__ block at end of file for sample usage. | |
| 353 ''' | |
| 354 def __init__(self, info, sequence): | |
| 355 self.info = info | |
| 356 self.sequence = iter(sequence) | |
| 357 self.total = len(sequence) | |
| 358 self.start = self.now = time.time() | |
| 359 self.num = 0 | |
| 360 self.stepsize = self.total / 100 or 1 | |
| 361 self.steptimes = [] | |
| 362 self.display() | |
| 363 | |
| 364 def __iter__(self): return self | |
| 365 | |
| 366 def next(self): | |
| 367 self.num += 1 | |
| 368 | |
| 369 if self.num > self.total: | |
| 370 print self.info, 'done', ' '*(75-len(self.info)-6) | |
| 371 sys.stdout.flush() | |
| 372 return self.sequence.next() | |
| 373 | |
| 374 if self.num % self.stepsize: | |
| 375 return self.sequence.next() | |
| 376 | |
| 377 self.display() | |
| 378 return self.sequence.next() | |
| 379 | |
| 380 def display(self): | |
| 381 # figure how long we've spent - guess how long to go | |
| 382 now = time.time() | |
| 383 steptime = now - self.now | |
| 384 self.steptimes.insert(0, steptime) | |
| 385 if len(self.steptimes) > 5: | |
| 386 self.steptimes.pop() | |
| 387 steptime = sum(self.steptimes) / len(self.steptimes) | |
| 388 self.now = now | |
| 389 eta = steptime * ((self.total - self.num)/self.stepsize) | |
| 390 | |
| 391 # tell it like it is (or might be) | |
| 392 if now - self.start > 3: | |
| 393 M = eta / 60 | |
| 394 H = M / 60 | |
| 395 M = M % 60 | |
| 396 S = eta % 60 | |
| 397 s = '%s %2d%% (ETA %02d:%02d:%02d)'%(self.info, | |
| 398 self.num * 100. / self.total, H, M, S) | |
| 399 else: | |
| 400 s = '%s %2d%%'%(self.info, self.num * 100. / self.total) | |
| 401 sys.stdout.write(s + ' '*(75-len(s)) + '\r') | |
| 402 sys.stdout.flush() | |
| 403 | |
| 404 if __name__ == '__main__': | |
| 405 if sys.argv[1] == 'import': | |
| 406 import_xml(*sys.argv[2:]) | |
| 407 elif sys.argv[1] == 'files': | |
| 408 fetch_files(*sys.argv[2:]) | |
| 409 |
