comparison scripts/import_sf.py @ 3534:ad9f06e32f7c

scripts/import_sf.py will import a tracker from Sourceforge.NET
author Richard Jones <richard@users.sourceforge.net>
date Fri, 03 Feb 2006 03:49:24 +0000
parents
children 00a7983b4666
comparison
equal deleted inserted replaced
3533:f0432dc11c2d 3534:ad9f06e32f7c
1 import sys, sets, os, csv, time, urllib2, httplib, mimetypes
2 from elementtree import ElementTree
3
4 from roundup import instance, hyperdb, date, support, password
5
6 def get_url(aid):
7 """ so basically we have to jump through hoops, given an artifact id, to
8 figure what the URL should be to access that artifact, and hence any
9 attached files."""
10 conn = httplib.HTTPConnection("sourceforge.net")
11 conn.request("GET", "/support/tracker.php?aid=%s"%aid)
12 response = conn.getresponse()
13 assert response.status == 302, 'response code was %s'%response.status
14 return 'http://sourceforge.net' + response.getheader('location')
15
16 def fetch_files(xml_file, file_dir):
17 """ Fetch files referenced in the xml_file into the dir file_dir. """
18 root = ElementTree.parse(xml_file).getroot()
19 to_fetch = sets.Set()
20 deleted = sets.Set()
21 for artifact in root.find('artifacts'):
22 for field in artifact.findall('field'):
23 if field.get('name') == 'artifact_id':
24 aid = field.text
25 for field in artifact.findall('field'):
26 if field.get('name') != 'artifact_history': continue
27 for event in field.findall('history'):
28 d = {}
29 for field in event.findall('field'):
30 d[field.get('name')] = field.text
31 if d['field_name'] == 'File Added':
32 fid = d['old_value'].split(':')[0]
33 to_fetch.add((aid, fid))
34 if d['field_name'] == 'File Deleted':
35 fid = d['old_value'].split(':')[0]
36 deleted.add((aid, fid))
37 to_fetch = to_fetch - deleted
38
39 got = sets.Set(os.listdir(file_dir))
40 to_fetch = to_fetch - got
41
42 # load cached urls (sigh)
43 urls = {}
44 if os.path.exists(os.path.join(file_dir, 'urls.txt')):
45 for line in open(os.path.join(file_dir, 'urls.txt')):
46 aid, url = line.strip().split()
47 urls[aid] = url
48
49 for aid, fid in Progress('Fetching files', list(to_fetch)):
50 if fid in got: continue
51 if not urls.has_key(aid):
52 urls[aid] = get_url(aid)
53 f = open(os.path.join(file_dir, 'urls.txt'), 'a')
54 f.write('%s %s\n'%(aid, urls[aid]))
55 f.close()
56 url = urls[aid] + '&file_id=' + fid
57 f = urllib2.urlopen(url)
58 data = f.read()
59 n = open(os.path.join(file_dir, fid), 'w')
60 n.write(data)
61 f.close()
62 n.close()
63
64 def import_xml(tracker_home, xml_file, file_dir):
65 """ Generate Roundup tracker import files based on the tracker schema,
66 sf.net xml export and downloaded files from sf.net. """
67 tracker = instance.open(tracker_home)
68 db = tracker.open('admin')
69
70 resolved = db.status.lookup('resolved')
71 unread = db.status.lookup('unread')
72 chatting = db.status.lookup('unread')
73 critical = db.priority.lookup('critical')
74 urgent = db.priority.lookup('urgent')
75 bug = db.priority.lookup('bug')
76 feature = db.priority.lookup('feature')
77 wish = db.priority.lookup('wish')
78 adminuid = db.user.lookup('admin')
79 anonuid = db.user.lookup('anonymous')
80
81 root = ElementTree.parse(xml_file).getroot()
82
83 def to_date(ts):
84 return date.Date(time.gmtime(float(ts)))
85
86 # parse out the XML
87 artifacts = []
88 categories = sets.Set()
89 users = sets.Set()
90 add_files = sets.Set()
91 remove_files = sets.Set()
92 for artifact in root.find('artifacts'):
93 d = {}
94 op = {}
95 artifacts.append(d)
96 for field in artifact.findall('field'):
97 name = field.get('name')
98 if name == 'artifact_messages':
99 for message in field.findall('message'):
100 l = d.setdefault('messages', [])
101 m = {}
102 l.append(m)
103 for field in message.findall('field'):
104 name = field.get('name')
105 if name == 'adddate':
106 m[name] = to_date(field.text)
107 else:
108 m[name] = field.text
109 if name == 'user_name': users.add(field.text)
110 elif name == 'artifact_history':
111 for event in field.findall('history'):
112 l = d.setdefault('history', [])
113 e = {}
114 l.append(e)
115 for field in event.findall('field'):
116 name = field.get('name')
117 if name == 'entrydate':
118 e[name] = to_date(field.text)
119 else:
120 e[name] = field.text
121 if name == 'mod_by': users.add(field.text)
122 if e['field_name'] == 'File Added':
123 add_files.add(e['old_value'].split(':')[0])
124 elif e['field_name'] == 'File Deleted':
125 remove_files.add(e['old_value'].split(':')[0])
126 elif name == 'details':
127 op['body'] = field.text
128 elif name == 'submitted_by':
129 op['user_name'] = field.text
130 d[name] = field.text
131 users.add(field.text)
132 elif name == 'open_date':
133 thedate = to_date(field.text)
134 op['adddate'] = thedate
135 d[name] = thedate
136 else:
137 d[name] = field.text
138
139 categories.add(d['category'])
140
141 if op.has_key('body'):
142 l = d.setdefault('messages', [])
143 l.insert(0, op)
144
145 add_files -= remove_files
146
147 # create users
148 userd = {'nobody': '2'}
149 users.remove('nobody')
150 data = [
151 {'id': '1', 'username': 'admin', 'password': password.Password('admin'),
152 'roles': 'Admin', 'address': 'richard@python.org'},
153 {'id': '2', 'username': 'anonymous', 'roles': 'Anonymous'},
154 ]
155 for n, user in enumerate(list(users)):
156 userd[user] = n+3
157 data.append({'id': str(n+3), 'username': user, 'roles': 'User',
158 'address': '%s@users.sourceforge.net'%user})
159 write_csv(db.user, data)
160 users=userd
161
162 # create categories
163 categoryd = {'None': None}
164 categories.remove('None')
165 data = []
166 for n, category in enumerate(list(categories)):
167 categoryd[category] = n
168 data.append({'id': str(n), 'name': category})
169 write_csv(db.keyword, data)
170 categories = categoryd
171
172 # create issues
173 issue_data = []
174 file_data = []
175 message_data = []
176 issue_journal = []
177 message_id = 0
178 for artifact in artifacts:
179 d = {}
180 d['id'] = artifact['artifact_id']
181 d['title'] = artifact['summary']
182 d['assignedto'] = users[artifact['assigned_to']]
183 if d['assignedto'] == '2':
184 d['assignedto'] = None
185 d['creation'] = artifact['open_date']
186 activity = artifact['open_date']
187 d['creator'] = users[artifact['submitted_by']]
188 actor = d['creator']
189 if categories[artifact['category']]:
190 d['topic'] = [categories[artifact['category']]]
191 issue_journal.append((
192 d['id'], d['creation'].get_tuple(), d['creator'], "'create'", {}
193 ))
194
195 p = int(artifact['priority'])
196 if artifact['artifact_type'] == 'Feature Requests':
197 if p > 3:
198 d['priority'] = feature
199 else:
200 d['priority'] = wish
201 else:
202 if p > 7:
203 d['priority'] = critical
204 elif p > 5:
205 d['priority'] = urgent
206 elif p > 3:
207 d['priority'] = bug
208 else:
209 d['priority'] = feature
210
211 s = artifact['status']
212 if s == 'Closed':
213 d['status'] = resolved
214 elif s == 'Deleted':
215 d['status'] = resolved
216 d['is retired'] = True
217 else:
218 d['status'] = unread
219
220 messages = []
221 nosy = sets.Set()
222 for message in artifact.get('messages', []):
223 message_id += 1
224 authid = users[message['user_name']]
225 body = convert_message(message['body'], message_id)
226 if not body: continue
227 m = {'content': body, 'author': authid,
228 'date': message['adddate'], 'id': str(message_id),
229 'creation': message['adddate'], }
230 message_data.append(m)
231 messages.append(message_id)
232 if authid not in (None, '2'):
233 nosy.add(authid)
234 activity = message['adddate']
235 actor = authid
236 if d['status'] == unread:
237 d['status'] = chatting
238 d['messages'] = messages
239 d['nosy'] = list(nosy)
240
241 files = []
242 for event in artifact.get('history', []):
243 if event['field_name'] == 'File Added':
244 fid, name = event['old_value'].split(':')
245 if fid in add_files:
246 files.append(fid)
247 name = name.strip()
248 try:
249 f = open(os.path.join(file_dir, fid))
250 content = f.read()
251 f.close()
252 except:
253 content = 'content missing'
254 file_data.append({
255 'id': fid,
256 'creation': event['entrydate'],
257 'creator': users[event['mod_by']],
258 'name': name,
259 'type': mimetypes.guess_type(name)[0],
260 'content': content,
261 })
262 continue
263 elif event['field_name'] == 'close_date':
264 action = "'set'"
265 info = { 'status': 'unread' }
266 elif event['field_name'] == 'summary':
267 action = "'set'"
268 info = { 'title': event['old_value'] }
269 else:
270 continue
271 row = [ d['id'], event['entrydate'].get_tuple(),
272 users[event['mod_by']], action, info ]
273 if event['entrydate'] > activity:
274 activity = event['entrydate']
275 issue_journal.append(row)
276 d['files'] = files
277
278 d['activity'] = activity
279 d['actor'] = actor
280 issue_data.append(d)
281
282 write_csv(db.issue, issue_data)
283 write_csv(db.msg, message_data)
284 write_csv(db.file, file_data)
285
286 f = open('/tmp/imported/issue-journals.csv', 'w')
287 writer = csv.writer(f, colon_separated)
288 writer.writerows(issue_journal)
289 f.close()
290
291 def convert_message(content, id):
292 ''' Strip off the useless sf message header crap '''
293 if content[:14] == 'Logged In: YES':
294 return '\n'.join(content.splitlines()[3:]).strip()
295 return content
296
297 class colon_separated(csv.excel):
298 delimiter = ':'
299
300 today = date.Date('.')
301 def write_csv(klass, data):
302 props = klass.getprops()
303 if not os.path.exists('/tmp/imported'):
304 os.mkdir('/tmp/imported')
305 f = open('/tmp/imported/%s.csv'%klass.classname, 'w')
306 writer = csv.writer(f, colon_separated)
307 propnames = klass.export_propnames()
308 propnames.append('is retired')
309 writer.writerow(propnames)
310 for entry in data:
311 row = []
312 for name in propnames:
313 if name == 'is retired':
314 continue
315 prop = props[name]
316 if entry.has_key(name):
317 if isinstance(prop, hyperdb.Date) or \
318 isinstance(prop, hyperdb.Interval):
319 row.append(repr(entry[name].get_tuple()))
320 elif isinstance(prop, hyperdb.Password):
321 row.append(repr(str(entry[name])))
322 else:
323 row.append(repr(entry[name]))
324 elif isinstance(prop, hyperdb.Multilink):
325 row.append('[]')
326 elif name in ('creator', 'actor'):
327 row.append("'1'")
328 elif name in ('created', 'activity'):
329 row.append(repr(today.get_tuple()))
330 else:
331 row.append('None')
332 row.append(entry.get('is retired', False))
333 writer.writerow(row)
334
335 if isinstance(klass, hyperdb.FileClass) and entry.get('content'):
336 fname = klass.exportFilename('/tmp/imported/', entry['id'])
337 support.ensureParentsExist(fname)
338 c = open(fname, 'w')
339 if isinstance(entry['content'], unicode):
340 c.write(entry['content'].encode('utf8'))
341 else:
342 c.write(entry['content'])
343 c.close()
344
345 f.close()
346 f = open('/tmp/imported/%s-journals.csv'%klass.classname, 'w')
347 f.close()
348
349 class Progress:
350 '''Progress display for console applications.
351
352 See __main__ block at end of file for sample usage.
353 '''
354 def __init__(self, info, sequence):
355 self.info = info
356 self.sequence = iter(sequence)
357 self.total = len(sequence)
358 self.start = self.now = time.time()
359 self.num = 0
360 self.stepsize = self.total / 100 or 1
361 self.steptimes = []
362 self.display()
363
364 def __iter__(self): return self
365
366 def next(self):
367 self.num += 1
368
369 if self.num > self.total:
370 print self.info, 'done', ' '*(75-len(self.info)-6)
371 sys.stdout.flush()
372 return self.sequence.next()
373
374 if self.num % self.stepsize:
375 return self.sequence.next()
376
377 self.display()
378 return self.sequence.next()
379
380 def display(self):
381 # figure how long we've spent - guess how long to go
382 now = time.time()
383 steptime = now - self.now
384 self.steptimes.insert(0, steptime)
385 if len(self.steptimes) > 5:
386 self.steptimes.pop()
387 steptime = sum(self.steptimes) / len(self.steptimes)
388 self.now = now
389 eta = steptime * ((self.total - self.num)/self.stepsize)
390
391 # tell it like it is (or might be)
392 if now - self.start > 3:
393 M = eta / 60
394 H = M / 60
395 M = M % 60
396 S = eta % 60
397 s = '%s %2d%% (ETA %02d:%02d:%02d)'%(self.info,
398 self.num * 100. / self.total, H, M, S)
399 else:
400 s = '%s %2d%%'%(self.info, self.num * 100. / self.total)
401 sys.stdout.write(s + ' '*(75-len(s)) + '\r')
402 sys.stdout.flush()
403
404 if __name__ == '__main__':
405 if sys.argv[1] == 'import':
406 import_xml(*sys.argv[2:])
407 elif sys.argv[1] == 'files':
408 fetch_files(*sys.argv[2:])
409

Roundup Issue Tracker: http://roundup-tracker.org/