forked from adamlaska/datatracker
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathiana.py
More file actions
304 lines (242 loc) · 11 KB
/
iana.py
File metadata and controls
304 lines (242 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# Copyright The IETF Trust 2012-2020, All Rights Reserved
# -*- coding: utf-8 -*-
import base64
import datetime
import email
import json
import re
import requests
from django.conf import settings
from django.utils.encoding import smart_bytes, force_str
from django.utils.http import urlquote
import debug # pyflakes:ignore
from ietf.doc.mails import email_state_changed
from ietf.doc.models import Document, DocEvent, State, StateDocEvent, StateType
from ietf.doc.utils import add_state_change_event
from ietf.person.models import Person
from ietf.utils.log import log
from ietf.utils.mail import parseaddr, get_payload_text
from ietf.utils.timezone import local_timezone_to_utc, email_time_to_local_timezone, utc_to_local_timezone
#PROTOCOLS_URL = "https://www.iana.org/protocols/"
#CHANGES_URL = "https://datatracker.dev.icann.org:8080/data-tracker/changes"
def parse_protocol_page(text):
"""Parse IANA protocols page to extract referenced RFCs (as
rfcXXXX document names)."""
matches = re.findall('RFC [0-9]+', force_str(text))
res = set()
for m in matches:
res.add("rfc" + m[len("RFC "):])
return list(res)
def update_rfc_log_from_protocol_page(rfc_names, rfc_must_published_later_than):
"""Add notices to RFC history log that IANA is now referencing the RFC."""
system = Person.objects.get(name="(System)")
updated = []
docs = Document.objects.filter(docalias__name__in=rfc_names).exclude(
docevent__type="rfc_in_iana_registry").filter(
# only take those that were published after cutoff since we
# have a big bunch of old RFCs that we unfortunately don't have data for
docevent__type="published_rfc", docevent__time__gte=rfc_must_published_later_than
).distinct()
for d in docs:
e = DocEvent(doc=d, rev=d.rev)
e.by = system
e.type = "rfc_in_iana_registry"
e.desc = "IANA registries were updated to include %s" % d.display_name()
e.save()
updated.append(d)
return updated
def fetch_changes_json(url, start, end):
url += "?start=%s&end=%s" % (urlquote(local_timezone_to_utc(start).strftime("%Y-%m-%d %H:%M:%S")),
urlquote(local_timezone_to_utc(end).strftime("%Y-%m-%d %H:%M:%S")))
# HTTP basic auth
username = "ietfsync"
password = settings.IANA_SYNC_PASSWORD
headers = { "Authorization": "Basic %s" % force_str(base64.encodebytes(smart_bytes("%s:%s" % (username, password)))).replace("\n", "") }
try:
response = requests.get(url, headers=headers, timeout=settings.DEFAULT_REQUESTS_TIMEOUT)
except requests.Timeout as exc:
log(f'GET request failed for [{url}]: {exc}')
raise RuntimeError(f'Timeout retrieving [{url}]') from exc
return response.text
def parse_changes_json(text):
response = json.loads(text)
if "error" in response:
raise Exception("IANA server returned error: %s" % response["error"])
changes = response["changes"]
# do some rudimentary validation
for i in changes:
for f in ['doc', 'type', 'time']:
if f not in i:
raise Exception('Error in response: Field %s missing in input: %s - %s' % (f, json.dumps(i), json.dumps(changes)))
# a little bit of cleaning
i["doc"] = i["doc"].strip()
if i["doc"].startswith("https://www.ietf.org/internet-drafts/"):
i["doc"] = i["doc"][len("https://www.ietf.org/internet-drafts/"):]
# make sure we process oldest entries first
changes.sort(key=lambda c: c["time"])
return changes
def update_history_with_changes(changes, send_email=True):
"""Take parsed changes from IANA and apply them. Note that we
expect to get these chronologically sorted, otherwise the change
descriptions generated may not be right."""
# build up state lookup
states = {}
slookup = dict((s.slug, s)
for s in State.objects.filter(used=True, type=StateType.objects.get(slug="draft-iana-action")))
states["action"] = {
"": slookup["newdoc"],
"In Progress": slookup["inprog"],
"Open": slookup["inprog"],
"pre-approval In Progress": slookup["inprog"],
"Waiting on Authors": slookup["waitauth"],
"Author": slookup["waitauth"],
"Waiting on ADs": slookup["waitad"],
"Waiting on AD": slookup["waitad"],
"AD": slookup["waitad"],
"Waiting on WGC": slookup["waitwgc"],
"WGC": slookup["waitwgc"],
"Waiting on RFC-Editor": slookup["waitrfc"],
"Waiting on RFC Editor": slookup["waitrfc"],
"RFC-Editor": slookup["waitrfc"],
"RFC-Ed-ACK": slookup["rfcedack"],
"RFC-Editor-ACK": slookup["rfcedack"],
"Completed": slookup["rfcedack"],
"On Hold": slookup["onhold"],
"No IC": slookup["noic"],
}
slookup = dict((s.slug, s)
for s in State.objects.filter(used=True, type=StateType.objects.get(slug="draft-iana-review")))
states["review"] = {
"IANA Review Needed": slookup["need-rev"],
"IANA - Review Needed": slookup["need-rev"],
"IANA OK - Actions Needed": slookup["ok-act"],
"IANA OK - No Actions Needed": slookup["ok-noact"],
"IANA Not OK": slookup["not-ok"],
"IANA - Not OK": slookup["not-ok"],
"Version Changed - Review Needed": slookup["changed"],
}
# so it turns out IANA has made a mistake and are including some
# wrong states, we'll have to skip those
wrong_action_states = ("Waiting on Reviewer", "Review Complete", "Last Call",
"Last Call - Questions", "Evaluation", "Evaluation - Questions",
"With Reviewer", "IESG Notification Received", "Watiing on Last Call",
"IANA Comments Submitted", "Waiting on Last Call")
system = Person.objects.get(name="(System)")
added_events = []
warnings = []
for c in changes:
docname = c['doc']
timestamp = datetime.datetime.strptime(c["time"], "%Y-%m-%d %H:%M:%S")
timestamp = utc_to_local_timezone(timestamp) # timestamps are in UTC
if c['type'] in ("iana_state", "iana_review"):
if c['type'] == "iana_state":
kind = "action"
if c["state"] in wrong_action_states:
warnings.append("Wrong action state '%s' encountered in changes from IANA" % c["state"])
continue
else:
kind = "review"
if c["state"] not in states[kind]:
warnings.append("Unknown IANA %s state %s (%s)" % (kind, c["state"], timestamp))
continue
state = states[kind][c["state"]]
state_type = "draft-iana-%s" % kind
if state.slug in ("need-rev", "changed"):
# the Datatracker is the ultimate source of these
# states, so skip them
continue
e = StateDocEvent.objects.filter(type="changed_state", time=timestamp,
state_type=state_type, state=state)
if not e:
try:
doc = Document.objects.get(docalias__name=docname)
except Document.DoesNotExist:
warnings.append("Document %s not found" % docname)
continue
# the naive way of extracting prev_state here means
# that we assume these changes are cronologically
# applied
prev_state = doc.get_state(state_type)
e = add_state_change_event(doc, system, prev_state, state, timestamp=timestamp)
if e:
# for logging purposes
e.json = c
added_events.append(e)
if not StateDocEvent.objects.filter(doc=doc, time__gt=timestamp, state_type=state_type):
doc.set_state(state)
if e:
doc.save_with_history([e])
if send_email and (state != prev_state):
email_state_changed(None, doc, "IANA %s state changed to \"%s\"" % (kind, state.name),'doc_iana_state_changed')
return added_events, warnings
def find_document_name(text):
prefixes = ['draft','conflict-review','status-change','charter']
leading_delimiter_re = r'(?<![-a-zA-Z0-9])'
prefix_re = r'(%s)' % '|'.join(prefixes)
tail_re = r'(-[a-z0-9]+)+?(-\d\d\.txt)?'
trailing_delimiter_re = r'((?![-a-zA-Z0-9])|$)'
name_re = r'%s(%s%s)%s' % (leading_delimiter_re, prefix_re, tail_re, trailing_delimiter_re)
m = re.search(name_re,text)
return m and m.group(0).lower()
def strip_version_extension(text):
if re.search(r"\.\w{3}$", text): # strip off extension
text = text[:-4]
if re.search(r"-\d{2}$", text): # strip off revision
text = text[:-3]
return text
def parse_review_email(text):
msg = email.message_from_string(force_str(text))
# doc
doc_name = find_document_name(msg["Subject"]) or ""
doc_name = strip_version_extension(doc_name)
# date
review_time = datetime.datetime.now()
if "Date" in msg:
review_time = email_time_to_local_timezone(msg["Date"])
# by
by = None
name, __ = parseaddr(msg["From"])
if name.endswith(" via RT"):
name = name[:-len(" via RT")]
try:
by = Person.objects.get(alias__name=name, role__group__acronym="iana")
except Person.DoesNotExist:
pass
if not by:
by = Person.objects.get(name="(System)")
# comment
body = get_payload_text(msg).replace("\r", "")
begin_search = re.search(r'\(BEGIN\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?(\s*:\s*[a-zA-Z0-9-\.]*)?\s*\)',body)
end_search = re.search(r'\(END\s+IANA\s+(LAST\s+CALL\s+)?COMMENTS?\)',body)
if begin_search and end_search:
begin_string = begin_search.group(0)
end_string = end_search.group(0)
b = body.find(begin_string)
e = body.find(end_string)
comment = body[b + len(begin_string):e].strip()
embedded_name = strip_version_extension(find_document_name(begin_string) or "")
if embedded_name:
doc_name = embedded_name
else:
comment = ""
# strip leading IESG:
if comment.startswith("IESG:"):
comment = comment[len("IESG:"):].lstrip()
# strip ending Thanks, followed by signature
m = re.compile(r"^Thanks,\n\n", re.MULTILINE).search(comment)
if m:
comment = comment[:m.start()].rstrip()
m = re.search(r"<(.*)>", msg["From"])
if m:
comment = '(Via %s): %s' % ( m.group(1).strip() , comment )
return doc_name, review_time, by, comment
def add_review_comment(doc_name, review_time, by, comment):
if comment:
try:
e = DocEvent.objects.get(doc__name=doc_name, time=review_time, type="iana_review")
except DocEvent.DoesNotExist:
doc = Document.objects.get(name=doc_name)
e = DocEvent(doc=doc, rev=doc.rev, time=review_time, type="iana_review")
e.desc = comment
e.by = by
e.save()