Mercurial > p > roundup > code
annotate website/issues/extensions/spambayes.py @ 5110:87b0358790ed
Adding some tests for admin.py. Specifically for issue2550572: setting
nosy=+foo on multiple issues gives them all the same exact nosy
list.
To make this work had to change the admin.py code to use
"sys.stdout.write" in place of "print". In the test I now hijack
stdout.write following an existing example of this for admin's
import/export command that hijacks sys.stderr.write.
Also I corrected a misspelling in security.py. The word "everything"
was misspelled. It is not inside _() markers so I don't think it's
going to affect translation and grepping the locale subdir doesn't
show the original string.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Wed, 29 Jun 2016 18:35:19 -0400 |
| parents | ca692423e401 |
| children | 198b6e810c67 |
| rev | line source |
|---|---|
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
1 import re, math |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
2 from roundup.cgi.actions import Action |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
3 from roundup.cgi.exceptions import * |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
4 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
5 import xmlrpclib, socket |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
6 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
7 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)') |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
8 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
9 def extract_classinfo(db, classname, nodeid): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
10 node = db.getnode(classname, nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
11 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
12 authorage = node['creation'].timestamp() - \ |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
13 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp() |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
14 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
15 authorid = node.get('author', node.get('creator')) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
16 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
17 content = db.getclass(classname).get(nodeid, 'content') |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
18 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
19 tokens = ["klass:%s" % classname, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
20 "author:%s" % authorid, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
21 "authorage:%d" % int(math.log(authorage)), |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
22 "hasrev:%s" % (REVPAT.search(content) is not None)] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
23 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
24 return (content, tokens) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
25 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
26 def train_spambayes(db, content, tokens, is_spam): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
27 spambayes_uri = db.config.detectors['SPAMBAYES_URI'] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
28 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
29 server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
30 try: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
31 server.train({'content':content}, tokens, {}, is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
32 return (True, None) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
33 except (socket.error, xmlrpclib.Error), e: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
34 return (False, str(e)) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
35 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
36 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
37 class SpambayesClassify(Action): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
38 permissionType = 'SB: May Classify' |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
39 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
40 def handle(self): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
41 (content, tokens) = extract_classinfo(self.db, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
42 self.classname, self.nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
43 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
44 if self.form.has_key("trainspam"): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
45 is_spam = True |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
46 elif self.form.has_key("trainham"): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
47 is_spam = False |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
48 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
49 (status, errmsg) = train_spambayes(self.db, content, tokens, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
50 is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
51 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
52 node = self.db.getnode(self.classname, self.nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
53 props = {} |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
54 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
55 if status: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
56 if node.get('spambayes_misclassified', False): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
57 props['spambayes_misclassified'] = True |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
58 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
59 props['spambayes_score'] = 1.0 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
60 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
61 s = " SPAM" |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
62 if not is_spam: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
63 props['spambayes_score'] = 0.0 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
64 s = " HAM" |
|
4880
ca692423e401
Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents:
4024
diff
changeset
|
65 self.client.add_ok_message(self._('Message classified as') + s) |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
66 else: |
|
4880
ca692423e401
Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents:
4024
diff
changeset
|
67 self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg) |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
68 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
69 klass = self.db.getclass(self.classname) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
70 klass.set(self.nodeid, **props) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
71 self.db.commit() |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
72 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
73 def sb_is_spam(obj): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
74 cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF']) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
75 try: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
76 score = obj['spambayes_score'] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
77 except KeyError: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
78 return False |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
79 return score >= cutoff_score |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
80 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
81 def init(instance): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
82 instance.registerAction("spambayes_classify", SpambayesClassify) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
83 instance.registerUtil('sb_is_spam', sb_is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
84 |
