Mercurial > p > roundup > code
annotate website/issues/extensions/spambayes.py @ 4632:4f25640b5521
Unified all copies of detectors/userauditor.py
The only functional change for the minimal and classic template is that the
error message for invalid addresses now contains the address that caused the
error. For the devel and website trackers the print command probably used
while debugging was removed. Everything else is comment and vim modeline.
| author | Thomas Arendsen Hein <thomas@intevation.de> |
|---|---|
| date | Fri, 01 Jun 2012 17:25:45 +0200 |
| parents | c2d0d3e9099d |
| children | ca692423e401 |
| rev | line source |
|---|---|
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
1 import re, math |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
2 from roundup.cgi.actions import Action |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
3 from roundup.cgi.exceptions import * |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
4 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
5 import xmlrpclib, socket |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
6 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
7 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)') |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
8 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
9 def extract_classinfo(db, classname, nodeid): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
10 node = db.getnode(classname, nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
11 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
12 authorage = node['creation'].timestamp() - \ |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
13 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp() |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
14 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
15 authorid = node.get('author', node.get('creator')) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
16 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
17 content = db.getclass(classname).get(nodeid, 'content') |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
18 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
19 tokens = ["klass:%s" % classname, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
20 "author:%s" % authorid, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
21 "authorage:%d" % int(math.log(authorage)), |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
22 "hasrev:%s" % (REVPAT.search(content) is not None)] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
23 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
24 return (content, tokens) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
25 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
26 def train_spambayes(db, content, tokens, is_spam): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
27 spambayes_uri = db.config.detectors['SPAMBAYES_URI'] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
28 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
29 server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
30 try: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
31 server.train({'content':content}, tokens, {}, is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
32 return (True, None) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
33 except (socket.error, xmlrpclib.Error), e: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
34 return (False, str(e)) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
35 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
36 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
37 class SpambayesClassify(Action): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
38 permissionType = 'SB: May Classify' |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
39 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
40 def handle(self): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
41 (content, tokens) = extract_classinfo(self.db, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
42 self.classname, self.nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
43 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
44 if self.form.has_key("trainspam"): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
45 is_spam = True |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
46 elif self.form.has_key("trainham"): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
47 is_spam = False |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
48 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
49 (status, errmsg) = train_spambayes(self.db, content, tokens, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
50 is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
51 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
52 node = self.db.getnode(self.classname, self.nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
53 props = {} |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
54 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
55 if status: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
56 if node.get('spambayes_misclassified', False): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
57 props['spambayes_misclassified'] = True |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
58 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
59 props['spambayes_score'] = 1.0 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
60 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
61 s = " SPAM" |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
62 if not is_spam: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
63 props['spambayes_score'] = 0.0 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
64 s = " HAM" |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
65 self.client.ok_message.append(self._('Message classified as') + s) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
66 else: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
67 self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
68 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
69 klass = self.db.getclass(self.classname) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
70 klass.set(self.nodeid, **props) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
71 self.db.commit() |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
72 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
73 def sb_is_spam(obj): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
74 cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF']) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
75 try: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
76 score = obj['spambayes_score'] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
77 except KeyError: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
78 return False |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
79 return score >= cutoff_score |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
80 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
81 def init(instance): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
82 instance.registerAction("spambayes_classify", SpambayesClassify) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
83 instance.registerUtil('sb_is_spam', sb_is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
84 |
