comparison website/issues/extensions/spambayes.py @ 4024:c2d0d3e9099d website

svn repository setup
author Stefan Seefeld <stefan@users.sourceforge.net>
date Fri, 06 Feb 2009 13:16:31 +0000
parents
children ca692423e401
comparison
equal deleted inserted replaced
4023:86c38b5aed66 4024:c2d0d3e9099d
1 import re, math
2 from roundup.cgi.actions import Action
3 from roundup.cgi.exceptions import *
4
5 import xmlrpclib, socket
6
7 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
8
9 def extract_classinfo(db, classname, nodeid):
10 node = db.getnode(classname, nodeid)
11
12 authorage = node['creation'].timestamp() - \
13 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
14
15 authorid = node.get('author', node.get('creator'))
16
17 content = db.getclass(classname).get(nodeid, 'content')
18
19 tokens = ["klass:%s" % classname,
20 "author:%s" % authorid,
21 "authorage:%d" % int(math.log(authorage)),
22 "hasrev:%s" % (REVPAT.search(content) is not None)]
23
24 return (content, tokens)
25
26 def train_spambayes(db, content, tokens, is_spam):
27 spambayes_uri = db.config.detectors['SPAMBAYES_URI']
28
29 server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
30 try:
31 server.train({'content':content}, tokens, {}, is_spam)
32 return (True, None)
33 except (socket.error, xmlrpclib.Error), e:
34 return (False, str(e))
35
36
37 class SpambayesClassify(Action):
38 permissionType = 'SB: May Classify'
39
40 def handle(self):
41 (content, tokens) = extract_classinfo(self.db,
42 self.classname, self.nodeid)
43
44 if self.form.has_key("trainspam"):
45 is_spam = True
46 elif self.form.has_key("trainham"):
47 is_spam = False
48
49 (status, errmsg) = train_spambayes(self.db, content, tokens,
50 is_spam)
51
52 node = self.db.getnode(self.classname, self.nodeid)
53 props = {}
54
55 if status:
56 if node.get('spambayes_misclassified', False):
57 props['spambayes_misclassified'] = True
58
59 props['spambayes_score'] = 1.0
60
61 s = " SPAM"
62 if not is_spam:
63 props['spambayes_score'] = 0.0
64 s = " HAM"
65 self.client.ok_message.append(self._('Message classified as') + s)
66 else:
67 self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg)
68
69 klass = self.db.getclass(self.classname)
70 klass.set(self.nodeid, **props)
71 self.db.commit()
72
73 def sb_is_spam(obj):
74 cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
75 try:
76 score = obj['spambayes_score']
77 except KeyError:
78 return False
79 return score >= cutoff_score
80
81 def init(instance):
82 instance.registerAction("spambayes_classify", SpambayesClassify)
83 instance.registerUtil('sb_is_spam', sb_is_spam)
84

Roundup Issue Tracker: http://roundup-tracker.org/