Mercurial > p > roundup > code
diff website/issues/extensions/spambayes.py @ 4024:c2d0d3e9099d website
svn repository setup
| author | Stefan Seefeld <stefan@users.sourceforge.net> |
|---|---|
| date | Fri, 06 Feb 2009 13:16:31 +0000 |
| parents | |
| children | ca692423e401 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/website/issues/extensions/spambayes.py Fri Feb 06 13:16:31 2009 +0000 @@ -0,0 +1,84 @@ +import re, math +from roundup.cgi.actions import Action +from roundup.cgi.exceptions import * + +import xmlrpclib, socket + +REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)') + +def extract_classinfo(db, classname, nodeid): + node = db.getnode(classname, nodeid) + + authorage = node['creation'].timestamp() - \ + db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp() + + authorid = node.get('author', node.get('creator')) + + content = db.getclass(classname).get(nodeid, 'content') + + tokens = ["klass:%s" % classname, + "author:%s" % authorid, + "authorage:%d" % int(math.log(authorage)), + "hasrev:%s" % (REVPAT.search(content) is not None)] + + return (content, tokens) + +def train_spambayes(db, content, tokens, is_spam): + spambayes_uri = db.config.detectors['SPAMBAYES_URI'] + + server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False) + try: + server.train({'content':content}, tokens, {}, is_spam) + return (True, None) + except (socket.error, xmlrpclib.Error), e: + return (False, str(e)) + + +class SpambayesClassify(Action): + permissionType = 'SB: May Classify' + + def handle(self): + (content, tokens) = extract_classinfo(self.db, + self.classname, self.nodeid) + + if self.form.has_key("trainspam"): + is_spam = True + elif self.form.has_key("trainham"): + is_spam = False + + (status, errmsg) = train_spambayes(self.db, content, tokens, + is_spam) + + node = self.db.getnode(self.classname, self.nodeid) + props = {} + + if status: + if node.get('spambayes_misclassified', False): + props['spambayes_misclassified'] = True + + props['spambayes_score'] = 1.0 + + s = " SPAM" + if not is_spam: + props['spambayes_score'] = 0.0 + s = " HAM" + self.client.ok_message.append(self._('Message classified as') + s) + else: + self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg) + + klass = self.db.getclass(self.classname) + klass.set(self.nodeid, **props) + self.db.commit() + +def sb_is_spam(obj): + cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF']) + try: + score = obj['spambayes_score'] + except KeyError: + return False + return score >= cutoff_score + +def init(instance): + instance.registerAction("spambayes_classify", SpambayesClassify) + instance.registerUtil('sb_is_spam', sb_is_spam) +
