diff website/issues/extensions/spambayes.py @ 4024:c2d0d3e9099d website

svn repository setup
author Stefan Seefeld <stefan@users.sourceforge.net>
date Fri, 06 Feb 2009 13:16:31 +0000
parents
children ca692423e401
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/website/issues/extensions/spambayes.py	Fri Feb 06 13:16:31 2009 +0000
@@ -0,0 +1,84 @@
+import re, math
+from roundup.cgi.actions import Action
+from roundup.cgi.exceptions import *
+
+import xmlrpclib, socket
+
+REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
+
+def extract_classinfo(db, classname, nodeid):
+    node = db.getnode(classname, nodeid)
+
+    authorage = node['creation'].timestamp() - \
+                db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
+
+    authorid = node.get('author', node.get('creator'))
+
+    content = db.getclass(classname).get(nodeid, 'content')
+
+    tokens = ["klass:%s" % classname,
+              "author:%s" % authorid,
+              "authorage:%d" % int(math.log(authorage)),
+              "hasrev:%s" % (REVPAT.search(content) is not None)]
+
+    return (content, tokens)
+
+def train_spambayes(db, content, tokens, is_spam):
+    spambayes_uri = db.config.detectors['SPAMBAYES_URI']
+
+    server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
+    try:
+        server.train({'content':content}, tokens, {}, is_spam)
+        return (True, None)
+    except (socket.error, xmlrpclib.Error), e:
+        return (False, str(e))
+
+
+class SpambayesClassify(Action):
+    permissionType = 'SB: May Classify'
+    
+    def handle(self):
+        (content, tokens) = extract_classinfo(self.db,
+                                              self.classname, self.nodeid)
+
+        if self.form.has_key("trainspam"):
+            is_spam = True
+        elif self.form.has_key("trainham"):
+            is_spam = False
+
+        (status, errmsg) = train_spambayes(self.db, content, tokens,
+                                           is_spam)
+
+        node = self.db.getnode(self.classname, self.nodeid)
+        props = {}
+
+        if status:
+            if node.get('spambayes_misclassified', False):
+                props['spambayes_misclassified'] = True
+
+            props['spambayes_score'] = 1.0
+            
+            s = " SPAM"
+            if not is_spam:
+                props['spambayes_score'] = 0.0
+                s = " HAM"
+            self.client.ok_message.append(self._('Message classified as') + s)
+        else:
+            self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg)
+
+        klass = self.db.getclass(self.classname)
+        klass.set(self.nodeid, **props)
+        self.db.commit()
+
+def sb_is_spam(obj):
+    cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
+    try:
+        score = obj['spambayes_score']
+    except KeyError:
+        return False
+    return score >= cutoff_score
+
+def init(instance):
+    instance.registerAction("spambayes_classify", SpambayesClassify)
+    instance.registerUtil('sb_is_spam', sb_is_spam)
+    

Roundup Issue Tracker: http://roundup-tracker.org/