view website/issues/extensions/spambayes.py @ 8089:f2b049b49fca

fix: replace hardcoded 'html' as template directory Template directory is now determined by loading the config file from the tracker home and using the template setting. Also fixed error message when polib import fails to indicate extraction is only from thml templates.
author John Rouillard <rouilj@ieee.org>
date Tue, 16 Jul 2024 01:28:15 -0400
parents e46ce04d5bbc
children
line wrap: on
line source

import re, math
from roundup.cgi.actions import Action
from roundup.cgi.exceptions import *
from roundup.anypy import xmlrpc_

import socket

REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')

def extract_classinfo(db, classname, nodeid):
    node = db.getnode(classname, nodeid)

    authorage = node['creation'].timestamp() - \
                db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()

    authorid = node.get('author', node.get('creator'))

    content = db.getclass(classname).get(nodeid, 'content')

    tokens = ["klass:%s" % classname,
              "author:%s" % authorid,
              "authorage:%d" % int(math.log(authorage)),
              "hasrev:%s" % (REVPAT.search(content) is not None)]

    return (content, tokens)

def train_spambayes(db, content, tokens, is_spam):
    spambayes_uri = db.config.detectors['SPAMBAYES_URI']

    server = xmlrpc_.client.ServerProxy(spambayes_uri, verbose=False)
    try:
        server.train({'content':content}, tokens, {}, is_spam)
        return (True, None)
    except (socket.error, xmlrpc_.client.Error) as e:
        return (False, str(e))


class SpambayesClassify(Action):
    permissionType = 'SB: May Classify'
    
    def handle(self):
        (content, tokens) = extract_classinfo(self.db,
                                              self.classname, self.nodeid)

        if "trainspam" in self.form:
            is_spam = True
        elif "trainham" in self.form:
            is_spam = False

        (status, errmsg) = train_spambayes(self.db, content, tokens,
                                           is_spam)

        node = self.db.getnode(self.classname, self.nodeid)
        props = {}

        if status:
            if node.get('spambayes_misclassified', False):
                props['spambayes_misclassified'] = True

            props['spambayes_score'] = 1.0
            
            s = " SPAM"
            if not is_spam:
                props['spambayes_score'] = 0.0
                s = " HAM"
            self.client.add_ok_message(self._('Message classified as') + s)
        else:
            self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg)

        klass = self.db.getclass(self.classname)
        klass.set(self.nodeid, **props)
        self.db.commit()

def sb_is_spam(obj):
    cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
    try:
        score = obj['spambayes_score']
    except KeyError:
        return False
    return score >= cutoff_score

def init(instance):
    instance.registerAction("spambayes_classify", SpambayesClassify)
    instance.registerUtil('sb_is_spam', sb_is_spam)
    

Roundup Issue Tracker: http://roundup-tracker.org/