view website/issues/extensions/spambayes.py @ 8323:32a57cde072f

chore(ci): issue2551368 use new gpg for all releases. I was the new version only for 3.10 and newer running on ubuntu 24.04. Paul said it should work on 3.7 and 3.8 which runn on unbuntu 22.04. So enable it and see if it works.
author John Rouillard <rouilj@ieee.org>
date Wed, 04 Jun 2025 19:20:27 -0400
parents e46ce04d5bbc
children
line wrap: on
line source

import re, math
from roundup.cgi.actions import Action
from roundup.cgi.exceptions import *
from roundup.anypy import xmlrpc_

import socket

REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')

def extract_classinfo(db, classname, nodeid):
    node = db.getnode(classname, nodeid)

    authorage = node['creation'].timestamp() - \
                db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()

    authorid = node.get('author', node.get('creator'))

    content = db.getclass(classname).get(nodeid, 'content')

    tokens = ["klass:%s" % classname,
              "author:%s" % authorid,
              "authorage:%d" % int(math.log(authorage)),
              "hasrev:%s" % (REVPAT.search(content) is not None)]

    return (content, tokens)

def train_spambayes(db, content, tokens, is_spam):
    spambayes_uri = db.config.detectors['SPAMBAYES_URI']

    server = xmlrpc_.client.ServerProxy(spambayes_uri, verbose=False)
    try:
        server.train({'content':content}, tokens, {}, is_spam)
        return (True, None)
    except (socket.error, xmlrpc_.client.Error) as e:
        return (False, str(e))


class SpambayesClassify(Action):
    permissionType = 'SB: May Classify'
    
    def handle(self):
        (content, tokens) = extract_classinfo(self.db,
                                              self.classname, self.nodeid)

        if "trainspam" in self.form:
            is_spam = True
        elif "trainham" in self.form:
            is_spam = False

        (status, errmsg) = train_spambayes(self.db, content, tokens,
                                           is_spam)

        node = self.db.getnode(self.classname, self.nodeid)
        props = {}

        if status:
            if node.get('spambayes_misclassified', False):
                props['spambayes_misclassified'] = True

            props['spambayes_score'] = 1.0
            
            s = " SPAM"
            if not is_spam:
                props['spambayes_score'] = 0.0
                s = " HAM"
            self.client.add_ok_message(self._('Message classified as') + s)
        else:
            self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg)

        klass = self.db.getclass(self.classname)
        klass.set(self.nodeid, **props)
        self.db.commit()

def sb_is_spam(obj):
    cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
    try:
        score = obj['spambayes_score']
    except KeyError:
        return False
    return score >= cutoff_score

def init(instance):
    instance.registerAction("spambayes_classify", SpambayesClassify)
    instance.registerUtil('sb_is_spam', sb_is_spam)
    

Roundup Issue Tracker: http://roundup-tracker.org/