view website/issues/extensions/spambayes.py @ 6628:2bb6d7baa47d

"Comment" out the meta data - will not process under 1.7.5 sphinx Apparently field names with : fail on 1.7.5 sphinx which is the virtual env version on sourceforge. It works on my 1.6.7 python2 install. Looks like I need to add sphinxext-opengraph to get this to work. However that is python3 only so need to spin up new virtualenv etc. Looks like no python3 on sourceforge which may be an issue. On sourceforge in /home/project-web/roundup/src/docbuilder these packages are used and must be scp'ed as pip has no network access outside of sourceforge: Babel-2.6.0-py2.py3-none-any.whl Jinja2-2.10-py2.py3-none-any.whl MarkupSafe-1.0.tar.gz Pygments-2.2.0-py2.py3-none-any.whl Sphinx-1.7.5 Sphinx-1.7.5-py2.py3-none-any.whl Sphinx-1.7.5.tar.gz alabaster-0.7.11-py2.py3-none-any.whl certifi-2018.4.16-py2.py3-none-any.whl chardet-3.0.4-py2.py3-none-any.whl docutils-0.14-py2-none-any.whl idna-2.7-py2.py3-none-any.whl imagesize-1.0.0-py2.py3-none-any.whl packaging-17.1-py2.py3-none-any.whl pip-10.0.1 pip-10.0.1.tar.gz pyparsing-2.2.0-py2.py3-none-any.whl pytz-2018.5-py2.py3-none-any.whl requests-2.19.1-py2.py3-none-any.whl setuptools-39.2.0-py2.py3-none-any.whl six-1.11.0-py2.py3-none-any.whl snowballstemmer-1.2.1-py2.py3-none-any.whl sphinxcontrib_websupport-1.1.0-py2.py3-none-any.whl typing-3.6.4-py2-none-any.whl urllib3-1.23-py2.py3-none-any.whl
author John Rouillard <rouilj@ieee.org>
date Sun, 27 Mar 2022 13:57:04 -0400
parents e46ce04d5bbc
children
line wrap: on
line source

import re, math
from roundup.cgi.actions import Action
from roundup.cgi.exceptions import *
from roundup.anypy import xmlrpc_

import socket

REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')

def extract_classinfo(db, classname, nodeid):
    node = db.getnode(classname, nodeid)

    authorage = node['creation'].timestamp() - \
                db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()

    authorid = node.get('author', node.get('creator'))

    content = db.getclass(classname).get(nodeid, 'content')

    tokens = ["klass:%s" % classname,
              "author:%s" % authorid,
              "authorage:%d" % int(math.log(authorage)),
              "hasrev:%s" % (REVPAT.search(content) is not None)]

    return (content, tokens)

def train_spambayes(db, content, tokens, is_spam):
    spambayes_uri = db.config.detectors['SPAMBAYES_URI']

    server = xmlrpc_.client.ServerProxy(spambayes_uri, verbose=False)
    try:
        server.train({'content':content}, tokens, {}, is_spam)
        return (True, None)
    except (socket.error, xmlrpc_.client.Error) as e:
        return (False, str(e))


class SpambayesClassify(Action):
    permissionType = 'SB: May Classify'
    
    def handle(self):
        (content, tokens) = extract_classinfo(self.db,
                                              self.classname, self.nodeid)

        if "trainspam" in self.form:
            is_spam = True
        elif "trainham" in self.form:
            is_spam = False

        (status, errmsg) = train_spambayes(self.db, content, tokens,
                                           is_spam)

        node = self.db.getnode(self.classname, self.nodeid)
        props = {}

        if status:
            if node.get('spambayes_misclassified', False):
                props['spambayes_misclassified'] = True

            props['spambayes_score'] = 1.0
            
            s = " SPAM"
            if not is_spam:
                props['spambayes_score'] = 0.0
                s = " HAM"
            self.client.add_ok_message(self._('Message classified as') + s)
        else:
            self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg)

        klass = self.db.getclass(self.classname)
        klass.set(self.nodeid, **props)
        self.db.commit()

def sb_is_spam(obj):
    cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
    try:
        score = obj['spambayes_score']
    except KeyError:
        return False
    return score >= cutoff_score

def init(instance):
    instance.registerAction("spambayes_classify", SpambayesClassify)
    instance.registerUtil('sb_is_spam', sb_is_spam)
    

Roundup Issue Tracker: http://roundup-tracker.org/