Mercurial > p > roundup > code
view website/issues/extensions/spambayes.py @ 5331:57caeefb2f81
Work around a line-length limit in poplib
Work around a limitation in python2.7 implementation of poplib (for the
pop3 protocol for fetching emails): It seems poplib applies a
line-length limit not just to the lines involving the pop3 protocol but
to any email content, too. This sometimes leads to tracebacks whenever
an email exceeding this limit is encountered. We "fix" this by
monkey-patching poplib with a larger line-limit. Thanks to Heiko
Stegmann for discovering this.
| author | Ralf Schlatterbeck <rsc@runtux.com> |
|---|---|
| date | Thu, 07 Jun 2018 12:39:31 +0200 |
| parents | 198b6e810c67 |
| children | 0942fe89e82e |
line wrap: on
line source
import re, math from roundup.cgi.actions import Action from roundup.cgi.exceptions import * import xmlrpclib, socket REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)') def extract_classinfo(db, classname, nodeid): node = db.getnode(classname, nodeid) authorage = node['creation'].timestamp() - \ db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp() authorid = node.get('author', node.get('creator')) content = db.getclass(classname).get(nodeid, 'content') tokens = ["klass:%s" % classname, "author:%s" % authorid, "authorage:%d" % int(math.log(authorage)), "hasrev:%s" % (REVPAT.search(content) is not None)] return (content, tokens) def train_spambayes(db, content, tokens, is_spam): spambayes_uri = db.config.detectors['SPAMBAYES_URI'] server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False) try: server.train({'content':content}, tokens, {}, is_spam) return (True, None) except (socket.error, xmlrpclib.Error) as e: return (False, str(e)) class SpambayesClassify(Action): permissionType = 'SB: May Classify' def handle(self): (content, tokens) = extract_classinfo(self.db, self.classname, self.nodeid) if self.form.has_key("trainspam"): is_spam = True elif self.form.has_key("trainham"): is_spam = False (status, errmsg) = train_spambayes(self.db, content, tokens, is_spam) node = self.db.getnode(self.classname, self.nodeid) props = {} if status: if node.get('spambayes_misclassified', False): props['spambayes_misclassified'] = True props['spambayes_score'] = 1.0 s = " SPAM" if not is_spam: props['spambayes_score'] = 0.0 s = " HAM" self.client.add_ok_message(self._('Message classified as') + s) else: self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg) klass = self.db.getclass(self.classname) klass.set(self.nodeid, **props) self.db.commit() def sb_is_spam(obj): cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF']) try: score = obj['spambayes_score'] except KeyError: return False return score >= cutoff_score def init(instance): instance.registerAction("spambayes_classify", SpambayesClassify) instance.registerUtil('sb_is_spam', sb_is_spam)
