annotate website/issues/extensions/spambayes.py @ 7923:29a666d8a70d

issue2551285 - Remove StructuredText support Asked on the users mailing list if anybody was using it. Got no responses. I have never seen CI installing structuredtext packages so it's untested as well.
author John Rouillard <rouilj@ieee.org>
date Tue, 30 Apr 2024 22:27:57 -0400
parents e46ce04d5bbc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
1 import re, math
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
2 from roundup.cgi.actions import Action
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
3 from roundup.cgi.exceptions import *
5408
e46ce04d5bbc Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5381
diff changeset
4 from roundup.anypy import xmlrpc_
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
5
5408
e46ce04d5bbc Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5381
diff changeset
6 import socket
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
7
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
8 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
9
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
10 def extract_classinfo(db, classname, nodeid):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
11 node = db.getnode(classname, nodeid)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
12
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
13 authorage = node['creation'].timestamp() - \
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
14 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
15
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
16 authorid = node.get('author', node.get('creator'))
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
17
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
18 content = db.getclass(classname).get(nodeid, 'content')
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
19
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
20 tokens = ["klass:%s" % classname,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
21 "author:%s" % authorid,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
22 "authorage:%d" % int(math.log(authorage)),
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
23 "hasrev:%s" % (REVPAT.search(content) is not None)]
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
24
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
25 return (content, tokens)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
26
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
27 def train_spambayes(db, content, tokens, is_spam):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
28 spambayes_uri = db.config.detectors['SPAMBAYES_URI']
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
29
5408
e46ce04d5bbc Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5381
diff changeset
30 server = xmlrpc_.client.ServerProxy(spambayes_uri, verbose=False)
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
31 try:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
32 server.train({'content':content}, tokens, {}, is_spam)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
33 return (True, None)
5408
e46ce04d5bbc Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5381
diff changeset
34 except (socket.error, xmlrpc_.client.Error) as e:
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
35 return (False, str(e))
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
36
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
37
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
38 class SpambayesClassify(Action):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
39 permissionType = 'SB: May Classify'
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
40
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
41 def handle(self):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
42 (content, tokens) = extract_classinfo(self.db,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
43 self.classname, self.nodeid)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
44
5381
0942fe89e82e Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents: 5248
diff changeset
45 if "trainspam" in self.form:
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
46 is_spam = True
5381
0942fe89e82e Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents: 5248
diff changeset
47 elif "trainham" in self.form:
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
48 is_spam = False
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
49
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
50 (status, errmsg) = train_spambayes(self.db, content, tokens,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
51 is_spam)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
52
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
53 node = self.db.getnode(self.classname, self.nodeid)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
54 props = {}
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
55
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
56 if status:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
57 if node.get('spambayes_misclassified', False):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
58 props['spambayes_misclassified'] = True
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
59
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
60 props['spambayes_score'] = 1.0
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
61
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
62 s = " SPAM"
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
63 if not is_spam:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
64 props['spambayes_score'] = 0.0
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
65 s = " HAM"
4880
ca692423e401 Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents: 4024
diff changeset
66 self.client.add_ok_message(self._('Message classified as') + s)
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
67 else:
4880
ca692423e401 Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents: 4024
diff changeset
68 self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg)
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
69
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
70 klass = self.db.getclass(self.classname)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
71 klass.set(self.nodeid, **props)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
72 self.db.commit()
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
73
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
74 def sb_is_spam(obj):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
75 cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
76 try:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
77 score = obj['spambayes_score']
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
78 except KeyError:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
79 return False
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
80 return score >= cutoff_score
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
81
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
82 def init(instance):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
83 instance.registerAction("spambayes_classify", SpambayesClassify)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
84 instance.registerUtil('sb_is_spam', sb_is_spam)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
85

Roundup Issue Tracker: http://roundup-tracker.org/