Mercurial > p > roundup > code
annotate website/issues/extensions/spambayes.py @ 8527:d4a43d9da8ef
chore(build): build(deps): bump anchore/scan-action from 7.3.1 to 7.3.2 pull #82
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Mon, 23 Feb 2026 20:16:55 -0500 |
| parents | e46ce04d5bbc |
| children |
| rev | line source |
|---|---|
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
1 import re, math |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
2 from roundup.cgi.actions import Action |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
3 from roundup.cgi.exceptions import * |
|
5408
e46ce04d5bbc
Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5381
diff
changeset
|
4 from roundup.anypy import xmlrpc_ |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
5 |
|
5408
e46ce04d5bbc
Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5381
diff
changeset
|
6 import socket |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
7 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
8 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)') |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
9 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
10 def extract_classinfo(db, classname, nodeid): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
11 node = db.getnode(classname, nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
12 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
13 authorage = node['creation'].timestamp() - \ |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
14 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp() |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
15 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
16 authorid = node.get('author', node.get('creator')) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
17 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
18 content = db.getclass(classname).get(nodeid, 'content') |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
19 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
20 tokens = ["klass:%s" % classname, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
21 "author:%s" % authorid, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
22 "authorage:%d" % int(math.log(authorage)), |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
23 "hasrev:%s" % (REVPAT.search(content) is not None)] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
24 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
25 return (content, tokens) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
26 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
27 def train_spambayes(db, content, tokens, is_spam): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
28 spambayes_uri = db.config.detectors['SPAMBAYES_URI'] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
29 |
|
5408
e46ce04d5bbc
Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5381
diff
changeset
|
30 server = xmlrpc_.client.ServerProxy(spambayes_uri, verbose=False) |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
31 try: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
32 server.train({'content':content}, tokens, {}, is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
33 return (True, None) |
|
5408
e46ce04d5bbc
Python 3 preparation: update xmlrpclib / SimpleXMLRPCServer imports.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5381
diff
changeset
|
34 except (socket.error, xmlrpc_.client.Error) as e: |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
35 return (False, str(e)) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
36 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
37 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
38 class SpambayesClassify(Action): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
39 permissionType = 'SB: May Classify' |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
40 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
41 def handle(self): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
42 (content, tokens) = extract_classinfo(self.db, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
43 self.classname, self.nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
44 |
|
5381
0942fe89e82e
Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents:
5248
diff
changeset
|
45 if "trainspam" in self.form: |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
46 is_spam = True |
|
5381
0942fe89e82e
Python 3 preparation: change "x.has_key(y)" to "y in x".
Joseph Myers <jsm@polyomino.org.uk>
parents:
5248
diff
changeset
|
47 elif "trainham" in self.form: |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
48 is_spam = False |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
49 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
50 (status, errmsg) = train_spambayes(self.db, content, tokens, |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
51 is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
52 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
53 node = self.db.getnode(self.classname, self.nodeid) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
54 props = {} |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
55 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
56 if status: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
57 if node.get('spambayes_misclassified', False): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
58 props['spambayes_misclassified'] = True |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
59 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
60 props['spambayes_score'] = 1.0 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
61 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
62 s = " SPAM" |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
63 if not is_spam: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
64 props['spambayes_score'] = 0.0 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
65 s = " HAM" |
|
4880
ca692423e401
Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents:
4024
diff
changeset
|
66 self.client.add_ok_message(self._('Message classified as') + s) |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
67 else: |
|
4880
ca692423e401
Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents:
4024
diff
changeset
|
68 self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg) |
|
4024
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
69 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
70 klass = self.db.getclass(self.classname) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
71 klass.set(self.nodeid, **props) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
72 self.db.commit() |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
73 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
74 def sb_is_spam(obj): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
75 cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF']) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
76 try: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
77 score = obj['spambayes_score'] |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
78 except KeyError: |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
79 return False |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
80 return score >= cutoff_score |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
81 |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
82 def init(instance): |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
83 instance.registerAction("spambayes_classify", SpambayesClassify) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
84 instance.registerUtil('sb_is_spam', sb_is_spam) |
|
c2d0d3e9099d
svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff
changeset
|
85 |
