annotate website/issues/extensions/spambayes.py @ 4880:ca692423e401

Different approach to fix XSS in issue2550817 Encapsulate the error/ok message append method as add_ok_message and add_error_message. The new approach escapes the messages when appending -- at a point in the code where we still know where the message comes from. Escaping is the default but can bei turned off. This also fixes issue2550836 where certain messages may contain links. Another advantage of the new fix is that users don't need to change installed trackers and are secure by default.
author Ralf Schlatterbeck <rsc@runtux.com>
date Mon, 31 Mar 2014 18:19:23 +0200
parents c2d0d3e9099d
children 198b6e810c67
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
1 import re, math
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
2 from roundup.cgi.actions import Action
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
3 from roundup.cgi.exceptions import *
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
4
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
5 import xmlrpclib, socket
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
6
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
7 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
8
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
9 def extract_classinfo(db, classname, nodeid):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
10 node = db.getnode(classname, nodeid)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
11
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
12 authorage = node['creation'].timestamp() - \
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
13 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
14
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
15 authorid = node.get('author', node.get('creator'))
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
16
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
17 content = db.getclass(classname).get(nodeid, 'content')
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
18
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
19 tokens = ["klass:%s" % classname,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
20 "author:%s" % authorid,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
21 "authorage:%d" % int(math.log(authorage)),
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
22 "hasrev:%s" % (REVPAT.search(content) is not None)]
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
23
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
24 return (content, tokens)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
25
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
26 def train_spambayes(db, content, tokens, is_spam):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
27 spambayes_uri = db.config.detectors['SPAMBAYES_URI']
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
28
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
29 server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
30 try:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
31 server.train({'content':content}, tokens, {}, is_spam)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
32 return (True, None)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
33 except (socket.error, xmlrpclib.Error), e:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
34 return (False, str(e))
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
35
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
36
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
37 class SpambayesClassify(Action):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
38 permissionType = 'SB: May Classify'
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
39
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
40 def handle(self):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
41 (content, tokens) = extract_classinfo(self.db,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
42 self.classname, self.nodeid)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
43
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
44 if self.form.has_key("trainspam"):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
45 is_spam = True
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
46 elif self.form.has_key("trainham"):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
47 is_spam = False
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
48
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
49 (status, errmsg) = train_spambayes(self.db, content, tokens,
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
50 is_spam)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
51
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
52 node = self.db.getnode(self.classname, self.nodeid)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
53 props = {}
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
54
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
55 if status:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
56 if node.get('spambayes_misclassified', False):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
57 props['spambayes_misclassified'] = True
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
58
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
59 props['spambayes_score'] = 1.0
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
60
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
61 s = " SPAM"
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
62 if not is_spam:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
63 props['spambayes_score'] = 0.0
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
64 s = " HAM"
4880
ca692423e401 Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents: 4024
diff changeset
65 self.client.add_ok_message(self._('Message classified as') + s)
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
66 else:
4880
ca692423e401 Different approach to fix XSS in issue2550817
Ralf Schlatterbeck <rsc@runtux.com>
parents: 4024
diff changeset
67 self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg)
4024
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
68
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
69 klass = self.db.getclass(self.classname)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
70 klass.set(self.nodeid, **props)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
71 self.db.commit()
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
72
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
73 def sb_is_spam(obj):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
74 cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
75 try:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
76 score = obj['spambayes_score']
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
77 except KeyError:
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
78 return False
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
79 return score >= cutoff_score
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
80
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
81 def init(instance):
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
82 instance.registerAction("spambayes_classify", SpambayesClassify)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
83 instance.registerUtil('sb_is_spam', sb_is_spam)
c2d0d3e9099d svn repository setup
Stefan Seefeld <stefan@users.sourceforge.net>
parents:
diff changeset
84

Roundup Issue Tracker: http://roundup-tracker.org/