Mercurial > p > roundup > code
comparison website/issues/extensions/spambayes.py @ 4024:c2d0d3e9099d website
svn repository setup
| author | Stefan Seefeld <stefan@users.sourceforge.net> |
|---|---|
| date | Fri, 06 Feb 2009 13:16:31 +0000 |
| parents | |
| children | ca692423e401 |
comparison
equal
deleted
inserted
replaced
| 4023:86c38b5aed66 | 4024:c2d0d3e9099d |
|---|---|
| 1 import re, math | |
| 2 from roundup.cgi.actions import Action | |
| 3 from roundup.cgi.exceptions import * | |
| 4 | |
| 5 import xmlrpclib, socket | |
| 6 | |
| 7 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)') | |
| 8 | |
| 9 def extract_classinfo(db, classname, nodeid): | |
| 10 node = db.getnode(classname, nodeid) | |
| 11 | |
| 12 authorage = node['creation'].timestamp() - \ | |
| 13 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp() | |
| 14 | |
| 15 authorid = node.get('author', node.get('creator')) | |
| 16 | |
| 17 content = db.getclass(classname).get(nodeid, 'content') | |
| 18 | |
| 19 tokens = ["klass:%s" % classname, | |
| 20 "author:%s" % authorid, | |
| 21 "authorage:%d" % int(math.log(authorage)), | |
| 22 "hasrev:%s" % (REVPAT.search(content) is not None)] | |
| 23 | |
| 24 return (content, tokens) | |
| 25 | |
| 26 def train_spambayes(db, content, tokens, is_spam): | |
| 27 spambayes_uri = db.config.detectors['SPAMBAYES_URI'] | |
| 28 | |
| 29 server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False) | |
| 30 try: | |
| 31 server.train({'content':content}, tokens, {}, is_spam) | |
| 32 return (True, None) | |
| 33 except (socket.error, xmlrpclib.Error), e: | |
| 34 return (False, str(e)) | |
| 35 | |
| 36 | |
| 37 class SpambayesClassify(Action): | |
| 38 permissionType = 'SB: May Classify' | |
| 39 | |
| 40 def handle(self): | |
| 41 (content, tokens) = extract_classinfo(self.db, | |
| 42 self.classname, self.nodeid) | |
| 43 | |
| 44 if self.form.has_key("trainspam"): | |
| 45 is_spam = True | |
| 46 elif self.form.has_key("trainham"): | |
| 47 is_spam = False | |
| 48 | |
| 49 (status, errmsg) = train_spambayes(self.db, content, tokens, | |
| 50 is_spam) | |
| 51 | |
| 52 node = self.db.getnode(self.classname, self.nodeid) | |
| 53 props = {} | |
| 54 | |
| 55 if status: | |
| 56 if node.get('spambayes_misclassified', False): | |
| 57 props['spambayes_misclassified'] = True | |
| 58 | |
| 59 props['spambayes_score'] = 1.0 | |
| 60 | |
| 61 s = " SPAM" | |
| 62 if not is_spam: | |
| 63 props['spambayes_score'] = 0.0 | |
| 64 s = " HAM" | |
| 65 self.client.ok_message.append(self._('Message classified as') + s) | |
| 66 else: | |
| 67 self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg) | |
| 68 | |
| 69 klass = self.db.getclass(self.classname) | |
| 70 klass.set(self.nodeid, **props) | |
| 71 self.db.commit() | |
| 72 | |
| 73 def sb_is_spam(obj): | |
| 74 cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF']) | |
| 75 try: | |
| 76 score = obj['spambayes_score'] | |
| 77 except KeyError: | |
| 78 return False | |
| 79 return score >= cutoff_score | |
| 80 | |
| 81 def init(instance): | |
| 82 instance.registerAction("spambayes_classify", SpambayesClassify) | |
| 83 instance.registerUtil('sb_is_spam', sb_is_spam) | |
| 84 |
