From 1121b101289774a4e9251542ec0b25624e3b02a7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 5 Mar 2012 09:58:57 -0600 Subject: [PATCH] New SpamFilter class --- ActivitySpamPlugin.php | 41 ++++------- Spam_score.php | 16 +++++ spamfilter.php | 156 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 29 deletions(-) create mode 100644 spamfilter.php diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 6c1f8df472..fcc6673ce2 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -47,7 +47,6 @@ if (!defined('STATUSNET')) { class ActivitySpamPlugin extends Plugin { public $server = null; - public $username = null; public $password = null; @@ -64,6 +63,8 @@ class ActivitySpamPlugin extends Plugin } } + $this->filter = new SpamFilter($this->server, $this->username, $this->password); + return true; } @@ -103,6 +104,9 @@ class ActivitySpamPlugin extends Plugin case 'Spam_score': include_once $dir . '/'.$cls.'.php'; return false; + case 'SpamFilter': + include_once $dir . '/'.strtolower($cls).'.php'; + return false; default: return true; } @@ -118,40 +122,19 @@ class ActivitySpamPlugin extends Plugin function onEndNoticeSave($notice) { - // FIXME: need this to autoload ActivityStreamsMediaLink - $doc = new ActivityStreamJSONDocument(); + try { - $activity = $notice->asActivity(null); + $result = $this->filter->test($notice); - $client = new HTTPClient($this->server . "/is-this-spam"); + $score = Spam_score::saveNew($notice, $result); - $client->setMethod('POST'); - $client->setAuth($this->username, $this->password); - $client->setHeader('Content-Type', 'application/json'); - $client->setBody(json_encode($activity->asArray())); + $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score); - $response = $client->send(); - - if (!$response->isOK()) { - $this->log(LOG_ERR, "Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); - return true; + } catch (Exception $e) { + // Log but continue + $this->log(LOG_ERR, $e->getMessage()); } - $result = json_decode($response->getBody()); - - $score = new Spam_score(); - - $score->notice_id = $notice->id; - $score->score = $result->probability; - $score->is_spam = $result->isSpam; - $score->scaled = Spam_score::scale($score->score); - $score->created = common_sql_now(); - $score->notice_created = $notice->created; - - $score->insert(); - - $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score); - return true; } diff --git a/Spam_score.php b/Spam_score.php index c815b42e36..08887d06b3 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -67,6 +67,22 @@ class Spam_score extends Managed_DataObject return Managed_DataObject::staticGet('Spam_score', $k, $v); } + function saveNew($notice, $result) { + + $score = new Spam_score(); + + $score->notice_id = $notice->id; + $score->score = $result->probability; + $score->is_spam = $result->isSpam; + $score->scaled = Spam_score::scale($score->score); + $score->created = common_sql_now(); + $score->notice_created = $notice->created; + + $score->insert(); + + return $score; + } + /** * The One True Thingy that must be defined and declared. */ diff --git a/spamfilter.php b/spamfilter.php new file mode 100644 index 0000000000..0e321ebc8b --- /dev/null +++ b/spamfilter.php @@ -0,0 +1,156 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Spam filter class + * + * Local proxy for remote filter + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SpamFilter { + + const HAM = 'ham'; + const SPAM = 'spam'; + + public $server; + public $username; + public $password; + + function __construct($server, $username, $password) { + + $this->server = $server; + $this->username = $username; + $this->password = $password; + } + + protected function toActivity($notice) { + // FIXME: need this to autoload ActivityStreamsMediaLink + $doc = new ActivityStreamJSONDocument(); + + $activity = $notice->asActivity(null); + + return $activity; + } + + public function test($notice) { + + $activity = $this->toActivity($notice); + return $this->testActivity($activity); + } + + public function testActivity($activity) { + + $client = new HTTPClient($this->server . "/is-this-spam"); + + $client->setMethod('POST'); + $client->setAuth($this->username, $this->password); + $client->setHeader('Content-Type', 'application/json'); + $client->setBody(json_encode($activity->asArray())); + + $response = $client->send(); + + if (!$response->isOK()) { + throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); + } + + $result = json_decode($response->getBody()); + + return $result; + } + + public function train($notice, $category) { + + $activity = $this->toActivity($notice); + return $this->trainActivity($activity, $category); + + } + + public function trainActivity($activity, $category) { + + switch ($category) { + case self::HAM: + $endpoint = '/this-is-ham'; + break; + case self::SPAM: + $endpoint = '/this-is-spam'; + break; + default: + throw new Exception("Unknown category: " + $category); + } + + $client = new HTTPClient($this->server . $endpoint); + + $client->setMethod('POST'); + $client->setAuth($this->username, $this->password); + $client->setHeader('Content-Type', 'application/json'); + $client->setBody(json_encode($activity->asArray())); + + $response = $client->send(); + + if (!$response->isOK()) { + throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); + } + + // We don't do much with the results + return true; + } + + public function trainOnError($notice, $category) { + + $activity = $this->toActivity($notice); + + return $this->trainActivityOnError($activity, $category); + } + + public function trainActivityOnError($activity, $category) { + + $result = $this->testActivity($activity); + + if (($category === self::SPAM && $result->isSpam) || + ($category === self::HAM && !$result->isSpam)) { + return true; + } else { + return $this->trainActivity($activity, $category); + } + } +} -- 2.39.5