]> git.mxchange.org Git - quix0rs-gnu-social.git/commitdiff
New SpamFilter class
authorEvan Prodromou <evan@status.net>
Mon, 5 Mar 2012 15:58:57 +0000 (09:58 -0600)
committerEvan Prodromou <evan@status.net>
Mon, 5 Mar 2012 15:58:57 +0000 (09:58 -0600)
ActivitySpamPlugin.php
Spam_score.php
spamfilter.php [new file with mode: 0644]

index 6c1f8df472e2d28c453b2ed06ac339040d204657..fcc6673ce2e7ec98ab7285b465ed3780d7d85561 100644 (file)
@@ -47,7 +47,6 @@ if (!defined('STATUSNET')) {
 class ActivitySpamPlugin extends Plugin
 {
     public $server = null;
-
     public $username = null;
     public $password = null;
 
@@ -64,6 +63,8 @@ class ActivitySpamPlugin extends Plugin
             }
         }
 
+        $this->filter = new SpamFilter($this->server, $this->username, $this->password);
+
         return true;
     }
 
@@ -103,6 +104,9 @@ class ActivitySpamPlugin extends Plugin
         case 'Spam_score':
             include_once $dir . '/'.$cls.'.php';
             return false;
+        case 'SpamFilter':
+            include_once $dir . '/'.strtolower($cls).'.php';
+            return false;
         default:
             return true;
         }
@@ -118,40 +122,19 @@ class ActivitySpamPlugin extends Plugin
 
     function onEndNoticeSave($notice)
     {
-        // FIXME: need this to autoload ActivityStreamsMediaLink
-        $doc = new ActivityStreamJSONDocument();
+        try {
 
-        $activity = $notice->asActivity(null);
+            $result = $this->filter->test($notice);
 
-        $client = new HTTPClient($this->server . "/is-this-spam");
+            $score = Spam_score::saveNew($notice, $result);
 
-        $client->setMethod('POST');
-        $client->setAuth($this->username, $this->password);
-        $client->setHeader('Content-Type', 'application/json');
-        $client->setBody(json_encode($activity->asArray()));
+            $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score);
 
-        $response = $client->send();
-
-        if (!$response->isOK()) {
-            $this->log(LOG_ERR, "Error " . $response->getStatus() . " checking spam score: " . $response->getBody());
-            return true;
+        } catch (Exception $e) {
+            // Log but continue 
+            $this->log(LOG_ERR, $e->getMessage());
         }
 
-        $result = json_decode($response->getBody());
-
-        $score = new Spam_score();
-
-        $score->notice_id      = $notice->id;
-        $score->score          = $result->probability;
-        $score->is_spam        = $result->isSpam;
-        $score->scaled         = Spam_score::scale($score->score);
-        $score->created        = common_sql_now();
-        $score->notice_created = $notice->created;
-
-        $score->insert();
-
-        $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score);
-
         return true;
     }
 
index c815b42e368f3cebdb62f1963f7e2b3f34b4c894..08887d06b38a4206db193356043ab50eb84fa0e5 100644 (file)
@@ -67,6 +67,22 @@ class Spam_score extends Managed_DataObject
         return Managed_DataObject::staticGet('Spam_score', $k, $v);
     }
 
+    function saveNew($notice, $result) {
+
+        $score = new Spam_score();
+
+        $score->notice_id      = $notice->id;
+        $score->score          = $result->probability;
+        $score->is_spam        = $result->isSpam;
+        $score->scaled         = Spam_score::scale($score->score);
+        $score->created        = common_sql_now();
+        $score->notice_created = $notice->created;
+
+        $score->insert();
+        
+        return $score;
+    }
+
     /**
      * The One True Thingy that must be defined and declared.
      */
diff --git a/spamfilter.php b/spamfilter.php
new file mode 100644 (file)
index 0000000..0e321eb
--- /dev/null
@@ -0,0 +1,156 @@
+<?php
+/**
+ * StatusNet - the distributed open-source microblogging tool
+ * Copyright (C) 2012, StatusNet, Inc.
+ *
+ * Spam filter class
+ * 
+ * PHP version 5
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * @category  Spam
+ * @package   StatusNet
+ * @author    Evan Prodromou <evan@status.net>
+ * @copyright 2012 StatusNet, Inc.
+ * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
+ * @link      http://status.net/
+ */
+
+if (!defined('STATUSNET')) {
+    // This check helps protect against security problems;
+    // your code file can't be executed directly from the web.
+    exit(1);
+}
+
+/**
+ * Spam filter class
+ *
+ * Local proxy for remote filter
+ *
+ * @category  Spam
+ * @package   StatusNet
+ * @author    Evan Prodromou <evan@status.net>
+ * @copyright 2012 StatusNet, Inc.
+ * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
+ * @link      http://status.net/
+ */
+
+class SpamFilter {
+
+    const HAM  = 'ham';
+    const SPAM = 'spam';
+
+    public $server;
+    public $username;
+    public $password;
+
+    function __construct($server, $username, $password) {
+
+        $this->server   = $server;
+        $this->username = $username;
+        $this->password = $password;
+    }
+
+    protected function toActivity($notice) {
+        // FIXME: need this to autoload ActivityStreamsMediaLink
+        $doc = new ActivityStreamJSONDocument();
+
+        $activity = $notice->asActivity(null);
+
+        return $activity;
+    }
+
+    public function test($notice) {
+
+        $activity = $this->toActivity($notice);
+        return $this->testActivity($activity);
+    }
+    
+    public function testActivity($activity) {
+
+        $client = new HTTPClient($this->server . "/is-this-spam");
+
+        $client->setMethod('POST');
+        $client->setAuth($this->username, $this->password);
+        $client->setHeader('Content-Type', 'application/json');
+        $client->setBody(json_encode($activity->asArray()));
+
+        $response = $client->send();
+
+        if (!$response->isOK()) {
+            throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody());
+        }
+
+        $result = json_decode($response->getBody());
+
+        return $result;
+    }
+
+    public function train($notice, $category) {
+
+        $activity = $this->toActivity($notice);
+        return $this->trainActivity($activity, $category);
+
+    }
+
+    public function trainActivity($activity, $category) {
+
+        switch ($category) {
+        case self::HAM:
+            $endpoint = '/this-is-ham';
+            break;
+        case self::SPAM:
+            $endpoint = '/this-is-spam';
+            break;
+        default:
+            throw new Exception("Unknown category: " + $category);
+        }
+
+        $client = new HTTPClient($this->server . $endpoint);
+
+        $client->setMethod('POST');
+        $client->setAuth($this->username, $this->password);
+        $client->setHeader('Content-Type', 'application/json');
+        $client->setBody(json_encode($activity->asArray()));
+
+        $response = $client->send();
+
+        if (!$response->isOK()) {
+            throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody());
+        }
+
+        // We don't do much with the results
+        return true;
+    }
+
+    public function trainOnError($notice, $category) {
+
+        $activity = $this->toActivity($notice);
+
+        return $this->trainActivityOnError($activity, $category);
+    }
+    
+    public function trainActivityOnError($activity, $category) {
+
+        $result = $this->testActivity($activity);
+
+        if (($category === self::SPAM && $result->isSpam) ||
+            ($category === self::HAM && !$result->isSpam)) {
+            return true;
+        } else {
+            return $this->trainActivity($activity, $category);
+        }
+    }
+}