]> git.mxchange.org Git - friendica.git/blob - library/spam/b8/storage/storage_base.php
Merge branch 'master' into spam
[friendica.git] / library / spam / b8 / storage / storage_base.php
1 <?php
2
3 #   Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
4 #
5 #   This file is part of the b8 package
6 #
7 #   This program is free software; you can redistribute it and/or modify it
8 #   under the terms of the GNU Lesser General Public License as published by
9 #   the Free Software Foundation in version 2.1 of the License.
10 #
11 #   This program is distributed in the hope that it will be useful, but
12 #   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 #   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14 #   License for more details.
15 #
16 #   You should have received a copy of the GNU Lesser General Public License
17 #   along with this program; if not, write to the Free Software Foundation,
18 #   Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
19
20 /**
21  * Functions used by all storage backends
22  * Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
23  *
24  * @license LGPL
25  * @access public
26  * @package b8
27  * @author Tobias Leupold
28  */
29
30 abstract class b8_storage_base
31 {
32
33         public $connected            = FALSE;
34
35         protected $_degenerator      = NULL;
36
37         const INTERNALS_TEXTS_HAM    = 'bayes*texts.ham';
38         const INTERNALS_TEXTS_SPAM   = 'bayes*texts.spam';
39         const INTERNALS_DBVERSION    = 'bayes*dbversion';
40
41         const BACKEND_NOT_CONNECTED  = 'BACKEND_NOT_CONNECTED';
42         const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
43         const DATABASE_NOT_B8        = 'DATABASE_NOT_B8';
44
45         /**
46          * Validates the class has all it needs to work.
47          *
48          * @access protected
49          * @return mixed Returns TRUE if everything is okay, otherwise an error code.
50          */
51
52         protected function validate()
53         {
54
55                 # We set up the degenerator here, as we would have to duplicate code if it
56                 # was done in the constructor of the respective storage backend.
57                 $class = 'b8_degenerator_' . $this->b8_config['degenerator'];
58                 $this->_degenerator = new $class();
59
60                 if($this->connected !== TRUE)
61                         return self::BACKEND_NOT_CONNECTED;
62
63                 return TRUE;
64
65         }
66
67         /**
68          * Checks if a b8 database is used and if it's version is okay
69          *
70          * @access protected
71          * @return mixed Returns TRUE if everything is okay, otherwise an error code.
72          */
73
74         protected function check_database($uid)
75         {
76
77                 $internals = $this->get_internals($uid);
78
79                 if(isset($internals['dbversion'])) {
80                         if($internals['dbversion'] == "2") {
81                                 return TRUE;
82                         }
83                         else {
84                                 $this->connected = FALSE;
85                                 return self::DATABASE_WRONG_VERSION;
86                         }
87                 }
88                 else {
89                         $this->connected = FALSE;
90                         return self::DATABASE_NOT_B8;
91                 }
92
93         }
94
95         /**
96          * Parses the "count" data of a token.
97          *
98          * @access private
99          * @param string $data
100          * @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen).
101          */
102
103         private function _parse_count($data)
104         {
105
106                 list($count_ham, $count_spam, $lastseen) = explode(' ', $data);
107
108                 $count_ham  = (int) $count_ham;
109                 $count_spam = (int) $count_spam;
110
111                 return array(
112                         'count_ham'  => $count_ham,
113                         'count_spam' => $count_spam
114                 );
115
116         }
117
118         /**
119          * Get the database's internal variables.
120          *
121          * @access public
122          * @return array Returns an array of all internals.
123          */
124
125         public function get_internals($uid)
126         {
127
128                 $internals = $this->_get_query(
129                         array(
130                                 self::INTERNALS_TEXTS_HAM,
131                                 self::INTERNALS_TEXTS_SPAM,
132                                 self::INTERNALS_DBVERSION
133                         ),
134                         $uid
135                 );
136
137                 return array(
138                         'texts_ham'  => (int) $internals[self::INTERNALS_TEXTS_HAM],
139                         'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM],
140                         'dbversion'  => (int) $internals[self::INTERNALS_DBVERSION]
141                 );
142
143         }
144
145         /**
146          * Get all data about a list of tags from the database.
147          *
148          * @access public
149          * @param array $tokens
150          * @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
151          */
152
153         public function get($tokens, $uid)
154         {
155
156                 # Validate the startup
157
158                 $started_up = $this->validate();
159
160                 if($started_up !== TRUE)
161                         return $started_up;
162
163                 # First we see what we have in the database.
164                 $token_data = $this->_get_query($tokens, $uid);
165
166                 # Check if we have to degenerate some tokens
167
168                 $missing_tokens = array();
169
170                 foreach($tokens as $token) {
171                         if(!isset($token_data[$token]))
172                                 $missing_tokens[] = $token;
173                 }
174
175                 if(count($missing_tokens) > 0) {
176
177                         # We have to degenerate some tokens
178                         $degenerates_list = array();
179
180                         # Generate a list of degenerated tokens for the missing tokens ...
181                         $degenerates = $this->_degenerator->degenerate($missing_tokens);
182
183                         # ... and look them up
184
185                         foreach($degenerates as $token => $token_degenerates)
186                                 $degenerates_list = array_merge($degenerates_list, $token_degenerates);
187
188                         $token_data = array_merge($token_data, $this->_get_query($degenerates_list));
189
190                 }
191
192                 # Here, we have all availible data in $token_data.
193
194                 $return_data_tokens = array();
195                 $return_data_degenerates = array();
196
197                 foreach($tokens as $token) {
198
199                         if(isset($token_data[$token]) === TRUE) {
200
201                                 # The token was found in the database
202
203                                 # Add the data ...
204                                 $return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
205
206                                 # ... and update it's lastseen parameter
207                                 $this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today'], $uid );
208
209                         }
210
211                         else {
212
213                                 # The token was not found, so we look if we
214                                 # can return data for degenerated tokens
215
216                                 # Check all degenerated forms of the token
217
218                                 foreach($this->_degenerator->degenerates[$token] as $degenerate) {
219
220                                         if(isset($token_data[$degenerate]) === TRUE) {
221
222                                                 # A degeneration of the token way found in the database
223
224                                                 # Add the data ...
225                                                 $return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
226
227                                                 # ... and update it's lastseen parameter
228                                                 $this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today'], $uid);
229
230                                         }
231
232                                 }
233
234                         }
235
236                 }
237
238                 # Now, all token data directly found in the database is in $return_data_tokens
239                 # and all data for degenerated versions is in $return_data_degenerates
240
241                 # First, we commit the changes to the lastseen parameters
242                 $this->_commit();
243
244                 # Then, we return what we have
245                 return array(
246                         'tokens'      => $return_data_tokens,
247                         'degenerates' => $return_data_degenerates
248                 );
249
250         }
251
252         /**
253          * Stores or deletes a list of tokens from the given category.
254          *
255          * @access public
256          * @param array $tokens
257          * @param const $category Either b8::HAM or b8::SPAM
258          * @param const $action Either b8::LEARN or b8::UNLEARN
259          * @return void
260          */
261
262         public function process_text($tokens, $category, $action, $uid)
263         {
264
265                 # Validate the startup
266
267                 $started_up = $this->validate();
268
269                 if($started_up !== TRUE)
270                         return $started_up;
271
272                 # No matter what we do, we first have to check what data we have.
273
274                 # First get the internals, including the ham texts and spam texts counter
275                 $internals = $this->get_internals($uid);
276
277                 # Then, fetch all data for all tokens we have (and update their lastseen parameters)
278                 $token_data = $this->_get_query(array_keys($tokens), $uid);
279
280                 # Process all tokens to learn/unlearn
281
282                 foreach($tokens as $token => $count) {
283
284                         if(isset($token_data[$token])) {
285
286                                 # We already have this token, so update it's data
287
288                                 # Get the existing data
289                                 list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]);
290                                 $count_ham  = (int) $count_ham;
291                                 $count_spam = (int) $count_spam;
292
293                                 # Increase or decrease the right counter
294
295                                 if($action === b8::LEARN) {
296                                         if($category === b8::HAM)
297                                                 $count_ham += $count;
298                                         elseif($category === b8::SPAM)
299                                                 $count_spam += $count;
300                                 }
301
302                                 elseif($action == b8::UNLEARN) {
303                                         if($category === b8::HAM)
304                                                 $count_ham -= $count;
305                                         elseif($category === b8::SPAM)
306                                                 $count_spam -= $count;
307                                 }
308
309                                 # We don't want to have negative values
310
311                                 if($count_ham < 0)
312                                         $count_ham = 0;
313
314                                 if($count_spam < 0)
315                                         $count_spam = 0;
316
317                                 # Now let's see if we have to update or delete the token
318                                 if($count_ham !== 0 or $count_spam !== 0)
319                                         $this->_update($token, "$count_ham $count_spam " . $this->b8_config['today'], $uid);
320                                 else
321                                         $this->_del($token, $uid);
322
323                         }
324
325                         else {
326
327                                 # We don't have the token. If we unlearn a text, we can't delete it
328                                 # as we don't have it anyway, so just do something if we learn a text
329
330                                 if($action === b8::LEARN) {
331
332                                         if($category === b8::HAM)
333                                                 $data = '1 0 ';
334                                         elseif($category === b8::SPAM)
335                                                 $data = '0 1 ';
336
337                                         $data .= $this->b8_config['today'];
338
339                                         $this->_put($token, $data, $uid);
340
341                                 }
342
343                         }
344
345                 }
346
347                 # Now, all token have been processed, so let's update the right text
348
349                 if($action === b8::LEARN) {
350
351                         if($category === b8::HAM) {
352                                 $internals['texts_ham']++;
353                                 $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
354                         }
355
356                         elseif($category === b8::SPAM) {
357                                 $internals['texts_spam']++;
358                                 $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
359                         }
360
361                 }
362
363                 elseif($action == b8::UNLEARN) {
364
365                         if($category === b8::HAM) {
366
367                                 $internals['texts_ham']--;
368
369                                 if($internals['texts_ham'] < 0)
370                                         $internals['texts_ham'] = 0;
371
372                                 $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
373
374                         }
375
376                         elseif($category === b8::SPAM) {
377
378                                 $internals['texts_spam']--;
379
380                                 if($internals['texts_spam'] < 0)
381                                         $internals['texts_spam'] = 0;
382
383                                 $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
384
385                         }
386
387                 }
388
389                 # We're done and can commit all changes to the database now
390                 $this->_commit($uid);
391
392         }
393
394 }
395
396 ?>