3 # Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
5 # This file is part of the b8 package
7 # This program is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU Lesser General Public License as published by
9 # the Free Software Foundation in version 2.1 of the License.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 # License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program; if not, write to the Free Software Foundation,
18 # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21 * Functions used by all storage backends
22 * Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
27 * @author Tobias Leupold
30 abstract class b8_storage_base
33 public $connected = FALSE;
35 protected $_degenerator = NULL;
37 const INTERNALS_TEXTS_HAM = 'bayes*texts.ham';
38 const INTERNALS_TEXTS_SPAM = 'bayes*texts.spam';
39 const INTERNALS_DBVERSION = 'bayes*dbversion';
41 const BACKEND_NOT_CONNECTED = 'BACKEND_NOT_CONNECTED';
42 const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
43 const DATABASE_NOT_B8 = 'DATABASE_NOT_B8';
46 * Validates the class has all it needs to work.
49 * @return mixed Returns TRUE if everything is okay, otherwise an error code.
52 protected function validate()
55 # We set up the degenerator here, as we would have to duplicate code if it
56 # was done in the constructor of the respective storage backend.
57 $class = 'b8_degenerator_' . $this->b8_config['degenerator'];
58 $this->_degenerator = new $class();
60 if($this->connected !== TRUE)
61 return self::BACKEND_NOT_CONNECTED;
68 * Checks if a b8 database is used and if it's version is okay
71 * @return mixed Returns TRUE if everything is okay, otherwise an error code.
74 protected function check_database($uid)
77 $internals = $this->get_internals($uid);
79 if(isset($internals['dbversion'])) {
80 if($internals['dbversion'] == "2") {
84 $this->connected = FALSE;
85 return self::DATABASE_WRONG_VERSION;
89 $this->connected = FALSE;
90 return self::DATABASE_NOT_B8;
96 * Parses the "count" data of a token.
100 * @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen).
103 private function _parse_count($data)
106 list($count_ham, $count_spam, $lastseen) = explode(' ', $data);
108 $count_ham = (int) $count_ham;
109 $count_spam = (int) $count_spam;
112 'count_ham' => $count_ham,
113 'count_spam' => $count_spam
119 * Get the database's internal variables.
122 * @return array Returns an array of all internals.
125 public function get_internals($uid)
128 $internals = $this->_get_query(
130 self::INTERNALS_TEXTS_HAM,
131 self::INTERNALS_TEXTS_SPAM,
132 self::INTERNALS_DBVERSION
138 'texts_ham' => (int) $internals[self::INTERNALS_TEXTS_HAM],
139 'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM],
140 'dbversion' => (int) $internals[self::INTERNALS_DBVERSION]
146 * Get all data about a list of tags from the database.
149 * @param array $tokens
150 * @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
153 public function get($tokens, $uid)
156 # Validate the startup
158 $started_up = $this->validate();
160 if($started_up !== TRUE)
163 # First we see what we have in the database.
164 $token_data = $this->_get_query($tokens, $uid);
166 # Check if we have to degenerate some tokens
168 $missing_tokens = array();
170 foreach($tokens as $token) {
171 if(!isset($token_data[$token]))
172 $missing_tokens[] = $token;
175 if(count($missing_tokens) > 0) {
177 # We have to degenerate some tokens
178 $degenerates_list = array();
180 # Generate a list of degenerated tokens for the missing tokens ...
181 $degenerates = $this->_degenerator->degenerate($missing_tokens);
183 # ... and look them up
185 foreach($degenerates as $token => $token_degenerates)
186 $degenerates_list = array_merge($degenerates_list, $token_degenerates);
188 $token_data = array_merge($token_data, $this->_get_query($degenerates_list));
192 # Here, we have all availible data in $token_data.
194 $return_data_tokens = array();
195 $return_data_degenerates = array();
197 foreach($tokens as $token) {
199 if(isset($token_data[$token]) === TRUE) {
201 # The token was found in the database
204 $return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
206 # ... and update it's lastseen parameter
207 $this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today'], $uid );
213 # The token was not found, so we look if we
214 # can return data for degenerated tokens
216 # Check all degenerated forms of the token
218 foreach($this->_degenerator->degenerates[$token] as $degenerate) {
220 if(isset($token_data[$degenerate]) === TRUE) {
222 # A degeneration of the token way found in the database
225 $return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
227 # ... and update it's lastseen parameter
228 $this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today'], $uid);
238 # Now, all token data directly found in the database is in $return_data_tokens
239 # and all data for degenerated versions is in $return_data_degenerates
241 # First, we commit the changes to the lastseen parameters
244 # Then, we return what we have
246 'tokens' => $return_data_tokens,
247 'degenerates' => $return_data_degenerates
253 * Stores or deletes a list of tokens from the given category.
256 * @param array $tokens
257 * @param const $category Either b8::HAM or b8::SPAM
258 * @param const $action Either b8::LEARN or b8::UNLEARN
262 public function process_text($tokens, $category, $action, $uid)
265 # Validate the startup
267 $started_up = $this->validate();
269 if($started_up !== TRUE)
272 # No matter what we do, we first have to check what data we have.
274 # First get the internals, including the ham texts and spam texts counter
275 $internals = $this->get_internals($uid);
277 # Then, fetch all data for all tokens we have (and update their lastseen parameters)
278 $token_data = $this->_get_query(array_keys($tokens), $uid);
280 # Process all tokens to learn/unlearn
282 foreach($tokens as $token => $count) {
284 if(isset($token_data[$token])) {
286 # We already have this token, so update it's data
288 # Get the existing data
289 list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]);
290 $count_ham = (int) $count_ham;
291 $count_spam = (int) $count_spam;
293 # Increase or decrease the right counter
295 if($action === b8::LEARN) {
296 if($category === b8::HAM)
297 $count_ham += $count;
298 elseif($category === b8::SPAM)
299 $count_spam += $count;
302 elseif($action == b8::UNLEARN) {
303 if($category === b8::HAM)
304 $count_ham -= $count;
305 elseif($category === b8::SPAM)
306 $count_spam -= $count;
309 # We don't want to have negative values
317 # Now let's see if we have to update or delete the token
318 if($count_ham !== 0 or $count_spam !== 0)
319 $this->_update($token, "$count_ham $count_spam " . $this->b8_config['today'], $uid);
321 $this->_del($token, $uid);
327 # We don't have the token. If we unlearn a text, we can't delete it
328 # as we don't have it anyway, so just do something if we learn a text
330 if($action === b8::LEARN) {
332 if($category === b8::HAM)
334 elseif($category === b8::SPAM)
337 $data .= $this->b8_config['today'];
339 $this->_put($token, $data, $uid);
347 # Now, all token have been processed, so let's update the right text
349 if($action === b8::LEARN) {
351 if($category === b8::HAM) {
352 $internals['texts_ham']++;
353 $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
356 elseif($category === b8::SPAM) {
357 $internals['texts_spam']++;
358 $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
363 elseif($action == b8::UNLEARN) {
365 if($category === b8::HAM) {
367 $internals['texts_ham']--;
369 if($internals['texts_ham'] < 0)
370 $internals['texts_ham'] = 0;
372 $this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
376 elseif($category === b8::SPAM) {
378 $internals['texts_spam']--;
380 if($internals['texts_spam'] < 0)
381 $internals['texts_spam'] = 0;
383 $this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
389 # We're done and can commit all changes to the database now
390 $this->_commit($uid);