]> git.mxchange.org Git - friendica.git/blobdiff - include/poller.php
Double check for maximum number of workers
[friendica.git] / include / poller.php
index b04c77f74446d80ce627fb41786a50b9aae3ed49..3b348531c5b6ed47806b1fa821189bb7d0262b17 100644 (file)
@@ -1,7 +1,16 @@
 <?php
+if (!file_exists("boot.php") AND (sizeof($_SERVER["argv"]) != 0)) {
+       $directory = dirname($_SERVER["argv"][0]);
 
-require_once("boot.php");
+       if (substr($directory, 0, 1) != "/")
+               $directory = $_SERVER["PWD"]."/".$directory;
+
+       $directory = realpath($directory."/..");
+
+       chdir($directory);
+}
 
+require_once("boot.php");
 
 function poller_run(&$argv, &$argc){
        global $a, $db;
@@ -11,280 +20,150 @@ function poller_run(&$argv, &$argc){
        }
 
        if(is_null($db)) {
-           @include(".htconfig.php");
-       require_once("include/dba.php");
-           $db = new dba($db_host, $db_user, $db_pass, $db_data);
-       unset($db_host, $db_user, $db_pass, $db_data);
-       };
-
-
-       require_once('include/session.php');
-       require_once('include/datetime.php');
-       require_once('library/simplepie/simplepie.inc');
-       require_once('include/items.php');
-       require_once('include/Contact.php');
-       require_once('include/email.php');
-       require_once('include/socgraph.php');
-       require_once('include/pidfile.php');
-
-       load_config('config');
-       load_config('system');
-
-       $maxsysload = intval(get_config('system','maxloadavg'));
-       if($maxsysload < 1)
-               $maxsysload = 50;
+               @include(".htconfig.php");
+               require_once("include/dba.php");
+               $db = new dba($db_host, $db_user, $db_pass, $db_data);
+               unset($db_host, $db_user, $db_pass, $db_data);
+       };
+
        if(function_exists('sys_getloadavg')) {
+               $maxsysload = intval(get_config('system','maxloadavg'));
+               if($maxsysload < 1)
+                       $maxsysload = 50;
+
                $load = sys_getloadavg();
                if(intval($load[0]) > $maxsysload) {
-                       logger('system: load ' . $load . ' too high. Poller deferred to next scheduled run.');
+                       logger('system: load ' . $load[0] . ' too high. poller deferred to next scheduled run.');
                        return;
                }
        }
 
-       $lockpath = get_config('system','lockpath');
-       if ($lockpath != '') {
-               $pidfile = new pidfile($lockpath, 'poller');
-               if($pidfile->is_already_running()) {
-                       logger("poller: Already running");
-                       if ($pidfile->running_time() > 9*60) {
-                                $pidfile->kill();
-                                logger("poller: killed stale process");
-                                // Calling a new instance
-                                proc_run('php','include/poller.php');
-                        }
-                       exit;
-               }
-       }
-
-
-
-       $a->set_baseurl(get_config('system','url'));
-
-       load_hooks();
-
-       logger('poller: start');
-
-       // run queue delivery process in the background
-
-       proc_run('php',"include/queue.php");
-
-       // run diaspora photo queue process in the background
-
-       proc_run('php',"include/dsprphotoq.php");
-
-       // expire any expired accounts
-
-       q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 
-               AND `account_expires_on` != '0000-00-00 00:00:00' 
-               AND `account_expires_on` < UTC_TIMESTAMP() ");
+       if(($argc <= 1) OR ($argv[1] != "no_cron")) {
+               // Run the cron job that calls all other jobs
+               proc_run("php","include/cron.php");
+
+               // Run the cronhooks job separately from cron for being able to use a different timing
+               proc_run("php","include/cronhooks.php");
+
+               // Cleaning dead processes
+               $r = q("SELECT DISTINCT(`pid`) FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'");
+               foreach($r AS $pid)
+                       if (!posix_kill($pid["pid"], 0))
+                               q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d",
+                                       intval($pid["pid"]));
+                       else {
+                               // To-Do: Kill long running processes
+                               // But: Update processes (like the database update) mustn't be killed
+                       }
 
-       // delete user and contact records for recently removed accounts
+       } else {
+               // Checking the number of workers
+               if (poller_too_much_workers(1))
+                       return;
 
-       $r = q("SELECT * FROM `user` WHERE `account_removed` = 1 AND `account_expires_on` < UTC_TIMESTAMP() - INTERVAL 3 DAY");
-       if ($r) {
-               foreach($r as $user) {
-                       q("DELETE FROM `contact` WHERE `uid` = %d", intval($user['uid']));
-                       q("DELETE FROM `user` WHERE `uid` = %d", intval($user['uid']));
-               }
+               // Sleep four seconds before checking for running processes again to avoid having too many workers
+               sleep(4);
        }
 
-       $abandon_days = intval(get_config('system','account_abandon_days'));
-       if($abandon_days < 1)
-               $abandon_days = 0;
+       // Checking number of workers
+       if (poller_too_much_workers(2))
+               return;
 
-       // Check OStatus conversations
-       check_conversations();
+       $starttime = time();
 
-       // To-Do: Regenerate usage statistics
-       // q("ANALYZE TABLE `item`");
+       while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) {
 
-       // once daily run birthday_updates and then expire in background
+               q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d AND `executed` = '0000-00-00 00:00:00'",
+                       dbesc(datetime_convert()),
+                       intval(getmypid()),
+                       intval($r[0]["id"]));
 
-       $d1 = get_config('system','last_expire_day');
-       $d2 = intval(datetime_convert('UTC','UTC','now','d'));
+               // Assure that there are no tasks executed twice
+               $id = q("SELECT `id` FROM `workerqueue` WHERE `id` = %d AND `pid` = %d",
+                       intval($r[0]["id"]),
+                       intval(getmypid()));
+               if (!$id) {
+                       logger("Queue item ".$r[0]["id"]." was executed multiple times - skip this execution", LOGGER_DEBUG);
+                       continue;
+               }
 
-       if($d2 != intval($d1)) {
+               $argv = json_decode($r[0]["parameter"]);
 
-               update_contact_birthdays();
+               $argc = count($argv);
 
-               update_suggestions();
+               // Check for existance and validity of the include file
+               $include = $argv[0];
 
-               set_config('system','last_expire_day',$d2);
-               proc_run('php','include/expire.php');
-       }
+               if (!validate_include($include)) {
+                       logger("Include file ".$argv[0]." is not valid!");
+                       q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"]));
+                       continue;
+               }
 
-       $last = get_config('system','cache_last_cleared');
+               require_once($include);
 
-       if($last) {
-               $next = $last + (3600); // Once per hour
-               $clear_cache = ($next <= time());
-        } else
-               $clear_cache = true;
+               $funcname=str_replace(".php", "", basename($argv[0]))."_run";
 
-       if ($clear_cache) {
-               // clear old cache
-               Cache::clear();
+               if (function_exists($funcname)) {
+                       logger("Process ".getmypid().": ".$funcname." ".$r[0]["parameter"]);
+                       $funcname($argv, $argc);
 
-               // clear old item cache files
-               clear_cache();
+                       logger("Process ".getmypid().": ".$funcname." - done");
 
-               // clear cache for photos
-               clear_cache($a->get_basepath(), $a->get_basepath()."/photo");
+                       q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"]));
+               } else
+                       logger("Function ".$funcname." does not exist");
 
-               // clear smarty cache
-               clear_cache($a->get_basepath()."/view/smarty3/compiled", $a->get_basepath()."/view/smarty3/compiled");
+               // Quit the poller once every hour
+               if (time() > ($starttime + 3600))
+                       return;
 
-               set_config('system','cache_last_cleared', time());
+               // Count active workers and compare them with a maximum value that depends on the load
+               if (poller_too_much_workers(3))
+                       return;
        }
 
-       $manual_id  = 0;
-       $generation = 0;
-       $hub_update = false;
-       $force      = false;
-       $restart    = false;
-
-       if(($argc > 1) && ($argv[1] == 'force'))
-               $force = true;
+}
 
-       if(($argc > 1) && ($argv[1] == 'restart')) {
-               $restart = true;
-               $generation = intval($argv[2]);
-               if(! $generation)
-                       killme();
-       }
+function poller_too_much_workers($stage) {
 
-       if(($argc > 1) && intval($argv[1])) {
-               $manual_id = intval($argv[1]);
-               $force     = true;
-       }
+       $queues = get_config("system", "worker_queues");
 
-       $interval = intval(get_config('system','poll_interval'));
-       if(! $interval) 
-               $interval = ((get_config('system','delivery_interval') === false) ? 3 : intval(get_config('system','delivery_interval')));
+       if ($queues == 0)
+               $queues = 4;
 
-       $sql_extra = (($manual_id) ? " AND `id` = $manual_id " : "");
+       $active = poller_active_workers();
 
-       reload_plugins();
-
-       $d = datetime_convert();
+       // Decrease the number of workers at higher load
+       if(function_exists('sys_getloadavg')) {
+               $load = max(sys_getloadavg());
 
-       if(! $restart)
-               proc_run('php','include/cronhooks.php');
+               $maxsysload = intval(get_config('system','maxloadavg'));
+               if($maxsysload < 1)
+                       $maxsysload = 50;
 
-       // Only poll from those with suitable relationships,
-       // and which have a polling address and ignore Diaspora since 
-       // we are unable to match those posts with a Diaspora GUID and prevent duplicates.
+               $maxworkers = $queues;
 
-       $abandon_sql = (($abandon_days) 
-               ? sprintf(" AND `user`.`login_date` > UTC_TIMESTAMP() - INTERVAL %d DAY ", intval($abandon_days)) 
-               : '' 
-       );
+               // Some magical mathemathics to reduce the workers
+               $exponent = 3;
+               $slope = $maxworkers / pow($maxsysload, $exponent);
+               $queues = ceil($slope * pow(max(0, $maxsysload - $load), $exponent));
 
-       $contacts = q("SELECT `contact`.`id` FROM `contact` INNER JOIN `user` ON `user`.`uid` = `contact`.`uid` 
-               WHERE ( `rel` = %d OR `rel` = %d ) AND `poll` != ''
-               AND NOT `network` IN ( '%s', '%s', '%s' )
-               $sql_extra 
-               AND `self` = 0 AND `contact`.`blocked` = 0 AND `contact`.`readonly` = 0 
-               AND `contact`.`archive` = 0 
-               AND `user`.`account_expired` = 0 AND `user`.`account_removed` = 0 $abandon_sql ORDER BY RAND()",
-               intval(CONTACT_IS_SHARING),
-               intval(CONTACT_IS_FRIEND),
-               dbesc(NETWORK_DIASPORA),
-               dbesc(NETWORK_FACEBOOK),
-               dbesc(NETWORK_PUMPIO)
-       );
+               logger("Current load stage ".$stage.": ".$load." - maximum: ".$maxsysload." - current queues: ".$active." - maximum: ".$queues, LOGGER_DEBUG);
 
-       if(! count($contacts)) {
-               return;
        }
 
-       foreach($contacts as $c) {
-
-               $res = q("SELECT * FROM `contact` WHERE `id` = %d LIMIT 1",
-                       intval($c['id'])
-               );
-
-               if((! $res) || (! count($res)))
-                       continue;
-
-               foreach($res as $contact) {
-
-                       $xml = false;
-
-                       if($manual_id)
-                               $contact['last-update'] = '0000-00-00 00:00:00';
-
-                       if($contact['network'] === NETWORK_DFRN)
-                               $contact['priority'] = 2;
-
-                       if(!get_config('system','ostatus_use_priority') and ($contact['network'] === NETWORK_OSTATUS))
-                               $contact['priority'] = 2;
-
-                       if($contact['priority'] || $contact['subhub']) {
-
-                               $hub_update = true;
-                               $update     = false;
-
-                               $t = $contact['last-update'];
-
-                               // We should be getting everything via a hub. But just to be sure, let's check once a day.
-                               // (You can make this more or less frequent if desired by setting 'pushpoll_frequency' appropriately)
-                               // This also lets us update our subscription to the hub, and add or replace hubs in case it
-                               // changed. We will only update hubs once a day, regardless of 'pushpoll_frequency'. 
-
-
-                               if($contact['subhub']) {
-                                       $poll_interval = get_config('system','pushpoll_frequency');
-                                       $contact['priority'] = (($poll_interval !== false) ? intval($poll_interval) : 3);
-                                       $hub_update = false;
-       
-                                       if((datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 day")) || $force)
-                                                       $hub_update = true;
-                               }
-                               else
-                                       $hub_update = false;
-
-                               /**
-                                * Based on $contact['priority'], should we poll this site now? Or later?
-                                */                     
-
-                               switch ($contact['priority']) {
-                                       case 5:
-                                               if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 month"))
-                                                       $update = true;
-                                               break;                                  
-                                       case 4:
-                                               if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 week"))
-                                                       $update = true;
-                                               break;
-                                       case 3:
-                                               if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 day"))
-                                                       $update = true;
-                                               break;
-                                       case 2:
-                                               if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 12 hour"))
-                                                       $update = true;
-                                               break;
-                                       case 1:
-                                       default:
-                                               if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 hour"))
-                                                       $update = true;
-                                               break;
-                               }
-                               if((! $update) && (! $force))
-                                       continue;
-                       }
+       return($active >= $queues);
+}
 
-                       proc_run('php','include/onepoll.php',$contact['id']);
-                       if($interval)
-                               @time_sleep_until(microtime(true) + (float) $interval);
-               }
-       }
+function poller_active_workers() {
+       $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'");
 
-       return;
+       return($workers[0]["workers"]);
 }
 
 if (array_search(__file__,get_included_files())===0){
-  poller_run($argv,$argc);
+  poller_run($_SERVER["argv"],$_SERVER["argc"]);
   killme();
 }
+?>