X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=include%2Fpoller.php;h=3a28b177c343e152c55f2f671d888fccfcb2003a;hb=3df235e5bd2666fdb4043e374d1df15414a355bd;hp=d971d4f00410a76cb77acac4364516c327052884;hpb=08f276f87b366e5d9c6f92e1dd5edc3da633707e;p=friendica.git diff --git a/include/poller.php b/include/poller.php index d971d4f004..3a28b177c3 100644 --- a/include/poller.php +++ b/include/poller.php @@ -12,7 +12,6 @@ if (!file_exists("boot.php") AND (sizeof($_SERVER["argv"]) != 0)) { require_once("boot.php"); - function poller_run(&$argv, &$argc){ global $a, $db; @@ -21,288 +20,251 @@ function poller_run(&$argv, &$argc){ } if(is_null($db)) { - @include(".htconfig.php"); - require_once("include/dba.php"); - $db = new dba($db_host, $db_user, $db_pass, $db_data); - unset($db_host, $db_user, $db_pass, $db_data); - }; - - - require_once('include/session.php'); - require_once('include/datetime.php'); - require_once('library/simplepie/simplepie.inc'); - require_once('include/items.php'); - require_once('include/Contact.php'); - require_once('include/email.php'); - require_once('include/socgraph.php'); - require_once('include/pidfile.php'); - - load_config('config'); - load_config('system'); - - $maxsysload = intval(get_config('system','maxloadavg')); - if($maxsysload < 1) - $maxsysload = 50; - if(function_exists('sys_getloadavg')) { - $load = sys_getloadavg(); - if(intval($load[0]) > $maxsysload) { - logger('system: load ' . $load[0] . ' too high. Poller deferred to next scheduled run.'); - return; - } - } - - $lockpath = get_lockpath(); - if ($lockpath != '') { - $pidfile = new pidfile($lockpath, 'poller'); - if($pidfile->is_already_running()) { - logger("poller: Already running"); - if ($pidfile->running_time() > 9*60) { - $pidfile->kill(); - logger("poller: killed stale process"); - // Calling a new instance - proc_run('php','include/poller.php'); - } - exit; - } - } - + @include(".htconfig.php"); + require_once("include/dba.php"); + $db = new dba($db_host, $db_user, $db_pass, $db_data); + unset($db_host, $db_user, $db_pass, $db_data); + }; + if (poller_max_connections_reached()) + return; - $a->set_baseurl(get_config('system','url')); + if (App::maxload_reached()) + return; - load_hooks(); + // Checking the number of workers + if (poller_too_much_workers(1)) { + poller_kill_stale_workers(); + return; + } - logger('poller: start'); + if(($argc <= 1) OR ($argv[1] != "no_cron")) { + // Run the cron job that calls all other jobs + proc_run("php","include/cron.php"); - // run queue delivery process in the background + // Run the cronhooks job separately from cron for being able to use a different timing + proc_run("php","include/cronhooks.php"); - proc_run('php',"include/queue.php"); + // Cleaning dead processes + poller_kill_stale_workers(); + } else + // Sleep four seconds before checking for running processes again to avoid having too many workers + sleep(4); - // run diaspora photo queue process in the background + // Checking number of workers + if (poller_too_much_workers(2)) + return; - proc_run('php',"include/dsprphotoq.php"); + $starttime = time(); - // expire any expired accounts + while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { - q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 - AND `account_expires_on` != '0000-00-00 00:00:00' - AND `account_expires_on` < UTC_TIMESTAMP() "); + // Constantly check the number of available database connections to let the frontend be accessible at any time + if (poller_max_connections_reached()) + return; - // delete user and contact records for recently removed accounts + // Count active workers and compare them with a maximum value that depends on the load + if (poller_too_much_workers(3)) + return; - $r = q("SELECT * FROM `user` WHERE `account_removed` = 1 AND `account_expires_on` < UTC_TIMESTAMP() - INTERVAL 3 DAY"); - if ($r) { - foreach($r as $user) { - q("DELETE FROM `contact` WHERE `uid` = %d", intval($user['uid'])); - q("DELETE FROM `user` WHERE `uid` = %d", intval($user['uid'])); + q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d AND `executed` = '0000-00-00 00:00:00'", + dbesc(datetime_convert()), + intval(getmypid()), + intval($r[0]["id"])); + + // Assure that there are no tasks executed twice + $id = q("SELECT `id` FROM `workerqueue` WHERE `id` = %d AND `pid` = %d", + intval($r[0]["id"]), + intval(getmypid())); + if (!$id) { + logger("Queue item ".$r[0]["id"]." was executed multiple times - skip this execution", LOGGER_DEBUG); + continue; } - } - $abandon_days = intval(get_config('system','account_abandon_days')); - if($abandon_days < 1) - $abandon_days = 0; + $argv = json_decode($r[0]["parameter"]); - // Check OStatus conversations - // Check only conversations with mentions (for a longer time) - check_conversations(true); + $argc = count($argv); - // Check every conversation - check_conversations(false); + // Check for existance and validity of the include file + $include = $argv[0]; - // To-Do: Regenerate usage statistics - // q("ANALYZE TABLE `item`"); + if (!validate_include($include)) { + logger("Include file ".$argv[0]." is not valid!"); + q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); + continue; + } - // once daily run birthday_updates and then expire in background + require_once($include); - $d1 = get_config('system','last_expire_day'); - $d2 = intval(datetime_convert('UTC','UTC','now','d')); + $funcname=str_replace(".php", "", basename($argv[0]))."_run"; - if($d2 != intval($d1)) { + if (function_exists($funcname)) { + logger("Process ".getmypid()." - ID ".$r[0]["id"].": ".$funcname." ".$r[0]["parameter"]); + $funcname($argv, $argc); - update_contact_birthdays(); + logger("Process ".getmypid()." - ID ".$r[0]["id"].": ".$funcname." - done"); - update_suggestions(); + q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); + } else + logger("Function ".$funcname." does not exist"); - set_config('system','last_expire_day',$d2); - proc_run('php','include/expire.php'); + // Quit the poller once every hour + if (time() > ($starttime + 3600)) + return; } - $last = get_config('system','cache_last_cleared'); - - if($last) { - $next = $last + (3600); // Once per hour - $clear_cache = ($next <= time()); - } else - $clear_cache = true; - - if ($clear_cache) { - // clear old cache - Cache::clear(); +} - // clear old item cache files - clear_cache(); +/** + * @brief Checks if the number of database connections has reached a critical limit. + * + * @return bool Are more than 3/4 of the maximum connections used? + */ +function poller_max_connections_reached() { + + // Fetch the max value from the config. This is needed when the system cannot detect the correct value by itself. + $max = get_config("system", "max_connections"); + + // Fetch the percentage level where the poller will get active + $maxlevel = get_config("system", "max_connections_level"); + if ($maxlevel == 0) + $maxlevel = 75; + + if ($max == 0) { + // the maximum number of possible user connections can be a system variable + $r = q("SHOW VARIABLES WHERE `variable_name` = 'max_user_connections'"); + if ($r) + $max = $r[0]["Value"]; + + // Or it can be granted. This overrides the system variable + $r = q("SHOW GRANTS"); + if ($r) + foreach ($r AS $grants) { + $grant = array_pop($grants); + if (stristr($grant, "GRANT USAGE ON")) + if (preg_match("/WITH MAX_USER_CONNECTIONS (\d*)/", $grant, $match)) + $max = $match[1]; + } + } - // clear cache for photos - clear_cache($a->get_basepath(), $a->get_basepath()."/photo"); + // If $max is set we will use the processlist to determine the current number of connections + // The processlist only shows entries of the current user + if ($max != 0) { + $r = q("SHOW PROCESSLIST"); + if (!$r) + return false; - // clear smarty cache - clear_cache($a->get_basepath()."/view/smarty3/compiled", $a->get_basepath()."/view/smarty3/compiled"); + $used = count($r); - // clear cache for image proxy - if (!get_config("system", "proxy_disabled")) { - clear_cache($a->get_basepath(), $a->get_basepath()."/proxy"); + logger("Connection usage (user values): ".$used."/".$max, LOGGER_DEBUG); - $cachetime = get_config('system','proxy_cache_time'); - if (!$cachetime) $cachetime = PROXY_DEFAULT_TIME; + $level = ($used / $max) * 100; - q('DELETE FROM `photo` WHERE `uid` = 0 AND `resource-id` LIKE "pic:%%" AND `created` < NOW() - INTERVAL %d SECOND', $cachetime); + if ($level >= $maxlevel) { + logger("Maximum level (".$maxlevel."%) of user connections reached: ".$used."/".$max); + return true; } - - set_config('system','cache_last_cleared', time()); } - $manual_id = 0; - $generation = 0; - $force = false; - $restart = false; + // We will now check for the system values. + // This limit could be reached although the user limits are fine. + $r = q("SHOW VARIABLES WHERE `variable_name` = 'max_connections'"); + if (!$r) + return false; - if(($argc > 1) && ($argv[1] == 'force')) - $force = true; + $max = intval($r[0]["Value"]); + if ($max == 0) + return false; - if(($argc > 1) && ($argv[1] == 'restart')) { - $restart = true; - $generation = intval($argv[2]); - if(! $generation) - killme(); - } + $r = q("SHOW STATUS WHERE `variable_name` = 'Threads_connected'"); + if (!$r) + return false; - if(($argc > 1) && intval($argv[1])) { - $manual_id = intval($argv[1]); - $force = true; - } + $used = intval($r[0]["Value"]); + if ($used == 0) + return false; - $interval = intval(get_config('system','poll_interval')); - if(! $interval) - $interval = ((get_config('system','delivery_interval') === false) ? 3 : intval(get_config('system','delivery_interval'))); + logger("Connection usage (system values): ".$used."/".$max, LOGGER_DEBUG); - $sql_extra = (($manual_id) ? " AND `id` = $manual_id " : ""); + $level = $used / $max * 100; - reload_plugins(); + if ($level < $maxlevel) + return false; - $d = datetime_convert(); - - if(! $restart) - proc_run('php','include/cronhooks.php'); - - // Only poll from those with suitable relationships, - // and which have a polling address and ignore Diaspora since - // we are unable to match those posts with a Diaspora GUID and prevent duplicates. - - $abandon_sql = (($abandon_days) - ? sprintf(" AND `user`.`login_date` > UTC_TIMESTAMP() - INTERVAL %d DAY ", intval($abandon_days)) - : '' - ); + logger("Maximum level (".$level."%) of system connections reached: ".$used."/".$max); + return true; +} - $contacts = q("SELECT `contact`.`id` FROM `contact` INNER JOIN `user` ON `user`.`uid` = `contact`.`uid` - WHERE `rel` IN (%d, %d) AND `poll` != '' AND `network` IN ('%s', '%s', '%s', '%s', '%s', '%s') - $sql_extra - AND NOT `self` AND NOT `contact`.`blocked` AND NOT `contact`.`readonly` AND NOT `contact`.`archive` - AND NOT `user`.`account_expired` AND NOT `user`.`account_removed` $abandon_sql ORDER BY RAND()", - intval(CONTACT_IS_SHARING), - intval(CONTACT_IS_FRIEND), - dbesc(NETWORK_DFRN), - dbesc(NETWORK_ZOT), - dbesc(NETWORK_OSTATUS), - dbesc(NETWORK_FEED), - dbesc(NETWORK_MAIL), - dbesc(NETWORK_MAIL2) - ); +/** + * @brief fix the queue entry if the worker process died + * + */ +function poller_kill_stale_workers() { + $r = q("SELECT `pid`, `executed` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); - if(! count($contacts)) { + if (!is_array($r) || count($r) == 0) { + // No processing here needed return; } - foreach($contacts as $c) { - - $res = q("SELECT * FROM `contact` WHERE `id` = %d LIMIT 1", - intval($c['id']) - ); - - if((! $res) || (! count($res))) - continue; - - foreach($res as $contact) { + foreach($r AS $pid) + if (!posix_kill($pid["pid"], 0)) + q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d", + intval($pid["pid"])); + else { + // Kill long running processes + $duration = (time() - strtotime($pid["executed"])) / 60; + if ($duration > 180) { + logger("Worker process ".$pid["pid"]." took more than 3 hours. It will be killed now."); + posix_kill($pid["pid"], SIGTERM); + + // Question: If a process is stale: Should we remove it or should we reschedule it? + // By now we rescheduling it. It's maybe not the wisest decision? + q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d", + intval($pid["pid"])); + } else + logger("Worker process ".$pid["pid"]." now runs for ".round($duration)." minutes. That's okay.", LOGGER_DEBUG); + } +} - $xml = false; +function poller_too_much_workers($stage) { - if($manual_id) - $contact['last-update'] = '0000-00-00 00:00:00'; + $queues = get_config("system", "worker_queues"); - if(in_array($contact['network'], array(NETWORK_DFRN, NETWORK_ZOT, NETWORK_OSTATUS))) - $contact['priority'] = 2; + if ($queues == 0) + $queues = 4; - if($contact['subhub'] AND in_array($contact['network'], array(NETWORK_DFRN, NETWORK_ZOT, NETWORK_OSTATUS))) { - // We should be getting everything via a hub. But just to be sure, let's check once a day. - // (You can make this more or less frequent if desired by setting 'pushpoll_frequency' appropriately) - // This also lets us update our subscription to the hub, and add or replace hubs in case it - // changed. We will only update hubs once a day, regardless of 'pushpoll_frequency'. + $active = poller_active_workers(); - $poll_interval = get_config('system','pushpoll_frequency'); - $contact['priority'] = (($poll_interval !== false) ? intval($poll_interval) : 3); - } + // Decrease the number of workers at higher load + $load = current_load(); + if($load) { + $maxsysload = intval(get_config('system','maxloadavg')); + if($maxsysload < 1) + $maxsysload = 50; - if($contact['priority'] AND !$force) { - - $update = false; - - $t = $contact['last-update']; - - /** - * Based on $contact['priority'], should we poll this site now? Or later? - */ - - switch ($contact['priority']) { - case 5: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 month")) - $update = true; - break; - case 4: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 week")) - $update = true; - break; - case 3: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 day")) - $update = true; - break; - case 2: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 12 hour")) - $update = true; - break; - case 1: - default: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 hour")) - $update = true; - break; - } - if(!$update) - continue; - } + $maxworkers = $queues; - logger("Polling ".$contact["network"]." ".$contact["id"]." ".$contact["nick"]." ".$contact["name"]); + // Some magical mathemathics to reduce the workers + $exponent = 3; + $slope = $maxworkers / pow($maxsysload, $exponent); + $queues = ceil($slope * pow(max(0, $maxsysload - $load), $exponent)); - proc_run('php','include/onepoll.php',$contact['id']); + logger("Current load stage ".$stage.": ".$load." - maximum: ".$maxsysload." - current queues: ".$active." - maximum: ".$queues, LOGGER_DEBUG); - if($interval) - @time_sleep_until(microtime(true) + (float) $interval); - } } - logger('poller: end'); + return($active >= $queues); +} + +function poller_active_workers() { + $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); - return; + return($workers[0]["workers"]); } if (array_search(__file__,get_included_files())===0){ poller_run($_SERVER["argv"],$_SERVER["argc"]); killme(); } +?>