X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;ds=sidebyside;f=blockbot%2Fblockbot.php;h=0ff372c502862a3b8f1a4093f5f99cf9acaf2f40;hb=cff3dd67b065983dcbe13858a2a31f2de9992659;hp=b602bf63bbc7ac86030b8fd27232f4a017b1b90d;hpb=05c98064b4d624fcdf5e7481f66704056da007ea;p=friendica-addons.git diff --git a/blockbot/blockbot.php b/blockbot/blockbot.php index b602bf63..0ff372c5 100644 --- a/blockbot/blockbot.php +++ b/blockbot/blockbot.php @@ -15,6 +15,7 @@ use Friendica\DI; use Jaybizzle\CrawlerDetect\CrawlerDetect; use Friendica\Core\Logger; use Friendica\Core\Renderer; +use Friendica\Network\HTTPException\ForbiddenException; require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php'; @@ -49,7 +50,7 @@ function blockbot_init_1(App $a) { // List of "good" crawlers $good_agents = ['fediverse.space crawler', 'fediverse.network crawler', 'Active_Pods_CheckBot_3.0', 'Social-Relay/', 'Test Certificate Info', 'Uptimebot/', 'GNUSocialBot', 'UptimeRobot/', - 'PTST/']; + 'PTST/', 'Zabbix']; // List of known crawlers. $agents = ['SemrushBot', 's~feedly-nikon3', 'Qwantify/Bleriot/', 'ltx71', 'Sogou web spider/', @@ -57,7 +58,7 @@ function blockbot_init_1(App $a) { 'PaperLiBot/', 'Nuzzel', 'um-LN/', 'Google Favicon', 'Datanyze', 'BLEXBot/', '360Spider', 'adscanner/', 'HeadlessChrome', 'wpif', 'startmebot/', 'Googlebot/', 'Applebot/', 'facebookexternalhit/', 'GoogleImageProxy', 'bingbot/', 'heritrix/', 'ldspider', - 'AwarioRssBot/', 'Zabbix', 'TweetmemeBot/', 'dcrawl/', 'PhantomJS/', 'Googlebot-Image/', + 'AwarioRssBot/', 'TweetmemeBot/', 'dcrawl/', 'PhantomJS/', 'Googlebot-Image/', 'CrowdTanglebot/', 'Mediapartners-Google', 'Baiduspider/', 'datagnionbot', 'MegaIndex.ru/', 'SMUrlExpander', 'Hatena-Favicon/', 'Wappalyzer', 'FlipboardProxy/', 'NetcraftSurveyAgent/', 'Dataprovider.com', 'SMTBot/', 'Nimbostratus-Bot/', @@ -73,7 +74,8 @@ function blockbot_init_1(App $a) { 'SummalyBot/', 'DNSResearchBot/', 'PetalBot;', 'Nmap Scripting Engine;', 'Google-Apps-Script; beanserver;', 'woorankreview/', 'Seekport Crawler;', 'AHC/', 'SkypeUriPreview Preview/', 'Semanticbot/', 'Embed PHP library', 'XoviOnpageCrawler;', - 'GetHPinfo.com-Bot/', 'BoardReader Favicon Fetcher']; + 'GetHPinfo.com-Bot/', 'BoardReader Favicon Fetcher', 'Google-Adwords-Instant', 'newspaper/', + 'YurichevBot/', 'Crawling at Home Project']; if (!DI::config()->get('blockbot', 'good_crawlers')) { $agents = array_merge($agents, $good_agents); @@ -91,7 +93,7 @@ function blockbot_init_1(App $a) { foreach ($agents as $agent) { if (stristr($_SERVER['HTTP_USER_AGENT'], $agent)) { - System::httpExit(403, 'Bots are not allowed'); + throw new ForbiddenException('Bots are not allowed'); } } @@ -114,7 +116,8 @@ function blockbot_init_1(App $a) { 'WordPress/', 'http.rb/', 'Apache-HttpClient/', 'WordPress.com;', 'Pleroma', 'Dispatch/', 'Ruby', 'Java/', 'libwww-perl/', 'Mastodon/', 'FeedlyApp/', 'lua-resty-http/', 'Tiny Tiny RSS/', 'Wget/', 'PostmanRuntime/', - 'W3C_Validator/', 'NetNewsWire', 'FeedValidator/', 'theoldreader.com']; + 'W3C_Validator/', 'NetNewsWire', 'FeedValidator/', 'theoldreader.com', 'axios/', + 'Paw/', 'PeerTube/', 'fedi.inex.dev', 'FediDB/', 'index.community crawler']; if (DI::config()->get('blockbot', 'good_crawlers')) { $agents = array_merge($agents, $good_agents); @@ -128,5 +131,5 @@ function blockbot_init_1(App $a) { } logger::info('Blocked bot', $logdata); - System::httpExit(403, 'Bots are not allowed'); + throw new ForbiddenException('Bots are not allowed'); }