+ // List of "good" crawlers
+ $good_agents = ['fediverse.space crawler', 'fediverse.network crawler', 'Active_Pods_CheckBot_3.0',
+ 'Social-Relay/', 'Test Certificate Info', 'Uptimebot/', 'GNUSocialBot', 'UptimeRobot/',
+ 'PTST/'];
+
+ // List of known crawlers.
+ $agents = ['SemrushBot', 's~feedly-nikon3', 'Qwantify/Bleriot/', 'ltx71', 'Sogou web spider/',
+ 'Diffbot/', 'Twitterbot/', 'YisouSpider', 'evc-batch/', 'LivelapBot/', 'TrendsmapResolver/',
+ 'PaperLiBot/', 'Nuzzel', 'um-LN/', 'Google Favicon', 'Datanyze', 'BLEXBot/', '360Spider',
+ 'adscanner/', 'HeadlessChrome', 'wpif', 'startmebot/', 'Googlebot/', 'Applebot/',
+ 'facebookexternalhit/', 'GoogleImageProxy', 'bingbot/', 'heritrix/', 'ldspider',
+ 'AwarioRssBot/', 'Zabbix', 'TweetmemeBot/', 'dcrawl/', 'PhantomJS/', 'Googlebot-Image/',
+ 'CrowdTanglebot/', 'Mediapartners-Google', 'Baiduspider/', 'datagnionbot',
+ 'MegaIndex.ru/', 'SMUrlExpander', 'Hatena-Favicon/', 'Wappalyzer', 'FlipboardProxy/',
+ 'NetcraftSurveyAgent/', 'Dataprovider.com', 'SMTBot/', 'Nimbostratus-Bot/',
+ 'DuckDuckGo-Favicons-Bot/', 'IndieWebCards/', 'proximic', 'netEstate NE Crawler',
+ 'AhrefsBot/', 'YandexBot/', 'Exabot/', 'Mediumbot-MetaTagFetcher/', 'WhatsApp/',
+ 'TelegramBot', 'SurdotlyBot/', 'BingPreview/', 'SabsimBot/', 'CCBot/', 'WbSrch/',
+ 'DuckDuckBot-Https/', 'HTTP Banner Detection', 'YandexImages/', 'archive.org_bot',
+ 'ArchiveTeam ArchiveBot/', 'yacybot', 'https://developers.google.com/+/web/snippet/',
+ 'Scrapy/', 'github-camo', 'MJ12bot/', 'DotBot/', 'Pinterestbot/', 'Jooblebot/',
+ 'Cliqzbot/', 'YaK/', 'Mediatoolkitbot', 'Snacktory', 'FunWebProducts', 'oBot/',
+ '7Siters/', 'KOCMOHABT', 'Google-SearchByImage', 'FemtosearchBot/',
+ 'HubSpot Crawler', 'DomainStatsBot/', 'Re-re Studio', 'AwarioSmartBot/',
+ 'SummalyBot/', 'DNSResearchBot/', 'PetalBot;', 'Nmap Scripting Engine;',
+ 'Google-Apps-Script; beanserver;', 'woorankreview/', 'Seekport Crawler;', 'AHC/',
+ 'SkypeUriPreview Preview/', 'Semanticbot/', 'Embed PHP library', 'XoviOnpageCrawler;',
+ 'GetHPinfo.com-Bot/', 'BoardReader Favicon Fetcher'];
+
+ if (!DI::config()->get('blockbot', 'good_crawlers')) {
+ $agents = array_merge($agents, $good_agents);
+ } else {
+ foreach ($good_agents as $good_agent) {
+ if (stristr($_SERVER['HTTP_USER_AGENT'], $good_agent)) {
+ return;
+ }
+ }
+ }
+
+ if (DI::config()->get('blockbot', 'block_gab')) {
+ $agents[] = 'GabSocial/';
+ }
+
+ foreach ($agents as $agent) {
+ if (stristr($_SERVER['HTTP_USER_AGENT'], $agent)) {
+ System::httpExit(403, 'Bots are not allowed');
+ }
+ }
+
+ // This switch here is only meant for developers who want to add more bots to the list above, it is not safe for production.
+ if (!DI::config()->get('blockbot', 'training')) {
+ return;
+ }
+
+ $crawlerDetect = new CrawlerDetect();
+