- // List of strings of known "good" agents
- $agents = ['diaspora-connection-tester', 'DiasporaFederation', 'Friendica', '(compatible; zot)',
- 'Micro.blog', 'Mastodon', 'hackney', 'GangGo', 'python/federation', 'GNU social', 'winHttp',
- 'Go-http-client', 'Mr.4x3 Powered', 'Test Certificate Info', 'WordPress.com', 'zgrab',
- 'curl/', 'StatusNet', 'OpenGraphReader/', 'Uptimebot/', 'python-opengraph-jaywink'];
-
- if ($crawlerDetect->isCrawler()) {
- foreach ($agents as $agent) {
- if (stristr($_SERVER['HTTP_USER_AGENT'], $agent)) {
- // @ToDo: Report every false positive here: https://github.com/JayBizzle/Crawler-Detect/issues/326
- logger::notice('False positive', ['agent' => $_SERVER['HTTP_USER_AGENT']]);
+ // List of "good" crawlers
+ $good_agents = ['fediverse.space crawler', 'fediverse.network crawler', 'Active_Pods_CheckBot_3.0',
+ 'Social-Relay/', 'Test Certificate Info', 'Uptimebot/', 'GNUSocialBot', 'UptimeRobot/'];
+
+ // List of known crawlers.
+ $agents = ['SemrushBot', 's~feedly-nikon3', 'Qwantify/Bleriot/', 'ltx71', 'Sogou web spider/',
+ 'Diffbot/', 'Twitterbot/', 'YisouSpider', 'evc-batch/', 'LivelapBot/', 'TrendsmapResolver/',
+ 'PaperLiBot/', 'Nuzzel', 'um-LN/', 'Google Favicon', 'Datanyze', 'BLEXBot/', '360Spider',
+ 'adscanner/', 'HeadlessChrome', 'wpif', 'startmebot/', 'Googlebot/', 'Applebot/',
+ 'facebookexternalhit/', 'GoogleImageProxy', 'bingbot/', 'heritrix/', 'ldspider',
+ 'AwarioRssBot/', 'Zabbix', 'TweetmemeBot/', 'dcrawl/', 'PhantomJS/', 'Googlebot-Image/',
+ 'CrowdTanglebot/', 'Mediapartners-Google', 'Baiduspider/', 'datagnionbot',
+ 'MegaIndex.ru/', 'SMUrlExpander', 'Hatena-Favicon/', 'Wappalyzer', 'FlipboardProxy/',
+ 'NetcraftSurveyAgent/', 'Dataprovider.com', 'SMTBot/', 'Nimbostratus-Bot/',
+ 'DuckDuckGo-Favicons-Bot/', 'IndieWebCards/', 'proximic', 'netEstate NE Crawler',
+ 'AhrefsBot/', 'YandexBot/', 'Exabot/', 'Mediumbot-MetaTagFetcher/', 'WhatsApp/',
+ 'TelegramBot', 'SurdotlyBot/', 'BingPreview/', 'SabsimBot/', 'CCBot/', 'WbSrch/',
+ 'DuckDuckBot-Https/', 'HTTP Banner Detection', 'YandexImages/', 'archive.org_bot',
+ 'ArchiveTeam ArchiveBot/', 'yacybot', 'https://developers.google.com/+/web/snippet/',
+ 'Scrapy/', 'github-camo', 'MJ12bot/', 'DotBot/', 'Pinterestbot/', 'Jooblebot/',
+ 'Cliqzbot/', 'YaK/', 'Mediatoolkitbot', 'Snacktory', 'FunWebProducts', 'oBot/',
+ '7Siters/', 'KOCMOHABT', 'Google-SearchByImage', 'FemtosearchBot/',
+ 'HubSpot Crawler', 'DomainStatsBot/', 'Re-re Studio'];
+
+ if (!DI::config()->get('blockbot', 'good_crawlers')) {
+ $agents = array_merge($agents, $good_agents);
+ } else {
+ foreach ($good_agents as $good_agent) {
+ if (stristr($_SERVER['HTTP_USER_AGENT'], $good_agent)) {