--- /dev/null
+<?php
+/**
+ * Name: blockbot
+ * Description: Blocking bots based on detecting bots/crawlers/spiders via the user agent and http_from header.
+ * Version: 0.1
+ * Author: Philipp Holzer <admin@philipp.info>
+ *
+ */
+
+use Friendica\App;
+use Friendica\Core\Hook;
+use Friendica\Core\System;
+use Jaybizzle\CrawlerDetect\CrawlerDetect;
+
+function blockbot_install() {
+ Hook::register('init_1', 'addon/blockbot/blockbot.php', 'blockbot_init_1');
+}
+
+
+function blockbot_uninstall() {
+ Hook::unregister('init_1', 'addon/blockbot/blockbot.php', 'blockbot_init_1');
+}
+
+function blockbot_init_1(App $a) {
+ $crawlerDetect = new CrawlerDetect();
+
+ if ($crawlerDetect->isCrawler()) {
+ System::httpExit(403, 'Bots are not allowed');
+ }
+}
--- /dev/null
+{
+ "name": "friendica-addons/blockbot",
+ "description": "Blocking bots based on detecting bots/crawlers/spiders via the user agent and http_from header.",
+ "type": "friendica-addon",
+ "authors": [
+ {
+ "name": "Philipp Holzer",
+ "email": "admin@philipp.info",
+ "homepage": "https://friendica.philipp.info/profile/nupplaphil",
+ "role": "Developer"
+ }
+ ],
+ "require": {
+ "php": ">=5.6.0",
+ "jaybizzle/crawler-detect": "1.*"
+ },
+ "license": "3-clause BSD license",
+ "minimum-stability": "stable",
+ "config": {
+ "optimize-autoloader": true,
+ "autoloader-suffix": "AdvancedContentFilterAddon",
+ "preferred-install": "dist"
+ }
+}
--- /dev/null
+{
+ "_readme": [
+ "This file locks the dependencies of your project to a known state",
+ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
+ "This file is @generated automatically"
+ ],
+ "content-hash": "814fd867d00e99f84d12304e8e244aae",
+ "packages": [
+ {
+ "name": "jaybizzle/crawler-detect",
+ "version": "v1.2.80",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/JayBizzle/Crawler-Detect.git",
+ "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/JayBizzle/Crawler-Detect/zipball/af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
+ "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
+ "shasum": ""
+ },
+ "require": {
+ "php": ">=5.3.0"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^4.8|^5.5|^6.5",
+ "satooshi/php-coveralls": "1.*"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-4": {
+ "Jaybizzle\\CrawlerDetect\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Mark Beech",
+ "email": "m@rkbee.ch",
+ "role": "Developer"
+ }
+ ],
+ "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent",
+ "homepage": "https://github.com/JayBizzle/Crawler-Detect/",
+ "keywords": [
+ "crawler",
+ "crawler detect",
+ "crawler detector",
+ "crawlerdetect",
+ "php crawler detect"
+ ],
+ "time": "2019-04-05T19:52:02+00:00"
+ }
+ ],
+ "packages-dev": [],
+ "aliases": [],
+ "minimum-stability": "stable",
+ "stability-flags": [],
+ "prefer-stable": false,
+ "prefer-lowest": false,
+ "platform": {
+ "php": ">=5.6.0"
+ },
+ "platform-dev": []
+}
--- /dev/null
+<?php
+
+// autoload.php @generated by Composer
+
+require_once __DIR__ . '/composer/autoload_real.php';
+
+return ComposerAutoloaderInitAdvancedContentFilterAddon::getLoader();
--- /dev/null
+<?php
+
+/*
+ * This file is part of Composer.
+ *
+ * (c) Nils Adermann <naderman@naderman.de>
+ * Jordi Boggiano <j.boggiano@seld.be>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Composer\Autoload;
+
+/**
+ * ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
+ *
+ * $loader = new \Composer\Autoload\ClassLoader();
+ *
+ * // register classes with namespaces
+ * $loader->add('Symfony\Component', __DIR__.'/component');
+ * $loader->add('Symfony', __DIR__.'/framework');
+ *
+ * // activate the autoloader
+ * $loader->register();
+ *
+ * // to enable searching the include path (eg. for PEAR packages)
+ * $loader->setUseIncludePath(true);
+ *
+ * In this example, if you try to use a class in the Symfony\Component
+ * namespace or one of its children (Symfony\Component\Console for instance),
+ * the autoloader will first look for the class under the component/
+ * directory, and it will then fallback to the framework/ directory if not
+ * found before giving up.
+ *
+ * This class is loosely based on the Symfony UniversalClassLoader.
+ *
+ * @author Fabien Potencier <fabien@symfony.com>
+ * @author Jordi Boggiano <j.boggiano@seld.be>
+ * @see http://www.php-fig.org/psr/psr-0/
+ * @see http://www.php-fig.org/psr/psr-4/
+ */
+class ClassLoader
+{
+ // PSR-4
+ private $prefixLengthsPsr4 = array();
+ private $prefixDirsPsr4 = array();
+ private $fallbackDirsPsr4 = array();
+
+ // PSR-0
+ private $prefixesPsr0 = array();
+ private $fallbackDirsPsr0 = array();
+
+ private $useIncludePath = false;
+ private $classMap = array();
+ private $classMapAuthoritative = false;
+ private $missingClasses = array();
+ private $apcuPrefix;
+
+ public function getPrefixes()
+ {
+ if (!empty($this->prefixesPsr0)) {
+ return call_user_func_array('array_merge', $this->prefixesPsr0);
+ }
+
+ return array();
+ }
+
+ public function getPrefixesPsr4()
+ {
+ return $this->prefixDirsPsr4;
+ }
+
+ public function getFallbackDirs()
+ {
+ return $this->fallbackDirsPsr0;
+ }
+
+ public function getFallbackDirsPsr4()
+ {
+ return $this->fallbackDirsPsr4;
+ }
+
+ public function getClassMap()
+ {
+ return $this->classMap;
+ }
+
+ /**
+ * @param array $classMap Class to filename map
+ */
+ public function addClassMap(array $classMap)
+ {
+ if ($this->classMap) {
+ $this->classMap = array_merge($this->classMap, $classMap);
+ } else {
+ $this->classMap = $classMap;
+ }
+ }
+
+ /**
+ * Registers a set of PSR-0 directories for a given prefix, either
+ * appending or prepending to the ones previously set for this prefix.
+ *
+ * @param string $prefix The prefix
+ * @param array|string $paths The PSR-0 root directories
+ * @param bool $prepend Whether to prepend the directories
+ */
+ public function add($prefix, $paths, $prepend = false)
+ {
+ if (!$prefix) {
+ if ($prepend) {
+ $this->fallbackDirsPsr0 = array_merge(
+ (array) $paths,
+ $this->fallbackDirsPsr0
+ );
+ } else {
+ $this->fallbackDirsPsr0 = array_merge(
+ $this->fallbackDirsPsr0,
+ (array) $paths
+ );
+ }
+
+ return;
+ }
+
+ $first = $prefix[0];
+ if (!isset($this->prefixesPsr0[$first][$prefix])) {
+ $this->prefixesPsr0[$first][$prefix] = (array) $paths;
+
+ return;
+ }
+ if ($prepend) {
+ $this->prefixesPsr0[$first][$prefix] = array_merge(
+ (array) $paths,
+ $this->prefixesPsr0[$first][$prefix]
+ );
+ } else {
+ $this->prefixesPsr0[$first][$prefix] = array_merge(
+ $this->prefixesPsr0[$first][$prefix],
+ (array) $paths
+ );
+ }
+ }
+
+ /**
+ * Registers a set of PSR-4 directories for a given namespace, either
+ * appending or prepending to the ones previously set for this namespace.
+ *
+ * @param string $prefix The prefix/namespace, with trailing '\\'
+ * @param array|string $paths The PSR-4 base directories
+ * @param bool $prepend Whether to prepend the directories
+ *
+ * @throws \InvalidArgumentException
+ */
+ public function addPsr4($prefix, $paths, $prepend = false)
+ {
+ if (!$prefix) {
+ // Register directories for the root namespace.
+ if ($prepend) {
+ $this->fallbackDirsPsr4 = array_merge(
+ (array) $paths,
+ $this->fallbackDirsPsr4
+ );
+ } else {
+ $this->fallbackDirsPsr4 = array_merge(
+ $this->fallbackDirsPsr4,
+ (array) $paths
+ );
+ }
+ } elseif (!isset($this->prefixDirsPsr4[$prefix])) {
+ // Register directories for a new namespace.
+ $length = strlen($prefix);
+ if ('\\' !== $prefix[$length - 1]) {
+ throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
+ }
+ $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
+ $this->prefixDirsPsr4[$prefix] = (array) $paths;
+ } elseif ($prepend) {
+ // Prepend directories for an already registered namespace.
+ $this->prefixDirsPsr4[$prefix] = array_merge(
+ (array) $paths,
+ $this->prefixDirsPsr4[$prefix]
+ );
+ } else {
+ // Append directories for an already registered namespace.
+ $this->prefixDirsPsr4[$prefix] = array_merge(
+ $this->prefixDirsPsr4[$prefix],
+ (array) $paths
+ );
+ }
+ }
+
+ /**
+ * Registers a set of PSR-0 directories for a given prefix,
+ * replacing any others previously set for this prefix.
+ *
+ * @param string $prefix The prefix
+ * @param array|string $paths The PSR-0 base directories
+ */
+ public function set($prefix, $paths)
+ {
+ if (!$prefix) {
+ $this->fallbackDirsPsr0 = (array) $paths;
+ } else {
+ $this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
+ }
+ }
+
+ /**
+ * Registers a set of PSR-4 directories for a given namespace,
+ * replacing any others previously set for this namespace.
+ *
+ * @param string $prefix The prefix/namespace, with trailing '\\'
+ * @param array|string $paths The PSR-4 base directories
+ *
+ * @throws \InvalidArgumentException
+ */
+ public function setPsr4($prefix, $paths)
+ {
+ if (!$prefix) {
+ $this->fallbackDirsPsr4 = (array) $paths;
+ } else {
+ $length = strlen($prefix);
+ if ('\\' !== $prefix[$length - 1]) {
+ throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
+ }
+ $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
+ $this->prefixDirsPsr4[$prefix] = (array) $paths;
+ }
+ }
+
+ /**
+ * Turns on searching the include path for class files.
+ *
+ * @param bool $useIncludePath
+ */
+ public function setUseIncludePath($useIncludePath)
+ {
+ $this->useIncludePath = $useIncludePath;
+ }
+
+ /**
+ * Can be used to check if the autoloader uses the include path to check
+ * for classes.
+ *
+ * @return bool
+ */
+ public function getUseIncludePath()
+ {
+ return $this->useIncludePath;
+ }
+
+ /**
+ * Turns off searching the prefix and fallback directories for classes
+ * that have not been registered with the class map.
+ *
+ * @param bool $classMapAuthoritative
+ */
+ public function setClassMapAuthoritative($classMapAuthoritative)
+ {
+ $this->classMapAuthoritative = $classMapAuthoritative;
+ }
+
+ /**
+ * Should class lookup fail if not found in the current class map?
+ *
+ * @return bool
+ */
+ public function isClassMapAuthoritative()
+ {
+ return $this->classMapAuthoritative;
+ }
+
+ /**
+ * APCu prefix to use to cache found/not-found classes, if the extension is enabled.
+ *
+ * @param string|null $apcuPrefix
+ */
+ public function setApcuPrefix($apcuPrefix)
+ {
+ $this->apcuPrefix = function_exists('apcu_fetch') && ini_get('apc.enabled') ? $apcuPrefix : null;
+ }
+
+ /**
+ * The APCu prefix in use, or null if APCu caching is not enabled.
+ *
+ * @return string|null
+ */
+ public function getApcuPrefix()
+ {
+ return $this->apcuPrefix;
+ }
+
+ /**
+ * Registers this instance as an autoloader.
+ *
+ * @param bool $prepend Whether to prepend the autoloader or not
+ */
+ public function register($prepend = false)
+ {
+ spl_autoload_register(array($this, 'loadClass'), true, $prepend);
+ }
+
+ /**
+ * Unregisters this instance as an autoloader.
+ */
+ public function unregister()
+ {
+ spl_autoload_unregister(array($this, 'loadClass'));
+ }
+
+ /**
+ * Loads the given class or interface.
+ *
+ * @param string $class The name of the class
+ * @return bool|null True if loaded, null otherwise
+ */
+ public function loadClass($class)
+ {
+ if ($file = $this->findFile($class)) {
+ includeFile($file);
+
+ return true;
+ }
+ }
+
+ /**
+ * Finds the path to the file where the class is defined.
+ *
+ * @param string $class The name of the class
+ *
+ * @return string|false The path if found, false otherwise
+ */
+ public function findFile($class)
+ {
+ // class map lookup
+ if (isset($this->classMap[$class])) {
+ return $this->classMap[$class];
+ }
+ if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
+ return false;
+ }
+ if (null !== $this->apcuPrefix) {
+ $file = apcu_fetch($this->apcuPrefix.$class, $hit);
+ if ($hit) {
+ return $file;
+ }
+ }
+
+ $file = $this->findFileWithExtension($class, '.php');
+
+ // Search for Hack files if we are running on HHVM
+ if (false === $file && defined('HHVM_VERSION')) {
+ $file = $this->findFileWithExtension($class, '.hh');
+ }
+
+ if (null !== $this->apcuPrefix) {
+ apcu_add($this->apcuPrefix.$class, $file);
+ }
+
+ if (false === $file) {
+ // Remember that this class does not exist.
+ $this->missingClasses[$class] = true;
+ }
+
+ return $file;
+ }
+
+ private function findFileWithExtension($class, $ext)
+ {
+ // PSR-4 lookup
+ $logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
+
+ $first = $class[0];
+ if (isset($this->prefixLengthsPsr4[$first])) {
+ $subPath = $class;
+ while (false !== $lastPos = strrpos($subPath, '\\')) {
+ $subPath = substr($subPath, 0, $lastPos);
+ $search = $subPath . '\\';
+ if (isset($this->prefixDirsPsr4[$search])) {
+ $pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
+ foreach ($this->prefixDirsPsr4[$search] as $dir) {
+ if (file_exists($file = $dir . $pathEnd)) {
+ return $file;
+ }
+ }
+ }
+ }
+ }
+
+ // PSR-4 fallback dirs
+ foreach ($this->fallbackDirsPsr4 as $dir) {
+ if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
+ return $file;
+ }
+ }
+
+ // PSR-0 lookup
+ if (false !== $pos = strrpos($class, '\\')) {
+ // namespaced class name
+ $logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
+ . strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
+ } else {
+ // PEAR-like class name
+ $logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
+ }
+
+ if (isset($this->prefixesPsr0[$first])) {
+ foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
+ if (0 === strpos($class, $prefix)) {
+ foreach ($dirs as $dir) {
+ if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
+ return $file;
+ }
+ }
+ }
+ }
+ }
+
+ // PSR-0 fallback dirs
+ foreach ($this->fallbackDirsPsr0 as $dir) {
+ if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
+ return $file;
+ }
+ }
+
+ // PSR-0 include paths.
+ if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
+ return $file;
+ }
+
+ return false;
+ }
+}
+
+/**
+ * Scope isolated include.
+ *
+ * Prevents access to $this/self from included files.
+ */
+function includeFile($file)
+{
+ include $file;
+}
--- /dev/null
+
+Copyright (c) Nils Adermann, Jordi Boggiano
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
--- /dev/null
+<?php
+
+// autoload_classmap.php @generated by Composer
+
+$vendorDir = dirname(dirname(__FILE__));
+$baseDir = dirname($vendorDir);
+
+return array(
+ 'Jaybizzle\\CrawlerDetect\\CrawlerDetect' => $vendorDir . '/jaybizzle/crawler-detect/src/CrawlerDetect.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\AbstractProvider' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\Crawlers' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\Exclusions' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\Headers' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Headers.php',
+);
--- /dev/null
+<?php
+
+// autoload_namespaces.php @generated by Composer
+
+$vendorDir = dirname(dirname(__FILE__));
+$baseDir = dirname($vendorDir);
+
+return array(
+);
--- /dev/null
+<?php
+
+// autoload_psr4.php @generated by Composer
+
+$vendorDir = dirname(dirname(__FILE__));
+$baseDir = dirname($vendorDir);
+
+return array(
+ 'Jaybizzle\\CrawlerDetect\\' => array($vendorDir . '/jaybizzle/crawler-detect/src'),
+);
--- /dev/null
+<?php
+
+// autoload_real.php @generated by Composer
+
+class ComposerAutoloaderInitAdvancedContentFilterAddon
+{
+ private static $loader;
+
+ public static function loadClassLoader($class)
+ {
+ if ('Composer\Autoload\ClassLoader' === $class) {
+ require __DIR__ . '/ClassLoader.php';
+ }
+ }
+
+ public static function getLoader()
+ {
+ if (null !== self::$loader) {
+ return self::$loader;
+ }
+
+ spl_autoload_register(array('ComposerAutoloaderInitAdvancedContentFilterAddon', 'loadClassLoader'), true, true);
+ self::$loader = $loader = new \Composer\Autoload\ClassLoader();
+ spl_autoload_unregister(array('ComposerAutoloaderInitAdvancedContentFilterAddon', 'loadClassLoader'));
+
+ $useStaticLoader = PHP_VERSION_ID >= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded());
+ if ($useStaticLoader) {
+ require_once __DIR__ . '/autoload_static.php';
+
+ call_user_func(\Composer\Autoload\ComposerStaticInitAdvancedContentFilterAddon::getInitializer($loader));
+ } else {
+ $map = require __DIR__ . '/autoload_namespaces.php';
+ foreach ($map as $namespace => $path) {
+ $loader->set($namespace, $path);
+ }
+
+ $map = require __DIR__ . '/autoload_psr4.php';
+ foreach ($map as $namespace => $path) {
+ $loader->setPsr4($namespace, $path);
+ }
+
+ $classMap = require __DIR__ . '/autoload_classmap.php';
+ if ($classMap) {
+ $loader->addClassMap($classMap);
+ }
+ }
+
+ $loader->register(true);
+
+ return $loader;
+ }
+}
--- /dev/null
+<?php
+
+// autoload_static.php @generated by Composer
+
+namespace Composer\Autoload;
+
+class ComposerStaticInitAdvancedContentFilterAddon
+{
+ public static $prefixLengthsPsr4 = array (
+ 'J' =>
+ array (
+ 'Jaybizzle\\CrawlerDetect\\' => 24,
+ ),
+ );
+
+ public static $prefixDirsPsr4 = array (
+ 'Jaybizzle\\CrawlerDetect\\' =>
+ array (
+ 0 => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src',
+ ),
+ );
+
+ public static $classMap = array (
+ 'Jaybizzle\\CrawlerDetect\\CrawlerDetect' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/CrawlerDetect.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\AbstractProvider' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\Crawlers' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\Exclusions' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php',
+ 'Jaybizzle\\CrawlerDetect\\Fixtures\\Headers' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Headers.php',
+ );
+
+ public static function getInitializer(ClassLoader $loader)
+ {
+ return \Closure::bind(function () use ($loader) {
+ $loader->prefixLengthsPsr4 = ComposerStaticInitAdvancedContentFilterAddon::$prefixLengthsPsr4;
+ $loader->prefixDirsPsr4 = ComposerStaticInitAdvancedContentFilterAddon::$prefixDirsPsr4;
+ $loader->classMap = ComposerStaticInitAdvancedContentFilterAddon::$classMap;
+
+ }, null, ClassLoader::class);
+ }
+}
--- /dev/null
+[
+ {
+ "name": "jaybizzle/crawler-detect",
+ "version": "v1.2.80",
+ "version_normalized": "1.2.80.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/JayBizzle/Crawler-Detect.git",
+ "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/JayBizzle/Crawler-Detect/zipball/af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
+ "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
+ "shasum": ""
+ },
+ "require": {
+ "php": ">=5.3.0"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^4.8|^5.5|^6.5",
+ "satooshi/php-coveralls": "1.*"
+ },
+ "time": "2019-04-05T19:52:02+00:00",
+ "type": "library",
+ "installation-source": "dist",
+ "autoload": {
+ "psr-4": {
+ "Jaybizzle\\CrawlerDetect\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Mark Beech",
+ "email": "m@rkbee.ch",
+ "role": "Developer"
+ }
+ ],
+ "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent",
+ "homepage": "https://github.com/JayBizzle/Crawler-Detect/",
+ "keywords": [
+ "crawler",
+ "crawler detect",
+ "crawler detector",
+ "crawlerdetect",
+ "php crawler detect"
+ ]
+ }
+]
--- /dev/null
+The MIT License (MIT)
+
+Copyright (c) 2015-2018 Mark Beech
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
--- /dev/null
+<p align="center"><a href="http://crawlerdetect.io/" target="_blank"><img src="https://cloud.githubusercontent.com/assets/340752/23082173/1bd1a396-f550-11e6-8aba-4d3c75edea2f.png" width="321" height="219" /></a><br><br>
+<a href="http://crawlerdetect.io/" target="_blank">crawlerdetect.io</a>
+<br><br>
+</p>
+
+<p align="center">
+<a href="https://travis-ci.org/JayBizzle/Crawler-Detect"><img src="https://img.shields.io/travis/JayBizzle/Crawler-Detect/master.svg?style=flat-square" /></a>
+<a href="https://packagist.org/packages/jaybizzle/crawler-detect"><img src="https://img.shields.io/packagist/dm/JayBizzle/Crawler-Detect.svg?style=flat-square" /></a>
+<a href="https://scrutinizer-ci.com/g/JayBizzle/Crawler-Detect/?branch=master"><img src="https://img.shields.io/scrutinizer/g/JayBizzle/Crawler-Detect.svg?style=flat-square" /></a>
+<a href="https://github.com/JayBizzle/Crawler-Detect"><img src="https://img.shields.io/badge/license-MIT-ff69b4.svg?style=flat-square" /></a>
+<a href="https://packagist.org/packages/jaybizzle/crawler-detect"><img src="https://img.shields.io/packagist/v/jaybizzle/Crawler-Detect.svg?style=flat-square" /></a>
+<a href="https://styleci.io/repos/32755917"><img src="https://styleci.io/repos/32755917/shield" /></a>
+<a href="https://coveralls.io/github/JayBizzle/Crawler-Detect"><img src="https://img.shields.io/coveralls/JayBizzle/Crawler-Detect/master.svg?style=flat-square" /></a>
+</p>
+
+## About CrawlerDetect
+
+CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent and http_from header. Currently able to detect 1,000's of bots/spiders/crawlers.
+
+### Installation
+Run `composer require jaybizzle/crawler-detect 1.*` or add `"jaybizzle/crawler-detect" :"1.*"` to your `composer.json`.
+
+### Usage
+```PHP
+use Jaybizzle\CrawlerDetect\CrawlerDetect;
+
+$CrawlerDetect = new CrawlerDetect;
+
+// Check the user agent of the current 'visitor'
+if($CrawlerDetect->isCrawler()) {
+ // true if crawler user agent detected
+}
+
+// Pass a user agent as a string
+if($CrawlerDetect->isCrawler('Mozilla/5.0 (compatible; Sosospider/2.0; +http://help.soso.com/webspider.htm)')) {
+ // true if crawler user agent detected
+}
+
+// Output the name of the bot that matched (if any)
+echo $CrawlerDetect->getMatches();
+```
+
+### Contributing
+If you find a bot/spider/crawler user agent that CrawlerDetect fails to detect, please submit a pull request with the regex pattern added to the `$data` array in `Fixtures/Crawlers.php` and add the failing user agent to `tests/crawlers.txt`.
+
+Failing that, just create an issue with the user agent you have found, and we'll take it from there :)
+
+### Laravel Package
+If you would like to use this with Laravel 4/5, please see [Laravel-Crawler-Detect](https://github.com/JayBizzle/Laravel-Crawler-Detect)
+
+### Symfony Bundle
+To use this library with Symfony 2/3/4, check out the [CrawlerDetectBundle](https://github.com/nicolasmure/CrawlerDetectBundle).
+
+### YII2 Extension
+To use this library with the YII2 framework, check out [yii2-crawler-detect](https://github.com/AlikDex/yii2-crawler-detect).
+
+### ES6 Library
+To use this library with NodeJS or any ES6 application based, check out [es6-crawler-detect](https://github.com/JefferyHus/es6-crawler-detect).
+
+### .NET Library
+To use this library in a .net standard (including .net core) based project, check out [NetCrawlerDetect](https://github.com/gplumb/NetCrawlerDetect).
+
+### Nette Extension
+To use this library with the Nette framework, checkout [NetteCrawlerDetect](https://github.com/JanGalek/Crawler-Detect).
+
+### Ruby Gem
+
+To use this library with Ruby on Rails or any Ruby-based application, check out [crawler_detect](https://github.com/loadkpi/crawler_detect) gem.
+
+_Parts of this class are based on the brilliant [MobileDetect](https://github.com/serbanghita/Mobile-Detect)_
+
+[![Analytics](https://ga-beacon.appspot.com/UA-72430465-1/Crawler-Detect/readme?pixel)](https://github.com/JayBizzle/Crawler-Detect)
--- /dev/null
+{
+ "name": "jaybizzle/crawler-detect",
+ "type": "library",
+ "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent",
+ "keywords": ["crawler", "crawler detect", "crawler detector", "crawlerdetect", "php crawler detect"],
+ "homepage": "https://github.com/JayBizzle/Crawler-Detect/",
+ "license": "MIT",
+ "authors": [
+ {
+ "name": "Mark Beech",
+ "email": "m@rkbee.ch",
+ "role": "Developer"
+ }
+ ],
+ "require": {
+ "php": ">=5.3.0"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^4.8|^5.5|^6.5",
+ "satooshi/php-coveralls": "1.*"
+ },
+ "autoload": {
+ "psr-4": {
+ "Jaybizzle\\CrawlerDetect\\": "src/"
+ }
+ },
+ "scripts": {
+ "test": "vendor/bin/phpunit"
+ }
+}
--- /dev/null
+<?php
+
+/*
+ * This file is part of Crawler Detect - the web crawler detection library.
+ *
+ * (c) Mark Beech <m@rkbee.ch>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+require 'src/Fixtures/AbstractProvider.php';
+require 'src/Fixtures/Crawlers.php';
+require 'src/Fixtures/Exclusions.php';
+require 'src/Fixtures/Headers.php';
+
+$src = array(
+ 'Crawlers',
+ 'Exclusions',
+ 'Headers',
+);
+
+foreach ($src as $class) {
+ $class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class";
+ $object = new $class;
+
+ outputJson($object);
+ outputTxt($object);
+}
+
+function outputJson($object)
+{
+ $className = (new ReflectionClass($object))->getShortName();
+ file_put_contents("raw/$className.json", json_encode($object->getAll()));
+}
+
+function outputTxt($object)
+{
+ $className = (new ReflectionClass($object))->getShortName();
+ file_put_contents("raw/$className.txt", implode($object->getAll(), PHP_EOL));
+}
--- /dev/null
+[".*Java.*outbrain"," YLT","^b0t$","^bluefish ","^Calypso v\\\/","^COMODO DCV","^DangDang","^DavClnt","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^HTTPClient\\\/","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^NG\\\/[0-9\\.]","^NING\\\/","^PHP\\\/[0-9]","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2ip\\.ru","404enemy","7Siters","80legs","a\\.pr-cy\\.ru","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adscanner","Adstxtaggregator","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Ant\\.com","Anturis Agent","AnyEvent-HTTP\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppStoreScraperZ","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","AskQuickly","Ask Jeeves","ASPSeek","Asterias","Astute","asynchttp","Attach","autocite","Autonomy","axios\\\/","B-l-i-t-z-B-O-T","Backlink-Ceck","backlink-check","BacklinkHttpStatus","BackStreet","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","biz_Directory","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","BrandVerity","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cg-eye","changedetection","ChangesMeter","Charlotte","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","Cosmos4j\\.feedback","Covario-IDS","Crescent","Crowsnest","Criteo","CSHttp","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Disqus\\\/","Dispatch\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookexternalhit","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconkit","faviconarchive","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","FeedshowOnline","Feedspot","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","fluffy","Flunky","flynxapp","forensiq","FoundSeoTool","http:\\\/\\\/www.neomo.de\\\/","free thumbnails","Freeuploader","Funnelback","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","github\\.com","Go [\\d\\.]* package http","Go http package","Go-Ahead-Got-It","Go-http-client","Go!Zilla","gobyus","gofetch","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google PP Default","Google Search Console","Google Web Preview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-Publisher-Plugin","Google-SearchByImage","Google-Site-Verification","Google-Structured-Data-Testing-Tool","Google-Youtube-Links","google-xrawler","GoogleDocs","GoogleHC\\\/","GoogleProducer","GoogleSites","Google-Transparency-Report","Gookey","GoScraper","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hatena","Havij","HeadlessChrome","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","HTTP_Compression_Test","http_request2","http_requester","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http\\.rb\\\/","http_get","HttpComponents","httphr","HTTPMon","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","httrack","huaweisymantec","HubSpot ","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","internet_archive","Internet Ninja","InternetSeer","internetVista monitor","intraVnews","IODC","IOI","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","iZSearch","JAHHO","janforman","Jaunt\\\/","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kube-probe","kulturarw3","KumKie","L\\.webis","Larbin","Lavf\\\/","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lightspeedsystems","Lighthouse","Likse","Link Valet","link_thumbnailer","LinkAlarm\\\/","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreviewGenerator","LinkScan","LinksManager","LinkTiger","LinkWalker","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","looksystems\\.net","ltx71","lua-resty-http","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","mabontland","Mag-Net","MagpieRSS","Mail\\.Ru","MailChimp","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft Office ","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft Data Access","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","Mister PiX","mixdata dot com","mixed-content-scan","Mixmax-LinkPreview","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Monit\\\/","monitis","Monitority\\\/","montastic","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mrcgiguy","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","nagios","Najdi\\.si","Name Intelligence","Nameprotect","Navroad","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","Nexgate Ruby Client","NG-Search","Nibbler","NICErsPRO","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-superagent","node-urllib","node\\.io","Nodemeter","NodePing","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","og-scraper","okhttp","omgili","OMSC","Online Domain Tools","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","Optimizer","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","ow\\.ly","Owler","ownCloud News","OxfordCloudService","Page Valet","page_verifier","page scorer","page2rss","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","phpservermon","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","ping\\.blo\\.gs","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","PostmanRuntime","PostPost","postrank","PowerPoint\\\/","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probethenet","Project 25499","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pulsepoint XT3 web scraper","Pump","Python-httplib2","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","Quora Link Preview","Qwantify","Radian6","RankActive","RankFlex","RankSonicSiteAuditor","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSOwl","safe-agent-scanner","SalesIntelligent","Saleslift","Sendsay\\.Ru","SauceNAO","SBIder","scalaj-http","scan\\.lol","ScanAlert","Scoop","scooter","ScoutJet","ScoutURLMonitor","ScrapeBox Page Scanner","SimpleScraper","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","search\\.thunderstone","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","SEOkicks","Seomoz","SEOprofiler","SEOsearch","seoscanners","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","ShortLinkTranslate","shrinktheweb","Sideqik","SimplePie","SimplyFast","Siphon","SISTRIX","Site-Shot\\\/","Site Sucker","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","slider\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","spaziodati","SPDYCheck","Specificfeeds","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","StatusCake","Steeler","Stratagems Kumo","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","SwiteScraper","Symfony BrowserKit","Symfony2 BrowserKit","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","The Drop Reaper","The Expert HTML Source Viewer","The Knowledge AI","The Intraformant","theinternetrules","TheNomad","Thinklab","Thumbshots","ThumbSniper","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","unchaos","unirest-java","UniversalFeedParser","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","Urlcheckr","URL Verifier","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","Vacuum","Vagabondo","VB Project","vBSEO","VCI","via ggpht\\.com GoogleImageProxy","VidibleScraper","Virusdie","visionutils","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C_Unicorn","W3C-checklink","W3C-mobileOK","WAC-OFU","Wallpapers\\\/[0-9]+","WallpapersHD","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","web-capture\\.net","Web-sniffer","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web Sucker","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","Webthumb\\\/","WebThumbnail","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoRunsCoinHive","Whynder Magnet","Windows-RSS-Platform","WinPodder","wkhtmlto","wmtips","Woko","woorankreview","Word\\\/","WordPress\\\/","WordupinfoSearch","wotbox","WP Engine Install Performance API","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","www\\.monitor\\.us","WWWOFFLE","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-(ASR|BSC)","Y\\!J-BRW","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zauba","Zemanta Aggregator","Zend_Http_Client","Zend\\\\Http\\\\Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","Zombie\\.js","Zoom\\.Mac","ZyBorg","[a-z0-9\\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer)"]
\ No newline at end of file
--- /dev/null
+.*Java.*outbrain
+ YLT
+^b0t$
+^bluefish
+^Calypso v\/
+^COMODO DCV
+^DangDang
+^DavClnt
+^FDM
+^git\/
+^Goose\/
+^Grabber
+^HTTPClient\/
+^Java\/
+^Jeode\/
+^Jetty\/
+^Mail\/
+^Mget
+^Microsoft URL Control
+^NG\/[0-9\.]
+^NING\/
+^PHP\/[0-9]
+^RMA\/
+^Ruby|Ruby\/[0-9]
+^VSE\/[0-9]
+^WordPress\.com
+^XRL\/[0-9]
+^ZmEu
+008\/
+13TABS
+192\.comAgent
+2ip\.ru
+404enemy
+7Siters
+80legs
+a\.pr-cy\.ru
+a3logics\.in
+A6-Indexer
+Abonti
+Aboundex
+aboutthedomain
+Accoona-AI-Agent
+acoon
+acrylicapps\.com\/pulp
+Acunetix
+AdAuth\/
+adbeat
+AddThis
+ADmantX
+AdminLabs
+adressendeutschland
+adscanner
+Adstxtaggregator
+agentslug
+AHC
+aihit
+aiohttp\/
+Airmail
+akka-http\/
+akula\/
+alertra
+alexa site audit
+Alibaba\.Security\.Heimdall
+Alligator
+allloadin
+AllSubmitter
+alyze\.info
+amagit
+Anarchie
+AndroidDownloadManager
+Anemone
+AngleSharp
+annotate_google
+Ant\.com
+Anturis Agent
+AnyEvent-HTTP\/
+Apache Droid
+Apache OpenOffice
+Apache-HttpAsyncClient
+Apache-HttpClient
+ApacheBench
+Apexoo
+APIs-Google
+AportWorm\/
+AppBeat\/
+AppEngine-Google
+AppStoreScraperZ
+Aprc\/[0-9]
+Arachmo
+arachnode
+Arachnophilia
+aria2
+Arukereso
+asafaweb
+AskQuickly
+Ask Jeeves
+ASPSeek
+Asterias
+Astute
+asynchttp
+Attach
+autocite
+Autonomy
+axios\/
+B-l-i-t-z-B-O-T
+Backlink-Ceck
+backlink-check
+BacklinkHttpStatus
+BackStreet
+BackWeb
+Bad-Neighborhood
+Badass
+baidu\.com
+Bandit
+basicstate
+BatchFTP
+Battleztar Bazinga
+baypup\/
+BazQux
+BBBike
+BCKLINKS
+BDFetch
+BegunAdvertising
+Bidtellect
+BigBozz
+Bigfoot
+biglotron
+BingLocalSearch
+BingPreview
+binlar
+biNu image cacher
+Bitacle
+biz_Directory
+Black Hole
+Blackboard Safeassign
+BlackWidow
+BlockNote\.Net
+Bloglines
+Bloglovin
+BlogPulseLive
+BlogSearch
+Blogtrottr
+BlowFish
+boitho\.com-dc
+BPImageWalker
+Braintree-Webhooks
+Branch Metrics API
+Branch-Passthrough
+Brandprotect
+BrandVerity
+Brandwatch
+Brodie\/
+Browsershots
+BUbiNG
+Buck\/
+Buddy
+BuiltWith
+Bullseye
+BunnySlippers
+Burf Search
+Butterfly\/
+BuzzSumo
+CAAM\/[0-9]
+CakePHP
+Calculon
+Canary%20Mail
+CaretNail
+catexplorador
+CC Metadata Scaper
+Cegbfeieh
+censys
+Cerberian Drtrs
+CERT\.at-Statistics-Survey
+cg-eye
+changedetection
+ChangesMeter
+Charlotte
+CheckHost
+checkprivacy
+CherryPicker
+ChinaClaw
+Chirp\/
+chkme\.com
+Chlooe
+Chromaxa
+CirrusExplorer
+CISPA Vulnerability Notification
+Citoid
+CJNetworkQuality
+Clarsentia
+clips\.ua\.ac\.be
+Cloud mapping
+CloudEndure
+CloudFlare-AlwaysOnline
+Cloudinary
+cmcm\.com
+coccoc
+cognitiveseo
+colly -
+CommaFeed
+Commons-HttpClient
+commonscan
+contactbigdatafr
+contentkingapp
+convera
+CookieReports
+copyright sheriff
+CopyRightCheck
+Copyscape
+Cosmos4j\.feedback
+Covario-IDS
+Crescent
+Crowsnest
+Criteo
+CSHttp
+curb
+Curious George
+curl
+cuwhois\/
+cybo\.com
+DAP\/NetHTTP
+DareBoost
+DatabaseDriverMysqli
+DataCha0s
+Datafeedwatch
+Datanyze
+DataparkSearch
+dataprovider
+DataXu
+Daum(oa)?[ \/][0-9]
+Demon
+DeuSu
+developers\.google\.com\/\+\/web\/snippet\/
+Devil
+Digg
+Digincore
+DigitalPebble
+Dirbuster
+Discourse Forum Onebox
+Disqus\/
+Dispatch\/
+DittoSpyder
+dlvr
+DMBrowser
+DNSPod-reporting
+docoloc
+Dolphin http client
+DomainAppender
+Donuts Content Explorer
+dotMailer content retrieval
+dotSemantic
+downforeveryoneorjustme
+Download Wonder
+downnotifier
+DowntimeDetector
+Drip
+drupact
+Drupal \(\+http:\/\/drupal\.org\/\)
+DTS Agent
+dubaiindex
+EARTHCOM
+Easy-Thumb
+EasyDL
+Ebingbong
+ec2linkfinder
+eCairn-Grabber
+eCatch
+ECCP
+eContext\/
+Ecxi
+EirGrabber
+ElectricMonk
+elefent
+EMail Exractor
+EMail Wolf
+EmailWolf
+Embarcadero
+Embed PHP Library
+Embedly
+endo\/
+europarchive\.org
+evc-batch
+EventMachine HttpClient
+Everwall Link Expander
+Evidon
+Evrinid
+ExactSearch
+ExaleadCloudview
+Excel\/
+exif
+Exploratodo
+Express WebPictures
+Extreme Picture Finder
+EyeNetIE
+ezooms
+facebookexternalhit
+facebookplatform
+fairshare
+Faraday v
+fasthttp
+Faveeo
+Favicon downloader
+faviconkit
+faviconarchive
+FavOrg
+Feed Wrangler
+Feedable\/
+Feedbin
+FeedBooster
+FeedBucket
+FeedBunch\/
+FeedBurner
+feeder
+Feedly
+FeedshowOnline
+Feedspot
+Feedwind\/
+FeedZcollector
+feeltiptop
+Fetch API
+Fetch\/[0-9]
+Fever\/[0-9]
+FHscan
+Fimap
+findlink
+findthatfile
+FlashGet
+FlipboardBrowserProxy
+FlipboardProxy
+FlipboardRSS
+Flock\/
+fluffy
+Flunky
+flynxapp
+forensiq
+FoundSeoTool
+http:\/\/www.neomo.de\/
+free thumbnails
+Freeuploader
+Funnelback
+G-i-g-a-b-o-t
+g00g1e\.net
+ganarvisitas
+geek-tools
+Genieo
+GentleSource
+GetCode
+Getintent
+GetLinkInfo
+getprismatic
+GetRight
+getroot
+GetURLInfo\/
+GetWeb
+Ghost Inspector
+GigablastOpenSource
+GIS-LABS
+github-camo
+github\.com
+Go [\d\.]* package http
+Go http package
+Go-Ahead-Got-It
+Go-http-client
+Go!Zilla
+gobyus
+gofetch
+GomezAgent
+gooblog
+Goodzer\/
+Google AppsViewer
+Google Desktop
+Google favicon
+Google Keyword Suggestion
+Google Keyword Tool
+Google Page Speed Insights
+Google PP Default
+Google Search Console
+Google Web Preview
+Google-Adwords
+Google-Apps-Script
+Google-Calendar-Importer
+Google-HotelAdsVerifier
+Google-HTTP-Java-Client
+Google-Publisher-Plugin
+Google-SearchByImage
+Google-Site-Verification
+Google-Structured-Data-Testing-Tool
+Google-Youtube-Links
+google-xrawler
+GoogleDocs
+GoogleHC\/
+GoogleProducer
+GoogleSites
+Google-Transparency-Report
+Gookey
+GoScraper
+GoSpotCheck
+gosquared-thumbnailer
+Gotit
+GoZilla
+grabify
+GrabNet
+Grafula
+Grammarly
+GrapeFX
+GreatNews
+Gregarius
+GRequests
+grokkit
+grouphigh
+grub-client
+gSOAP\/
+GT::WWW
+GTmetrix
+GuzzleHttp
+gvfs\/
+HAA(A)?RTLAND http client
+Haansoft
+hackney\/
+Hadi Agent
+HappyApps-WebCheck
+Hatena
+Havij
+HeadlessChrome
+HEADMasterSEO
+HeartRails_Capture
+help@dataminr\.com
+heritrix
+historious
+hkedcity
+hledejLevne\.cz
+Hloader
+HMView
+Holmes
+HonesoSearchEngine
+HootSuite Image proxy
+Hootsuite-WebFeed
+hosterstats
+HostTracker
+ht:\/\/check
+htdig
+HTMLparser
+htmlyse
+HTTP Banner Detection
+HTTP_Compression_Test
+http_request2
+http_requester
+http-get
+HTTP-Header-Abfrage
+http-kit
+http-request\/
+HTTP-Tiny
+HTTP::Lite
+http\.rb\/
+http_get
+HttpComponents
+httphr
+HTTPMon
+httpRequest
+httpscheck
+httpssites_power
+httpunit
+HttpUrlConnection
+httrack
+huaweisymantec
+HubSpot
+Humanlinks
+i2kconnect\/
+Iblog
+ichiro
+Id-search
+IdeelaborPlagiaat
+IDG Twitter Links Resolver
+IDwhois\/
+Iframely
+igdeSpyder
+IlTrovatore
+Image Fetch
+Image Sucker
+ImageEngine\/
+ImageVisu\/
+Imagga
+imagineeasy
+imgsizer
+InAGist
+inbound\.li parser
+InDesign%20CC
+Indy Library
+InetURL
+infegy
+infohelfer
+InfoTekies
+InfoWizards Reciprocal Link
+inpwrd\.com
+instabid
+Instapaper
+Integrity
+integromedb
+Intelliseek
+InterGET
+internet_archive
+Internet Ninja
+InternetSeer
+internetVista monitor
+intraVnews
+IODC
+IOI
+iplabel
+ips-agent
+IPS\/[0-9]
+IPWorks HTTP\/S Component
+iqdb\/
+Iria
+Irokez
+isitup\.org
+iskanie
+isUp\.li
+iThemes Sync\/
+iZSearch
+JAHHO
+janforman
+Jaunt\/
+Jbrofuzz
+Jersey\/
+JetCar
+Jigsaw
+Jobboerse
+JobFeed discovery
+Jobg8 URL Monitor
+jobo
+Jobrapido
+Jobsearch1\.5
+JoinVision Generic
+JolokiaPwn
+Joomla
+Jorgee
+JS-Kit
+JustView
+Kaspersky Lab CFR link resolver
+Kelny\/
+Kerrigan\/
+KeyCDN
+Keyword Density
+Keywords Research
+KickFire
+KimonoLabs\/
+Kml-Google
+knows\.is
+KOCMOHABT
+kouio
+kube-probe
+kulturarw3
+KumKie
+L\.webis
+Larbin
+Lavf\/
+LeechFTP
+LeechGet
+letsencrypt
+Lftp
+LibVLC
+LibWeb
+Libwhisker
+libwww
+Licorne
+Liferea\/
+Lightspeedsystems
+Lighthouse
+Likse
+Link Valet
+link_thumbnailer
+LinkAlarm\/
+linkCheck
+linkdex
+LinkExaminer
+linkfluence
+linkpeek
+LinkPreviewGenerator
+LinkScan
+LinksManager
+LinkTiger
+LinkWalker
+Lipperhey
+Litemage_walker
+livedoor ScreenShot
+LoadImpactRload
+localsearch-web
+LongURL API
+looksystems\.net
+ltx71
+lua-resty-http
+lwp-request
+lwp-trivial
+LWP::Simple
+lycos
+LYT\.SR
+mabontland
+Mag-Net
+MagpieRSS
+Mail\.Ru
+MailChimp
+Majestic12
+makecontact\/
+Mandrill
+MapperCmd
+marketinggrader
+MarkMonitor
+MarkWatch
+Mass Downloader
+masscan\/
+Mata Hari
+Mediapartners-Google
+mediawords
+MegaIndex\.ru
+MeltwaterNews
+Melvil Rawi
+MemGator
+Metaspinner
+MetaURI
+MFC_Tear_Sample
+Microsearch
+Microsoft Office
+Microsoft Outlook
+Microsoft Windows Network Diagnostics
+Microsoft-WebDAV-MiniRedir
+Microsoft Data Access
+MIDown tool
+MIIxpc
+Mindjet
+Miniature\.io
+Miniflux
+Mister PiX
+mixdata dot com
+mixed-content-scan
+Mixmax-LinkPreview
+mixnode
+Mnogosearch
+mogimogi
+Mojeek
+Mojolicious \(Perl\)
+Monit\/
+monitis
+Monitority\/
+montastic
+MonTools
+Moreover
+Morfeus Fucking Scanner
+Morning Paper
+MovableType
+mowser
+Mrcgiguy
+MS Web Services Client Protocol
+MSFrontPage
+mShots
+MuckRack\/
+muhstik-scan
+MVAClient
+MxToolbox\/
+nagios
+Najdi\.si
+Name Intelligence
+Nameprotect
+Navroad
+NearSite
+Needle
+Nessus
+Net Vampire
+NetAnts
+NETCRAFT
+NetLyzer
+NetMechanic
+NetNewsWire
+Netpursual
+netresearch
+NetShelter ContentScan
+Netsparker
+NetTrack
+Netvibes
+NetZIP
+Neustar WPM
+NeutrinoAPI
+NewRelicPinger
+NewsBlur .*Finder
+NewsGator
+newsme
+newspaper\/
+Nexgate Ruby Client
+NG-Search
+Nibbler
+NICErsPRO
+Nikto
+nineconnections
+NLNZ_IAHarvester
+Nmap Scripting Engine
+node-superagent
+node-urllib
+node\.io
+Nodemeter
+NodePing
+nominet\.org\.uk
+nominet\.uk
+Norton-Safeweb
+Notifixious
+notifyninja
+nuhk
+nutch
+Nuzzel
+nWormFeedFinder
+nyawc\/
+Nymesis
+NYU
+Ocelli\/
+Octopus
+oegp
+Offline Explorer
+Offline Navigator
+og-scraper
+okhttp
+omgili
+OMSC
+Online Domain Tools
+OpenCalaisSemanticProxy
+Openfind
+OpenLinkProfiler
+Openstat\/
+OpenVAS
+Optimizer
+Orbiter
+OrgProbe\/
+orion-semantics
+Outlook-Express
+Outlook-iOS
+ow\.ly
+Owler
+ownCloud News
+OxfordCloudService
+Page Valet
+page_verifier
+page scorer
+page2rss
+PageGrabber
+PagePeeker
+PageScorer
+Pagespeed\/
+Panopta
+panscient
+Papa Foto
+parsijoo
+Pavuk
+PayPal IPN
+pcBrowser
+Pcore-HTTP
+Pearltrees
+PECL::HTTP
+peerindex
+Peew
+PeoplePal
+Perlu -
+PhantomJS Screenshoter
+PhantomJS\/
+Photon\/
+phpservermon
+Pi-Monster
+Picscout
+Picsearch
+PictureFinder
+Pimonster
+ping\.blo\.gs
+Pingability
+PingAdmin\.Ru
+Pingdom
+Pingoscope
+PingSpot
+pinterest\.com
+Pixray
+Pizilla
+Plagger\/
+Ploetz \+ Zeller
+Plukkie
+plumanalytics
+PocketImageCache
+PocketParser
+Pockey
+POE-Component-Client-HTTP
+Polymail\/
+Pompos
+Porkbun
+Port Monitor
+postano
+PostmanRuntime
+PostPost
+postrank
+PowerPoint\/
+Priceonomics Analysis Engine
+PrintFriendly
+PritTorrent
+Prlog
+probethenet
+Project 25499
+prospectb2b
+Protopage
+ProWebWalker
+proximic
+PRTG Network Monitor
+pshtt, https scanning
+PTST
+PTST\/[0-9]+
+Pulsepoint XT3 web scraper
+Pump
+Python-httplib2
+python-requests
+Python-urllib
+Qirina Hurdler
+QQDownload
+QrafterPro
+Qseero
+Qualidator
+QueryN Metasearch
+queuedriver
+Quora Link Preview
+Qwantify
+Radian6
+RankActive
+RankFlex
+RankSonicSiteAuditor
+Re-re Studio
+ReactorNetty
+Readability
+RealDownload
+RealPlayer%20Downloader
+RebelMouse
+Recorder
+RecurPost\/
+redback\/
+ReederForMac
+ReGet
+RepoMonkey
+request\.js
+reqwest\/
+ResponseCodeTest
+RestSharp
+Riddler
+Rival IQ
+Robosourcer
+Robozilla
+ROI Hunter
+RPT-HTTPClient
+RSSOwl
+safe-agent-scanner
+SalesIntelligent
+Saleslift
+Sendsay\.Ru
+SauceNAO
+SBIder
+scalaj-http
+scan\.lol
+ScanAlert
+Scoop
+scooter
+ScoutJet
+ScoutURLMonitor
+ScrapeBox Page Scanner
+SimpleScraper
+Scrapy
+Screaming
+ScreenShotService
+Scrubby
+Scrutiny\/
+search\.thunderstone
+Search37
+searchenginepromotionhelp
+Searchestate
+SearchExpress
+SearchSight
+Seeker
+semanticdiscovery
+semanticjuice
+Semiocast HTTP client
+Semrush
+sentry\/
+SEO Browser
+Seo Servis
+seo-nastroj\.cz
+seo4ajax
+Seobility
+SEOCentro
+SeoCheck
+SEOkicks
+Seomoz
+SEOprofiler
+SEOsearch
+seoscanners
+seositecheckup
+SEOstats
+servernfo
+sexsearcher
+Seznam
+Shelob
+Shodan
+Shoppimon
+ShopWiki
+ShortLinkTranslate
+shrinktheweb
+Sideqik
+SimplePie
+SimplyFast
+Siphon
+SISTRIX
+Site-Shot\/
+Site Sucker
+Site24x7
+SiteBar
+Sitebeam
+Sitebulb\/
+SiteCondor
+SiteExplorer
+SiteGuardian
+Siteimprove
+SiteIndexed
+Sitemap(s)? Generator
+SitemapGenerator
+SiteMonitor
+Siteshooter B0t
+SiteSnagger
+SiteSucker
+SiteTruth
+Sitevigil
+sitexy\.com
+SkypeUriPreview
+Slack\/
+slider\.com
+slurp
+SlySearch
+SmartDownload
+SMRF URL Expander
+SMUrlExpander
+Snake
+Snappy
+SnapSearch
+Snarfer\/
+SniffRSS
+sniptracker
+Snoopy
+SnowHaze Search
+sogou web
+SortSite
+Sottopop
+sovereign\.ai
+SpaceBison
+SpamExperts
+Spammen
+Spanner
+spaziodati
+SPDYCheck
+Specificfeeds
+speedy
+SPEng
+Spinn3r
+spray-can
+Sprinklr
+spyonweb
+sqlmap
+Sqlworm
+Sqworm
+SSL Labs
+ssl-tools
+StackRambler
+Statastico\/
+StatusCake
+Steeler
+Stratagems Kumo
+Stroke\.cz
+StudioFACA
+StumbleUpon
+suchen
+Sucuri
+summify
+SuperHTTP
+Surphace Scout
+Suzuran
+SwiteScraper
+Symfony BrowserKit
+Symfony2 BrowserKit
+SynHttpClient-Built
+Sysomos
+sysscan
+Szukacz
+T0PHackTeam
+tAkeOut
+Tarantula\/
+Taringa UGC
+TarmotGezgin
+Teleport
+Telesoft
+Telesphoreo
+Telesphorep
+Tenon\.io
+teoma
+terrainformatica
+Test Certificate Info
+testuri
+Tetrahedron
+The Drop Reaper
+The Expert HTML Source Viewer
+The Knowledge AI
+The Intraformant
+theinternetrules
+TheNomad
+Thinklab
+Thumbshots
+ThumbSniper
+timewe\.net
+TinEye
+Tiny Tiny RSS
+TLSProbe\/
+Toata
+topster
+touche\.com
+Traackr\.com
+tracemyfile
+Trackuity
+TrapitAgent
+Trendiction
+Trendsmap
+trendspottr
+truwoGPS
+TryJsoup
+TulipChain
+Turingos
+Turnitin
+tweetedtimes
+Tweetminster
+Tweezler\/
+twibble
+Twice
+Twikle
+Twingly
+Twisted PageGetter
+Typhoeus
+ubermetrics-technologies
+uclassify
+UdmSearch
+unchaos
+unirest-java
+UniversalFeedParser
+Unshorten\.It
+Untiny
+UnwindFetchor
+updated
+updown\.io daemon
+Upflow
+Uptimia
+Urlcheckr
+URL Verifier
+URLitor
+urlresolver
+Urlstat
+URLTester
+UrlTrends Ranking Updater
+URLy Warning
+URLy\.Warning
+Vacuum
+Vagabondo
+VB Project
+vBSEO
+VCI
+via ggpht\.com GoogleImageProxy
+VidibleScraper
+Virusdie
+visionutils
+vkShare
+VoidEYE
+Voil
+voltron
+voyager\/
+VSAgent\/
+VSB-TUO\/
+Vulnbusters Meter
+VYU2
+w3af\.org
+W3C_Unicorn
+W3C-checklink
+W3C-mobileOK
+WAC-OFU
+Wallpapers\/[0-9]+
+WallpapersHD
+wangling
+Wappalyzer
+WatchMouse
+WbSrch\/
+WDT\.io
+web-capture\.net
+Web-sniffer
+Web Auto
+Web Collage
+Web Enhancer
+Web Fetch
+Web Fuck
+Web Pix
+Web Sauger
+Web Sucker
+Webalta
+Webauskunft
+WebAuto
+WebCapture
+WebClient\/
+webcollage
+WebCookies
+WebCopier
+WebCorp
+WebDataStats
+WebDoc
+WebEnhancer
+WebFetch
+WebFuck
+WebGazer
+WebGo IS
+WebImageCollector
+WebImages
+WebIndex
+webkit2png
+WebLeacher
+webmastercoffee
+webmon
+WebPix
+WebReaper
+WebSauger
+webscreenie
+Webshag
+Webshot
+Website Quester
+websitepulse agent
+WebsiteQuester
+Websnapr
+WebSniffer
+Webster
+WebStripper
+WebSucker
+Webthumb\/
+WebThumbnail
+WebWhacker
+WebZIP
+WeLikeLinks
+WEPA
+WeSEE
+wf84
+Wfuzz\/
+wget
+WhatsApp
+WhatsMyIP
+WhatWeb
+WhereGoes\?
+Whibse
+WhoRunsCoinHive
+Whynder Magnet
+Windows-RSS-Platform
+WinPodder
+wkhtmlto
+wmtips
+Woko
+woorankreview
+Word\/
+WordPress\/
+WordupinfoSearch
+wotbox
+WP Engine Install Performance API
+wpif
+wprecon\.com survey
+WPScan
+wscheck
+Wtrace
+WWW-Collector-E
+WWW-Mechanize
+WWW::Document
+WWW::Mechanize
+www\.monitor\.us
+WWWOFFLE
+x09Mozilla
+x22Mozilla
+XaxisSemanticsClassifier
+Xenu Link Sleuth
+XING-contenttabreceiver
+xpymep([0-9]?)\.exe
+Y!J-(ASR|BSC)
+Y\!J-BRW
+Yaanb
+yacy
+Yahoo Link Preview
+YahooCacheSystem
+YahooYSMcm
+YandeG
+Yandex(?!Search)
+yanga
+yeti
+Yo-yo
+Yoleo Consumer
+yoogliFetchAgent
+YottaaMonitor
+Your-Website-Sucks
+yourls\.org
+YoYs\.net
+YP\.PL
+Zabbix
+Zade
+Zao
+Zauba
+Zemanta Aggregator
+Zend_Http_Client
+Zend\\Http\\Client
+Zermelo
+Zeus
+zgrab
+ZnajdzFoto
+Zombie\.js
+Zoom\.Mac
+ZyBorg
+[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer)
\ No newline at end of file
--- /dev/null
+["Safari.[\\d\\.]*","Firefox.[\\d\\.]*"," Chrome.[\\d\\.]*","Chromium.[\\d\\.]*","MSIE.[\\d\\.]","Opera\\\/[\\d\\.]*","Mozilla.[\\d\\.]*","AppleWebKit.[\\d\\.]*","Trident.[\\d\\.]*","Windows NT.[\\d\\.]*","Android [\\d\\.]*","Macintosh.","Ubuntu","Linux","[ ]Intel","Mac OS X [\\d_]*","(like )?Gecko(.[\\d\\.]*)?","KHTML,","CriOS.[\\d\\.]*","CPU iPhone OS ([0-9_])* like Mac OS X","CPU OS ([0-9_])* like Mac OS X","iPod","compatible","x86_..","i686","x64","X11","rv:[\\d\\.]*","Version.[\\d\\.]*","WOW64","Win64","Dalvik.[\\d\\.]*"," \\.NET CLR [\\d\\.]*","Presto.[\\d\\.]*","Media Center PC","BlackBerry","Build","Opera Mini\\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\\/\\d{1,2}\\.","Opera"," \\.NET[\\d\\.]*","cubot","; M bot","; CRONO","; B bot","; IDbot","; ID bot","; POWER BOT",";"]
\ No newline at end of file
--- /dev/null
+Safari.[\d\.]*
+Firefox.[\d\.]*
+ Chrome.[\d\.]*
+Chromium.[\d\.]*
+MSIE.[\d\.]
+Opera\/[\d\.]*
+Mozilla.[\d\.]*
+AppleWebKit.[\d\.]*
+Trident.[\d\.]*
+Windows NT.[\d\.]*
+Android [\d\.]*
+Macintosh.
+Ubuntu
+Linux
+[ ]Intel
+Mac OS X [\d_]*
+(like )?Gecko(.[\d\.]*)?
+KHTML,
+CriOS.[\d\.]*
+CPU iPhone OS ([0-9_])* like Mac OS X
+CPU OS ([0-9_])* like Mac OS X
+iPod
+compatible
+x86_..
+i686
+x64
+X11
+rv:[\d\.]*
+Version.[\d\.]*
+WOW64
+Win64
+Dalvik.[\d\.]*
+ \.NET CLR [\d\.]*
+Presto.[\d\.]*
+Media Center PC
+BlackBerry
+Build
+Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.
+Opera
+ \.NET[\d\.]*
+cubot
+; M bot
+; CRONO
+; B bot
+; IDbot
+; ID bot
+; POWER BOT
+;
\ No newline at end of file
--- /dev/null
+["HTTP_USER_AGENT","HTTP_X_OPERAMINI_PHONE_UA","HTTP_X_DEVICE_USER_AGENT","HTTP_X_ORIGINAL_USER_AGENT","HTTP_X_SKYFIRE_PHONE","HTTP_X_BOLT_PHONE_UA","HTTP_DEVICE_STOCK_UA","HTTP_X_UCBROWSER_DEVICE_UA","HTTP_FROM","HTTP_X_SCANNER"]
\ No newline at end of file
--- /dev/null
+HTTP_USER_AGENT
+HTTP_X_OPERAMINI_PHONE_UA
+HTTP_X_DEVICE_USER_AGENT
+HTTP_X_ORIGINAL_USER_AGENT
+HTTP_X_SKYFIRE_PHONE
+HTTP_X_BOLT_PHONE_UA
+HTTP_DEVICE_STOCK_UA
+HTTP_X_UCBROWSER_DEVICE_UA
+HTTP_FROM
+HTTP_X_SCANNER
\ No newline at end of file
--- /dev/null
+<?php
+
+/*
+ * This file is part of Crawler Detect - the web crawler detection library.
+ *
+ * (c) Mark Beech <m@rkbee.ch>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+namespace Jaybizzle\CrawlerDetect;
+
+use Jaybizzle\CrawlerDetect\Fixtures\Crawlers;
+use Jaybizzle\CrawlerDetect\Fixtures\Exclusions;
+use Jaybizzle\CrawlerDetect\Fixtures\Headers;
+
+class CrawlerDetect
+{
+ /**
+ * The user agent.
+ *
+ * @var null
+ */
+ protected $userAgent = null;
+
+ /**
+ * Headers that contain a user agent.
+ *
+ * @var array
+ */
+ protected $httpHeaders = array();
+
+ /**
+ * Store regex matches.
+ *
+ * @var array
+ */
+ protected $matches = array();
+
+ /**
+ * Crawlers object.
+ *
+ * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
+ */
+ protected $crawlers;
+
+ /**
+ * Exclusions object.
+ *
+ * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
+ */
+ protected $exclusions;
+
+ /**
+ * Headers object.
+ *
+ * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
+ */
+ protected $uaHttpHeaders;
+
+ /**
+ * The compiled regex string.
+ *
+ * @var string
+ */
+ protected $compiledRegex;
+
+ /**
+ * The compiled exclusions regex string.
+ *
+ * @var string
+ */
+ protected $compiledExclusions;
+
+ /**
+ * Class constructor.
+ */
+ public function __construct(array $headers = null, $userAgent = null)
+ {
+ $this->crawlers = new Crawlers();
+ $this->exclusions = new Exclusions();
+ $this->uaHttpHeaders = new Headers();
+
+ $this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
+ $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
+
+ $this->setHttpHeaders($headers);
+ $this->setUserAgent($userAgent);
+ }
+
+ /**
+ * Compile the regex patterns into one regex string.
+ *
+ * @param array
+ *
+ * @return string
+ */
+ public function compileRegex($patterns)
+ {
+ return '('.implode('|', $patterns).')';
+ }
+
+ /**
+ * Set HTTP headers.
+ *
+ * @param array|null $httpHeaders
+ */
+ public function setHttpHeaders($httpHeaders)
+ {
+ // Use global _SERVER if $httpHeaders aren't defined.
+ if (! is_array($httpHeaders) || ! count($httpHeaders)) {
+ $httpHeaders = $_SERVER;
+ }
+
+ // Clear existing headers.
+ $this->httpHeaders = array();
+
+ // Only save HTTP headers. In PHP land, that means
+ // only _SERVER vars that start with HTTP_.
+ foreach ($httpHeaders as $key => $value) {
+ if (strpos($key, 'HTTP_') === 0) {
+ $this->httpHeaders[$key] = $value;
+ }
+ }
+ }
+
+ /**
+ * Return user agent headers.
+ *
+ * @return array
+ */
+ public function getUaHttpHeaders()
+ {
+ return $this->uaHttpHeaders->getAll();
+ }
+
+ /**
+ * Set the user agent.
+ *
+ * @param string $userAgent
+ */
+ public function setUserAgent($userAgent)
+ {
+ if (is_null($userAgent)) {
+ foreach ($this->getUaHttpHeaders() as $altHeader) {
+ if (isset($this->httpHeaders[$altHeader])) {
+ $userAgent .= $this->httpHeaders[$altHeader].' ';
+ }
+ }
+ }
+
+ return $this->userAgent = $userAgent;
+ }
+
+ /**
+ * Check user agent string against the regex.
+ *
+ * @param string|null $userAgent
+ *
+ * @return bool
+ */
+ public function isCrawler($userAgent = null)
+ {
+ $agent = trim(preg_replace(
+ "/{$this->compiledExclusions}/i",
+ '',
+ $userAgent ?: $this->userAgent
+ ));
+
+ if ($agent == '') {
+ return false;
+ }
+
+ $result = preg_match("/{$this->compiledRegex}/i", $agent, $matches);
+
+ if ($matches) {
+ $this->matches = $matches;
+ }
+
+ return (bool) $result;
+ }
+
+ /**
+ * Return the matches.
+ *
+ * @return string|null
+ */
+ public function getMatches()
+ {
+ return isset($this->matches[0]) ? $this->matches[0] : null;
+ }
+}
--- /dev/null
+<?php
+
+/*
+ * This file is part of Crawler Detect - the web crawler detection library.
+ *
+ * (c) Mark Beech <m@rkbee.ch>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+namespace Jaybizzle\CrawlerDetect\Fixtures;
+
+abstract class AbstractProvider
+{
+ /**
+ * The data set.
+ *
+ * @var array
+ */
+ protected $data;
+
+ /**
+ * Return the data set.
+ *
+ * @return array
+ */
+ public function getAll()
+ {
+ return $this->data;
+ }
+}
--- /dev/null
+<?php
+
+/*
+ * This file is part of Crawler Detect - the web crawler detection library.
+ *
+ * (c) Mark Beech <m@rkbee.ch>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+namespace Jaybizzle\CrawlerDetect\Fixtures;
+
+class Crawlers extends AbstractProvider
+{
+ /**
+ * Array of regular expressions to match against the user agent.
+ *
+ * @var array
+ */
+ protected $data = array(
+ '.*Java.*outbrain',
+ ' YLT',
+ '^b0t$',
+ '^bluefish ',
+ '^Calypso v\/',
+ '^COMODO DCV',
+ '^DangDang',
+ '^DavClnt',
+ '^FDM ',
+ '^git\/',
+ '^Goose\/',
+ '^Grabber',
+ '^HTTPClient\/',
+ '^Java\/',
+ '^Jeode\/',
+ '^Jetty\/',
+ '^Mail\/',
+ '^Mget',
+ '^Microsoft URL Control',
+ '^NG\/[0-9\.]',
+ '^NING\/',
+ '^PHP\/[0-9]',
+ '^RMA\/',
+ '^Ruby|Ruby\/[0-9]',
+ '^VSE\/[0-9]',
+ '^WordPress\.com',
+ '^XRL\/[0-9]',
+ '^ZmEu',
+ '008\/',
+ '13TABS',
+ '192\.comAgent',
+ '2ip\.ru',
+ '404enemy',
+ '7Siters',
+ '80legs',
+ 'a\.pr-cy\.ru',
+ 'a3logics\.in',
+ 'A6-Indexer',
+ 'Abonti',
+ 'Aboundex',
+ 'aboutthedomain',
+ 'Accoona-AI-Agent',
+ 'acoon',
+ 'acrylicapps\.com\/pulp',
+ 'Acunetix',
+ 'AdAuth\/',
+ 'adbeat',
+ 'AddThis',
+ 'ADmantX',
+ 'AdminLabs',
+ 'adressendeutschland',
+ 'adscanner',
+ 'Adstxtaggregator',
+ 'agentslug',
+ 'AHC',
+ 'aihit',
+ 'aiohttp\/',
+ 'Airmail',
+ 'akka-http\/',
+ 'akula\/',
+ 'alertra',
+ 'alexa site audit',
+ 'Alibaba\.Security\.Heimdall',
+ 'Alligator',
+ 'allloadin',
+ 'AllSubmitter',
+ 'alyze\.info',
+ 'amagit',
+ 'Anarchie',
+ 'AndroidDownloadManager',
+ 'Anemone',
+ 'AngleSharp',
+ 'annotate_google',
+ 'Ant\.com',
+ 'Anturis Agent',
+ 'AnyEvent-HTTP\/',
+ 'Apache Droid',
+ 'Apache OpenOffice',
+ 'Apache-HttpAsyncClient',
+ 'Apache-HttpClient',
+ 'ApacheBench',
+ 'Apexoo',
+ 'APIs-Google',
+ 'AportWorm\/',
+ 'AppBeat\/',
+ 'AppEngine-Google',
+ 'AppStoreScraperZ',
+ 'Aprc\/[0-9]',
+ 'Arachmo',
+ 'arachnode',
+ 'Arachnophilia',
+ 'aria2',
+ 'Arukereso',
+ 'asafaweb',
+ 'AskQuickly',
+ 'Ask Jeeves',
+ 'ASPSeek',
+ 'Asterias',
+ 'Astute',
+ 'asynchttp',
+ 'Attach',
+ 'autocite',
+ 'Autonomy',
+ 'axios\/',
+ 'B-l-i-t-z-B-O-T',
+ 'Backlink-Ceck',
+ 'backlink-check',
+ 'BacklinkHttpStatus',
+ 'BackStreet',
+ 'BackWeb',
+ 'Bad-Neighborhood',
+ 'Badass',
+ 'baidu\.com',
+ 'Bandit',
+ 'basicstate',
+ 'BatchFTP',
+ 'Battleztar Bazinga',
+ 'baypup\/',
+ 'BazQux',
+ 'BBBike',
+ 'BCKLINKS',
+ 'BDFetch',
+ 'BegunAdvertising',
+ 'Bidtellect',
+ 'BigBozz',
+ 'Bigfoot',
+ 'biglotron',
+ 'BingLocalSearch',
+ 'BingPreview',
+ 'binlar',
+ 'biNu image cacher',
+ 'Bitacle',
+ 'biz_Directory',
+ 'Black Hole',
+ 'Blackboard Safeassign',
+ 'BlackWidow',
+ 'BlockNote\.Net',
+ 'Bloglines',
+ 'Bloglovin',
+ 'BlogPulseLive',
+ 'BlogSearch',
+ 'Blogtrottr',
+ 'BlowFish',
+ 'boitho\.com-dc',
+ 'BPImageWalker',
+ 'Braintree-Webhooks',
+ 'Branch Metrics API',
+ 'Branch-Passthrough',
+ 'Brandprotect',
+ 'BrandVerity',
+ 'Brandwatch',
+ 'Brodie\/',
+ 'Browsershots',
+ 'BUbiNG',
+ 'Buck\/',
+ 'Buddy',
+ 'BuiltWith',
+ 'Bullseye',
+ 'BunnySlippers',
+ 'Burf Search',
+ 'Butterfly\/',
+ 'BuzzSumo',
+ 'CAAM\/[0-9]',
+ 'CakePHP',
+ 'Calculon',
+ 'Canary%20Mail',
+ 'CaretNail',
+ 'catexplorador',
+ 'CC Metadata Scaper',
+ 'Cegbfeieh',
+ 'censys',
+ 'Cerberian Drtrs',
+ 'CERT\.at-Statistics-Survey',
+ 'cg-eye',
+ 'changedetection',
+ 'ChangesMeter',
+ 'Charlotte',
+ 'CheckHost',
+ 'checkprivacy',
+ 'CherryPicker',
+ 'ChinaClaw',
+ 'Chirp\/',
+ 'chkme\.com',
+ 'Chlooe',
+ 'Chromaxa',
+ 'CirrusExplorer',
+ 'CISPA Vulnerability Notification',
+ 'Citoid',
+ 'CJNetworkQuality',
+ 'Clarsentia',
+ 'clips\.ua\.ac\.be',
+ 'Cloud mapping',
+ 'CloudEndure',
+ 'CloudFlare-AlwaysOnline',
+ 'Cloudinary',
+ 'cmcm\.com',
+ 'coccoc',
+ 'cognitiveseo',
+ 'colly -',
+ 'CommaFeed',
+ 'Commons-HttpClient',
+ 'commonscan',
+ 'contactbigdatafr',
+ 'contentkingapp',
+ 'convera',
+ 'CookieReports',
+ 'copyright sheriff',
+ 'CopyRightCheck',
+ 'Copyscape',
+ 'Cosmos4j\.feedback',
+ 'Covario-IDS',
+ 'Crescent',
+ 'Crowsnest',
+ 'Criteo',
+ 'CSHttp',
+ 'curb',
+ 'Curious George',
+ 'curl',
+ 'cuwhois\/',
+ 'cybo\.com',
+ 'DAP\/NetHTTP',
+ 'DareBoost',
+ 'DatabaseDriverMysqli',
+ 'DataCha0s',
+ 'Datafeedwatch',
+ 'Datanyze',
+ 'DataparkSearch',
+ 'dataprovider',
+ 'DataXu',
+ 'Daum(oa)?[ \/][0-9]',
+ 'Demon',
+ 'DeuSu',
+ 'developers\.google\.com\/\+\/web\/snippet\/',
+ 'Devil',
+ 'Digg',
+ 'Digincore',
+ 'DigitalPebble',
+ 'Dirbuster',
+ 'Discourse Forum Onebox',
+ 'Disqus\/',
+ 'Dispatch\/',
+ 'DittoSpyder',
+ 'dlvr',
+ 'DMBrowser',
+ 'DNSPod-reporting',
+ 'docoloc',
+ 'Dolphin http client',
+ 'DomainAppender',
+ 'Donuts Content Explorer',
+ 'dotMailer content retrieval',
+ 'dotSemantic',
+ 'downforeveryoneorjustme',
+ 'Download Wonder',
+ 'downnotifier',
+ 'DowntimeDetector',
+ 'Drip',
+ 'drupact',
+ 'Drupal \(\+http:\/\/drupal\.org\/\)',
+ 'DTS Agent',
+ 'dubaiindex',
+ 'EARTHCOM',
+ 'Easy-Thumb',
+ 'EasyDL',
+ 'Ebingbong',
+ 'ec2linkfinder',
+ 'eCairn-Grabber',
+ 'eCatch',
+ 'ECCP',
+ 'eContext\/',
+ 'Ecxi',
+ 'EirGrabber',
+ 'ElectricMonk',
+ 'elefent',
+ 'EMail Exractor',
+ 'EMail Wolf',
+ 'EmailWolf',
+ 'Embarcadero',
+ 'Embed PHP Library',
+ 'Embedly',
+ 'endo\/',
+ 'europarchive\.org',
+ 'evc-batch',
+ 'EventMachine HttpClient',
+ 'Everwall Link Expander',
+ 'Evidon',
+ 'Evrinid',
+ 'ExactSearch',
+ 'ExaleadCloudview',
+ 'Excel\/',
+ 'exif',
+ 'Exploratodo',
+ 'Express WebPictures',
+ 'Extreme Picture Finder',
+ 'EyeNetIE',
+ 'ezooms',
+ 'facebookexternalhit',
+ 'facebookplatform',
+ 'fairshare',
+ 'Faraday v',
+ 'fasthttp',
+ 'Faveeo',
+ 'Favicon downloader',
+ 'faviconkit',
+ 'faviconarchive',
+ 'FavOrg',
+ 'Feed Wrangler',
+ 'Feedable\/',
+ 'Feedbin',
+ 'FeedBooster',
+ 'FeedBucket',
+ 'FeedBunch\/',
+ 'FeedBurner',
+ 'feeder',
+ 'Feedly',
+ 'FeedshowOnline',
+ 'Feedspot',
+ 'Feedwind\/',
+ 'FeedZcollector',
+ 'feeltiptop',
+ 'Fetch API',
+ 'Fetch\/[0-9]',
+ 'Fever\/[0-9]',
+ 'FHscan',
+ 'Fimap',
+ 'findlink',
+ 'findthatfile',
+ 'FlashGet',
+ 'FlipboardBrowserProxy',
+ 'FlipboardProxy',
+ 'FlipboardRSS',
+ 'Flock\/',
+ 'fluffy',
+ 'Flunky',
+ 'flynxapp',
+ 'forensiq',
+ 'FoundSeoTool',
+ 'http:\/\/www.neomo.de\/', //'Francis [Bot]'
+ 'free thumbnails',
+ 'Freeuploader',
+ 'Funnelback',
+ 'G-i-g-a-b-o-t',
+ 'g00g1e\.net',
+ 'ganarvisitas',
+ 'geek-tools',
+ 'Genieo',
+ 'GentleSource',
+ 'GetCode',
+ 'Getintent',
+ 'GetLinkInfo',
+ 'getprismatic',
+ 'GetRight',
+ 'getroot',
+ 'GetURLInfo\/',
+ 'GetWeb',
+ 'Ghost Inspector',
+ 'GigablastOpenSource',
+ 'GIS-LABS',
+ 'github-camo',
+ 'github\.com',
+ 'Go [\d\.]* package http',
+ 'Go http package',
+ 'Go-Ahead-Got-It',
+ 'Go-http-client',
+ 'Go!Zilla',
+ 'gobyus',
+ 'gofetch',
+ 'GomezAgent',
+ 'gooblog',
+ 'Goodzer\/',
+ 'Google AppsViewer',
+ 'Google Desktop',
+ 'Google favicon',
+ 'Google Keyword Suggestion',
+ 'Google Keyword Tool',
+ 'Google Page Speed Insights',
+ 'Google PP Default',
+ 'Google Search Console',
+ 'Google Web Preview',
+ 'Google-Adwords',
+ 'Google-Apps-Script',
+ 'Google-Calendar-Importer',
+ 'Google-HotelAdsVerifier',
+ 'Google-HTTP-Java-Client',
+ 'Google-Publisher-Plugin',
+ 'Google-SearchByImage',
+ 'Google-Site-Verification',
+ 'Google-Structured-Data-Testing-Tool',
+ 'Google-Youtube-Links',
+ 'google-xrawler',
+ 'GoogleDocs',
+ 'GoogleHC\/',
+ 'GoogleProducer',
+ 'GoogleSites',
+ 'Google-Transparency-Report',
+ 'Gookey',
+ 'GoScraper',
+ 'GoSpotCheck',
+ 'gosquared-thumbnailer',
+ 'Gotit',
+ 'GoZilla',
+ 'grabify',
+ 'GrabNet',
+ 'Grafula',
+ 'Grammarly',
+ 'GrapeFX',
+ 'GreatNews',
+ 'Gregarius',
+ 'GRequests',
+ 'grokkit',
+ 'grouphigh',
+ 'grub-client',
+ 'gSOAP\/',
+ 'GT::WWW',
+ 'GTmetrix',
+ 'GuzzleHttp',
+ 'gvfs\/',
+ 'HAA(A)?RTLAND http client',
+ 'Haansoft',
+ 'hackney\/',
+ 'Hadi Agent',
+ 'HappyApps-WebCheck',
+ 'Hatena',
+ 'Havij',
+ 'HeadlessChrome',
+ 'HEADMasterSEO',
+ 'HeartRails_Capture',
+ 'help@dataminr\.com',
+ 'heritrix',
+ 'historious',
+ 'hkedcity',
+ 'hledejLevne\.cz',
+ 'Hloader',
+ 'HMView',
+ 'Holmes',
+ 'HonesoSearchEngine',
+ 'HootSuite Image proxy',
+ 'Hootsuite-WebFeed',
+ 'hosterstats',
+ 'HostTracker',
+ 'ht:\/\/check',
+ 'htdig',
+ 'HTMLparser',
+ 'htmlyse',
+ 'HTTP Banner Detection',
+ 'HTTP_Compression_Test',
+ 'http_request2',
+ 'http_requester',
+ 'http-get',
+ 'HTTP-Header-Abfrage',
+ 'http-kit',
+ 'http-request\/',
+ 'HTTP-Tiny',
+ 'HTTP::Lite',
+ 'http\.rb\/',
+ 'http_get',
+ 'HttpComponents',
+ 'httphr',
+ 'HTTPMon',
+ 'httpRequest',
+ 'httpscheck',
+ 'httpssites_power',
+ 'httpunit',
+ 'HttpUrlConnection',
+ 'httrack',
+ 'huaweisymantec',
+ 'HubSpot ',
+ 'Humanlinks',
+ 'i2kconnect\/',
+ 'Iblog',
+ 'ichiro',
+ 'Id-search',
+ 'IdeelaborPlagiaat',
+ 'IDG Twitter Links Resolver',
+ 'IDwhois\/',
+ 'Iframely',
+ 'igdeSpyder',
+ 'IlTrovatore',
+ 'Image Fetch',
+ 'Image Sucker',
+ 'ImageEngine\/',
+ 'ImageVisu\/',
+ 'Imagga',
+ 'imagineeasy',
+ 'imgsizer',
+ 'InAGist',
+ 'inbound\.li parser',
+ 'InDesign%20CC',
+ 'Indy Library',
+ 'InetURL',
+ 'infegy',
+ 'infohelfer',
+ 'InfoTekies',
+ 'InfoWizards Reciprocal Link',
+ 'inpwrd\.com',
+ 'instabid',
+ 'Instapaper',
+ 'Integrity',
+ 'integromedb',
+ 'Intelliseek',
+ 'InterGET',
+ 'internet_archive',
+ 'Internet Ninja',
+ 'InternetSeer',
+ 'internetVista monitor',
+ 'intraVnews',
+ 'IODC',
+ 'IOI',
+ 'iplabel',
+ 'ips-agent',
+ 'IPS\/[0-9]',
+ 'IPWorks HTTP\/S Component',
+ 'iqdb\/',
+ 'Iria',
+ 'Irokez',
+ 'isitup\.org',
+ 'iskanie',
+ 'isUp\.li',
+ 'iThemes Sync\/',
+ 'iZSearch',
+ 'JAHHO',
+ 'janforman',
+ 'Jaunt\/',
+ 'Jbrofuzz',
+ 'Jersey\/',
+ 'JetCar',
+ 'Jigsaw',
+ 'Jobboerse',
+ 'JobFeed discovery',
+ 'Jobg8 URL Monitor',
+ 'jobo',
+ 'Jobrapido',
+ 'Jobsearch1\.5',
+ 'JoinVision Generic',
+ 'JolokiaPwn',
+ 'Joomla',
+ 'Jorgee',
+ 'JS-Kit',
+ 'JustView',
+ 'Kaspersky Lab CFR link resolver',
+ 'Kelny\/',
+ 'Kerrigan\/',
+ 'KeyCDN',
+ 'Keyword Density',
+ 'Keywords Research',
+ 'KickFire',
+ 'KimonoLabs\/',
+ 'Kml-Google',
+ 'knows\.is',
+ 'KOCMOHABT',
+ 'kouio',
+ 'kube-probe',
+ 'kulturarw3',
+ 'KumKie',
+ 'L\.webis',
+ 'Larbin',
+ 'Lavf\/',
+ 'LeechFTP',
+ 'LeechGet',
+ 'letsencrypt',
+ 'Lftp',
+ 'LibVLC',
+ 'LibWeb',
+ 'Libwhisker',
+ 'libwww',
+ 'Licorne',
+ 'Liferea\/',
+ 'Lightspeedsystems',
+ 'Lighthouse',
+ 'Likse',
+ 'Link Valet',
+ 'link_thumbnailer',
+ 'LinkAlarm\/',
+ 'linkCheck',
+ 'linkdex',
+ 'LinkExaminer',
+ 'linkfluence',
+ 'linkpeek',
+ 'LinkPreviewGenerator',
+ 'LinkScan',
+ 'LinksManager',
+ 'LinkTiger',
+ 'LinkWalker',
+ 'Lipperhey',
+ 'Litemage_walker',
+ 'livedoor ScreenShot',
+ 'LoadImpactRload',
+ 'localsearch-web',
+ 'LongURL API',
+ 'looksystems\.net',
+ 'ltx71',
+ 'lua-resty-http',
+ 'lwp-request',
+ 'lwp-trivial',
+ 'LWP::Simple',
+ 'lycos',
+ 'LYT\.SR',
+ 'mabontland',
+ 'Mag-Net',
+ 'MagpieRSS',
+ 'Mail\.Ru',
+ 'MailChimp',
+ 'Majestic12',
+ 'makecontact\/',
+ 'Mandrill',
+ 'MapperCmd',
+ 'marketinggrader',
+ 'MarkMonitor',
+ 'MarkWatch',
+ 'Mass Downloader',
+ 'masscan\/',
+ 'Mata Hari',
+ 'Mediapartners-Google',
+ 'mediawords',
+ 'MegaIndex\.ru',
+ 'MeltwaterNews',
+ 'Melvil Rawi',
+ 'MemGator',
+ 'Metaspinner',
+ 'MetaURI',
+ 'MFC_Tear_Sample',
+ 'Microsearch',
+ 'Microsoft Office ',
+ 'Microsoft Outlook',
+ 'Microsoft Windows Network Diagnostics',
+ 'Microsoft-WebDAV-MiniRedir',
+ 'Microsoft Data Access',
+ 'MIDown tool',
+ 'MIIxpc',
+ 'Mindjet',
+ 'Miniature\.io',
+ 'Miniflux',
+ 'Mister PiX',
+ 'mixdata dot com',
+ 'mixed-content-scan',
+ 'Mixmax-LinkPreview',
+ 'mixnode',
+ 'Mnogosearch',
+ 'mogimogi',
+ 'Mojeek',
+ 'Mojolicious \(Perl\)',
+ 'Monit\/',
+ 'monitis',
+ 'Monitority\/',
+ 'montastic',
+ 'MonTools',
+ 'Moreover',
+ 'Morfeus Fucking Scanner',
+ 'Morning Paper',
+ 'MovableType',
+ 'mowser',
+ 'Mrcgiguy',
+ 'MS Web Services Client Protocol',
+ 'MSFrontPage',
+ 'mShots',
+ 'MuckRack\/',
+ 'muhstik-scan',
+ 'MVAClient',
+ 'MxToolbox\/',
+ 'nagios',
+ 'Najdi\.si',
+ 'Name Intelligence',
+ 'Nameprotect',
+ 'Navroad',
+ 'NearSite',
+ 'Needle',
+ 'Nessus',
+ 'Net Vampire',
+ 'NetAnts',
+ 'NETCRAFT',
+ 'NetLyzer',
+ 'NetMechanic',
+ 'NetNewsWire',
+ 'Netpursual',
+ 'netresearch',
+ 'NetShelter ContentScan',
+ 'Netsparker',
+ 'NetTrack',
+ 'Netvibes',
+ 'NetZIP',
+ 'Neustar WPM',
+ 'NeutrinoAPI',
+ 'NewRelicPinger',
+ 'NewsBlur .*Finder',
+ 'NewsGator',
+ 'newsme',
+ 'newspaper\/',
+ 'Nexgate Ruby Client',
+ 'NG-Search',
+ 'Nibbler',
+ 'NICErsPRO',
+ 'Nikto',
+ 'nineconnections',
+ 'NLNZ_IAHarvester',
+ 'Nmap Scripting Engine',
+ 'node-superagent',
+ 'node-urllib',
+ 'node\.io',
+ 'Nodemeter',
+ 'NodePing',
+ 'nominet\.org\.uk',
+ 'nominet\.uk',
+ 'Norton-Safeweb',
+ 'Notifixious',
+ 'notifyninja',
+ 'nuhk',
+ 'nutch',
+ 'Nuzzel',
+ 'nWormFeedFinder',
+ 'nyawc\/',
+ 'Nymesis',
+ 'NYU',
+ 'Ocelli\/',
+ 'Octopus',
+ 'oegp',
+ 'Offline Explorer',
+ 'Offline Navigator',
+ 'og-scraper',
+ 'okhttp',
+ 'omgili',
+ 'OMSC',
+ 'Online Domain Tools',
+ 'OpenCalaisSemanticProxy',
+ 'Openfind',
+ 'OpenLinkProfiler',
+ 'Openstat\/',
+ 'OpenVAS',
+ 'Optimizer',
+ 'Orbiter',
+ 'OrgProbe\/',
+ 'orion-semantics',
+ 'Outlook-Express',
+ 'Outlook-iOS',
+ 'ow\.ly',
+ 'Owler',
+ 'ownCloud News',
+ 'OxfordCloudService',
+ 'Page Valet',
+ 'page_verifier',
+ 'page scorer',
+ 'page2rss',
+ 'PageGrabber',
+ 'PagePeeker',
+ 'PageScorer',
+ 'Pagespeed\/',
+ 'Panopta',
+ 'panscient',
+ 'Papa Foto',
+ 'parsijoo',
+ 'Pavuk',
+ 'PayPal IPN',
+ 'pcBrowser',
+ 'Pcore-HTTP',
+ 'Pearltrees',
+ 'PECL::HTTP',
+ 'peerindex',
+ 'Peew',
+ 'PeoplePal',
+ 'Perlu -',
+ 'PhantomJS Screenshoter',
+ 'PhantomJS\/',
+ 'Photon\/',
+ 'phpservermon',
+ 'Pi-Monster',
+ 'Picscout',
+ 'Picsearch',
+ 'PictureFinder',
+ 'Pimonster',
+ 'ping\.blo\.gs',
+ 'Pingability',
+ 'PingAdmin\.Ru',
+ 'Pingdom',
+ 'Pingoscope',
+ 'PingSpot',
+ 'pinterest\.com',
+ 'Pixray',
+ 'Pizilla',
+ 'Plagger\/',
+ 'Ploetz \+ Zeller',
+ 'Plukkie',
+ 'plumanalytics',
+ 'PocketImageCache',
+ 'PocketParser',
+ 'Pockey',
+ 'POE-Component-Client-HTTP',
+ 'Polymail\/',
+ 'Pompos',
+ 'Porkbun',
+ 'Port Monitor',
+ 'postano',
+ 'PostmanRuntime',
+ 'PostPost',
+ 'postrank',
+ 'PowerPoint\/',
+ 'Priceonomics Analysis Engine',
+ 'PrintFriendly',
+ 'PritTorrent',
+ 'Prlog',
+ 'probethenet',
+ 'Project 25499',
+ 'prospectb2b',
+ 'Protopage',
+ 'ProWebWalker',
+ 'proximic',
+ 'PRTG Network Monitor',
+ 'pshtt, https scanning',
+ 'PTST ',
+ 'PTST\/[0-9]+',
+ 'Pulsepoint XT3 web scraper',
+ 'Pump',
+ 'Python-httplib2',
+ 'python-requests',
+ 'Python-urllib',
+ 'Qirina Hurdler',
+ 'QQDownload',
+ 'QrafterPro',
+ 'Qseero',
+ 'Qualidator',
+ 'QueryN Metasearch',
+ 'queuedriver',
+ 'Quora Link Preview',
+ 'Qwantify',
+ 'Radian6',
+ 'RankActive',
+ 'RankFlex',
+ 'RankSonicSiteAuditor',
+ 'Re-re Studio',
+ 'ReactorNetty',
+ 'Readability',
+ 'RealDownload',
+ 'RealPlayer%20Downloader',
+ 'RebelMouse',
+ 'Recorder',
+ 'RecurPost\/',
+ 'redback\/',
+ 'ReederForMac',
+ 'ReGet',
+ 'RepoMonkey',
+ 'request\.js',
+ 'reqwest\/',
+ 'ResponseCodeTest',
+ 'RestSharp',
+ 'Riddler',
+ 'Rival IQ',
+ 'Robosourcer',
+ 'Robozilla',
+ 'ROI Hunter',
+ 'RPT-HTTPClient',
+ 'RSSOwl',
+ 'safe-agent-scanner',
+ 'SalesIntelligent',
+ 'Saleslift',
+ 'Sendsay\.Ru',
+ 'SauceNAO',
+ 'SBIder',
+ 'scalaj-http',
+ 'scan\.lol',
+ 'ScanAlert',
+ 'Scoop',
+ 'scooter',
+ 'ScoutJet',
+ 'ScoutURLMonitor',
+ 'ScrapeBox Page Scanner',
+ 'SimpleScraper',
+ 'Scrapy',
+ 'Screaming',
+ 'ScreenShotService',
+ 'Scrubby',
+ 'Scrutiny\/',
+ 'search\.thunderstone',
+ 'Search37',
+ 'searchenginepromotionhelp',
+ 'Searchestate',
+ 'SearchExpress',
+ 'SearchSight',
+ 'Seeker',
+ 'semanticdiscovery',
+ 'semanticjuice',
+ 'Semiocast HTTP client',
+ 'Semrush',
+ 'sentry\/',
+ 'SEO Browser',
+ 'Seo Servis',
+ 'seo-nastroj\.cz',
+ 'seo4ajax',
+ 'Seobility',
+ 'SEOCentro',
+ 'SeoCheck',
+ 'SEOkicks',
+ 'Seomoz',
+ 'SEOprofiler',
+ 'SEOsearch',
+ 'seoscanners',
+ 'seositecheckup',
+ 'SEOstats',
+ 'servernfo',
+ 'sexsearcher',
+ 'Seznam',
+ 'Shelob',
+ 'Shodan',
+ 'Shoppimon',
+ 'ShopWiki',
+ 'ShortLinkTranslate',
+ 'shrinktheweb',
+ 'Sideqik',
+ 'SimplePie',
+ 'SimplyFast',
+ 'Siphon',
+ 'SISTRIX',
+ 'Site-Shot\/',
+ 'Site Sucker',
+ 'Site24x7',
+ 'SiteBar',
+ 'Sitebeam',
+ 'Sitebulb\/',
+ 'SiteCondor',
+ 'SiteExplorer',
+ 'SiteGuardian',
+ 'Siteimprove',
+ 'SiteIndexed',
+ 'Sitemap(s)? Generator',
+ 'SitemapGenerator',
+ 'SiteMonitor',
+ 'Siteshooter B0t',
+ 'SiteSnagger',
+ 'SiteSucker',
+ 'SiteTruth',
+ 'Sitevigil',
+ 'sitexy\.com',
+ 'SkypeUriPreview',
+ 'Slack\/',
+ 'slider\.com',
+ 'slurp',
+ 'SlySearch',
+ 'SmartDownload',
+ 'SMRF URL Expander',
+ 'SMUrlExpander',
+ 'Snake',
+ 'Snappy',
+ 'SnapSearch',
+ 'Snarfer\/',
+ 'SniffRSS',
+ 'sniptracker',
+ 'Snoopy',
+ 'SnowHaze Search',
+ 'sogou web',
+ 'SortSite',
+ 'Sottopop',
+ 'sovereign\.ai',
+ 'SpaceBison',
+ 'SpamExperts',
+ 'Spammen',
+ 'Spanner',
+ 'spaziodati',
+ 'SPDYCheck',
+ 'Specificfeeds',
+ 'speedy',
+ 'SPEng',
+ 'Spinn3r',
+ 'spray-can',
+ 'Sprinklr ',
+ 'spyonweb',
+ 'sqlmap',
+ 'Sqlworm',
+ 'Sqworm',
+ 'SSL Labs',
+ 'ssl-tools',
+ 'StackRambler',
+ 'Statastico\/',
+ 'StatusCake',
+ 'Steeler',
+ 'Stratagems Kumo',
+ 'Stroke\.cz',
+ 'StudioFACA',
+ 'StumbleUpon',
+ 'suchen',
+ 'Sucuri',
+ 'summify',
+ 'SuperHTTP',
+ 'Surphace Scout',
+ 'Suzuran',
+ 'SwiteScraper',
+ 'Symfony BrowserKit',
+ 'Symfony2 BrowserKit',
+ 'SynHttpClient-Built',
+ 'Sysomos',
+ 'sysscan',
+ 'Szukacz',
+ 'T0PHackTeam',
+ 'tAkeOut',
+ 'Tarantula\/',
+ 'Taringa UGC',
+ 'TarmotGezgin',
+ 'Teleport',
+ 'Telesoft',
+ 'Telesphoreo',
+ 'Telesphorep',
+ 'Tenon\.io',
+ 'teoma',
+ 'terrainformatica',
+ 'Test Certificate Info',
+ 'testuri',
+ 'Tetrahedron',
+ 'The Drop Reaper',
+ 'The Expert HTML Source Viewer',
+ 'The Knowledge AI',
+ 'The Intraformant',
+ 'theinternetrules',
+ 'TheNomad',
+ 'Thinklab',
+ 'Thumbshots',
+ 'ThumbSniper',
+ 'timewe\.net',
+ 'TinEye',
+ 'Tiny Tiny RSS',
+ 'TLSProbe\/',
+ 'Toata',
+ 'topster',
+ 'touche\.com',
+ 'Traackr\.com',
+ 'tracemyfile',
+ 'Trackuity',
+ 'TrapitAgent',
+ 'Trendiction',
+ 'Trendsmap',
+ 'trendspottr',
+ 'truwoGPS',
+ 'TryJsoup',
+ 'TulipChain',
+ 'Turingos',
+ 'Turnitin',
+ 'tweetedtimes',
+ 'Tweetminster',
+ 'Tweezler\/',
+ 'twibble',
+ 'Twice',
+ 'Twikle',
+ 'Twingly',
+ 'Twisted PageGetter',
+ 'Typhoeus',
+ 'ubermetrics-technologies',
+ 'uclassify',
+ 'UdmSearch',
+ 'unchaos',
+ 'unirest-java',
+ 'UniversalFeedParser',
+ 'Unshorten\.It',
+ 'Untiny',
+ 'UnwindFetchor',
+ 'updated',
+ 'updown\.io daemon',
+ 'Upflow',
+ 'Uptimia',
+ 'Urlcheckr',
+ 'URL Verifier',
+ 'URLitor',
+ 'urlresolver',
+ 'Urlstat',
+ 'URLTester',
+ 'UrlTrends Ranking Updater',
+ 'URLy Warning',
+ 'URLy\.Warning',
+ 'Vacuum',
+ 'Vagabondo',
+ 'VB Project',
+ 'vBSEO',
+ 'VCI',
+ 'via ggpht\.com GoogleImageProxy',
+ 'VidibleScraper',
+ 'Virusdie',
+ 'visionutils',
+ 'vkShare',
+ 'VoidEYE',
+ 'Voil',
+ 'voltron',
+ 'voyager\/',
+ 'VSAgent\/',
+ 'VSB-TUO\/',
+ 'Vulnbusters Meter',
+ 'VYU2',
+ 'w3af\.org',
+ 'W3C_Unicorn',
+ 'W3C-checklink',
+ 'W3C-mobileOK',
+ 'WAC-OFU',
+ 'Wallpapers\/[0-9]+',
+ 'WallpapersHD',
+ 'wangling',
+ 'Wappalyzer',
+ 'WatchMouse',
+ 'WbSrch\/',
+ 'WDT\.io',
+ 'web-capture\.net',
+ 'Web-sniffer',
+ 'Web Auto',
+ 'Web Collage',
+ 'Web Enhancer',
+ 'Web Fetch',
+ 'Web Fuck',
+ 'Web Pix',
+ 'Web Sauger',
+ 'Web Sucker',
+ 'Webalta',
+ 'Webauskunft',
+ 'WebAuto',
+ 'WebCapture',
+ 'WebClient\/',
+ 'webcollage',
+ 'WebCookies',
+ 'WebCopier',
+ 'WebCorp',
+ 'WebDataStats',
+ 'WebDoc',
+ 'WebEnhancer',
+ 'WebFetch',
+ 'WebFuck',
+ 'WebGazer',
+ 'WebGo IS',
+ 'WebImageCollector',
+ 'WebImages',
+ 'WebIndex',
+ 'webkit2png',
+ 'WebLeacher',
+ 'webmastercoffee',
+ 'webmon ',
+ 'WebPix',
+ 'WebReaper',
+ 'WebSauger',
+ 'webscreenie',
+ 'Webshag',
+ 'Webshot',
+ 'Website Quester',
+ 'websitepulse agent',
+ 'WebsiteQuester',
+ 'Websnapr',
+ 'WebSniffer',
+ 'Webster',
+ 'WebStripper',
+ 'WebSucker',
+ 'Webthumb\/',
+ 'WebThumbnail',
+ 'WebWhacker',
+ 'WebZIP',
+ 'WeLikeLinks',
+ 'WEPA',
+ 'WeSEE',
+ 'wf84',
+ 'Wfuzz\/',
+ 'wget',
+ 'WhatsApp',
+ 'WhatsMyIP',
+ 'WhatWeb',
+ 'WhereGoes\?',
+ 'Whibse',
+ 'WhoRunsCoinHive',
+ 'Whynder Magnet',
+ 'Windows-RSS-Platform',
+ 'WinPodder',
+ 'wkhtmlto',
+ 'wmtips',
+ 'Woko',
+ 'woorankreview',
+ 'Word\/',
+ 'WordPress\/',
+ 'WordupinfoSearch',
+ 'wotbox',
+ 'WP Engine Install Performance API',
+ 'wpif',
+ 'wprecon\.com survey',
+ 'WPScan',
+ 'wscheck',
+ 'Wtrace',
+ 'WWW-Collector-E',
+ 'WWW-Mechanize',
+ 'WWW::Document',
+ 'WWW::Mechanize',
+ 'www\.monitor\.us',
+ 'WWWOFFLE',
+ 'x09Mozilla',
+ 'x22Mozilla',
+ 'XaxisSemanticsClassifier',
+ 'Xenu Link Sleuth',
+ 'XING-contenttabreceiver',
+ 'xpymep([0-9]?)\.exe',
+ 'Y!J-(ASR|BSC)',
+ 'Y\!J-BRW',
+ 'Yaanb',
+ 'yacy',
+ 'Yahoo Link Preview',
+ 'YahooCacheSystem',
+ 'YahooYSMcm',
+ 'YandeG',
+ 'Yandex(?!Search)',
+ 'yanga',
+ 'yeti',
+ 'Yo-yo',
+ 'Yoleo Consumer',
+ 'yoogliFetchAgent',
+ 'YottaaMonitor',
+ 'Your-Website-Sucks',
+ 'yourls\.org',
+ 'YoYs\.net',
+ 'YP\.PL',
+ 'Zabbix',
+ 'Zade',
+ 'Zao',
+ 'Zauba',
+ 'Zemanta Aggregator',
+ 'Zend_Http_Client',
+ 'Zend\\\\Http\\\\Client',
+ 'Zermelo',
+ 'Zeus ',
+ 'zgrab',
+ 'ZnajdzFoto',
+ 'Zombie\.js',
+ 'Zoom\.Mac',
+ 'ZyBorg',
+ '[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer)',
+ );
+}
--- /dev/null
+<?php
+
+/*
+ * This file is part of Crawler Detect - the web crawler detection library.
+ *
+ * (c) Mark Beech <m@rkbee.ch>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+namespace Jaybizzle\CrawlerDetect\Fixtures;
+
+class Exclusions extends AbstractProvider
+{
+ /**
+ * List of strings to remove from the user agent before running the crawler regex
+ * Over a large list of user agents, this gives us about a 55% speed increase!
+ *
+ * @var array
+ */
+ protected $data = array(
+ 'Safari.[\d\.]*',
+ 'Firefox.[\d\.]*',
+ ' Chrome.[\d\.]*',
+ 'Chromium.[\d\.]*',
+ 'MSIE.[\d\.]',
+ 'Opera\/[\d\.]*',
+ 'Mozilla.[\d\.]*',
+ 'AppleWebKit.[\d\.]*',
+ 'Trident.[\d\.]*',
+ 'Windows NT.[\d\.]*',
+ 'Android [\d\.]*',
+ 'Macintosh.',
+ 'Ubuntu',
+ 'Linux',
+ '[ ]Intel',
+ 'Mac OS X [\d_]*',
+ '(like )?Gecko(.[\d\.]*)?',
+ 'KHTML,',
+ 'CriOS.[\d\.]*',
+ 'CPU iPhone OS ([0-9_])* like Mac OS X',
+ 'CPU OS ([0-9_])* like Mac OS X',
+ 'iPod',
+ 'compatible',
+ 'x86_..',
+ 'i686',
+ 'x64',
+ 'X11',
+ 'rv:[\d\.]*',
+ 'Version.[\d\.]*',
+ 'WOW64',
+ 'Win64',
+ 'Dalvik.[\d\.]*',
+ ' \.NET CLR [\d\.]*',
+ 'Presto.[\d\.]*',
+ 'Media Center PC',
+ 'BlackBerry',
+ 'Build',
+ 'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
+ 'Opera',
+ ' \.NET[\d\.]*',
+ 'cubot',
+ '; M bot',
+ '; CRONO',
+ '; B bot',
+ '; IDbot',
+ '; ID bot',
+ '; POWER BOT',
+ ';', // Remove the following characters ;
+ );
+}
--- /dev/null
+<?php
+
+/*
+ * This file is part of Crawler Detect - the web crawler detection library.
+ *
+ * (c) Mark Beech <m@rkbee.ch>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+namespace Jaybizzle\CrawlerDetect\Fixtures;
+
+class Headers extends AbstractProvider
+{
+ /**
+ * All possible HTTP headers that represent the user agent string.
+ *
+ * @var array
+ */
+ protected $data = array(
+ // The default User-Agent string.
+ 'HTTP_USER_AGENT',
+ // Header can occur on devices using Opera Mini.
+ 'HTTP_X_OPERAMINI_PHONE_UA',
+ // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
+ 'HTTP_X_DEVICE_USER_AGENT',
+ 'HTTP_X_ORIGINAL_USER_AGENT',
+ 'HTTP_X_SKYFIRE_PHONE',
+ 'HTTP_X_BOLT_PHONE_UA',
+ 'HTTP_DEVICE_STOCK_UA',
+ 'HTTP_X_UCBROWSER_DEVICE_UA',
+ // Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address
+ 'HTTP_FROM',
+ 'HTTP_X_SCANNER', // Seen in use by Netsparker
+ );
+}
+++ /dev/null
-<?php
-/**
- * Name: botdetection
- * Description: Blocking bots based on detecting bots/crawlers/spiders via the user agent and http_from header.
- * Version: 0.1
- * Author: Philipp Holzer <admin@philipp.info>
- *
- */
-
-use Friendica\App;
-use Friendica\Core\Hook;
-use Friendica\Core\System;
-use Jaybizzle\CrawlerDetect\CrawlerDetect;
-
-function botdetection_install() {
- Hook::register('init_1', 'addon/botdetection/botdetection.php', 'botdetection_init_1');
-}
-
-
-function botdetection_uninstall() {
- Hook::unregister('init_1', 'addon/botdetection/botdetection.php', 'botdetection_init_1');
-}
-
-function botdetection_init_1(App $a) {
- $crawlerDetect = new CrawlerDetect();
-
- if ($crawlerDetect->isCrawler()) {
- System::httpExit(404, 'Bots are not allowed');
- }
-}