X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FUtil%2FNetwork.php;h=255b3c8c4d3e1b8d1f41d1b8358b27251102a5d2;hb=50da89d861dce3b648c8f9e5c1e4c480ee320a43;hp=1f9184ae968f178dc22ff8029ac56d1776de0d4f;hpb=2e420a15f8f3e469f9ea3fd298b1283d9806698e;p=friendica.git diff --git a/src/Util/Network.php b/src/Util/Network.php index 1f9184ae96..255b3c8c4d 100644 --- a/src/Util/Network.php +++ b/src/Util/Network.php @@ -4,28 +4,24 @@ */ namespace Friendica\Util; -use Friendica\App; use Friendica\Core\Addon; -use Friendica\Core\L10n; +use Friendica\Core\Logger; use Friendica\Core\System; use Friendica\Core\Config; -use Friendica\Network\Probe; -use Friendica\Object\Image; -use Friendica\Util\XML; +use Friendica\Network\CurlResult; use DOMDocument; use DomXPath; -require_once 'library/slinky.php'; - class Network { /** - * @brief Curl wrapper + * Curl wrapper * * If binary flag is true, return binary results. * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt") * to preserve cookies from one request to the next. * + * @brief Curl wrapper * @param string $url URL to fetch * @param boolean $binary default false * TRUE if asked to return binary results (file download) @@ -36,9 +32,33 @@ class Network * * @return string The fetched content */ - public static function fetchUrl($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = 0) + public static function fetchUrl($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '') { - $ret = self::curl( + $ret = self::fetchUrlFull($url, $binary, $redirects, $timeout, $accept_content, $cookiejar); + + return $ret->getBody(); + } + + /** + * Curl wrapper with array of return values. + * + * Inner workings and parameters are the same as @ref fetchUrl but returns an array with + * all the information collected during the fetch. + * + * @brief Curl wrapper with array of return values. + * @param string $url URL to fetch + * @param boolean $binary default false + * TRUE if asked to return binary results (file download) + * @param integer $redirects The recursion counter for internal use - default 0 + * @param integer $timeout Timeout in seconds, default system config value or 60 seconds + * @param string $accept_content supply Accept: header with 'accept_content' as the value + * @param string $cookiejar Path to cookie jar file + * + * @return CurlResult With all relevant information, 'body' contains the actual fetched content. + */ + public static function fetchUrlFull($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '') + { + return self::curl( $url, $binary, $redirects, @@ -47,8 +67,6 @@ class Network 'cookiejar'=>$cookiejar ] ); - - return($ret['body']); } /** @@ -66,12 +84,7 @@ class Network * 'nobody' => only return the header * 'cookiejar' => path to cookie jar file * - * @return array an assoziative array with: - * int 'return_code' => HTTP return code or 0 if timeout or failure - * boolean 'success' => boolean true (if HTTP 2xx result) or false - * string 'redirect_url' => in case of redirect, content was finally retrieved from this URL - * string 'header' => HTTP headers - * string 'body' => fetched content + * @return CurlResult */ public static function curl($url, $binary = false, &$redirects = 0, $opts = []) { @@ -81,15 +94,27 @@ class Network $a = get_app(); + $parts = parse_url($url); + $path_parts = explode('/', defaults($parts, 'path', '')); + foreach ($path_parts as $part) { + if (strlen($part) <> mb_strlen($part)) { + $parts2[] = rawurlencode($part); + } else { + $parts2[] = $part; + } + } + $parts['path'] = implode('/', $parts2); + $url = self::unparseURL($parts); + if (self::isUrlBlocked($url)) { - logger('z_fetch_url: domain of ' . $url . ' is blocked', LOGGER_DATA); - return $ret; + Logger::log('domain of ' . $url . ' is blocked', Logger::DATA); + return CurlResult::createErrorCurl($url); } $ch = @curl_init($url); if (($redirects > 8) || (!$ch)) { - return $ret; + return CurlResult::createErrorCurl($url); } @curl_setopt($ch, CURLOPT_HEADER, true); @@ -112,7 +137,7 @@ class Network } @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - @curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + @curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent()); $range = intval(Config::get('system', 'curl_range_bytes', 0)); @@ -170,8 +195,6 @@ class Network @curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); } - $a->set_curl_code(0); - // don't let curl abort the entire application // if it throws any errors. @@ -186,86 +209,20 @@ class Network $curl_info = @curl_getinfo($ch); } - if (curl_errno($ch) !== CURLE_OK) { - logger('fetch_url error fetching ' . $url . ': ' . curl_error($ch), LOGGER_NORMAL); - } - - $ret['errno'] = curl_errno($ch); - - $base = $s; - $ret['info'] = $curl_info; - - $http_code = $curl_info['http_code']; - - logger('fetch_url ' . $url . ': ' . $http_code . " " . $s, LOGGER_DATA); - $header = ''; + $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch)); - // Pull out multiple headers, e.g. proxy and continuation headers - // allow for HTTP/2.x without fixing code - - while (preg_match('/^HTTP\/[1-2].+? [1-5][0-9][0-9]/', $base)) { - $chunk = substr($base, 0, strpos($base, "\r\n\r\n") + 4); - $header .= $chunk; - $base = substr($base, strlen($chunk)); - } - - $a->set_curl_code($http_code); - $a->set_curl_content_type($curl_info['content_type']); - $a->set_curl_headers($header); - - if ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { - $new_location_info = @parse_url($curl_info['redirect_url']); - $old_location_info = @parse_url($curl_info['url']); - - $newurl = $curl_info['redirect_url']; - - if (($new_location_info['path'] == '') && ( $new_location_info['host'] != '')) { - $newurl = $new_location_info['scheme'] . '://' . $new_location_info['host'] . $old_location_info['path']; - } - - $matches = []; - - if (preg_match('/(Location:|URI:)(.*?)\n/i', $header, $matches)) { - $newurl = trim(array_pop($matches)); - } - if (strpos($newurl, '/') === 0) { - $newurl = $old_location_info["scheme"]."://".$old_location_info["host"].$newurl; - } - - if (filter_var($newurl, FILTER_VALIDATE_URL)) { - $redirects++; - @curl_close($ch); - return self::curl($newurl, $binary, $redirects, $opts); - } - } - - $a->set_curl_code($http_code); - $a->set_curl_content_type($curl_info['content_type']); - - $rc = intval($http_code); - $ret['return_code'] = $rc; - $ret['success'] = (($rc >= 200 && $rc <= 299) ? true : false); - $ret['redirect_url'] = $url; - - if (!$ret['success']) { - $ret['error'] = curl_error($ch); - $ret['debug'] = $curl_info; - logger('z_fetch_url: error: '.$url.': '.$ret['return_code'].' - '.$ret['error'], LOGGER_DEBUG); - logger('z_fetch_url: debug: '.print_r($curl_info, true), LOGGER_DATA); - } - - $ret['body'] = substr($s, strlen($header)); - $ret['header'] = $header; - - if (x($opts, 'debug')) { - $ret['debug'] = $curl_info; + if ($curlResponse->isRedirectUrl()) { + $redirects++; + Logger::log('curl: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl()); + @curl_close($ch); + return self::curl($curlResponse->getRedirectUrl(), $binary, $redirects, $opts); } @curl_close($ch); - $a->save_timestamp($stamp1, 'network'); + $a->saveTimestamp($stamp1, 'network'); - return($ret); + return $curlResponse; } /** @@ -277,31 +234,31 @@ class Network * @param integer $redirects Recursion counter for internal use - default = 0 * @param integer $timeout The timeout in seconds, default system config value or 60 seconds * - * @return string The content + * @return CurlResult The content */ public static function post($url, $params, $headers = null, &$redirects = 0, $timeout = 0) { $stamp1 = microtime(true); if (self::isUrlBlocked($url)) { - logger('post_url: domain of ' . $url . ' is blocked', LOGGER_DATA); - return false; + Logger::log('post_url: domain of ' . $url . ' is blocked', Logger::DATA); + return CurlResult::createErrorCurl($url); } $a = get_app(); $ch = curl_init($url); if (($redirects > 8) || (!$ch)) { - return false; + return CurlResult::createErrorCurl($url); } - logger('post_url: start ' . $url, LOGGER_DATA); + Logger::log('post_url: start ' . $url, Logger::DATA); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $params); - curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent()); if (Config::get('system', 'ipv4_resolve', false)) { curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); @@ -346,8 +303,6 @@ class Network } } - $a->set_curl_code(0); - // don't let curl abort the entire application // if it throws any errors. @@ -355,50 +310,23 @@ class Network $base = $s; $curl_info = curl_getinfo($ch); - $http_code = $curl_info['http_code']; - - logger('post_url: result ' . $http_code . ' - ' . $url, LOGGER_DATA); - $header = ''; + $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch)); - // Pull out multiple headers, e.g. proxy and continuation headers - // allow for HTTP/2.x without fixing code - - while (preg_match('/^HTTP\/[1-2].+? [1-5][0-9][0-9]/', $base)) { - $chunk = substr($base, 0, strpos($base, "\r\n\r\n") + 4); - $header .= $chunk; - $base = substr($base, strlen($chunk)); + if ($curlResponse->isRedirectUrl()) { + $redirects++; + Logger::log('post_url: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl()); + curl_close($ch); + return self::post($curlResponse->getRedirectUrl(), $params, $headers, $redirects, $timeout); } - if ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { - $matches = []; - preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); - $newurl = trim(array_pop($matches)); - - if (strpos($newurl, '/') === 0) { - $newurl = $old_location_info["scheme"] . "://" . $old_location_info["host"] . $newurl; - } - - if (filter_var($newurl, FILTER_VALIDATE_URL)) { - $redirects++; - logger('post_url: redirect ' . $url . ' to ' . $newurl); - return self::post($newurl, $params, $headers, $redirects, $timeout); - } - } - - $a->set_curl_code($http_code); - - $body = substr($s, strlen($header)); - - $a->set_curl_headers($header); - curl_close($ch); - $a->save_timestamp($stamp1, 'network'); + $a->saveTimestamp($stamp1, 'network'); - logger('post_url: end ' . $url, LOGGER_DATA); + Logger::log('post_url: end ' . $url, Logger::DATA); - return $body; + return $curlResponse; } /** @@ -428,7 +356,7 @@ class Network /// @TODO Really suppress function outcomes? Why not find them + debug them? $h = @parse_url($url); - if ((is_array($h)) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME + DNS_PTR) || filter_var($h['host'], FILTER_VALIDATE_IP) )) { + if ((is_array($h)) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP) )) { return $url; } @@ -453,7 +381,11 @@ class Network $h = substr($addr, strpos($addr, '@') + 1); - if (($h) && (dns_get_record($h, DNS_A + DNS_CNAME + DNS_PTR + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) { + // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred + if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) { + return true; + } + if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) { return true; } return false; @@ -515,21 +447,18 @@ class Network */ public static function isUrlBlocked($url) { - $h = @parse_url($url); - - if (! $h) { - return true; + $host = @parse_url($url, PHP_URL_HOST); + if (!$host) { + return false; } $domain_blocklist = Config::get('system', 'blocklist', []); - if (! $domain_blocklist) { + if (!$domain_blocklist) { return false; } - $host = strtolower($h['host']); - foreach ($domain_blocklist as $domain_block) { - if (strtolower($domain_block['domain']) == $host) { + if (strcasecmp($domain_block['domain'], $host) === 0) { return true; } } @@ -588,7 +517,7 @@ class Network public static function lookupAvatarByEmail($email) { - $avatar['size'] = 175; + $avatar['size'] = 300; $avatar['email'] = $email; $avatar['url'] = ''; $avatar['success'] = false; @@ -596,10 +525,10 @@ class Network Addon::callHooks('avatar_lookup', $avatar); if (! $avatar['success']) { - $avatar['url'] = System::baseUrl() . '/images/person-175.jpg'; + $avatar['url'] = System::baseUrl() . '/images/person-300.jpg'; } - logger('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], LOGGER_DEBUG); + Logger::log('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], Logger::DEBUG); return $avatar['url']; } @@ -612,7 +541,7 @@ class Network public static function stripTrackingQueryParams($url) { $urldata = parse_url($url); - if (is_string($urldata["query"])) { + if (!empty($urldata["query"])) { $query = $urldata["query"]; parse_str($query, $querydata); @@ -674,7 +603,7 @@ class Network $url = self::stripTrackingQueryParams($url); if ($depth > 10) { - return($url); + return $url; } $url = trim($url, "'"); @@ -687,26 +616,24 @@ class Network curl_setopt($ch, CURLOPT_NOBODY, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent()); curl_exec($ch); $curl_info = @curl_getinfo($ch); $http_code = $curl_info['http_code']; curl_close($ch); - $a->save_timestamp($stamp1, "network"); + $a->saveTimestamp($stamp1, "network"); if ($http_code == 0) { - return($url); + return $url; } - if ((($curl_info['http_code'] == "301") || ($curl_info['http_code'] == "302")) - && (($curl_info['redirect_url'] != "") || ($curl_info['location'] != "")) - ) { - if ($curl_info['redirect_url'] != "") { - return(self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody)); - } else { - return(self::finalUrl($curl_info['location'], ++$depth, $fetchbody)); + if (in_array($http_code, ['301', '302'])) { + if (!empty($curl_info['redirect_url'])) { + return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody); + } elseif (!empty($curl_info['location'])) { + return self::finalUrl($curl_info['location'], ++$depth, $fetchbody); } } @@ -717,12 +644,12 @@ class Network // if the file is too large then exit if ($curl_info["download_content_length"] > 1000000) { - return($url); + return $url; } // if it isn't a HTML file then exit - if (($curl_info["content_type"] != "") && !strstr(strtolower($curl_info["content_type"]), "html")) { - return($url); + if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) { + return $url; } $stamp1 = microtime(true); @@ -733,15 +660,15 @@ class Network curl_setopt($ch, CURLOPT_NOBODY, 0); curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent()); $body = curl_exec($ch); curl_close($ch); - $a->save_timestamp($stamp1, "network"); + $a->saveTimestamp($stamp1, "network"); if (trim($body) == "") { - return($url); + return $url; } // Check for redirect in meta elements @@ -764,7 +691,7 @@ class Network $pathinfo = explode(";", $path); foreach ($pathinfo as $value) { if (substr(strtolower($value), 0, 4) == "url=") { - return(self::finalUrl(substr($value, 4), ++$depth)); + return self::finalUrl(substr($value, 4), ++$depth); } } } @@ -773,29 +700,6 @@ class Network return $url; } - public static function shortenUrl($url) - { - $slinky = new Slinky($url); - $yourls_url = Config::get('yourls', 'url1'); - if ($yourls_url) { - $yourls_username = Config::get('yourls', 'username1'); - $yourls_password = Config::get('yourls', 'password1'); - $yourls_ssl = Config::get('yourls', 'ssl1'); - $yourls = new Slinky_YourLS(); - $yourls->set('username', $yourls_username); - $yourls->set('password', $yourls_password); - $yourls->set('ssl', $yourls_ssl); - $yourls->set('yourls-url', $yourls_url); - $slinky->set_cascade([$yourls, new Slinky_Ur1ca(), new Slinky_TinyURL()]); - } else { - // setup a cascade of shortening services - // try to get a short link from these services - // in the order ur1.ca, tinyurl - $slinky->set_cascade([new Slinky_Ur1ca(), new Slinky_TinyURL()]); - } - return $slinky->short(); - } - /** * @brief Find the matching part between two url * @@ -819,14 +723,35 @@ class Network return ""; } + if (empty($parts1["scheme"])) { + $parts1["scheme"] = ''; + } + if (empty($parts2["scheme"])) { + $parts2["scheme"] = ''; + } + if ($parts1["scheme"] != $parts2["scheme"]) { return ""; } + if (empty($parts1["host"])) { + $parts1["host"] = ''; + } + if (empty($parts2["host"])) { + $parts2["host"] = ''; + } + if ($parts1["host"] != $parts2["host"]) { return ""; } + if (empty($parts1["port"])) { + $parts1["port"] = ''; + } + if (empty($parts2["port"])) { + $parts2["port"] = ''; + } + if ($parts1["port"] != $parts2["port"]) { return ""; } @@ -837,14 +762,21 @@ class Network $match .= ":".$parts1["port"]; } + if (empty($parts1["path"])) { + $parts1["path"] = ''; + } + if (empty($parts2["path"])) { + $parts2["path"] = ''; + } + $pathparts1 = explode("/", $parts1["path"]); $pathparts2 = explode("/", $parts2["path"]); $i = 0; $path = ""; do { - $path1 = $pathparts1[$i]; - $path2 = $pathparts2[$i]; + $path1 = defaults($pathparts1, $i, ''); + $path2 = defaults($pathparts2, $i, ''); if ($path1 == $path2) { $path .= $path1."/";