"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
- "content-hash": "7d6dee6e449da931e8fe209e61b2e78e",
+ "content-hash": "c9e0a9eacc23d884012042eeab01cc8b",
"packages": [
{
"name": "asika/simple-console",
],
"time": "2017-07-19T15:11:19+00:00"
},
+ {
+ "name": "mattwright/urlresolver",
+ "version": "2.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/mattwright/URLResolver.php.git",
+ "reference": "416039192cb6d9158bdacd68349bceff8739b857"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/mattwright/URLResolver.php/zipball/416039192cb6d9158bdacd68349bceff8739b857",
+ "reference": "416039192cb6d9158bdacd68349bceff8739b857",
+ "shasum": ""
+ },
+ "require": {
+ "ext-curl": "*",
+ "ext-mbstring": "*",
+ "php": ">=5.3"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-4": {
+ "mattwright\\": "."
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Matt Wright",
+ "email": "mw@mattwright.com"
+ }
+ ],
+ "description": "PHP class that attempts to resolve URLs to a final, canonical link.",
+ "homepage": "https://github.com/mattwright/URLResolver.php",
+ "keywords": [
+ "canonical",
+ "link",
+ "redirect",
+ "resolve",
+ "url"
+ ],
+ "time": "2019-01-18T00:59:34+00:00"
+ },
{
"name": "michelf/php-markdown",
"version": "1.9.0",
namespace Friendica\Network;
-use DOMDocument;
-use DomXPath;
-use Friendica\Core\Config\IConfig;
use Friendica\Core\System;
use Friendica\Util\Network;
use Friendica\Util\Profiler;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\TransferException;
use GuzzleHttp\RequestOptions;
+use mattwright\URLResolver;
use Psr\Http\Message\ResponseInterface;
use Psr\Log\LoggerInterface;
private $logger;
/** @var Profiler */
private $profiler;
- /** @var IConfig */
- private $config;
- /** @var string */
- private $userAgent;
/** @var Client */
private $client;
+ /** @var URLResolver */
+ private $resolver;
- public function __construct(LoggerInterface $logger, Profiler $profiler, IConfig $config, string $userAgent, Client $client)
+ public function __construct(LoggerInterface $logger, Profiler $profiler, Client $client, URLResolver $resolver)
{
- $this->logger = $logger;
- $this->profiler = $profiler;
- $this->config = $config;
- $this->userAgent = $userAgent;
- $this->client = $client;
+ $this->logger = $logger;
+ $this->profiler = $profiler;
+ $this->client = $client;
+ $this->resolver = $resolver;
}
/**
return CurlResult::createErrorCurl($url);
}
+ if (Network::isRedirectBlocked($url)) {
+ $this->logger->info('Domain should not be redirected.', ['url' => $url]);
+ return CurlResult::createErrorCurl($url);
+ }
+
$conf = [];
if (!empty($opts['cookiejar'])) {
/**
* {@inheritDoc}
*/
- public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
+ public function finalUrl(string $url)
{
+ $this->profiler->startRecording('network');
+
if (Network::isLocalLink($url)) {
- $this->logger->info('Local link', ['url' => $url, 'callstack' => System::callstack(20)]);
+ $this->logger->debug('Local link', ['url' => $url, 'callstack' => System::callstack(20)]);
}
if (Network::isUrlBlocked($url)) {
$url = Network::stripTrackingQueryParams($url);
- if ($depth > 10) {
- return $url;
- }
-
$url = trim($url, "'");
- $this->profiler->startRecording('network');
-
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_HEADER, 1);
- curl_setopt($ch, CURLOPT_NOBODY, 1);
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
- curl_setopt($ch, CURLOPT_TIMEOUT, 10);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
-
- curl_exec($ch);
- $curl_info = @curl_getinfo($ch);
- $http_code = $curl_info['http_code'];
- curl_close($ch);
-
- $this->profiler->stopRecording();
-
- if ($http_code == 0) {
- return $url;
- }
+ // Designate a temporary file that will store cookies during the session.
+ // Some websites test the browser for cookie support, so this enhances results.
+ $this->resolver->setCookieJar(tempnam(get_temppath() , 'url_resolver-'));
- if (in_array($http_code, ['301', '302'])) {
- if (!empty($curl_info['redirect_url'])) {
- return $this->finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
- } elseif (!empty($curl_info['location'])) {
- return $this->finalUrl($curl_info['location'], ++$depth, $fetchbody);
- }
- }
+ $urlResult = $this->resolver->resolveURL($url);
- // Check for redirects in the meta elements of the body if there are no redirects in the header.
- if (!$fetchbody) {
- return $this->finalUrl($url, ++$depth, true);
- }
-
- // if the file is too large then exit
- if ($curl_info["download_content_length"] > 1000000) {
- return $url;
- }
-
- // if it isn't a HTML file then exit
- if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
- return $url;
- }
-
- $this->profiler->startRecording('network');
-
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_NOBODY, 0);
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
- curl_setopt($ch, CURLOPT_TIMEOUT, 10);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
-
- $body = curl_exec($ch);
- curl_close($ch);
-
- $this->profiler->stopRecording();
-
- if (trim($body) == "") {
- return $url;
- }
-
- // Check for redirect in meta elements
- $doc = new DOMDocument();
- @$doc->loadHTML($body);
-
- $xpath = new DomXPath($doc);
-
- $list = $xpath->query("//meta[@content]");
- foreach ($list as $node) {
- $attr = [];
- if ($node->attributes->length) {
- foreach ($node->attributes as $attribute) {
- $attr[$attribute->name] = $attribute->value;
- }
- }
-
- if (@$attr["http-equiv"] == 'refresh') {
- $path = $attr["content"];
- $pathinfo = explode(";", $path);
- foreach ($pathinfo as $value) {
- if (substr(strtolower($value), 0, 4) == "url=") {
- return $this->finalUrl(substr($value, 4), ++$depth);
- }
- }
- }
+ if ($urlResult->didErrorOccur()) {
+ throw new TransferException($urlResult->getErrorMessageString());
}
- return $url;
+ return $urlResult->getURL();
}
/**