<?php
/**
- * @copyright Copyright (C) 2010-2021, the Friendica project
+ * @copyright Copyright (C) 2010-2022, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
+use Friendica\Network\HTTPClient\Client\HttpClientAccept;
use Friendica\Network\HTTPException;
use Friendica\Network\HTTPClient\Client\HttpClientOptions;
/**
* Fetch the content type of the given url
- * @param string $url URL of the page
+ * @param string $url URL of the page
+ * @param string $accept content-type to accept
+ * @param int $timeout
* @return array content type
*/
- public static function getContentType(string $url)
+ public static function getContentType(string $url, string $accept = HttpClientAccept::DEFAULT, int $timeout = 0)
{
- $curlResult = DI::httpClient()->head($url);
+ if (!empty($timeout)) {
+ $options = [HttpClientOptions::TIMEOUT => $timeout];
+ } else {
+ $options = [];
+ }
+
+ $curlResult = DI::httpClient()->head($url, array_merge([HttpClientOptions::ACCEPT_CONTENT => $accept], $options));
+
+ // Workaround for systems that can't handle a HEAD request. Don't retry on timeouts.
+ if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) {
+ $curlResult = DI::httpClient()->get($url, $accept, array_merge([HttpClientOptions::CONTENT_LENGTH => 1000000], $options));
+ }
+
if (!$curlResult->isSuccess()) {
+ Logger::debug('Got HTTP Error', ['http error' => $curlResult->getReturnCode(), 'url' => $url]);
return [];
}
$contenttype = $curlResult->getHeader('Content-Type')[0] ?? '';
if (empty($contenttype)) {
- return [];
+ return ['application', 'octet-stream'];
}
return explode('/', current(explode(';', $contenttype)));
return $siteinfo;
}
- $curlResult = DI::httpClient()->get($url, [HttpClientOptions::CONTENT_LENGTH => 1000000]);
+ $curlResult = DI::httpClient()->get($url, HttpClientAccept::HTML, [HttpClientOptions::CONTENT_LENGTH => 1000000]);
if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
+ Logger::info('Empty body or error when fetching', ['url' => $url, 'success' => $curlResult->isSuccess(), 'code' => $curlResult->getReturnCode()]);
return $siteinfo;
}
$siteinfo['language'] = trim($meta_tag['content']);
break;
case 'og:type':
- $siteinfo['type'] = trim($meta_tag['content']);
+ $siteinfo['pagetype'] = trim($meta_tag['content']);
break;
case 'twitter:description':
$siteinfo['text'] = trim($meta_tag['content']);