X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FUtil%2FParseUrl.php;h=24be55cab931ced253b360a51070f93852dcb0d0;hb=093dd70e790def206d6f7d48cfac095c29eb10a5;hp=03cfd7d7611cf338f71f375763be4dcc28b3fb20;hpb=d62f21cfa146d6ecb704e54ae765780830edcf0d;p=friendica.git diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index 03cfd7d761..24be55cab9 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -1,6 +1,6 @@ head($url); + + // Workaround for systems that can't handle a HEAD request + if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() == 405)) { + $curlResult = DI::httpClient()->get($url, [HttpClientOptions::CONTENT_LENGTH => 1000000]); + } + if (!$curlResult->isSuccess()) { return []; } $contenttype = $curlResult->getHeader('Content-Type')[0] ?? ''; if (empty($contenttype)) { - return []; + return ['application', 'octet-stream']; } return explode('/', current(explode(';', $contenttype))); @@ -216,6 +222,7 @@ class ParseUrl $curlResult = DI::httpClient()->get($url, [HttpClientOptions::CONTENT_LENGTH => 1000000]); if (!$curlResult->isSuccess() || empty($curlResult->getBody())) { + Logger::info('Empty body or error when fetching', ['url' => $url, 'success' => $curlResult->isSuccess(), 'code' => $curlResult->getReturnCode()]); return $siteinfo; } @@ -451,6 +458,12 @@ class ParseUrl case 'og:site_name': $siteinfo['publisher_name'] = trim($meta_tag['content']); break; + case 'og:locale': + $siteinfo['language'] = trim($meta_tag['content']); + break; + case 'og:type': + $siteinfo['pagetype'] = trim($meta_tag['content']); + break; case 'twitter:description': $siteinfo['text'] = trim($meta_tag['content']); break; @@ -521,7 +534,7 @@ class ParseUrl * * @param string $page_url * @param array $siteinfo - * @return void + * @return array */ private static function checkMedia(string $page_url, array $siteinfo) : array { @@ -967,6 +980,16 @@ class ParseUrl } } + $content = JsonLD::fetchElement($jsonld, 'datePublished'); + if (!empty($content) && is_string($content)) { + $jsonldinfo['published'] = DateTimeFormat::utc($content); + } + + $content = JsonLD::fetchElement($jsonld, 'dateModified'); + if (!empty($content) && is_string($content)) { + $jsonldinfo['modified'] = DateTimeFormat::utc($content); + } + $jsonldinfo = self::parseJsonLdAuthor($jsonldinfo, $jsonld); Logger::info('Fetched article information', ['url' => $siteinfo['url'], 'fetched' => $jsonldinfo]);