$html_text = mb_convert_encoding($text, 'HTML-ENTITIES', mb_detect_encoding($text));
// If it doesn't parse at all, just return the text.
- $dom = @DOMDocument::loadHTML($html_text);
- if (!$dom) {
+ $dom = new DOMDocument();
+ if (!@$dom->loadHTML($html_text)) {
return $text;
}
$xpath = new DOMXPath($dom);
public $type = '';
public $title = '';
+ public $description = '';
public $author_name = '';
public $author_url = '';
public $provider_name = '';
if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) {
$options = [HttpClientOptions::VERIFY => true, HttpClientOptions::TIMEOUT => $xrd_timeout];
- $curlResult = DI::httpClient()->head($url, $options);
+ try {
+ $curlResult = DI::httpClient()->head($url, $options);
+ } catch (\Exception $e) {
+ return false;
+ }
// Workaround for systems that can't handle a HEAD request. Don't retry on timeouts.
if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) {
- $curlResult = DI::httpClient()->get($url, HttpClientAccept::DEFAULT, $options);
+ try {
+ $curlResult = DI::httpClient()->get($url, HttpClientAccept::DEFAULT, $options);
+ } catch (\Exception $e) {
+ return false;
+ }
}
if (!$curlResult->isSuccess()) {
$options = [];
}
- $curlResult = DI::httpClient()->head($url, array_merge([HttpClientOptions::ACCEPT_CONTENT => $accept], $options));
+ try {
+ $curlResult = DI::httpClient()->head($url, array_merge([HttpClientOptions::ACCEPT_CONTENT => $accept], $options));
+ } catch (\Exception $e) {
+ DI::logger()->debug('Got exception', ['url' => $url, 'message' => $e->getMessage()]);
+ return [];
+ }
// Workaround for systems that can't handle a HEAD request. Don't retry on timeouts.
if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) {