curl_setopt($ch, CURLOPT_TIMEOUT, 3);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
- //curl_setopt($ch,CURLOPT_USERAGENT,' Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0');
- curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
+ curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);
// Fetch the first mentioned charset. Can be in body or header
$charset = "";
if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches))
- $charset = trim(array_pop($matches));
+ $charset = trim(trim(trim(array_pop($matches)), ';,'));
if ($charset == "")
$charset = "utf-8";
else
$body = $header;
- $body = mb_convert_encoding($body, "UTF-8", $charset);
+ if (($charset != '') AND (strtoupper($charset) != "UTF-8")) {
+ logger("parseurl_getsiteinfo: detected charset ".$charset, LOGGER_DEBUG);
+ //$body = mb_convert_encoding($body, "UTF-8", $charset);
+ $body = iconv($charset, "UTF-8//TRANSLIT", $body);
+ }
+
$body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8");
$doc = new DOMDocument();