$ch = @curl_init($match[1]);
@curl_setopt($ch, CURLOPT_NOBODY, true);
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
+ @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
@curl_exec($ch);
$curl_info = @curl_getinfo($ch);
$ch = @curl_init($match[1]);
@curl_setopt($ch, CURLOPT_NOBODY, true);
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
+ @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
@curl_exec($ch);
$curl_info = @curl_getinfo($ch);
if($saved_image)
$Text = bb_replace_images($Text, $saved_image);
- // Clean up the HTML by loading and saving the HTML with the DOM
- // Only do it when it has to be done - for performance reasons
- // Update: Now it is done every time - since bad structured html can break a whole page
- //if (!$tryoembed) {
- // $doc = new DOMDocument();
- // $doc->preserveWhiteSpace = false;
+ // Clean up the HTML by loading and saving the HTML with the DOM.
+ // Bad structured html can break a whole page.
+ // For performance reasons do it only with ativated item cache or at export.
+ if (!$tryoembed OR (get_itemcachepath() != "")) {
+ $doc = new DOMDocument();
+ $doc->preserveWhiteSpace = false;
- // $Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8");
+ //$Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8");
- // $doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">';
- // @$doc->loadHTML($doctype."<html><body>".$Text."</body></html>");
+ $doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">';
+ $encoding = '<?xml encoding="UTF-8">';
+ @$doc->loadHTML($encoding.$doctype."<html><body>".$Text."</body></html>");
+ $doc->encoding = 'UTF-8';
+ $Text = $doc->saveHTML();
+ $Text = str_replace(array("<html><body>", "</body></html>", $doctype, $encoding), array("", "", "", ""), $Text);
- // $Text = $doc->saveHTML();
- // $Text = str_replace(array("<html><body>", "</body></html>", $doctype), array("", "", ""), $Text);
+ $Text = str_replace('<br></li>','</li>', $Text);
- // $Text = str_replace('<br></li>','</li>', $Text);
-
- // $Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
- //}
+ //$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
+ }
// Clean up some useless linebreaks in lists
//$Text = str_replace('<br /><ul','<ul ', $Text);
}
@curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
- @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
+ @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
if(intval($timeout)) {
$base = $s;
$curl_info = @curl_getinfo($ch);
$http_code = $curl_info['http_code'];
-// logger('fetch_url:' . $http_code . ' data: ' . $s);
+ logger('fetch_url '.$url.': '.$http_code." *".$s."*", LOGGER_DEBUG);
+ //logger('fetch_url:' . $http_code . ' data: ' . $s);
$header = '';
// Pull out multiple headers, e.g. proxy and continuation headers
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_POST,1);
curl_setopt($ch, CURLOPT_POSTFIELDS,$params);
- curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
+ curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
if(intval($timeout)) {
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- //curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0');
- curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
+ curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);
curl_setopt($ch, CURLOPT_TIMEOUT, 3);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
- //curl_setopt($ch,CURLOPT_USERAGENT,' Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0');
- curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
+ curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);