X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=mod%2Fparse_url.php;h=1393a15d36b95bd94a9ab69ebd6fa6a38cddacb7;hb=c35feb27f2d5d9a1dc9cf885517953663dae9748;hp=97e1658c8985fd47b6360f9685a91d0549c3ca20;hpb=09034ce0ee78e2906033b79f7844cd63b9bab18c;p=friendica.git diff --git a/mod/parse_url.php b/mod/parse_url.php index 97e1658c89..1393a15d36 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -1,5 +1,17 @@ + + + + +

Shiny Trinket

+ +

Shiny trinkets are shiny.

+ +*/ if(!function_exists('deletenode')) { function deletenode(&$doc, $node) @@ -11,6 +23,33 @@ if(!function_exists('deletenode')) { } } +function completeurl($url, $scheme) { + $urlarr = parse_url($url); + + if (isset($urlarr["scheme"])) + return($url); + + $schemearr = parse_url($scheme); + + $complete = $schemearr["scheme"]."://".$schemearr["host"]; + + if ($schemearr["port"] != "") + $complete .= ":".$schemearr["port"]; + + if(strpos($urlarr['path'],'/') !== 0) + $complete .= '/'; + + $complete .= $urlarr["path"]; + + if ($urlarr["query"] != "") + $complete .= "?".$urlarr["query"]; + + if ($urlarr["fragment"] != "") + $complete .= "#".$urlarr["fragment"]; + + return($complete); +} + function parseurl_getsiteinfo($url) { $siteinfo = array(); @@ -25,7 +64,8 @@ function parseurl_getsiteinfo($url) { $header = curl_exec($ch); curl_close($ch); - if (preg_match('/charset=(.*?)\n/', $header, $matches)) + // Fetch the first mentioned charset. Can be in body or header + if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) $charset = trim(array_pop($matches)); else $charset = "utf-8"; @@ -57,11 +97,35 @@ function parseurl_getsiteinfo($url) { $xpath = new DomXPath($doc); - $list = $xpath->query("head/title"); + $list = $xpath->query("//meta[@content]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + if (@$attr["http-equiv"] == 'refresh') { + $path = $attr["content"]; + $pathinfo = explode(";", $path); + $content = ""; + foreach ($pathinfo AS $value) { + if (substr(strtolower($value), 0, 4) == "url=") + $content = substr($value, 4); + } + if ($content != "") { + $siteinfo = parseurl_getsiteinfo($content); + return($siteinfo); + } + } + } + + //$list = $xpath->query("head/title"); + $list = $xpath->query("//title"); foreach ($list as $node) $siteinfo["title"] = html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8"); - $list = $xpath->query("head/meta[@name]"); + //$list = $xpath->query("head/meta[@name]"); + $list = $xpath->query("//meta[@name]"); foreach ($list as $node) { $attr = array(); if ($node->attributes->length) @@ -86,7 +150,8 @@ function parseurl_getsiteinfo($url) { } } - $list = $xpath->query("head/meta[@property]"); + //$list = $xpath->query("head/meta[@property]"); + $list = $xpath->query("//meta[@property]"); foreach ($list as $node) { $attr = array(); if ($node->attributes->length) @@ -109,45 +174,42 @@ function parseurl_getsiteinfo($url) { } if ($siteinfo["image"] == "") { - $list = $xpath->query("//img[@src]"); - foreach ($list as $node) { - $attr = array(); - if ($node->attributes->length) - foreach ($node->attributes as $attribute) - $attr[$attribute->name] = $attribute->value; - - // guess mimetype from headers or filename - $type = guess_image_type($attr["src"],true); - - $i = fetch_url($attr["src"]); - $ph = new Photo($i, $type); - - if($ph->is_valid() and ($ph->getWidth() > 200) and ($ph->getHeight() > 200)) { - if ($siteinfo["image"] == "") - $siteinfo["image"] = $attr["src"]; - - if($ph->getWidth() > 300 || $ph->getHeight() > 300) { - $ph->scaleImage(300); - $siteinfo["images"][] = array("src"=>$attr["src"], - "width"=>$ph->getWidth(), - "height"=>$ph->getHeight()); - } else - $siteinfo["images"][] = array("src"=>$attr["src"], - "width"=>$ph->getWidth(), - "height"=>$ph->getHeight()); + $list = $xpath->query("//img[@src]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + $src = completeurl($attr["src"], $url); + $photodata = @getimagesize($src); + + if (($photodata) && ($photodata[0] > 150) and ($photodata[1] > 150)) { + if ($photodata[0] > 300) { + $photodata[1] = round($photodata[1] * (300 / $photodata[0])); + $photodata[0] = 300; + } + if ($photodata[1] > 300) { + $photodata[0] = round($photodata[0] * (300 / $photodata[1])); + $photodata[1] = 300; + } + $siteinfo["images"][] = array("src"=>$src, + "width"=>$photodata[0], + "height"=>$photodata[1]); } - } - } else { - // guess mimetype from headers or filename - $type = guess_image_type($siteinfo["image"],true); - $i = fetch_url($siteinfo["image"]); - $ph = new Photo($i, $type); + } + } else { + $src = completeurl($siteinfo["image"], $url); + + unset($siteinfo["image"]); - if($ph->is_valid()) - $siteinfo["images"][] = array("src"=>$siteinfo["image"], - "width"=>$ph->getWidth(), - "height"=>$ph->getHeight()); + $photodata = @getimagesize($src); + + if (($photodata) && ($photodata[0] > 10) and ($photodata[1] > 10)) + $siteinfo["images"][] = array("src"=>$src, + "width"=>$photodata[0], + "height"=>$photodata[1]); } if ($siteinfo["text"] == "") { @@ -155,19 +217,22 @@ function parseurl_getsiteinfo($url) { $list = $xpath->query("//div[@class='article']"); foreach ($list as $node) - $text .= " ".trim($node->nodeValue); + if (strlen($node->nodeValue) > 40) + $text .= " ".trim($node->nodeValue); if ($text == "") { $list = $xpath->query("//div[@class='content']"); foreach ($list as $node) - $text .= " ".trim($node->nodeValue); + if (strlen($node->nodeValue) > 40) + $text .= " ".trim($node->nodeValue); } // If none text was found then take the paragraph content if ($text == "") { $list = $xpath->query("//p"); foreach ($list as $node) - $text .= " ".trim($node->nodeValue); + if (strlen($node->nodeValue) > 40) + $text .= " ".trim($node->nodeValue); } if ($text != "") { @@ -193,7 +258,8 @@ function parse_url_content(&$a) { $str_tags = ''; $textmode = false; - if(local_user() && intval(get_pconfig(local_user(),'system','plaintext'))) + + if(local_user() && (! feature_enabled(local_user(),'richtext'))) $textmode = true; //if($textmode) @@ -238,9 +304,9 @@ function parse_url_content(&$a) { if($url && $title && $text) { if($textmode) - $text = $br . $br . '[quote]' . trim($text) . '[/quote]' . $br; + $text = $br . '[quote]' . trim($text) . '[/quote]' . $br; else - $text = '

' . trim($text) . '

'; + $text = '
' . trim($text) . '

'; $title = str_replace(array("\r","\n"),array('',''),$title); @@ -264,38 +330,26 @@ function parse_url_content(&$a) { $image = ""; - foreach ($siteinfo["images"] as $imagedata) - if($textmode) - $image .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]'; + if(sizeof($siteinfo["images"]) > 0){ + /* Execute below code only if image is present in siteinfo */ + + $total_images = 0; + $max_images = get_config('system','max_bookmark_images'); + if($max_images === false) + $max_images = 2; else - $image .= 'photo'; - -/* if ($image != "") { - $i = fetch_url($image); - if($i) { - require_once('include/Photo.php'); - // guess mimetype from headers or filename - $type = guess_image_type($image,true); - - $ph = new Photo($i, $type); - if($ph->is_valid()) { - if($ph->getWidth() > 300 || $ph->getHeight() > 300) { - $ph->scaleImage(300); - $new_width = $ph->getWidth(); - $new_height = $ph->getHeight(); - if($textmode) - $image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]'; - else - $image = '

photo'; - } else { - if($textmode) - $image = $br.$br.'[img]'.$image.'[/img]'; - else - $image = '

photo'; - } - } - } - }*/ + $max_images = intval($max_images); + + foreach ($siteinfo["images"] as $imagedata) { + if($textmode) + $image .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]' . "\n"; + else + $image .= 'photo
'; + $total_images ++; + if($max_images && $max_images >= $total_images) + break; + } + } if(strlen($text)) { if($textmode) @@ -305,7 +359,7 @@ function parse_url_content(&$a) { } if($image) { - $text = $br.$br.$image.$br.$text; + $text = $br.$br.$image.$text; } $title = str_replace(array("\r","\n"),array('',''),$title); @@ -313,6 +367,6 @@ function parse_url_content(&$a) { logger('parse_url: returns: ' . $result); - echo $result; + echo trim($result); killme(); }