X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=include%2Fnetwork.php;h=4c6af8e71b45e0e599b4dc7c5c67394f33f67f79;hb=7673579afc55813950d9e86237acd2a2f74e4ae2;hp=400c1386be69a2779dd59894962037460bab79ee;hpb=93143702ed5fe88c0fce77d778d86ec651d4331e;p=friendica.git diff --git a/include/network.php b/include/network.php index 400c1386be..4c6af8e71b 100644 --- a/include/network.php +++ b/include/network.php @@ -4,8 +4,10 @@ // curl wrapper. If binary flag is true, return binary // results. +// Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt") +// to preserve cookies from one request to the next. if(! function_exists('fetch_url')) { -function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_content=Null) { +function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_content=Null, $cookiejar = 0) { $stamp1 = microtime(true); @@ -17,8 +19,14 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ @curl_setopt($ch, CURLOPT_HEADER, true); - @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - @curl_setopt($ch, CURLOPT_MAXREDIRS, 5); + if($cookiejar) { + curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiejar); + curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar); + } + +// These settings aren't needed. We're following the location already. +// @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); +// @curl_setopt($ch, CURLOPT_MAXREDIRS, 5); if (!is_null($accept_content)){ curl_setopt($ch,CURLOPT_HTTPHEADER, array ( @@ -27,8 +35,7 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ } @curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); - //@curl_setopt($ch, CURLOPT_USERAGENT, "Friendica"); - @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)"); + @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")"); if(intval($timeout)) { @@ -65,7 +72,7 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ $base = $s; $curl_info = @curl_getinfo($ch); $http_code = $curl_info['http_code']; -// logger('fetch_url:' . $http_code . ' data: ' . $s); + logger('fetch_url '.$url.': '.$http_code." ".$s, LOGGER_DATA); $header = ''; // Pull out multiple headers, e.g. proxy and continuation headers @@ -86,19 +93,20 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ if (($new_location_info["path"] == "") AND ($new_location_info["host"] != "")) $newurl = $new_location_info["scheme"]."://".$new_location_info["host"].$old_location_info["path"]; - //$matches = array(); - //preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); - //$newurl = trim(array_pop($matches)); + $matches = array(); + if (preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches)) { + $newurl = trim(array_pop($matches)); + } if(strpos($newurl,'/') === 0) - $newurl = $url . $newurl; - $url_parsed = @parse_url($newurl); - if (isset($url_parsed)) { + $newurl = $old_location_info["scheme"]."://".$old_location_info["host"].$newurl; + if (filter_var($newurl, FILTER_VALIDATE_URL)) { $redirects++; - return fetch_url($newurl,$binary,$redirects,$timeout); + return fetch_url($newurl,$binary,$redirects,$timeout,$accept_content,$cookiejar); } } $a->set_curl_code($http_code); + $a->set_curl_content_type($curl_info['content_type']); $body = substr($s,strlen($header)); $a->set_curl_headers($header); @@ -113,7 +121,6 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ if(! function_exists('post_url')) { function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) { - $stamp1 = microtime(true); $a = get_app(); @@ -121,11 +128,13 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) if(($redirects > 8) || (! $ch)) return false; + logger("post_url: start ".$url, LOGGER_DATA); + curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); curl_setopt($ch, CURLOPT_POST,1); curl_setopt($ch, CURLOPT_POSTFIELDS,$params); - curl_setopt($ch, CURLOPT_USERAGENT, "Friendica"); + curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")"); if(intval($timeout)) { curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); @@ -169,6 +178,8 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $curl_info = curl_getinfo($ch); $http_code = $curl_info['http_code']; + logger("post_url: result ".$http_code." - ".$url, LOGGER_DATA); + $header = ''; // Pull out multiple headers, e.g. proxy and continuation headers @@ -180,18 +191,19 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $base = substr($base,strlen($chunk)); } - if($http_code == 301 || $http_code == 302 || $http_code == 303) { - $matches = array(); - preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); - $newurl = trim(array_pop($matches)); + if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { + $matches = array(); + preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); + $newurl = trim(array_pop($matches)); if(strpos($newurl,'/') === 0) - $newurl = $url . $newurl; - $url_parsed = @parse_url($newurl); - if (isset($url_parsed)) { - $redirects++; - return fetch_url($newurl,false,$redirects,$timeout); - } - } + $newurl = $old_location_info["scheme"] . "://" . $old_location_info["host"] . $newurl; + if (filter_var($newurl, FILTER_VALIDATE_URL)) { + $redirects++; + logger("post_url: redirect ".$url." to ".$newurl); + return post_url($newurl,$params, $headers, $redirects, $timeout); + //return fetch_url($newurl,false,$redirects,$timeout); + } + } $a->set_curl_code($http_code); $body = substr($s,strlen($header)); @@ -201,6 +213,8 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $a->save_timestamp($stamp1, "network"); + logger("post_url: end ".$url, LOGGER_DATA); + return($body); }} @@ -541,7 +555,7 @@ function fetch_lrdd_template($host) { } if(count($links)) { foreach($links as $link) - if($link['@attributes']['rel'] && $link['@attributes']['rel'] === 'lrdd') + if($link['@attributes']['rel'] && $link['@attributes']['rel'] === 'lrdd' && (!$link['@attributes']['type'] || $link['@attributes']['type'] === 'application/xrd+xml')) $tpl = $link['@attributes']['template']; } if(! strpos($tpl,'{uri}')) @@ -558,7 +572,7 @@ function fetch_xrd_links($url) { $xrd_timeout = intval(get_config('system','xrd_timeout')); $redirects = 0; - $xml = fetch_url($url,false,$redirects,(($xrd_timeout) ? $xrd_timeout : 20)); + $xml = fetch_url($url,false,$redirects,(($xrd_timeout) ? $xrd_timeout : 20), "application/xrd+xml"); logger('fetch_xrd_links: ' . $xml, LOGGER_DATA); @@ -609,7 +623,6 @@ function fetch_xrd_links($url) { if(! function_exists('validate_url')) { function validate_url(&$url) { - // no naked subdomains (allow localhost for tests) if(strpos($url,'.') === false && strpos($url,'/localhost/') === false) return false; @@ -775,7 +788,7 @@ function add_fcontact($arr,$update = false) { `alias` = '%s', `pubkey` = '%s', `updated` = '%s' - WHERE `url` = '%s' AND `network` = '%s' LIMIT 1", + WHERE `url` = '%s' AND `network` = '%s'", dbesc($arr['name']), dbesc($arr['photo']), dbesc($arr['request']), @@ -817,12 +830,16 @@ function add_fcontact($arr,$update = false) { } -function scale_external_images($s, $include_link = true, $scale_replace = false) { +function scale_external_images($srctext, $include_link = true, $scale_replace = false) { + + // Suppress "view full size" + if (intval(get_config('system','no_view_full_size'))) + $include_link = false; $a = get_app(); // Picture addresses can contain special characters - $s = htmlspecialchars_decode($s); + $s = htmlspecialchars_decode($srctext); $matches = null; $c = preg_match_all('/\[img.*?\](.*?)\[\/img\]/ism',$s,$matches,PREG_SET_ORDER); @@ -844,7 +861,9 @@ function scale_external_images($s, $include_link = true, $scale_replace = false) $scaled = str_replace($scale_replace[0], $scale_replace[1], $mtch[1]); else $scaled = $mtch[1]; - $i = fetch_url($scaled); + $i = @fetch_url($scaled); + if(! $i) + return $srctext; $cachefile = get_cachefile(hash("md5", $scaled)); if ($cachefile != '') { @@ -1092,3 +1111,128 @@ function xml2array($contents, $namespaces = true, $get_attributes=1, $priority = return($xml_array); } + +function original_url($url, $depth=1, $fetchbody = false) { + + // Remove Analytics Data from Google and other tracking platforms + $urldata = parse_url($url); + if (is_string($urldata["query"])) { + $query = $urldata["query"]; + parse_str($query, $querydata); + + if (is_array($querydata)) + foreach ($querydata AS $param=>$value) + if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign", + "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid", + "fb_action_ids", "fb_action_types", "fb_ref", + "awesm", + "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) { + + $pair = $param."=".urlencode($value); + $url = str_replace($pair, "", $url); + + // Second try: if the url isn't encoded completely + $pair = $param."=".str_replace(" ", "+", $value); + $url = str_replace($pair, "", $url); + + // Third try: Maybey the url isn't encoded at all + $pair = $param."=".$value; + $url = str_replace($pair, "", $url); + + $url = str_replace(array("?&", "&&"), array("?", ""), $url); + } + + if (substr($url, -1, 1) == "?") + $url = substr($url, 0, -1); + } + + if ($depth > 10) + return($url); + + $url = trim($url, "'"); + + $siteinfo = array(); + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 1); + + if ($fetchbody) + curl_setopt($ch, CURLOPT_NOBODY, 0); + else + curl_setopt($ch, CURLOPT_NOBODY, 1); + + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")"); + + $header = curl_exec($ch); + $curl_info = @curl_getinfo($ch); + $http_code = $curl_info['http_code']; + curl_close($ch); + + if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302")) + AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) { + if ($curl_info['redirect_url'] != "") + return(original_url($curl_info['redirect_url'], ++$depth, $fetchbody)); + else + return(original_url($curl_info['location'], ++$depth, $fetchbody)); + } + + $pos = strpos($header, "\r\n\r\n"); + + if ($pos) + $body = trim(substr($header, $pos)); + else + $body = $header; + + if (trim($body) == "") + return(original_url($url, ++$depth, true)); + + $doc = new DOMDocument(); + @$doc->loadHTML($body); + + $xpath = new DomXPath($doc); + + $list = $xpath->query("//meta[@content]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + if (@$attr["http-equiv"] == 'refresh') { + $path = $attr["content"]; + $pathinfo = explode(";", $path); + $content = ""; + foreach ($pathinfo AS $value) + if (substr(strtolower($value), 0, 4) == "url=") + return(original_url(substr($value, 4), ++$depth)); + } + } + + return($url); +} + +if (!function_exists('short_link')) { +function short_link($url) { + require_once('library/slinky.php'); + $slinky = new Slinky($url); + $yourls_url = get_config('yourls','url1'); + if ($yourls_url) { + $yourls_username = get_config('yourls','username1'); + $yourls_password = get_config('yourls', 'password1'); + $yourls_ssl = get_config('yourls', 'ssl1'); + $yourls = new Slinky_YourLS(); + $yourls->set('username', $yourls_username); + $yourls->set('password', $yourls_password); + $yourls->set('ssl', $yourls_ssl); + $yourls->set('yourls-url', $yourls_url); + $slinky->set_cascade( array($yourls, new Slinky_UR1ca(), new Slinky_Trim(), new Slinky_IsGd(), new Slinky_TinyURL())); + } else { + // setup a cascade of shortening services + // try to get a short link from these services + // in the order ur1.ca, trim, id.gd, tinyurl + $slinky->set_cascade(array(new Slinky_UR1ca(), new Slinky_Trim(), new Slinky_IsGd(), new Slinky_TinyURL())); + } + return $slinky->short(); +}};