X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=include%2Fnetwork.php;h=11a0fffbe671a827c3e524e10ed0bc7b76def26c;hb=26d7cf1d6a03840bd6645ceb514ee92620a65485;hp=b238dbff657cd23d083d92dd5c4f80f765afeb5c;hpb=432f96b6461745f2ac7915750c322359d435bc4b;p=friendica.git diff --git a/include/network.php b/include/network.php index b238dbff65..11a0fffbe6 100644 --- a/include/network.php +++ b/include/network.php @@ -4,16 +4,20 @@ * @file include/network.php */ -require_once("include/xml.php"); +use Friendica\App; +use Friendica\Core\System; +use Friendica\Core\Config; +use Friendica\Network\Probe; +require_once("include/xml.php"); /** * @brief Curl wrapper - * + * * If binary flag is true, return binary results. * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt") * to preserve cookies from one request to the next. - * + * * @param string $url URL to fetch * @param boolean $binary default false * TRUE if asked to return binary results (file download) @@ -21,7 +25,7 @@ require_once("include/xml.php"); * @param integer $timeout Timeout in seconds, default system config value or 60 seconds * @param string $accept_content supply Accept: header with 'accept_content' as the value * @param string $cookiejar Path to cookie jar file - * + * * @return string The fetched content */ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_content=Null, $cookiejar = 0) { @@ -60,22 +64,27 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ * string 'header' => HTTP headers * string 'body' => fetched content */ -function z_fetch_url($url,$binary = false, &$redirects = 0, $opts=array()) { - - $ret = array('return_code' => 0, 'success' => false, 'header' => "", 'body' => ""); - +function z_fetch_url($url, $binary = false, &$redirects = 0, $opts = array()) { + $ret = array('return_code' => 0, 'success' => false, 'header' => '', 'body' => ''); $stamp1 = microtime(true); $a = get_app(); + if (blocked_url($url)) { + logger('z_fetch_url: domain of ' . $url . ' is blocked', LOGGER_DATA); + return $ret; + } + $ch = @curl_init($url); - if(($redirects > 8) || (! $ch)) - return false; + + if (($redirects > 8) || (!$ch)) { + return $ret; + } @curl_setopt($ch, CURLOPT_HEADER, true); - if(x($opts,"cookiejar")) { + if (x($opts, "cookiejar")) { curl_setopt($ch, CURLOPT_COOKIEJAR, $opts["cookiejar"]); curl_setopt($ch, CURLOPT_COOKIEFILE, $opts["cookiejar"]); } @@ -84,47 +93,70 @@ function z_fetch_url($url,$binary = false, &$redirects = 0, $opts=array()) { // @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // @curl_setopt($ch, CURLOPT_MAXREDIRS, 5); - if (x($opts,'accept_content')){ - curl_setopt($ch,CURLOPT_HTTPHEADER, array ( - "Accept: " . $opts['accept_content'] + if (x($opts, 'accept_content')) { + curl_setopt($ch, CURLOPT_HTTPHEADER, array( + 'Accept: ' . $opts['accept_content'] )); } - @curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); + @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); @curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + $range = intval(Config::get('system', 'curl_range_bytes', 0)); + + if ($range > 0) { + @curl_setopt($ch, CURLOPT_RANGE, '0-' . $range); + } + // Without this setting it seems as if some webservers send compressed content + // This seems to confuse curl so that it shows this uncompressed. + /// @todo We could possibly set this value to "gzip" or something similar + curl_setopt($ch, CURLOPT_ENCODING, ''); - if(x($opts,'headers')){ + if (x($opts, 'headers')) { @curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['headers']); } - if(x($opts,'nobody')){ + + if (x($opts, 'nobody')) { @curl_setopt($ch, CURLOPT_NOBODY, $opts['nobody']); } - if(x($opts,'timeout')){ + + if (x($opts, 'timeout')) { @curl_setopt($ch, CURLOPT_TIMEOUT, $opts['timeout']); } else { - $curl_time = intval(get_config('system','curl_timeout')); + $curl_time = intval(get_config('system', 'curl_timeout')); @curl_setopt($ch, CURLOPT_TIMEOUT, (($curl_time !== false) ? $curl_time : 60)); } // by default we will allow self-signed certs // but you can override this - $check_cert = get_config('system','verifyssl'); + $check_cert = get_config('system', 'verifyssl'); @curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false)); - @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false)); - $prx = get_config('system','proxy'); - if(strlen($prx)) { + if ($check_cert) { + @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); + } + + $proxy = get_config('system', 'proxy'); + + if (strlen($proxy)) { @curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1); - @curl_setopt($ch, CURLOPT_PROXY, $prx); - $prxusr = @get_config('system','proxyuser'); - if(strlen($prxusr)) - @curl_setopt($ch, CURLOPT_PROXYUSERPWD, $prxusr); + @curl_setopt($ch, CURLOPT_PROXY, $proxy); + $proxyuser = @get_config('system', 'proxyuser'); + + if (strlen($proxyuser)) { + @curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser); + } + } + + if (Config::get('system', 'ipv4_resolve', false)) { + curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); + } + + if ($binary) { + @curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); } - if($binary) - @curl_setopt($ch, CURLOPT_BINARYTRANSFER,1); $a->set_curl_code(0); @@ -132,142 +164,165 @@ function z_fetch_url($url,$binary = false, &$redirects = 0, $opts=array()) { // if it throws any errors. $s = @curl_exec($ch); + if (curl_errno($ch) !== CURLE_OK) { - logger('fetch_url error fetching '.$url.': '.curl_error($ch), LOGGER_NORMAL); + logger('fetch_url error fetching ' . $url . ': ' . curl_error($ch), LOGGER_NORMAL); } + $ret['errno'] = curl_errno($ch); + $base = $s; $curl_info = @curl_getinfo($ch); $http_code = $curl_info['http_code']; - logger('fetch_url '.$url.': '.$http_code." ".$s, LOGGER_DATA); + logger('fetch_url ' . $url . ': ' . $http_code . " " . $s, LOGGER_DATA); $header = ''; // Pull out multiple headers, e.g. proxy and continuation headers // allow for HTTP/2.x without fixing code - while(preg_match('/^HTTP\/[1-2].+? [1-5][0-9][0-9]/',$base)) { - $chunk = substr($base,0,strpos($base,"\r\n\r\n")+4); + while (preg_match('/^HTTP\/[1-2].+? [1-5][0-9][0-9]/', $base)) { + $chunk = substr($base, 0, strpos($base,"\r\n\r\n") + 4); $header .= $chunk; - $base = substr($base,strlen($chunk)); + $base = substr($base, strlen($chunk)); } $a->set_curl_code($http_code); $a->set_curl_content_type($curl_info['content_type']); $a->set_curl_headers($header); - if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { - $new_location_info = @parse_url($curl_info["redirect_url"]); - $old_location_info = @parse_url($curl_info["url"]); + if ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { + $new_location_info = @parse_url($curl_info['redirect_url']); + $old_location_info = @parse_url($curl_info['url']); - $newurl = $curl_info["redirect_url"]; + $newurl = $curl_info['redirect_url']; - if (($new_location_info["path"] == "") AND ($new_location_info["host"] != "")) - $newurl = $new_location_info["scheme"]."://".$new_location_info["host"].$old_location_info["path"]; + if (($new_location_info['path'] == '') && ( $new_location_info['host'] != '')) { + $newurl = $new_location_info['scheme'] . '://' . $new_location_info['host'] . $old_location_info['path']; + } $matches = array(); + if (preg_match('/(Location:|URI:)(.*?)\n/i', $header, $matches)) { $newurl = trim(array_pop($matches)); } - if(strpos($newurl,'/') === 0) + if (strpos($newurl,'/') === 0) { $newurl = $old_location_info["scheme"]."://".$old_location_info["host"].$newurl; + } + if (filter_var($newurl, FILTER_VALIDATE_URL)) { $redirects++; @curl_close($ch); - return z_fetch_url($newurl,$binary, $redirects, $opts); + return z_fetch_url($newurl, $binary, $redirects, $opts); } } - $a->set_curl_code($http_code); $a->set_curl_content_type($curl_info['content_type']); - $body = substr($s,strlen($header)); - - + $body = substr($s, strlen($header)); $rc = intval($http_code); $ret['return_code'] = $rc; $ret['success'] = (($rc >= 200 && $rc <= 299) ? true : false); $ret['redirect_url'] = $url; - if(! $ret['success']) { + + if (!$ret['success']) { $ret['error'] = curl_error($ch); $ret['debug'] = $curl_info; - logger('z_fetch_url: error: ' . $url . ': ' . $ret['error'], LOGGER_DEBUG); - logger('z_fetch_url: debug: ' . print_r($curl_info,true), LOGGER_DATA); + logger('z_fetch_url: error: '.$url.': '.$ret['return_code'].' - '.$ret['error'], LOGGER_DEBUG); + logger('z_fetch_url: debug: '.print_r($curl_info, true), LOGGER_DATA); } - $ret['body'] = substr($s,strlen($header)); + + $ret['body'] = substr($s, strlen($header)); $ret['header'] = $header; - if(x($opts,'debug')) { + + if (x($opts, 'debug')) { $ret['debug'] = $curl_info; } + @curl_close($ch); - $a->save_timestamp($stamp1, "network"); + $a->save_timestamp($stamp1, 'network'); return($ret); - } -// post request to $url. $params is an array of post variables. - /** - * @brief Post request to $url - * + * @brief Send POST request to $url + * * @param string $url URL to post - * @param mixed $params + * @param mixed $params array of POST variables * @param string $headers HTTP headers * @param integer $redirects Recursion counter for internal use - default = 0 * @param integer $timeout The timeout in seconds, default system config value or 60 seconds - * + * * @return string The content */ -function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) { +function post_url($url, $params, $headers = null, &$redirects = 0, $timeout = 0) { $stamp1 = microtime(true); + if (blocked_url($url)) { + logger('post_url: domain of ' . $url . ' is blocked', LOGGER_DATA); + return false; + } + $a = get_app(); $ch = curl_init($url); - if(($redirects > 8) || (! $ch)) + + if (($redirects > 8) || (!$ch)) { return false; + } - logger("post_url: start ".$url, LOGGER_DATA); + logger('post_url: start ' . $url, LOGGER_DATA); curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); - curl_setopt($ch, CURLOPT_POST,1); - curl_setopt($ch, CURLOPT_POSTFIELDS,$params); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_POST, 1); + curl_setopt($ch, CURLOPT_POSTFIELDS, $params); curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); - if(intval($timeout)) { - curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); + if (Config::get('system', 'ipv4_resolve', false)) { + curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); } - else { - $curl_time = intval(get_config('system','curl_timeout')); + + if (intval($timeout)) { + curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); + } else { + $curl_time = intval(get_config('system', 'curl_timeout')); curl_setopt($ch, CURLOPT_TIMEOUT, (($curl_time !== false) ? $curl_time : 60)); } - if(defined('LIGHTTPD')) { - if(!is_array($headers)) { + if (defined('LIGHTTPD')) { + if (!is_array($headers)) { $headers = array('Expect:'); } else { - if(!in_array('Expect:', $headers)) { + if (!in_array('Expect:', $headers)) { array_push($headers, 'Expect:'); } } } - if($headers) + + if ($headers) { curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + } - $check_cert = get_config('system','verifyssl'); + $check_cert = get_config('system', 'verifyssl'); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false)); - curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false)); - $prx = get_config('system','proxy'); - if(strlen($prx)) { + + if ($check_cert) { + @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); + } + + $proxy = get_config('system', 'proxy'); + + if (strlen($proxy)) { curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1); - curl_setopt($ch, CURLOPT_PROXY, $prx); - $prxusr = get_config('system','proxyuser'); - if(strlen($prxusr)) - curl_setopt($ch, CURLOPT_PROXYUSERPWD, $prxusr); + curl_setopt($ch, CURLOPT_PROXY, $proxy); + $proxyuser = get_config('system', 'proxyuser'); + if (strlen($proxyuser)) { + curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser); + } } $a->set_curl_code(0); @@ -281,44 +336,48 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $curl_info = curl_getinfo($ch); $http_code = $curl_info['http_code']; - logger("post_url: result ".$http_code." - ".$url, LOGGER_DATA); + logger('post_url: result ' . $http_code . ' - ' . $url, LOGGER_DATA); $header = ''; // Pull out multiple headers, e.g. proxy and continuation headers // allow for HTTP/2.x without fixing code - while(preg_match('/^HTTP\/[1-2].+? [1-5][0-9][0-9]/',$base)) { - $chunk = substr($base,0,strpos($base,"\r\n\r\n")+4); + while (preg_match('/^HTTP\/[1-2].+? [1-5][0-9][0-9]/', $base)) { + $chunk = substr($base, 0, strpos($base, "\r\n\r\n") + 4); $header .= $chunk; - $base = substr($base,strlen($chunk)); + $base = substr($base, strlen($chunk)); } - if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { + if ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { $matches = array(); preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); $newurl = trim(array_pop($matches)); - if(strpos($newurl,'/') === 0) + + if (strpos($newurl, '/') === 0) { $newurl = $old_location_info["scheme"] . "://" . $old_location_info["host"] . $newurl; + } + if (filter_var($newurl, FILTER_VALIDATE_URL)) { $redirects++; - logger("post_url: redirect ".$url." to ".$newurl); - return post_url($newurl,$params, $headers, $redirects, $timeout); - //return fetch_url($newurl,false,$redirects,$timeout); + logger('post_url: redirect ' . $url . ' to ' . $newurl); + return post_url($newurl, $params, $headers, $redirects, $timeout); } } + $a->set_curl_code($http_code); - $body = substr($s,strlen($header)); + + $body = substr($s, strlen($header)); $a->set_curl_headers($header); curl_close($ch); - $a->save_timestamp($stamp1, "network"); + $a->save_timestamp($stamp1, 'network'); - logger("post_url: end ".$url, LOGGER_DATA); + logger('post_url: end ' . $url, LOGGER_DATA); - return($body); + return $body; } // Generic XML return @@ -327,18 +386,25 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) function xml_status($st, $message = '') { - $xml_message = ((strlen($message)) ? "\t" . xmlify($message) . "\r\n" : ''); + $result = array('status' => $st); + + if ($message != '') { + $result['message'] = $message; + } - if($st) + if ($st) { logger('xml_status returning non_zero: ' . $st . " message=" . $message); + } + + header("Content-type: text/xml"); + + $xmldata = array("result" => $result); + + echo xml::from_array($xmldata, $xml); - header( "Content-type: text/xml" ); - echo ''."\r\n"; - echo "\r\n\t$st\r\n$xml_message\r\n"; killme(); } - /** * @brief Send HTTP status header and exit. * @@ -348,14 +414,22 @@ function xml_status($st, $message = '') { * 'description' => optional message */ +/** + * @brief Send HTTP status header and exit. + * + * @param integer $val HTTP status result value + * @param array $description optional message + * 'title' => header title + * 'description' => optional message + */ function http_status_exit($val, $description = array()) { $err = ''; - if($val >= 400) { + if ($val >= 400) { $err = 'Error'; if (!isset($description["title"])) $description["title"] = $err." ".$val; } - if($val >= 200 && $val < 300) + if ($val >= 200 && $val < 300) $err = 'OK'; logger('http_status_exit ' . $val); @@ -371,380 +445,52 @@ function http_status_exit($val, $description = array()) { } -// Given an email style address, perform webfinger lookup and -// return the resulting DFRN profile URL, or if no DFRN profile URL -// is located, returns an OStatus subscription template (prefixed -// with the string 'stat:' to identify it as on OStatus template). -// If this isn't an email style address just return $webbie. -// Return an empty string if email-style addresses but webfinger fails, -// or if the resultant personal XRD doesn't contain a supported -// subscription/friend-request attribute. - -// amended 7/9/2011 to return an hcard which could save potentially loading -// a lengthy content page to scrape dfrn attributes - -function webfinger_dfrn($webbie,&$hcard) { - if(! strstr($webbie,'@')) { - return $webbie; - } - $profile_link = ''; - - $links = webfinger($webbie); - logger('webfinger_dfrn: ' . $webbie . ':' . print_r($links,true), LOGGER_DATA); - if(count($links)) { - foreach($links as $link) { - if(empty($profile_link) && $link['@attributes']['rel'] === NAMESPACE_DFRN) { - $profile_link = $link['@attributes']['href']; - } elseif(empty($profile_link) && $link['@attributes']['rel'] === NAMESPACE_OSTATUSSUB) { - $profile_link = 'stat:' . $link['@attributes']['template']; - } elseif(empty($hcard) && $link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') { - $hcard = $link['@attributes']['href']; - } - } - } - return $profile_link; -} - -/** - * @brief Perform webfinger lookup on an email style address - * - * @param string $webbi An email style address - * @param boolean $debug - * - * @return array of link attributes from the personal XRD file - * empty array on error/failure - */ -function webfinger($webbie, $debug = false) { - $host = ''; - if(strstr($webbie,'@')) { - $host = substr($webbie,strpos($webbie,'@') + 1); - } - if(strlen($host)) { - $tpl = fetch_lrdd_template($host); - logger('webfinger: lrdd template: ' . $tpl); - if(strlen($tpl)) { - $pxrd = str_replace('{uri}', urlencode('acct:' . $webbie), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - if(! count($links)) { - // try with double slashes - $pxrd = str_replace('{uri}', urlencode('acct://' . $webbie), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - } - return $links; - } - } - return array(); -} - -function lrdd($uri, $debug = false) { - - $a = get_app(); - - // default priority is host priority, host-meta first - - $priority = 'host'; - - // All we have is an email address. Resource-priority is irrelevant - // because our URI isn't directly resolvable. - - if(strstr($uri,'@')) { - return(webfinger($uri)); - } - - // get the host meta file - - $host = @parse_url($uri); - - if($host) { - $url = ((x($host,'scheme')) ? $host['scheme'] : 'http') . '://'; - $url .= $host['host'] . '/.well-known/host-meta' ; - } - else - return array(); - - logger('lrdd: constructed url: ' . $url); - - $xml = fetch_url($url); - - $headers = $a->get_curl_headers(); - - if (! $xml) - return array(); - - logger('lrdd: host_meta: ' . $xml, LOGGER_DATA); - - if(! stristr($xml,'].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - return(fetch_xrd_links($matches[1])); - break; - } - } - } - } - - - // priority 'resource' - - - $html = fetch_url($uri); - $headers = $a->get_curl_headers(); - logger('lrdd: headers=' . $headers, LOGGER_DEBUG); - - // don't try and parse raw xml as html - if(! strstr($html,'getElementsByTagName('link'); - foreach($items as $item) { - $x = $item->getAttribute('rel'); - if($x == "lrdd") { - $pagelink = $item->getAttribute('href'); - break; - } - } - } - } - - if(isset($pagelink)) - return(fetch_xrd_links($pagelink)); - - // next look in HTTP headers - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - /// @TODO Alter the following regex to support multiple relations (space separated) - if((stristr($line,'link:')) && preg_match('/<([^>].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - $pagelink = $matches[1]; - break; - } - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return array(); - if(stristr($html,''),array('href="','"/>'),$xml); - - $h = parse_xml_string($xml); - if(! $h) - return array(); - - $arr = xml::element_to_array($h); - - $links = array(); - - if(isset($arr['xrd']['link'])) { - $link = $arr['xrd']['link']; - if(! isset($link[0])) - $links = array($link); - else - $links = $link; - } - if(isset($arr['xrd']['alias'])) { - $alias = $arr['xrd']['alias']; - if(! isset($alias[0])) - $aliases = array($alias); - else - $aliases = $alias; - if(is_array($aliases) && count($aliases)) { - foreach($aliases as $alias) { - $links[]['@attributes'] = array('rel' => 'alias' , 'href' => $alias); - } - } - } - - logger('fetch_xrd_links: ' . print_r($links,true), LOGGER_DATA); - - return $links; - -} - /** * @brief Check URL to se if ts's real - * + * * Take a URL from the wild, prepend http:// if necessary * and check DNS to see if it's real (or check if is a valid IP address) - * + * * @param string $url The URL to be validated * @return boolean True if it's a valid URL, fals if something wrong with it */ function validate_url(&$url) { - logger(sprintf('[%s:%d]: url=%s - CALLED!', __FUNCTION__, __LINE__, $url), LOGGER_TRACE); - - if(get_config('system','disable_url_validation')) - logger(sprintf('[%s:%d]: URL validation disabled, returning TRUE - EXIT!', __FUNCTION__, __LINE__), LOGGER_TRACE); + if (get_config('system','disable_url_validation')) return true; // no naked subdomains (allow localhost for tests) - if(strpos($url,'.') === false && strpos($url,'/localhost/') === false) - logger(sprintf('[%s:%d]: URL is not complete, returning FALSE - EXIT!', __FUNCTION__, __LINE__), LOGGER_TRACE); + if (strpos($url,'.') === false && strpos($url,'/localhost/') === false) return false; - if(substr($url,0,4) != 'http' && substr($url,0,5) != 'https') + if (substr($url,0,4) != 'http') $url = 'http://' . $url; - logger(sprintf('[%s:%d]: url=%s - before parse_url() ...', __FUNCTION__, __LINE__, $url), LOGGER_DEBUG); - + /// @TODO Really supress function outcomes? Why not find them + debug them? $h = @parse_url($url); - logger(sprintf('[%s:%d]: h[]=%s', __FUNCTION__, __LINE__, gettype($h)), LOGGER_DEBUG); - - if((is_array($h)) && (dns_get_record($h['host'], DNS_A + DNS_CNAME + DNS_PTR) || filter_var($h['host'], FILTER_VALIDATE_IP) )) { - logger(sprintf('[%s:%d]: URL %s validated. - EXIT!', __FUNCTION__, __LINE__, $url), LOGGER_TRACE); + if ((is_array($h)) && (dns_get_record($h['host'], DNS_A + DNS_CNAME + DNS_PTR) || filter_var($h['host'], FILTER_VALIDATE_IP) )) { return true; } - logger(sprintf('[%s:%d]: URL %s maybe not valid - EXIT!', __FUNCTION__, __LINE__, $url), LOGGER_TRACE); return false; } /** * @brief Checks that email is an actual resolvable internet address - * + * * @param string $addr The email address * @return boolean True if it's a valid email address, false if it's not */ function validate_email($addr) { - if(get_config('system','disable_email_validation')) + if (get_config('system','disable_email_validation')) return true; - if(! strpos($addr,'@')) + if (! strpos($addr,'@')) return false; $h = substr($addr,strpos($addr,'@') + 1); - if(($h) && (dns_get_record($h, DNS_A + DNS_CNAME + DNS_PTR + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) { + if (($h) && (dns_get_record($h, DNS_A + DNS_CNAME + DNS_PTR + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) { return true; } return false; @@ -752,10 +498,10 @@ function validate_email($addr) { /** * @brief Check if URL is allowed - * + * * Check $url against our list of allowed sites, * wildcards allowed. If allowed_sites is unset return true; - * + * * @param string $url URL which get tested * @return boolean True if url is allowed otherwise return false */ @@ -763,30 +509,31 @@ function allowed_url($url) { $h = @parse_url($url); - if(! $h) { + if (! $h) { return false; } - $str_allowed = get_config('system','allowed_sites'); - if(! $str_allowed) + $str_allowed = Config::get('system', 'allowed_sites'); + if (! $str_allowed) { return true; + } $found = false; $host = strtolower($h['host']); // always allow our own site - - if($host == strtolower($_SERVER['SERVER_NAME'])) + if ($host == strtolower($_SERVER['SERVER_NAME'])) { return true; + } $fnmatch = function_exists('fnmatch'); - $allowed = explode(',',$str_allowed); + $allowed = explode(',', $str_allowed); - if(count($allowed)) { - foreach($allowed as $a) { + if (count($allowed)) { + foreach ($allowed as $a) { $pat = strtolower(trim($a)); - if(($fnmatch && fnmatch($pat,$host)) || ($pat == $host)) { + if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) { $found = true; break; } @@ -795,35 +542,66 @@ function allowed_url($url) { return $found; } +/** + * Checks if the provided url domain is on the domain blocklist. + * Returns true if it is or malformed URL, false if not. + * + * @param string $url The url to check the domain from + * @return boolean + */ +function blocked_url($url) { + $h = @parse_url($url); + + if (! $h) { + return true; + } + + $domain_blocklist = Config::get('system', 'blocklist', array()); + if (! $domain_blocklist) { + return false; + } + + $host = strtolower($h['host']); + + foreach ($domain_blocklist as $domain_block) { + if (strtolower($domain_block['domain']) == $host) { + return true; + } + } + + return false; +} + /** * @brief Check if email address is allowed to register here. - * + * * Compare against our list (wildcards allowed). - * + * * @param type $email * @return boolean False if not allowed, true if allowed * or if allowed list is not configured */ function allowed_email($email) { - $domain = strtolower(substr($email,strpos($email,'@') + 1)); - if(! $domain) + if (! $domain) { return false; + } $str_allowed = get_config('system','allowed_email'); - if(! $str_allowed) + if (! $str_allowed) { return true; + } $found = false; $fnmatch = function_exists('fnmatch'); $allowed = explode(',',$str_allowed); - if(count($allowed)) { - foreach($allowed as $a) { + if (count($allowed)) { + foreach ($allowed as $a) { $pat = strtolower(trim($a)); - if(($fnmatch && fnmatch($pat,$domain)) || ($pat == $domain)) { + if (($fnmatch && fnmatch($pat,$domain)) || ($pat == $domain)) { $found = true; break; } @@ -834,8 +612,6 @@ function allowed_email($email) { function avatar_img($email) { - $a = get_app(); - $avatar['size'] = 175; $avatar['email'] = $email; $avatar['url'] = ''; @@ -843,30 +619,27 @@ function avatar_img($email) { call_hooks('avatar_lookup', $avatar); - if(! $avatar['success']) - $avatar['url'] = $a->get_baseurl() . '/images/person-175.jpg'; + if (! $avatar['success']) { + $avatar['url'] = System::baseUrl() . '/images/person-175.jpg'; + } logger('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], LOGGER_DEBUG); return $avatar['url']; } -function parse_xml_string($s,$strict = true) { +function parse_xml_string($s, $strict = true) { + // the "strict" parameter is deactivated + /// @todo Move this function to the xml class - if($strict) { - if(! strstr($s,'code." at ".$err->line.":".$err->column." : ".$err->message, LOGGER_DATA); + } libxml_clear_errors(); } return $x; @@ -875,8 +648,9 @@ function parse_xml_string($s,$strict = true) { function scale_external_images($srctext, $include_link = true, $scale_replace = false) { // Suppress "view full size" - if (intval(get_config('system','no_view_full_size'))) + if (intval(get_config('system','no_view_full_size'))) { $include_link = false; + } $a = get_app(); @@ -885,38 +659,41 @@ function scale_external_images($srctext, $include_link = true, $scale_replace = $matches = null; $c = preg_match_all('/\[img.*?\](.*?)\[\/img\]/ism',$s,$matches,PREG_SET_ORDER); - if($c) { + if ($c) { require_once('include/Photo.php'); - foreach($matches as $mtch) { + foreach ($matches as $mtch) { logger('scale_external_image: ' . $mtch[1]); - $hostname = str_replace('www.','',substr($a->get_baseurl(),strpos($a->get_baseurl(),'://')+3)); - if(stristr($mtch[1],$hostname)) + $hostname = str_replace('www.','',substr(System::baseUrl(),strpos(System::baseUrl(),'://')+3)); + if (stristr($mtch[1],$hostname)) { continue; + } // $scale_replace, if passed, is an array of two elements. The // first is the name of the full-size image. The second is the // name of a remote, scaled-down version of the full size image. // This allows Friendica to display the smaller remote image if // one exists, while still linking to the full-size image - if($scale_replace) + if ($scale_replace) { $scaled = str_replace($scale_replace[0], $scale_replace[1], $mtch[1]); - else + } else { $scaled = $mtch[1]; - $i = @fetch_url($scaled); - if(! $i) + } + $i = fetch_url($scaled); + if (! $i) { return $srctext; + } // guess mimetype from headers or filename $type = guess_image_type($mtch[1],true); - if($i) { + if ($i) { $ph = new Photo($i, $type); - if($ph->is_valid()) { + if ($ph->is_valid()) { $orig_width = $ph->getWidth(); $orig_height = $ph->getHeight(); - if($orig_width > 640 || $orig_height > 640) { + if ($orig_width > 640 || $orig_height > 640) { $ph->scaleImage(640); $new_width = $ph->getWidth(); @@ -942,7 +719,7 @@ function scale_external_images($srctext, $include_link = true, $scale_replace = function fix_contact_ssl_policy(&$contact,$new_policy) { $ssl_changed = false; - if((intval($new_policy) == SSL_POLICY_SELFSIGN || $new_policy === 'self') && strstr($contact['url'],'https:')) { + if ((intval($new_policy) == SSL_POLICY_SELFSIGN || $new_policy === 'self') && strstr($contact['url'],'https:')) { $ssl_changed = true; $contact['url'] = str_replace('https:','http:',$contact['url']); $contact['request'] = str_replace('https:','http:',$contact['request']); @@ -952,7 +729,7 @@ function fix_contact_ssl_policy(&$contact,$new_policy) { $contact['poco'] = str_replace('https:','http:',$contact['poco']); } - if((intval($new_policy) == SSL_POLICY_FULL || $new_policy === 'full') && strstr($contact['url'],'http:')) { + if ((intval($new_policy) == SSL_POLICY_FULL || $new_policy === 'full') && strstr($contact['url'],'http:')) { $ssl_changed = true; $contact['url'] = str_replace('http:','https:',$contact['url']); $contact['request'] = str_replace('http:','https:',$contact['request']); @@ -962,62 +739,79 @@ function fix_contact_ssl_policy(&$contact,$new_policy) { $contact['poco'] = str_replace('http:','https:',$contact['poco']); } - if($ssl_changed) { - q("update contact set - url = '%s', - request = '%s', - notify = '%s', - poll = '%s', - confirm = '%s', - poco = '%s' - where id = %d limit 1", - dbesc($contact['url']), - dbesc($contact['request']), - dbesc($contact['notify']), - dbesc($contact['poll']), - dbesc($contact['confirm']), - dbesc($contact['poco']), - intval($contact['id']) - ); + if ($ssl_changed) { + $fields = array('url' => $contact['url'], 'request' => $contact['request'], + 'notify' => $contact['notify'], 'poll' => $contact['poll'], + 'confirm' => $contact['confirm'], 'poco' => $contact['poco']); + dba::update('contact', $fields, array('id' => $contact['id'])); } } -function original_url($url, $depth=1, $fetchbody = false) { - - $a = get_app(); - - // Remove Analytics Data from Google and other tracking platforms +/** + * @brief Remove Google Analytics and other tracking platforms params from URL + * + * @param string $url Any user-submitted URL that may contain tracking params + * @return string The same URL stripped of tracking parameters + */ +function strip_tracking_query_params($url) +{ $urldata = parse_url($url); if (is_string($urldata["query"])) { $query = $urldata["query"]; parse_str($query, $querydata); - if (is_array($querydata)) - foreach ($querydata AS $param=>$value) + if (is_array($querydata)) { + foreach ($querydata AS $param => $value) { if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign", "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid", "fb_action_ids", "fb_action_types", "fb_ref", "awesm", "wtrid", "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) { - $pair = $param."=".urlencode($value); + $pair = $param . "=" . urlencode($value); $url = str_replace($pair, "", $url); // Second try: if the url isn't encoded completely - $pair = $param."=".str_replace(" ", "+", $value); + $pair = $param . "=" . str_replace(" ", "+", $value); $url = str_replace($pair, "", $url); // Third try: Maybey the url isn't encoded at all - $pair = $param."=".$value; + $pair = $param . "=" . $value; $url = str_replace($pair, "", $url); $url = str_replace(array("?&", "&&"), array("?", ""), $url); } + } + } - if (substr($url, -1, 1) == "?") + if (substr($url, -1, 1) == "?") { $url = substr($url, 0, -1); + } } + return $url; +} + +/** + * @brief Returns the original URL of the provided URL + * + * This function strips tracking query params and follows redirections, either + * through HTTP code or meta refresh tags. Stops after 10 redirections. + * + * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request + * + * @see ParseUrl::getSiteinfo + * + * @param string $url A user-submitted URL + * @param int $depth The current redirection recursion level (internal) + * @param bool $fetchbody Wether to fetch the body or not after the HEAD requests + * @return string A canonical URL + */ +function original_url($url, $depth = 1, $fetchbody = false) { + $a = get_app(); + + $url = strip_tracking_query_params($url); + if ($depth > 10) return($url); @@ -1044,8 +838,8 @@ function original_url($url, $depth=1, $fetchbody = false) { if ($http_code == 0) return($url); - if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302")) - AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) { + if ((($curl_info['http_code'] == "301") || ($curl_info['http_code'] == "302")) + && (($curl_info['redirect_url'] != "") || ($curl_info['location'] != ""))) { if ($curl_info['redirect_url'] != "") return(original_url($curl_info['redirect_url'], ++$depth, $fetchbody)); else @@ -1061,7 +855,7 @@ function original_url($url, $depth=1, $fetchbody = false) { return($url); // if it isn't a HTML file then exit - if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html")) + if (($curl_info["content_type"] != "") && !strstr(strtolower($curl_info["content_type"]),"html")) return($url); $stamp1 = microtime(true); @@ -1121,23 +915,23 @@ function short_link($url) { $yourls->set('password', $yourls_password); $yourls->set('ssl', $yourls_ssl); $yourls->set('yourls-url', $yourls_url); - $slinky->set_cascade( array($yourls, new Slinky_UR1ca(), new Slinky_Trim(), new Slinky_IsGd(), new Slinky_TinyURL())); + $slinky->set_cascade(array($yourls, new Slinky_Ur1ca(), new Slinky_TinyURL())); } else { // setup a cascade of shortening services // try to get a short link from these services - // in the order ur1.ca, trim, id.gd, tinyurl - $slinky->set_cascade(array(new Slinky_UR1ca(), new Slinky_Trim(), new Slinky_IsGd(), new Slinky_TinyURL())); + // in the order ur1.ca, tinyurl + $slinky->set_cascade(array(new Slinky_Ur1ca(), new Slinky_TinyURL())); } return $slinky->short(); } /** * @brief Encodes content to json - * + * * This function encodes an array to json format * and adds an application/json HTTP header to the output. * After finishing the process is getting killed. - * + * * @param array $x The input content */ function json_return_and_die($x) { @@ -1145,3 +939,57 @@ function json_return_and_die($x) { echo json_encode($x); killme(); } + +/** + * @brief Find the matching part between two url + * + * @param string $url1 + * @param string $url2 + * @return string The matching part + */ +function matching_url($url1, $url2) { + + if (($url1 == "") || ($url2 == "")) + return ""; + + $url1 = normalise_link($url1); + $url2 = normalise_link($url2); + + $parts1 = parse_url($url1); + $parts2 = parse_url($url2); + + if (!isset($parts1["host"]) || !isset($parts2["host"])) + return ""; + + if ($parts1["scheme"] != $parts2["scheme"]) + return ""; + + if ($parts1["host"] != $parts2["host"]) + return ""; + + if ($parts1["port"] != $parts2["port"]) + return ""; + + $match = $parts1["scheme"]."://".$parts1["host"]; + + if ($parts1["port"]) + $match .= ":".$parts1["port"]; + + $pathparts1 = explode("/", $parts1["path"]); + $pathparts2 = explode("/", $parts2["path"]); + + $i = 0; + $path = ""; + do { + $path1 = $pathparts1[$i]; + $path2 = $pathparts2[$i]; + + if ($path1 == $path2) + $path .= $path1."/"; + + } while (($path1 == $path2) && ($i++ <= count($pathparts1))); + + $match .= $path; + + return normalise_link($match); +}