X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=include%2Fnetwork.php;h=351951846c6f38ef1f006b662f4335c5514ccd54;hb=636a0333646b0ca941a3eadaa246002001915303;hp=a4538e88ed79a92d54fc0182b150c6cfaad264b0;hpb=a19289a8bf5c240e36eeb2a17767b5682971fcc2;p=friendica.git diff --git a/include/network.php b/include/network.php index a4538e88ed..351951846c 100644 --- a/include/network.php +++ b/include/network.php @@ -1,14 +1,70 @@ $timeout, + 'accept_content'=>$accept_content, + 'cookiejar'=>$cookiejar + )); + + return($ret['body']); +} + +/** + * @brief fetches an URL. + * + * @param string $url URL to fetch + * @param boolean $binary default false + * TRUE if asked to return binary results (file download) + * @param int $redirects The recursion counter for internal use - default 0 + * @param array $opts (optional parameters) assoziative array with: + * 'accept_content' => supply Accept: header with 'accept_content' as the value + * 'timeout' => int Timeout in seconds, default system config value or 60 seconds + * 'http_auth' => username:password + * 'novalidate' => do not validate SSL certs, default is to validate using our CA list + * 'nobody' => only return the header + * 'cookiejar' => path to cookie jar file + * + * @return array an assoziative array with: + * int 'return_code' => HTTP return code or 0 if timeout or failure + * boolean 'success' => boolean true (if HTTP 2xx result) or false + * string 'redirect_url' => in case of redirect, content was finally retrieved from this URL + * string 'header' => HTTP headers + * string 'body' => fetched content + */ +function z_fetch_url($url,$binary = false, &$redirects = 0, $opts=array()) { + + $ret = array('return_code' => 0, 'success' => false, 'header' => "", 'body' => ""); + + $stamp1 = microtime(true); $a = get_app(); @@ -19,38 +75,45 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ @curl_setopt($ch, CURLOPT_HEADER, true); - if($cookiejar) { - curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiejar); - curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar); + if(x($opts,"cookiejar")) { + curl_setopt($ch, CURLOPT_COOKIEJAR, $opts["cookiejar"]); + curl_setopt($ch, CURLOPT_COOKIEFILE, $opts["cookiejar"]); } -// These settings aren't needed. We're following the location already. +// These settings aren't needed. We're following the location already. // @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // @curl_setopt($ch, CURLOPT_MAXREDIRS, 5); - if (!is_null($accept_content)){ + if (x($opts,'accept_content')){ curl_setopt($ch,CURLOPT_HTTPHEADER, array ( - "Accept: " . $accept_content + "Accept: " . $opts['accept_content'] )); } @curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); - //@curl_setopt($ch, CURLOPT_USERAGENT, "Friendica"); - @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)"); + @curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); - if(intval($timeout)) { - @curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); + + if(x($opts,'headers')){ + @curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['headers']); } - else { + if(x($opts,'nobody')){ + @curl_setopt($ch, CURLOPT_NOBODY, $opts['nobody']); + } + if(x($opts,'timeout')){ + @curl_setopt($ch, CURLOPT_TIMEOUT, $opts['timeout']); + } else { $curl_time = intval(get_config('system','curl_timeout')); @curl_setopt($ch, CURLOPT_TIMEOUT, (($curl_time !== false) ? $curl_time : 60)); } + // by default we will allow self-signed certs // but you can override this $check_cert = get_config('system','verifyssl'); @curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false)); + @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false)); $prx = get_config('system','proxy'); if(strlen($prx)) { @@ -69,11 +132,15 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ // if it throws any errors. $s = @curl_exec($ch); + if (curl_errno($ch) !== CURLE_OK) { + logger('fetch_url error fetching '.$url.': '.curl_error($ch), LOGGER_NORMAL); + } $base = $s; $curl_info = @curl_getinfo($ch); + $http_code = $curl_info['http_code']; -// logger('fetch_url:' . $http_code . ' data: ' . $s); + logger('fetch_url '.$url.': '.$http_code." ".$s, LOGGER_DATA); $header = ''; // Pull out multiple headers, e.g. proxy and continuation headers @@ -85,6 +152,10 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ $base = substr($base,strlen($chunk)); } + $a->set_curl_code($http_code); + $a->set_curl_content_type($curl_info['content_type']); + $a->set_curl_headers($header); + if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { $new_location_info = @parse_url($curl_info["redirect_url"]); $old_location_info = @parse_url($curl_info["url"]); @@ -95,34 +166,63 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ $newurl = $new_location_info["scheme"]."://".$new_location_info["host"].$old_location_info["path"]; $matches = array(); - if (preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches)) { + if (preg_match('/(Location:|URI:)(.*?)\n/i', $header, $matches)) { $newurl = trim(array_pop($matches)); } if(strpos($newurl,'/') === 0) $newurl = $old_location_info["scheme"]."://".$old_location_info["host"].$newurl; if (filter_var($newurl, FILTER_VALIDATE_URL)) { $redirects++; - return fetch_url($newurl,$binary,$redirects,$timeout,$accept_content,$cookiejar); + @curl_close($ch); + return z_fetch_url($newurl,$binary, $redirects, $opts); } } + $a->set_curl_code($http_code); $a->set_curl_content_type($curl_info['content_type']); $body = substr($s,strlen($header)); - $a->set_curl_headers($header); + + + + $rc = intval($http_code); + $ret['return_code'] = $rc; + $ret['success'] = (($rc >= 200 && $rc <= 299) ? true : false); + $ret['redirect_url'] = $url; + if(! $ret['success']) { + $ret['error'] = curl_error($ch); + $ret['debug'] = $curl_info; + logger('z_fetch_url: error: ' . $url . ': ' . $ret['error'], LOGGER_DEBUG); + logger('z_fetch_url: debug: ' . print_r($curl_info,true), LOGGER_DATA); + } + $ret['body'] = substr($s,strlen($header)); + $ret['header'] = $header; + if(x($opts,'debug')) { + $ret['debug'] = $curl_info; + } @curl_close($ch); $a->save_timestamp($stamp1, "network"); - return($body); -}} + return($ret); + +} // post request to $url. $params is an array of post variables. -if(! function_exists('post_url')) { +/** + * @brief Post request to $url + * + * @param string $url URL to post + * @param mixed $params + * @param string $headers HTTP headers + * @param integer $redirects Recursion counter for internal use - default = 0 + * @param integer $timeout The timeout in seconds, default system config value or 60 seconds + * + * @return string The content + */ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) { - $stamp1 = microtime(true); $a = get_app(); @@ -130,11 +230,13 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) if(($redirects > 8) || (! $ch)) return false; + logger("post_url: start ".$url, LOGGER_DATA); + curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); curl_setopt($ch, CURLOPT_POST,1); curl_setopt($ch, CURLOPT_POSTFIELDS,$params); - curl_setopt($ch, CURLOPT_USERAGENT, "Friendica"); + curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); if(intval($timeout)) { curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); @@ -158,6 +260,7 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $check_cert = get_config('system','verifyssl'); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false)); + curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false)); $prx = get_config('system','proxy'); if(strlen($prx)) { curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1); @@ -178,6 +281,8 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $curl_info = curl_getinfo($ch); $http_code = $curl_info['http_code']; + logger("post_url: result ".$http_code." - ".$url, LOGGER_DATA); + $header = ''; // Pull out multiple headers, e.g. proxy and continuation headers @@ -189,17 +294,19 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $base = substr($base,strlen($chunk)); } - if($http_code == 301 || $http_code == 302 || $http_code == 303) { - $matches = array(); - preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); - $newurl = trim(array_pop($matches)); + if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { + $matches = array(); + preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); + $newurl = trim(array_pop($matches)); if(strpos($newurl,'/') === 0) $newurl = $old_location_info["scheme"] . "://" . $old_location_info["host"] . $newurl; - if (filter_var($newurl, FILTER_VALIDATE_URL)) { - $redirects++; - return fetch_url($newurl,false,$redirects,$timeout); - } - } + if (filter_var($newurl, FILTER_VALIDATE_URL)) { + $redirects++; + logger("post_url: redirect ".$url." to ".$newurl); + return post_url($newurl,$params, $headers, $redirects, $timeout); + //return fetch_url($newurl,false,$redirects,$timeout); + } + } $a->set_curl_code($http_code); $body = substr($s,strlen($header)); @@ -209,14 +316,15 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $a->save_timestamp($stamp1, "network"); + logger("post_url: end ".$url, LOGGER_DATA); + return($body); -}} +} // Generic XML return -// Outputs a basic dfrn XML status structure to STDOUT, with a variable -// of $st and an optional text of $message and terminates the current process. +// Outputs a basic dfrn XML status structure to STDOUT, with a variable +// of $st and an optional text of $message and terminates the current process. -if(! function_exists('xml_status')) { function xml_status($st, $message = '') { $xml_message = ((strlen($message)) ? "\t" . xmlify($message) . "\r\n" : ''); @@ -228,396 +336,54 @@ function xml_status($st, $message = '') { echo ''."\r\n"; echo "\r\n\t$st\r\n$xml_message\r\n"; killme(); -}} +} -if(! function_exists('http_status_exit')) { -function http_status_exit($val) { +/** + * @brief Send HTTP status header and exit. + * + * @param integer $val HTTP status result value + * @param array $description optional message + * 'title' => header title + * 'description' => optional message + */ - $err = ''; - if($val >= 400) +function http_status_exit($val, $description = array()) { + $err = ''; + if($val >= 400) { $err = 'Error'; + if (!isset($description["title"])) + $description["title"] = $err." ".$val; + } if($val >= 200 && $val < 300) $err = 'OK'; - logger('http_status_exit ' . $val); + logger('http_status_exit ' . $val); header($_SERVER["SERVER_PROTOCOL"] . ' ' . $val . ' ' . $err); - killme(); - -}} - - -// convert an XML document to a normalised, case-corrected array -// used by webfinger - -if(! function_exists('convert_xml_element_to_array')) { -function convert_xml_element_to_array($xml_element, &$recursion_depth=0) { - - // If we're getting too deep, bail out - if ($recursion_depth > 512) { - return(null); - } - - if (!is_string($xml_element) && - !is_array($xml_element) && - (get_class($xml_element) == 'SimpleXMLElement')) { - $xml_element_copy = $xml_element; - $xml_element = get_object_vars($xml_element); - } - - if (is_array($xml_element)) { - $result_array = array(); - if (count($xml_element) <= 0) { - return (trim(strval($xml_element_copy))); - } - - foreach($xml_element as $key=>$value) { - - $recursion_depth++; - $result_array[strtolower($key)] = - convert_xml_element_to_array($value, $recursion_depth); - $recursion_depth--; - } - if ($recursion_depth == 0) { - $temp_array = $result_array; - $result_array = array( - strtolower($xml_element_copy->getName()) => $temp_array, - ); - } - - return ($result_array); - - } else { - return (trim(strval($xml_element))); - } -}} - -// Given an email style address, perform webfinger lookup and -// return the resulting DFRN profile URL, or if no DFRN profile URL -// is located, returns an OStatus subscription template (prefixed -// with the string 'stat:' to identify it as on OStatus template). -// If this isn't an email style address just return $s. -// Return an empty string if email-style addresses but webfinger fails, -// or if the resultant personal XRD doesn't contain a supported -// subscription/friend-request attribute. - -// amended 7/9/2011 to return an hcard which could save potentially loading -// a lengthy content page to scrape dfrn attributes - -if(! function_exists('webfinger_dfrn')) { -function webfinger_dfrn($s,&$hcard) { - if(! strstr($s,'@')) { - return $s; - } - $profile_link = ''; - - $links = webfinger($s); - logger('webfinger_dfrn: ' . $s . ':' . print_r($links,true), LOGGER_DATA); - if(count($links)) { - foreach($links as $link) { - if($link['@attributes']['rel'] === NAMESPACE_DFRN) - $profile_link = $link['@attributes']['href']; - if($link['@attributes']['rel'] === NAMESPACE_OSTATUSSUB) - $profile_link = 'stat:' . $link['@attributes']['template']; - if($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') - $hcard = $link['@attributes']['href']; - } - } - return $profile_link; -}} - -// Given an email style address, perform webfinger lookup and -// return the array of link attributes from the personal XRD file. -// On error/failure return an empty array. - - -if(! function_exists('webfinger')) { -function webfinger($s, $debug = false) { - $host = ''; - if(strstr($s,'@')) { - $host = substr($s,strpos($s,'@') + 1); - } - if(strlen($host)) { - $tpl = fetch_lrdd_template($host); - logger('webfinger: lrdd template: ' . $tpl); - if(strlen($tpl)) { - $pxrd = str_replace('{uri}', urlencode('acct:' . $s), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - if(! count($links)) { - // try with double slashes - $pxrd = str_replace('{uri}', urlencode('acct://' . $s), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - } - return $links; - } - } - return array(); -}} - -if(! function_exists('lrdd')) { -function lrdd($uri, $debug = false) { - - $a = get_app(); - - // default priority is host priority, host-meta first - - $priority = 'host'; - - // All we have is an email address. Resource-priority is irrelevant - // because our URI isn't directly resolvable. - - if(strstr($uri,'@')) { - return(webfinger($uri)); - } - - // get the host meta file - - $host = @parse_url($uri); - - if($host) { - $url = ((x($host,'scheme')) ? $host['scheme'] : 'http') . '://'; - $url .= $host['host'] . '/.well-known/host-meta' ; - } - else - return array(); - - logger('lrdd: constructed url: ' . $url); - - $xml = fetch_url($url); - - $headers = $a->get_curl_headers(); - - if (! $xml) - return array(); - - logger('lrdd: host_meta: ' . $xml, LOGGER_DATA); - - if(! stristr($xml,'].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - return(fetch_xrd_links($matches[1])); - break; - } - } - } - } - - - // priority 'resource' - - $html = fetch_url($uri); - $headers = $a->get_curl_headers(); - logger('lrdd: headers=' . $headers, LOGGER_DEBUG); - - // don't try and parse raw xml as html - if(! strstr($html,'getElementsByTagName('link'); - foreach($items as $item) { - $x = $item->getAttribute('rel'); - if($x == "lrdd") { - $pagelink = $item->getAttribute('href'); - break; - } - } - } - } - - if(isset($pagelink)) - return(fetch_xrd_links($pagelink)); - - // next look in HTTP headers - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - // TODO alter the following regex to support multiple relations (space separated) - if((stristr($line,'link:')) && preg_match('/<([^>].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - $pagelink = $matches[1]; - break; - } - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return array(); - if(stristr($html,' $description["title"], + '$description' => $description["description"])); } - if(isset($pagelink)) - return(fetch_xrd_links($pagelink)); - - // If we haven't found any links, return the host xrd links (which we have already fetched) - - if(isset($links)) - return $links; - - return array(); - -}} - - - -// Given a host name, locate the LRDD template from that -// host. Returns the LRDD template or an empty string on -// error/failure. - -if(! function_exists('fetch_lrdd_template')) { -function fetch_lrdd_template($host) { - $tpl = ''; - - $url1 = 'https://' . $host . '/.well-known/host-meta' ; - $url2 = 'http://' . $host . '/.well-known/host-meta' ; - $links = fetch_xrd_links($url1); - logger('fetch_lrdd_template from: ' . $url1); - logger('template (https): ' . print_r($links,true)); - if(! count($links)) { - logger('fetch_lrdd_template from: ' . $url2); - $links = fetch_xrd_links($url2); - logger('template (http): ' . print_r($links,true)); - } - if(count($links)) { - foreach($links as $link) - if($link['@attributes']['rel'] && $link['@attributes']['rel'] === 'lrdd' && (!$link['@attributes']['type'] || $link['@attributes']['type'] === 'application/xrd+xml')) - $tpl = $link['@attributes']['template']; - } - if(! strpos($tpl,'{uri}')) - $tpl = ''; - return $tpl; -}} - -// Given a URL, retrieve the page as an XRD document. -// Return an array of links. -// on error/failure return empty array. - -if(! function_exists('fetch_xrd_links')) { -function fetch_xrd_links($url) { - - $xrd_timeout = intval(get_config('system','xrd_timeout')); - $redirects = 0; - $xml = fetch_url($url,false,$redirects,(($xrd_timeout) ? $xrd_timeout : 20), "application/xrd+xml"); - - logger('fetch_xrd_links: ' . $xml, LOGGER_DATA); - - if ((! $xml) || (! stristr($xml,''),array('href="','"/>'),$xml); - - $h = parse_xml_string($xml); - if(! $h) - return array(); - - $arr = convert_xml_element_to_array($h); - - $links = array(); - - if(isset($arr['xrd']['link'])) { - $link = $arr['xrd']['link']; - if(! isset($link[0])) - $links = array($link); - else - $links = $link; - } - if(isset($arr['xrd']['alias'])) { - $alias = $arr['xrd']['alias']; - if(! isset($alias[0])) - $aliases = array($alias); - else - $aliases = $alias; - if(is_array($aliases) && count($aliases)) { - foreach($aliases as $alias) { - $links[]['@attributes'] = array('rel' => 'alias' , 'href' => $alias); - } - } - } - - logger('fetch_xrd_links: ' . print_r($links,true), LOGGER_DATA); - - return $links; - -}} - + killme(); -// Take a URL from the wild, prepend http:// if necessary -// and check DNS to see if it's real (or check if is a valid IP address) -// return true if it's OK, false if something is wrong with it +} -if(! function_exists('validate_url')) { +/** + * @brief Check URL to se if ts's real + * + * Take a URL from the wild, prepend http:// if necessary + * and check DNS to see if it's real (or check if is a valid IP address) + * + * @param string $url The URL to be validated + * @return boolean True if it's a valid URL, fals if something wrong with it + */ function validate_url(&$url) { + if(get_config('system','disable_url_validation')) + return true; // no naked subdomains (allow localhost for tests) if(strpos($url,'.') === false && strpos($url,'/localhost/') === false) return false; @@ -629,11 +395,14 @@ function validate_url(&$url) { return true; } return false; -}} - -// checks that email is an actual resolvable internet address +} -if(! function_exists('validate_email')) { +/** + * @brief Checks that email is an actual resolvable internet address + * + * @param string $addr The email address + * @return boolean True if it's a valid email address, false if it's not + */ function validate_email($addr) { if(get_config('system','disable_email_validation')) @@ -643,18 +412,21 @@ function validate_email($addr) { return false; $h = substr($addr,strpos($addr,'@') + 1); - if(($h) && (dns_get_record($h, DNS_A + DNS_CNAME + DNS_PTR + DNS_MX) || filter_var($h['host'], FILTER_VALIDATE_IP) )) { + if(($h) && (dns_get_record($h, DNS_A + DNS_CNAME + DNS_PTR + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) { return true; } return false; -}} - -// Check $url against our list of allowed sites, -// wildcards allowed. If allowed_sites is unset return true; -// If url is allowed, return true. -// otherwise, return false +} -if(! function_exists('allowed_url')) { +/** + * @brief Check if URL is allowed + * + * Check $url against our list of allowed sites, + * wildcards allowed. If allowed_sites is unset return true; + * + * @param string $url URL which get tested + * @return boolean True if url is allowed otherwise return false + */ function allowed_url($url) { $h = @parse_url($url); @@ -683,20 +455,23 @@ function allowed_url($url) { foreach($allowed as $a) { $pat = strtolower(trim($a)); if(($fnmatch && fnmatch($pat,$host)) || ($pat == $host)) { - $found = true; + $found = true; break; } } } return $found; -}} - -// check if email address is allowed to register here. -// Compare against our list (wildcards allowed). -// Returns false if not allowed, true if allowed or if -// allowed list is not configured. +} -if(! function_exists('allowed_email')) { +/** + * @brief Check if email address is allowed to register here. + * + * Compare against our list (wildcards allowed). + * + * @param type $email + * @return boolean False if not allowed, true if allowed + * or if allowed list is not configured + */ function allowed_email($email) { @@ -717,16 +492,14 @@ function allowed_email($email) { foreach($allowed as $a) { $pat = strtolower(trim($a)); if(($fnmatch && fnmatch($pat,$domain)) || ($pat == $domain)) { - $found = true; + $found = true; break; } } } return $found; -}} - +} -if(! function_exists('avatar_img')) { function avatar_img($email) { $a = get_app(); @@ -743,11 +516,11 @@ function avatar_img($email) { logger('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], LOGGER_DEBUG); return $avatar['url']; -}} +} -if(! function_exists('parse_xml_string')) { function parse_xml_string($s,$strict = true) { + /// @todo Move this function to the xml class if($strict) { if(! strstr($s,'save_timestamp($stamp1, "file"); - } - // guess mimetype from headers or filename $type = guess_image_type($mtch[1],true); @@ -879,7 +591,7 @@ function scale_external_images($srctext, $include_link = true, $scale_replace = $new_height = $ph->getHeight(); logger('scale_external_images: ' . $orig_width . '->' . $new_width . 'w ' . $orig_height . '->' . $new_height . 'h' . ' match: ' . $mtch[0], LOGGER_DEBUG); $s = str_replace($mtch[0],'[img=' . $new_width . 'x' . $new_height. ']' . $scaled . '[/img]' - . "\n" . (($include_link) + . "\n" . (($include_link) ? '[url=' . $mtch[1] . ']' . t('view full size') . '[/url]' . "\n" : ''),$s); logger('scale_external_images: new string: ' . $s, LOGGER_DEBUG); @@ -919,8 +631,8 @@ function fix_contact_ssl_policy(&$contact,$new_policy) { } if($ssl_changed) { - q("update contact set - url = '%s', + q("update contact set + url = '%s', request = '%s', notify = '%s', poll = '%s', @@ -938,167 +650,220 @@ function fix_contact_ssl_policy(&$contact,$new_policy) { } } +function original_url($url, $depth=1, $fetchbody = false) { + $a = get_app(); -/** - * xml2array() will convert the given XML text to an array in the XML structure. - * Link: http://www.bin-co.com/php/scripts/xml2array/ - * Portions significantly re-written by mike@macgirvin.com for Friendica (namespaces, lowercase tags, get_attribute default changed, more...) - * Arguments : $contents - The XML text - * $namespaces - true or false include namespace information in the returned array as array elements. - * $get_attributes - 1 or 0. If this is 1 the function will get the attributes as well as the tag values - this results in a different array structure in the return value. - * $priority - Can be 'tag' or 'attribute'. This will change the way the resulting array sturcture. For 'tag', the tags are given more importance. - * Return: The parsed XML in an array form. Use print_r() to see the resulting array structure. - * Examples: $array = xml2array(file_get_contents('feed.xml')); - * $array = xml2array(file_get_contents('feed.xml', true, 1, 'attribute')); - */ + // Remove Analytics Data from Google and other tracking platforms + $urldata = parse_url($url); + if (is_string($urldata["query"])) { + $query = $urldata["query"]; + parse_str($query, $querydata); -function xml2array($contents, $namespaces = true, $get_attributes=1, $priority = 'attribute') { - if(!$contents) return array(); + if (is_array($querydata)) + foreach ($querydata AS $param=>$value) + if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign", + "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid", + "fb_action_ids", "fb_action_types", "fb_ref", + "awesm", "wtrid", + "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) { - if(!function_exists('xml_parser_create')) { - logger('xml2array: parser function missing'); - return array(); - } + $pair = $param."=".urlencode($value); + $url = str_replace($pair, "", $url); + // Second try: if the url isn't encoded completely + $pair = $param."=".str_replace(" ", "+", $value); + $url = str_replace($pair, "", $url); - libxml_use_internal_errors(true); - libxml_clear_errors(); + // Third try: Maybey the url isn't encoded at all + $pair = $param."=".$value; + $url = str_replace($pair, "", $url); - if($namespaces) - $parser = @xml_parser_create_ns("UTF-8",':'); - else - $parser = @xml_parser_create(); + $url = str_replace(array("?&", "&&"), array("?", ""), $url); + } - if(! $parser) { - logger('xml2array: xml_parser_create: no resource'); - return array(); + if (substr($url, -1, 1) == "?") + $url = substr($url, 0, -1); } - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, "UTF-8"); - // http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss - xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); - xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); - @xml_parse_into_struct($parser, trim($contents), $xml_values); - @xml_parser_free($parser); + if ($depth > 10) + return($url); - if(! $xml_values) { - logger('xml2array: libxml: parse error: ' . $contents, LOGGER_DATA); - foreach(libxml_get_errors() as $err) - logger('libxml: parse: ' . $err->code . " at " . $err->line . ":" . $err->column . " : " . $err->message, LOGGER_DATA); - libxml_clear_errors(); - return; + $url = trim($url, "'"); + + $stamp1 = microtime(true); + + $siteinfo = array(); + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 1); + curl_setopt($ch, CURLOPT_NOBODY, 1); + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + + $header = curl_exec($ch); + $curl_info = @curl_getinfo($ch); + $http_code = $curl_info['http_code']; + curl_close($ch); + + $a->save_timestamp($stamp1, "network"); + + if ($http_code == 0) + return($url); + + if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302")) + AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) { + if ($curl_info['redirect_url'] != "") + return(original_url($curl_info['redirect_url'], ++$depth, $fetchbody)); + else + return(original_url($curl_info['location'], ++$depth, $fetchbody)); } - //Initializations - $xml_array = array(); - $parents = array(); - $opened_tags = array(); - $arr = array(); - - $current = &$xml_array; // Reference - - // Go through the tags. - $repeated_tag_index = array(); // Multiple tags with same name will be turned into an array - foreach($xml_values as $data) { - unset($attributes,$value); // Remove existing values, or there will be trouble - - // This command will extract these variables into the foreach scope - // tag(string), type(string), level(int), attributes(array). - extract($data); // We could use the array by itself, but this cooler. - - $result = array(); - $attributes_data = array(); - - if(isset($value)) { - if($priority == 'tag') $result = $value; - else $result['value'] = $value; // Put the value in a assoc array if we are in the 'Attribute' mode - } - - //Set the attributes too. - if(isset($attributes) and $get_attributes) { - foreach($attributes as $attr => $val) { - if($priority == 'tag') $attributes_data[$attr] = $val; - else $result['@attributes'][$attr] = $val; // Set all the attributes in a array called 'attr' - } - } - - // See tag status and do the needed. - if($namespaces && strpos($tag,':')) { - $namespc = substr($tag,0,strrpos($tag,':')); - $tag = strtolower(substr($tag,strlen($namespc)+1)); - $result['@namespace'] = $namespc; + // Check for redirects in the meta elements of the body if there are no redirects in the header. + if (!$fetchbody) + return(original_url($url, ++$depth, true)); + + // if the file is too large then exit + if ($curl_info["download_content_length"] > 1000000) + return($url); + + // if it isn't a HTML file then exit + if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html")) + return($url); + + $stamp1 = microtime(true); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_NOBODY, 0); + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + + $body = curl_exec($ch); + curl_close($ch); + + $a->save_timestamp($stamp1, "network"); + + if (trim($body) == "") + return($url); + + // Check for redirect in meta elements + $doc = new DOMDocument(); + @$doc->loadHTML($body); + + $xpath = new DomXPath($doc); + + $list = $xpath->query("//meta[@content]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + if (@$attr["http-equiv"] == 'refresh') { + $path = $attr["content"]; + $pathinfo = explode(";", $path); + $content = ""; + foreach ($pathinfo AS $value) + if (substr(strtolower($value), 0, 4) == "url=") + return(original_url(substr($value, 4), ++$depth)); } - $tag = strtolower($tag); - - if($type == "open") { // The starting of the tag '' - $parent[$level-1] = &$current; - if(!is_array($current) or (!in_array($tag, array_keys($current)))) { // Insert New tag - $current[$tag] = $result; - if($attributes_data) $current[$tag. '_attr'] = $attributes_data; - $repeated_tag_index[$tag.'_'.$level] = 1; - - $current = &$current[$tag]; - - } else { // There was another element with the same tag name - - if(isset($current[$tag][0])) { // If there is a 0th element it is already an array - $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result; - $repeated_tag_index[$tag.'_'.$level]++; - } else { // This section will make the value an array if multiple tags with the same name appear together - $current[$tag] = array($current[$tag],$result); // This will combine the existing item and the new item together to make an array - $repeated_tag_index[$tag.'_'.$level] = 2; - - if(isset($current[$tag.'_attr'])) { // The attribute of the last(0th) tag must be moved as well - $current[$tag]['0_attr'] = $current[$tag.'_attr']; - unset($current[$tag.'_attr']); - } - - } - $last_item_index = $repeated_tag_index[$tag.'_'.$level]-1; - $current = &$current[$tag][$last_item_index]; - } - - } elseif($type == "complete") { // Tags that ends in 1 line '' - //See if the key is already taken. - if(!isset($current[$tag])) { //New Key - $current[$tag] = $result; - $repeated_tag_index[$tag.'_'.$level] = 1; - if($priority == 'tag' and $attributes_data) $current[$tag. '_attr'] = $attributes_data; - - } else { // If taken, put all things inside a list(array) - if(isset($current[$tag][0]) and is_array($current[$tag])) { // If it is already an array... - - // ...push the new element into that array. - $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result; - - if($priority == 'tag' and $get_attributes and $attributes_data) { - $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data; - } - $repeated_tag_index[$tag.'_'.$level]++; - - } else { // If it is not an array... - $current[$tag] = array($current[$tag],$result); //...Make it an array using using the existing value and the new value - $repeated_tag_index[$tag.'_'.$level] = 1; - if($priority == 'tag' and $get_attributes) { - if(isset($current[$tag.'_attr'])) { // The attribute of the last(0th) tag must be moved as well - - $current[$tag]['0_attr'] = $current[$tag.'_attr']; - unset($current[$tag.'_attr']); - } - - if($attributes_data) { - $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data; - } - } - $repeated_tag_index[$tag.'_'.$level]++; // 0 and 1 indexes are already taken - } - } - - } elseif($type == 'close') { // End of tag '' - $current = &$parent[$level-1]; - } - } - - return($xml_array); + } + + return($url); +} + +function short_link($url) { + require_once('library/slinky.php'); + $slinky = new Slinky($url); + $yourls_url = get_config('yourls','url1'); + if ($yourls_url) { + $yourls_username = get_config('yourls','username1'); + $yourls_password = get_config('yourls', 'password1'); + $yourls_ssl = get_config('yourls', 'ssl1'); + $yourls = new Slinky_YourLS(); + $yourls->set('username', $yourls_username); + $yourls->set('password', $yourls_password); + $yourls->set('ssl', $yourls_ssl); + $yourls->set('yourls-url', $yourls_url); + $slinky->set_cascade(array($yourls, new Slinky_Ur1ca(), new Slinky_TinyURL())); + } else { + // setup a cascade of shortening services + // try to get a short link from these services + // in the order ur1.ca, tinyurl + $slinky->set_cascade(array(new Slinky_Ur1ca(), new Slinky_TinyURL())); + } + return $slinky->short(); +} + +/** + * @brief Encodes content to json + * + * This function encodes an array to json format + * and adds an application/json HTTP header to the output. + * After finishing the process is getting killed. + * + * @param array $x The input content + */ +function json_return_and_die($x) { + header("content-type: application/json"); + echo json_encode($x); + killme(); +} + +/** + * @brief Find the matching part between two url + * + * @param string $url1 + * @param string $url2 + * @return string The matching part + */ +function matching_url($url1, $url2) { + + if (($url1 == "") OR ($url2 == "")) + return ""; + + $url1 = normalise_link($url1); + $url2 = normalise_link($url2); + + $parts1 = parse_url($url1); + $parts2 = parse_url($url2); + + if (!isset($parts1["host"]) OR !isset($parts2["host"])) + return ""; + + if ($parts1["scheme"] != $parts2["scheme"]) + return ""; + + if ($parts1["host"] != $parts2["host"]) + return ""; + + if ($parts1["port"] != $parts2["port"]) + return ""; + + $match = $parts1["scheme"]."://".$parts1["host"]; + + if ($parts1["port"]) + $match .= ":".$parts1["port"]; + + $pathparts1 = explode("/", $parts1["path"]); + $pathparts2 = explode("/", $parts2["path"]); + + $i = 0; + $path = ""; + do { + $path1 = $pathparts1[$i]; + $path2 = $pathparts2[$i]; + + if ($path1 == $path2) + $path .= $path1."/"; + + } while (($path1 == $path2) AND ($i++ <= count($pathparts1))); + + $match .= $path; + + return normalise_link($match); }