use DOMDocument;
use DomXPath;
-require_once 'library/slinky.php';
-
class Network
{
/**
- * @brief Curl wrapper
+ * Curl wrapper
*
* If binary flag is true, return binary results.
* Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt")
* to preserve cookies from one request to the next.
*
+ * @brief Curl wrapper
* @param string $url URL to fetch
* @param boolean $binary default false
* TRUE if asked to return binary results (file download)
*/
public static function fetchUrl($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = 0)
{
- $ret = self::curl(
+ $ret = self::fetchUrlFull($url, $binary, $redirects, $timeout, $accept_content, $cookiejar);
+
+ return $ret['body'];
+ }
+
+ /**
+ * Curl wrapper with array of return values.
+ *
+ * Inner workings and parameters are the same as @ref fetchUrl but returns an array with
+ * all the information collected during the fetch.
+ *
+ * @brief Curl wrapper with array of return values.
+ * @param string $url URL to fetch
+ * @param boolean $binary default false
+ * TRUE if asked to return binary results (file download)
+ * @param integer $redirects The recursion counter for internal use - default 0
+ * @param integer $timeout Timeout in seconds, default system config value or 60 seconds
+ * @param string $accept_content supply Accept: header with 'accept_content' as the value
+ * @param string $cookiejar Path to cookie jar file
+ *
+ * @return array With all relevant information, 'body' contains the actual fetched content.
+ */
+ public static function fetchUrlFull($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = 0)
+ {
+ return self::curl(
$url,
$binary,
$redirects,
'cookiejar'=>$cookiejar
]
);
-
- return($ret['body']);
}
/**
$a = get_app();
+ $parts = parse_url($url);
+ $path_parts = explode('/', defaults($parts, 'path', ''));
+ foreach ($path_parts as $part) {
+ if (strlen($part) <> mb_strlen($part)) {
+ $parts2[] = rawurlencode($part);
+ } else {
+ $parts2[] = $part;
+ }
+ }
+ $parts['path'] = implode('/', $parts2);
+ $url = self::unparseURL($parts);
+
if (self::isUrlBlocked($url)) {
logger('domain of ' . $url . ' is blocked', LOGGER_DATA);
return $ret;
}
if (curl_errno($ch) !== CURLE_OK) {
- logger('error fetching ' . $url . ': ' . curl_error($ch), LOGGER_NORMAL);
+ logger('error fetching ' . $url . ': ' . curl_error($ch), LOGGER_INFO);
}
$ret['errno'] = curl_errno($ch);
$newurl = $curl_info['redirect_url'];
- if (($new_location_info['path'] == '') && ( $new_location_info['host'] != '')) {
+ if (empty($new_location_info['path']) && !empty($new_location_info['host'])) {
$newurl = $new_location_info['scheme'] . '://' . $new_location_info['host'] . $old_location_info['path'];
}
if (strpos($newurl, '/') === 0) {
$newurl = $old_location_info["scheme"]."://".$old_location_info["host"].$newurl;
}
+ $old_location_query = @parse_url($url, PHP_URL_QUERY);
+
+ if ($old_location_query != '') {
+ $newurl .= '?' . $old_location_query;
+ }
if (filter_var($newurl, FILTER_VALIDATE_URL)) {
$redirects++;
if ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) {
$matches = [];
+ $new_location_info = @parse_url($curl_info['redirect_url']);
+ $old_location_info = @parse_url($curl_info['url']);
+
preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches);
$newurl = trim(array_pop($matches));
/// @TODO Really suppress function outcomes? Why not find them + debug them?
$h = @parse_url($url);
- if ((is_array($h)) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME + DNS_PTR) || filter_var($h['host'], FILTER_VALIDATE_IP) )) {
+ if ((is_array($h)) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP) )) {
return $url;
}
$h = substr($addr, strpos($addr, '@') + 1);
- if (($h) && (dns_get_record($h, DNS_A + DNS_CNAME + DNS_PTR + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) {
+ // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
+ if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) {
+ return true;
+ }
+ if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
return true;
}
return false;
*/
public static function isUrlBlocked($url)
{
- $h = @parse_url($url);
-
- if (! $h) {
- return true;
+ $host = @parse_url($url, PHP_URL_HOST);
+ if (!$host) {
+ return false;
}
$domain_blocklist = Config::get('system', 'blocklist', []);
- if (! $domain_blocklist) {
+ if (!$domain_blocklist) {
return false;
}
- $host = strtolower($h['host']);
-
foreach ($domain_blocklist as $domain_block) {
- if (strtolower($domain_block['domain']) == $host) {
+ if (strcasecmp($domain_block['domain'], $host) === 0) {
return true;
}
}
public static function stripTrackingQueryParams($url)
{
$urldata = parse_url($url);
- if (is_string($urldata["query"])) {
+ if (!empty($urldata["query"])) {
$query = $urldata["query"];
parse_str($query, $querydata);
$url = self::stripTrackingQueryParams($url);
if ($depth > 10) {
- return($url);
+ return $url;
}
$url = trim($url, "'");
$a->save_timestamp($stamp1, "network");
if ($http_code == 0) {
- return($url);
+ return $url;
}
- if ((($curl_info['http_code'] == "301") || ($curl_info['http_code'] == "302"))
- && (($curl_info['redirect_url'] != "") || ($curl_info['location'] != ""))
- ) {
- if ($curl_info['redirect_url'] != "") {
- return(self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody));
- } else {
- return(self::finalUrl($curl_info['location'], ++$depth, $fetchbody));
+ if (in_array($http_code, ['301', '302'])) {
+ if (!empty($curl_info['redirect_url'])) {
+ return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
+ } elseif (!empty($curl_info['location'])) {
+ return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
}
}
// if the file is too large then exit
if ($curl_info["download_content_length"] > 1000000) {
- return($url);
+ return $url;
}
// if it isn't a HTML file then exit
- if (($curl_info["content_type"] != "") && !strstr(strtolower($curl_info["content_type"]), "html")) {
- return($url);
+ if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
+ return $url;
}
$stamp1 = microtime(true);
$a->save_timestamp($stamp1, "network");
if (trim($body) == "") {
- return($url);
+ return $url;
}
// Check for redirect in meta elements
$pathinfo = explode(";", $path);
foreach ($pathinfo as $value) {
if (substr(strtolower($value), 0, 4) == "url=") {
- return(self::finalUrl(substr($value, 4), ++$depth));
+ return self::finalUrl(substr($value, 4), ++$depth);
}
}
}
return $url;
}
- public static function shortenUrl($url)
- {
- $slinky = new Slinky($url);
- $yourls_url = Config::get('yourls', 'url1');
- if ($yourls_url) {
- $yourls_username = Config::get('yourls', 'username1');
- $yourls_password = Config::get('yourls', 'password1');
- $yourls_ssl = Config::get('yourls', 'ssl1');
- $yourls = new Slinky_YourLS();
- $yourls->set('username', $yourls_username);
- $yourls->set('password', $yourls_password);
- $yourls->set('ssl', $yourls_ssl);
- $yourls->set('yourls-url', $yourls_url);
- $slinky->set_cascade([$yourls, new Slinky_Ur1ca(), new Slinky_TinyURL()]);
- } else {
- // setup a cascade of shortening services
- // try to get a short link from these services
- // in the order ur1.ca, tinyurl
- $slinky->set_cascade([new Slinky_Ur1ca(), new Slinky_TinyURL()]);
- }
- return $slinky->short();
- }
-
/**
* @brief Find the matching part between two url
*
return "";
}
+ if (empty($parts1["scheme"])) {
+ $parts1["scheme"] = '';
+ }
+ if (empty($parts2["scheme"])) {
+ $parts2["scheme"] = '';
+ }
+
if ($parts1["scheme"] != $parts2["scheme"]) {
return "";
}
+ if (empty($parts1["host"])) {
+ $parts1["host"] = '';
+ }
+ if (empty($parts2["host"])) {
+ $parts2["host"] = '';
+ }
+
if ($parts1["host"] != $parts2["host"]) {
return "";
}
+ if (empty($parts1["port"])) {
+ $parts1["port"] = '';
+ }
+ if (empty($parts2["port"])) {
+ $parts2["port"] = '';
+ }
+
if ($parts1["port"] != $parts2["port"]) {
return "";
}
$match .= ":".$parts1["port"];
}
+ if (empty($parts1["path"])) {
+ $parts1["path"] = '';
+ }
+ if (empty($parts2["path"])) {
+ $parts2["path"] = '';
+ }
+
$pathparts1 = explode("/", $parts1["path"]);
$pathparts2 = explode("/", $parts2["path"]);
$i = 0;
$path = "";
do {
- $path1 = $pathparts1[$i];
- $path2 = $pathparts2[$i];
+ $path1 = defaults($pathparts1, $i, '');
+ $path2 = defaults($pathparts2, $i, '');
if ($path1 == $path2) {
$path .= $path1."/";