3 * @copyright Copyright (C) 2020, Friendica
5 * @license GNU AGPL version 3 or any later version
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 namespace Friendica\Util;
26 use Friendica\Core\Hook;
27 use Friendica\Core\Logger;
28 use Friendica\Core\System;
35 * Return raw post data from a post request
37 * @return string post data
39 public static function postdata()
41 return file_get_contents('php://input');
45 * Check URL to see if it's real
47 * Take a URL from the wild, prepend http:// if necessary
48 * and check DNS to see if it's real (or check if is a valid IP address)
50 * @param string $url The URL to be validated
51 * @return string|boolean The actual working URL, false else
52 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
54 public static function isUrlValid(string $url)
56 if (DI::config()->get('system', 'disable_url_validation')) {
60 // no naked subdomains (allow localhost for tests)
61 if (strpos($url, '.') === false && strpos($url, '/localhost/') === false) {
65 if (substr($url, 0, 4) != 'http') {
66 $url = 'http://' . $url;
69 /// @TODO Really suppress function outcomes? Why not find them + debug them?
70 $h = @parse_url($url);
72 if (!empty($h['host']) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP))) {
80 * Checks that email is an actual resolvable internet address
82 * @param string $addr The email address
83 * @return boolean True if it's a valid email address, false if it's not
85 public static function isEmailDomainValid(string $addr)
87 if (DI::config()->get('system', 'disable_email_validation')) {
91 if (! strpos($addr, '@')) {
95 $h = substr($addr, strpos($addr, '@') + 1);
97 // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
98 if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP))) {
101 if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
108 * Check if URL is allowed
110 * Check $url against our list of allowed sites,
111 * wildcards allowed. If allowed_sites is unset return true;
113 * @param string $url URL which get tested
114 * @return boolean True if url is allowed otherwise return false
116 public static function isUrlAllowed(string $url)
118 $h = @parse_url($url);
124 $str_allowed = DI::config()->get('system', 'allowed_sites');
125 if (! $str_allowed) {
131 $host = strtolower($h['host']);
133 // always allow our own site
134 if ($host == strtolower($_SERVER['SERVER_NAME'])) {
138 $fnmatch = function_exists('fnmatch');
139 $allowed = explode(',', $str_allowed);
141 if (count($allowed)) {
142 foreach ($allowed as $a) {
143 $pat = strtolower(trim($a));
144 if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) {
154 * Checks if the provided url domain is on the domain blocklist.
155 * Returns true if it is or malformed URL, false if not.
157 * @param string $url The url to check the domain from
161 public static function isUrlBlocked(string $url)
163 $host = @parse_url($url, PHP_URL_HOST);
168 $domain_blocklist = DI::config()->get('system', 'blocklist', []);
169 if (!$domain_blocklist) {
173 foreach ($domain_blocklist as $domain_block) {
174 if (fnmatch(strtolower($domain_block['domain']), strtolower($host))) {
183 * Check if email address is allowed to register here.
185 * Compare against our list (wildcards allowed).
187 * @param string $email email address
188 * @return boolean False if not allowed, true if allowed
189 * or if allowed list is not configured
190 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
192 public static function isEmailDomainAllowed(string $email)
194 $domain = strtolower(substr($email, strpos($email, '@') + 1));
199 $str_allowed = DI::config()->get('system', 'allowed_email', '');
200 if (empty($str_allowed)) {
204 $allowed = explode(',', $str_allowed);
206 return self::isDomainAllowed($domain, $allowed);
210 * Checks for the existence of a domain in a domain list
212 * @param string $domain
213 * @param array $domain_list
216 public static function isDomainAllowed(string $domain, array $domain_list)
220 foreach ($domain_list as $item) {
221 $pat = strtolower(trim($item));
222 if (fnmatch($pat, $domain) || ($pat == $domain)) {
231 public static function lookupAvatarByEmail(string $email)
233 $avatar['size'] = 300;
234 $avatar['email'] = $email;
236 $avatar['success'] = false;
238 Hook::callAll('avatar_lookup', $avatar);
240 if (! $avatar['success']) {
241 $avatar['url'] = DI::baseUrl() . '/images/person-300.jpg';
244 Logger::log('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], Logger::DEBUG);
245 return $avatar['url'];
249 * Remove Google Analytics and other tracking platforms params from URL
251 * @param string $url Any user-submitted URL that may contain tracking params
252 * @return string The same URL stripped of tracking parameters
254 public static function stripTrackingQueryParams(string $url)
256 $urldata = parse_url($url);
257 if (!empty($urldata["query"])) {
258 $query = $urldata["query"];
259 parse_str($query, $querydata);
261 if (is_array($querydata)) {
262 foreach ($querydata as $param => $value) {
266 "utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
267 "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
268 "fb_action_ids", "fb_action_types", "fb_ref",
270 "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"]
273 $pair = $param . "=" . urlencode($value);
274 $url = str_replace($pair, "", $url);
276 // Second try: if the url isn't encoded completely
277 $pair = $param . "=" . str_replace(" ", "+", $value);
278 $url = str_replace($pair, "", $url);
280 // Third try: Maybey the url isn't encoded at all
281 $pair = $param . "=" . $value;
282 $url = str_replace($pair, "", $url);
284 $url = str_replace(["?&", "&&"], ["?", ""], $url);
289 if (substr($url, -1, 1) == "?") {
290 $url = substr($url, 0, -1);
298 * Add a missing base path (scheme and host) to a given url
301 * @param string $basepath
304 public static function addBasePath(string $url, string $basepath)
306 if (!empty(parse_url($url, PHP_URL_SCHEME)) || empty(parse_url($basepath, PHP_URL_SCHEME)) || empty($url) || empty(parse_url($url))) {
310 $base = ['scheme' => parse_url($basepath, PHP_URL_SCHEME),
311 'host' => parse_url($basepath, PHP_URL_HOST)];
313 $parts = array_merge($base, parse_url('/' . ltrim($url, '/')));
314 return self::unparseURL($parts);
318 * Returns the original URL of the provided URL
320 * This function strips tracking query params and follows redirections, either
321 * through HTTP code or meta refresh tags. Stops after 10 redirections.
323 * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
325 * @see ParseUrl::getSiteinfo
327 * @param string $url A user-submitted URL
328 * @param int $depth The current redirection recursion level (internal)
329 * @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
330 * @return string A canonical URL
331 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
333 public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
337 $url = self::stripTrackingQueryParams($url);
343 $url = trim($url, "'");
345 $stamp1 = microtime(true);
348 curl_setopt($ch, CURLOPT_URL, $url);
349 curl_setopt($ch, CURLOPT_HEADER, 1);
350 curl_setopt($ch, CURLOPT_NOBODY, 1);
351 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
352 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
353 curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
356 $curl_info = @curl_getinfo($ch);
357 $http_code = $curl_info['http_code'];
360 DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
362 if ($http_code == 0) {
366 if (in_array($http_code, ['301', '302'])) {
367 if (!empty($curl_info['redirect_url'])) {
368 return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
369 } elseif (!empty($curl_info['location'])) {
370 return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
374 // Check for redirects in the meta elements of the body if there are no redirects in the header.
376 return(self::finalUrl($url, ++$depth, true));
379 // if the file is too large then exit
380 if ($curl_info["download_content_length"] > 1000000) {
384 // if it isn't a HTML file then exit
385 if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
389 $stamp1 = microtime(true);
392 curl_setopt($ch, CURLOPT_URL, $url);
393 curl_setopt($ch, CURLOPT_HEADER, 0);
394 curl_setopt($ch, CURLOPT_NOBODY, 0);
395 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
396 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
397 curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
399 $body = curl_exec($ch);
402 DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
404 if (trim($body) == "") {
408 // Check for redirect in meta elements
409 $doc = new DOMDocument();
410 @$doc->loadHTML($body);
412 $xpath = new DomXPath($doc);
414 $list = $xpath->query("//meta[@content]");
415 foreach ($list as $node) {
417 if ($node->attributes->length) {
418 foreach ($node->attributes as $attribute) {
419 $attr[$attribute->name] = $attribute->value;
423 if (@$attr["http-equiv"] == 'refresh') {
424 $path = $attr["content"];
425 $pathinfo = explode(";", $path);
426 foreach ($pathinfo as $value) {
427 if (substr(strtolower($value), 0, 4) == "url=") {
428 return self::finalUrl(substr($value, 4), ++$depth);
438 * Find the matching part between two url
440 * @param string $url1
441 * @param string $url2
442 * @return string The matching part
444 public static function getUrlMatch(string $url1, string $url2)
446 if (($url1 == "") || ($url2 == "")) {
450 $url1 = Strings::normaliseLink($url1);
451 $url2 = Strings::normaliseLink($url2);
453 $parts1 = parse_url($url1);
454 $parts2 = parse_url($url2);
456 if (!isset($parts1["host"]) || !isset($parts2["host"])) {
460 if (empty($parts1["scheme"])) {
461 $parts1["scheme"] = '';
463 if (empty($parts2["scheme"])) {
464 $parts2["scheme"] = '';
467 if ($parts1["scheme"] != $parts2["scheme"]) {
471 if (empty($parts1["host"])) {
472 $parts1["host"] = '';
474 if (empty($parts2["host"])) {
475 $parts2["host"] = '';
478 if ($parts1["host"] != $parts2["host"]) {
482 if (empty($parts1["port"])) {
483 $parts1["port"] = '';
485 if (empty($parts2["port"])) {
486 $parts2["port"] = '';
489 if ($parts1["port"] != $parts2["port"]) {
493 $match = $parts1["scheme"]."://".$parts1["host"];
495 if ($parts1["port"]) {
496 $match .= ":".$parts1["port"];
499 if (empty($parts1["path"])) {
500 $parts1["path"] = '';
502 if (empty($parts2["path"])) {
503 $parts2["path"] = '';
506 $pathparts1 = explode("/", $parts1["path"]);
507 $pathparts2 = explode("/", $parts2["path"]);
512 $path1 = $pathparts1[$i] ?? '';
513 $path2 = $pathparts2[$i] ?? '';
515 if ($path1 == $path2) {
518 } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
522 return Strings::normaliseLink($match);
526 * Glue url parts together
528 * @param array $parsed URL parts
530 * @return string The glued URL
532 public static function unparseURL(array $parsed)
534 $get = function ($key) use ($parsed) {
535 return isset($parsed[$key]) ? $parsed[$key] : null;
538 $pass = $get('pass');
539 $user = $get('user');
540 $userinfo = $pass !== null ? "$user:$pass" : $user;
541 $port = $get('port');
542 $scheme = $get('scheme');
543 $query = $get('query');
544 $fragment = $get('fragment');
545 $authority = ($userinfo !== null ? $userinfo."@" : '') .
547 ($port ? ":$port" : '');
549 return (strlen($scheme) ? $scheme.":" : '') .
550 (strlen($authority) ? "//".$authority : '') .
552 (strlen($query) ? "?".$query : '') .
553 (strlen($fragment) ? "#".$fragment : '');
558 * Switch the scheme of an url between http and https
560 * @param string $url URL
562 * @return string switched URL
564 public static function switchScheme(string $url)
566 $scheme = parse_url($url, PHP_URL_SCHEME);
567 if (empty($scheme)) {
571 if ($scheme === 'http') {
572 $url = str_replace('http://', 'https://', $url);
573 } elseif ($scheme === 'https') {
574 $url = str_replace('https://', 'http://', $url);
581 * Adds query string parameters to the provided URI. Replace the value of existing keys.
583 * @param string $path
584 * @param array $additionalParams Associative array of parameters
587 public static function appendQueryParam(string $path, array $additionalParams)
589 $parsed = parse_url($path);
592 if (!empty($parsed['query'])) {
593 parse_str($parsed['query'], $params);
596 $params = array_merge($params, $additionalParams);
598 $parsed['query'] = http_build_query($params);
600 return self::unparseURL($parsed);
604 * Generates ETag and Last-Modified response headers and checks them against
605 * If-None-Match and If-Modified-Since request headers if present.
607 * Blocking function, sends 304 headers and exits if check passes.
609 * @param string $etag The page etag
610 * @param string $last_modified The page last modification UTC date
613 public static function checkEtagModified(string $etag, string $last_modified)
615 $last_modified = DateTimeFormat::utc($last_modified, 'D, d M Y H:i:s') . ' GMT';
618 * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
620 $if_none_match = filter_input(INPUT_SERVER, 'HTTP_IF_NONE_MATCH');
621 $if_modified_since = filter_input(INPUT_SERVER, 'HTTP_IF_MODIFIED_SINCE');
622 $flag_not_modified = null;
623 if ($if_none_match) {
625 preg_match('/^(?:W\/")?([^"]+)"?$/i', $etag, $result);
626 $etagTrimmed = $result[1];
627 // Lazy exact ETag match, could check weak/strong ETags
628 $flag_not_modified = $if_none_match == '*' || strpos($if_none_match, $etagTrimmed) !== false;
631 if ($if_modified_since && (!$if_none_match || $flag_not_modified)) {
632 // Lazy exact Last-Modified match, could check If-Modified-Since validity
633 $flag_not_modified = $if_modified_since == $last_modified;
636 header('Etag: ' . $etag);
637 header('Last-Modified: ' . $last_modified);
639 if ($flag_not_modified) {
640 header("HTTP/1.1 304 Not Modified");