3 * @copyright Copyright (C) 2010-2022, the Friendica project
5 * @license GNU AGPL version 3 or any later version
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 namespace Friendica\Util;
24 use Friendica\Core\Hook;
25 use Friendica\Core\Logger;
27 use Friendica\Model\Contact;
28 use Friendica\Network\HTTPClient\Client\HttpClientAccept;
29 use Friendica\Network\HTTPClient\Client\HttpClientOptions;
30 use Friendica\Network\HTTPException\NotModifiedException;
31 use GuzzleHttp\Psr7\Uri;
37 * Return raw post data from a post request
39 * @return string post data
41 public static function postdata()
43 return file_get_contents('php://input');
47 * Check URL to see if it's real
49 * Take a URL from the wild, prepend http:// if necessary
50 * and check DNS to see if it's real (or check if is a valid IP address)
52 * @param string $url The URL to be validated
53 * @return string|boolean The actual working URL, false else
54 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
56 public static function isUrlValid(string $url)
58 if (DI::config()->get('system', 'disable_url_validation')) {
62 // no naked subdomains (allow localhost for tests)
63 if (strpos($url, '.') === false && strpos($url, '/localhost/') === false) {
67 if (substr($url, 0, 4) != 'http') {
68 $url = 'http://' . $url;
71 $xrd_timeout = DI::config()->get('system', 'xrd_timeout');
72 $host = parse_url($url, PHP_URL_HOST);
74 if (empty($host) || !(@dns_get_record($host . '.', DNS_A + DNS_AAAA + DNS_CNAME) || filter_var($host, FILTER_VALIDATE_IP))) {
78 if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) {
79 $curlResult = DI::httpClient()->head($url, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
81 // Workaround for systems that can't handle a HEAD request. Don't retry on timeouts.
82 if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) {
83 $curlResult = DI::httpClient()->get($url, HttpClientAccept::DEFAULT, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
86 if (!$curlResult->isSuccess()) {
87 Logger::notice('Url not reachable', ['host' => $host, 'url' => $url]);
89 } elseif ($curlResult->isRedirectUrl()) {
90 $url = $curlResult->getRedirectUrl();
94 // Check if the certificate is valid for this hostname
95 if (parse_url($url, PHP_URL_SCHEME) == 'https') {
96 $port = parse_url($url, PHP_URL_PORT) ?? 443;
98 $context = stream_context_create(["ssl" => ['capture_peer_cert' => true]]);
100 $resource = @stream_socket_client('ssl://' . $host . ':' . $port, $errno, $errstr, $xrd_timeout, STREAM_CLIENT_CONNECT, $context);
101 if (empty($resource)) {
102 Logger::notice('Invalid certificate', ['host' => $host]);
106 $cert = stream_context_get_params($resource);
108 Logger::notice('Invalid certificate params', ['host' => $host]);
112 $certinfo = openssl_x509_parse($cert['options']['ssl']['peer_certificate']);
113 if (empty($certinfo)) {
114 Logger::notice('Invalid certificate information', ['host' => $host]);
118 $valid_from = date(DATE_RFC2822,$certinfo['validFrom_time_t']);
119 $valid_to = date(DATE_RFC2822,$certinfo['validTo_time_t']);
121 if ($certinfo['validFrom_time_t'] > time()) {
122 Logger::notice('Certificate validity starts after current date', ['host' => $host, 'from' => $valid_from, 'to' => $valid_to]);
126 if ($certinfo['validTo_time_t'] < time()) {
127 Logger::notice('Certificate validity ends before current date', ['host' => $host, 'from' => $valid_from, 'to' => $valid_to]);
136 * Checks that email is an actual resolvable internet address
138 * @param string $addr The email address
139 * @return boolean True if it's a valid email address, false if it's not
141 public static function isEmailDomainValid(string $addr)
143 if (DI::config()->get('system', 'disable_email_validation')) {
147 if (! strpos($addr, '@')) {
151 $h = substr($addr, strpos($addr, '@') + 1);
153 // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
154 if ($h && (@dns_get_record($h, DNS_A + DNS_AAAA + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP))) {
157 if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
164 * Check if URL is allowed
166 * Check $url against our list of allowed sites,
167 * wildcards allowed. If allowed_sites is unset return true;
169 * @param string $url URL which get tested
170 * @return boolean True if url is allowed otherwise return false
172 public static function isUrlAllowed(string $url)
174 $h = @parse_url($url);
180 $str_allowed = DI::config()->get('system', 'allowed_sites');
181 if (! $str_allowed) {
187 $host = strtolower($h['host']);
189 // always allow our own site
190 if ($host == strtolower($_SERVER['SERVER_NAME'])) {
194 $fnmatch = function_exists('fnmatch');
195 $allowed = explode(',', $str_allowed);
197 if (count($allowed)) {
198 foreach ($allowed as $a) {
199 $pat = strtolower(trim($a));
200 if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) {
210 * Checks if the provided url domain is on the domain blocklist.
211 * Returns true if it is or malformed URL, false if not.
213 * @param string $url The url to check the domain from
217 public static function isUrlBlocked(string $url)
219 $host = @parse_url($url, PHP_URL_HOST);
224 $domain_blocklist = DI::config()->get('system', 'blocklist', []);
225 if (!$domain_blocklist) {
229 foreach ($domain_blocklist as $domain_block) {
230 if (fnmatch(strtolower($domain_block['domain']), strtolower($host))) {
239 * Checks if the provided url is on the list of domains where redirects are blocked.
240 * Returns true if it is or malformed URL, false if not.
242 * @param string $url The url to check the domain from
246 public static function isRedirectBlocked(string $url)
248 $host = @parse_url($url, PHP_URL_HOST);
253 $no_redirect_list = DI::config()->get('system', 'no_redirect_list', []);
254 if (!$no_redirect_list) {
258 foreach ($no_redirect_list as $no_redirect) {
259 if (fnmatch(strtolower($no_redirect), strtolower($host))) {
268 * Check if email address is allowed to register here.
270 * Compare against our list (wildcards allowed).
272 * @param string $email email address
273 * @return boolean False if not allowed, true if allowed
274 * or if allowed list is not configured
275 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
277 public static function isEmailDomainAllowed(string $email)
279 $domain = strtolower(substr($email, strpos($email, '@') + 1));
284 $str_allowed = DI::config()->get('system', 'allowed_email', '');
285 if (empty($str_allowed)) {
289 $allowed = explode(',', $str_allowed);
291 return self::isDomainAllowed($domain, $allowed);
295 * Checks for the existence of a domain in a domain list
297 * @param string $domain
298 * @param array $domain_list
301 public static function isDomainAllowed(string $domain, array $domain_list)
305 foreach ($domain_list as $item) {
306 $pat = strtolower(trim($item));
307 if (fnmatch($pat, $domain) || ($pat == $domain)) {
316 public static function lookupAvatarByEmail(string $email)
318 $avatar['size'] = 300;
319 $avatar['email'] = $email;
321 $avatar['success'] = false;
323 Hook::callAll('avatar_lookup', $avatar);
325 if (! $avatar['success']) {
326 $avatar['url'] = DI::baseUrl() . Contact::DEFAULT_AVATAR_PHOTO;
329 Logger::info('Avatar: ' . $avatar['email'] . ' ' . $avatar['url']);
330 return $avatar['url'];
334 * Remove Google Analytics and other tracking platforms params from URL
336 * @param string $url Any user-submitted URL that may contain tracking params
337 * @return string The same URL stripped of tracking parameters
339 public static function stripTrackingQueryParams(string $url)
341 $urldata = parse_url($url);
342 if (!empty($urldata["query"])) {
343 $query = $urldata["query"];
344 parse_str($query, $querydata);
346 if (is_array($querydata)) {
347 foreach ($querydata as $param => $value) {
351 "utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
352 "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
353 "fb_action_ids", "fb_action_types", "fb_ref",
355 "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"]
358 $pair = $param . "=" . urlencode($value);
359 $url = str_replace($pair, "", $url);
361 // Second try: if the url isn't encoded completely
362 $pair = $param . "=" . str_replace(" ", "+", $value);
363 $url = str_replace($pair, "", $url);
365 // Third try: Maybey the url isn't encoded at all
366 $pair = $param . "=" . $value;
367 $url = str_replace($pair, "", $url);
369 $url = str_replace(["?&", "&&"], ["?", ""], $url);
374 if (substr($url, -1, 1) == "?") {
375 $url = substr($url, 0, -1);
383 * Add a missing base path (scheme and host) to a given url
386 * @param string $basepath
389 public static function addBasePath(string $url, string $basepath)
391 if (!empty(parse_url($url, PHP_URL_SCHEME)) || empty(parse_url($basepath, PHP_URL_SCHEME)) || empty($url) || empty(parse_url($url))) {
395 $base = ['scheme' => parse_url($basepath, PHP_URL_SCHEME),
396 'host' => parse_url($basepath, PHP_URL_HOST)];
398 $parts = array_merge($base, parse_url('/' . ltrim($url, '/')));
399 return self::unparseURL($parts);
403 * Find the matching part between two url
405 * @param string $url1
406 * @param string $url2
407 * @return string The matching part
409 public static function getUrlMatch(string $url1, string $url2)
411 if (($url1 == "") || ($url2 == "")) {
415 $url1 = Strings::normaliseLink($url1);
416 $url2 = Strings::normaliseLink($url2);
418 $parts1 = parse_url($url1);
419 $parts2 = parse_url($url2);
421 if (!isset($parts1["host"]) || !isset($parts2["host"])) {
425 if (empty($parts1["scheme"])) {
426 $parts1["scheme"] = '';
428 if (empty($parts2["scheme"])) {
429 $parts2["scheme"] = '';
432 if ($parts1["scheme"] != $parts2["scheme"]) {
436 if (empty($parts1["host"])) {
437 $parts1["host"] = '';
439 if (empty($parts2["host"])) {
440 $parts2["host"] = '';
443 if ($parts1["host"] != $parts2["host"]) {
447 if (empty($parts1["port"])) {
448 $parts1["port"] = '';
450 if (empty($parts2["port"])) {
451 $parts2["port"] = '';
454 if ($parts1["port"] != $parts2["port"]) {
458 $match = $parts1["scheme"]."://".$parts1["host"];
460 if ($parts1["port"]) {
461 $match .= ":".$parts1["port"];
464 if (empty($parts1["path"])) {
465 $parts1["path"] = '';
467 if (empty($parts2["path"])) {
468 $parts2["path"] = '';
471 $pathparts1 = explode("/", $parts1["path"]);
472 $pathparts2 = explode("/", $parts2["path"]);
477 $path1 = $pathparts1[$i] ?? '';
478 $path2 = $pathparts2[$i] ?? '';
480 if ($path1 == $path2) {
483 } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
487 return Strings::normaliseLink($match);
491 * Glue url parts together
493 * @param array $parsed URL parts
495 * @return string The glued URL.
496 * @deprecated since version 2021.12, use GuzzleHttp\Psr7\Uri::fromParts($parts) instead
498 public static function unparseURL(array $parsed)
500 $get = function ($key) use ($parsed) {
501 return isset($parsed[$key]) ? $parsed[$key] : null;
504 $pass = $get('pass');
505 $user = $get('user');
506 $userinfo = $pass !== null ? "$user:$pass" : $user;
507 $port = $get('port');
508 $scheme = $get('scheme');
509 $query = $get('query');
510 $fragment = $get('fragment');
511 $authority = ($userinfo !== null ? $userinfo."@" : '') .
513 ($port ? ":$port" : '');
515 return (strlen($scheme) ? $scheme.":" : '') .
516 (strlen($authority) ? "//".$authority : '') .
518 (strlen($query) ? "?".$query : '') .
519 (strlen($fragment) ? "#".$fragment : '');
523 * Convert an URI to an IDN compatible URI
528 public static function convertToIdn(string $uri): string
530 $parts = parse_url($uri);
531 if (!empty($parts['scheme']) && !empty($parts['host'])) {
532 $parts['host'] = idn_to_ascii($parts['host']);
533 $uri = Uri::fromParts($parts);
535 $parts = explode('@', $uri);
536 if (count($parts) == 2) {
537 $uri = $parts[0] . '@' . idn_to_ascii($parts[1]);
539 $uri = idn_to_ascii($uri);
547 * Switch the scheme of an url between http and https
549 * @param string $url URL
551 * @return string switched URL
553 public static function switchScheme(string $url)
555 $scheme = parse_url($url, PHP_URL_SCHEME);
556 if (empty($scheme)) {
560 if ($scheme === 'http') {
561 $url = str_replace('http://', 'https://', $url);
562 } elseif ($scheme === 'https') {
563 $url = str_replace('https://', 'http://', $url);
570 * Adds query string parameters to the provided URI. Replace the value of existing keys.
572 * @param string $path
573 * @param array $additionalParams Associative array of parameters
576 public static function appendQueryParam(string $path, array $additionalParams)
578 $parsed = parse_url($path);
581 if (!empty($parsed['query'])) {
582 parse_str($parsed['query'], $params);
585 $params = array_merge($params, $additionalParams);
587 $parsed['query'] = http_build_query($params);
589 return self::unparseURL($parsed);
593 * Generates ETag and Last-Modified response headers and checks them against
594 * If-None-Match and If-Modified-Since request headers if present.
596 * Blocking function, sends 304 headers and exits if check passes.
598 * @param string $etag The page etag
599 * @param string $last_modified The page last modification UTC date
602 public static function checkEtagModified(string $etag, string $last_modified)
604 $last_modified = DateTimeFormat::utc($last_modified, 'D, d M Y H:i:s') . ' GMT';
607 * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
609 $if_none_match = filter_input(INPUT_SERVER, 'HTTP_IF_NONE_MATCH');
610 $if_modified_since = filter_input(INPUT_SERVER, 'HTTP_IF_MODIFIED_SINCE');
611 $flag_not_modified = null;
612 if ($if_none_match) {
614 preg_match('/^(?:W\/")?([^"]+)"?$/i', $etag, $result);
615 $etagTrimmed = $result[1];
616 // Lazy exact ETag match, could check weak/strong ETags
617 $flag_not_modified = $if_none_match == '*' || strpos($if_none_match, $etagTrimmed) !== false;
620 if ($if_modified_since && (!$if_none_match || $flag_not_modified)) {
621 // Lazy exact Last-Modified match, could check If-Modified-Since validity
622 $flag_not_modified = $if_modified_since == $last_modified;
625 header('Etag: ' . $etag);
626 header('Last-Modified: ' . $last_modified);
628 if ($flag_not_modified) {
629 throw new NotModifiedException();
634 * Check if the given URL is a local link
639 public static function isLocalLink(string $url)
641 return (strpos(Strings::normaliseLink($url), Strings::normaliseLink(DI::baseUrl())) !== false);
645 * Check if the given URL is a valid HTTP/HTTPS URL
650 public static function isValidHttpUrl(string $url)
652 $scheme = parse_url($url, PHP_URL_SCHEME);
653 return !empty($scheme) && in_array($scheme, ['http', 'https']) && parse_url($url, PHP_URL_HOST);