3 * @copyright Copyright (C) 2010-2023, the Friendica project
5 * @license GNU AGPL version 3 or any later version
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 namespace Friendica\Util;
24 use Friendica\Core\Hook;
25 use Friendica\Core\Logger;
27 use Friendica\Model\Contact;
28 use Friendica\Network\HTTPClient\Client\HttpClientAccept;
29 use Friendica\Network\HTTPClient\Client\HttpClientOptions;
30 use Friendica\Network\HTTPException\NotModifiedException;
31 use GuzzleHttp\Psr7\Uri;
32 use Psr\Http\Message\UriInterface;
38 * Return raw post data from a post request
40 * @return string post data
42 public static function postdata()
44 return file_get_contents('php://input');
48 * Check URL to see if it's real
50 * Take a URL from the wild, prepend http:// if necessary
51 * and check DNS to see if it's real (or check if is a valid IP address)
53 * @param string $url The URL to be validated
55 * @return string|boolean The actual working URL, false else
56 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
58 public static function isUrlValid(string $url)
60 if (DI::config()->get('system', 'disable_url_validation')) {
64 // no naked subdomains (allow localhost for tests)
65 if (strpos($url, '.') === false && strpos($url, '/localhost/') === false) {
69 if (substr($url, 0, 4) != 'http') {
70 $url = 'http://' . $url;
73 $xrd_timeout = DI::config()->get('system', 'xrd_timeout');
74 $host = parse_url($url, PHP_URL_HOST);
76 if (empty($host) || !(filter_var($host, FILTER_VALIDATE_IP) || @dns_get_record($host . '.', DNS_A + DNS_AAAA))) {
80 if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) {
81 $options = [HttpClientOptions::VERIFY => true, HttpClientOptions::TIMEOUT => $xrd_timeout];
82 $curlResult = DI::httpClient()->head($url, $options);
84 // Workaround for systems that can't handle a HEAD request. Don't retry on timeouts.
85 if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) {
86 $curlResult = DI::httpClient()->get($url, HttpClientAccept::DEFAULT, $options);
89 if (!$curlResult->isSuccess()) {
90 Logger::notice('Url not reachable', ['host' => $host, 'url' => $url]);
92 } elseif ($curlResult->isRedirectUrl()) {
93 $url = $curlResult->getRedirectUrl();
101 * Checks that email is an actual resolvable internet address
103 * @param string $addr The email address
104 * @return boolean True if it's a valid email address, false if it's not
106 public static function isEmailDomainValid(string $addr): bool
108 if (DI::config()->get('system', 'disable_email_validation')) {
112 if (! strpos($addr, '@')) {
116 $h = substr($addr, strpos($addr, '@') + 1);
118 // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
119 if ($h && (@dns_get_record($h, DNS_A + DNS_AAAA + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP))) {
122 if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
129 * Check if URL is allowed
131 * Check $url against our list of allowed sites,
132 * wildcards allowed. If allowed_sites is unset return true;
134 * @param string $url URL which get tested
135 * @return boolean True if url is allowed otherwise return false
137 public static function isUrlAllowed(string $url): bool
139 $h = @parse_url($url);
145 $str_allowed = DI::config()->get('system', 'allowed_sites');
146 if (! $str_allowed) {
152 $host = strtolower($h['host']);
154 // always allow our own site
155 if ($host == strtolower($_SERVER['SERVER_NAME'])) {
159 $fnmatch = function_exists('fnmatch');
160 $allowed = explode(',', $str_allowed);
162 if (count($allowed)) {
163 foreach ($allowed as $a) {
164 $pat = strtolower(trim($a));
165 if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) {
175 * Checks if the provided url domain is on the domain blocklist.
176 * Returns true if it is or malformed URL, false if not.
178 * @param string $url The url to check the domain from
182 * @deprecated since 2023.03 Use isUriBlocked instead
184 public static function isUrlBlocked(string $url): bool
187 return self::isUriBlocked(new Uri($url));
188 } catch (\Throwable $e) {
189 Logger::warning('Invalid URL', ['url' => $url]);
195 * Checks if the provided URI domain is on the domain blocklist.
197 * @param UriInterface $uri
200 public static function isUriBlocked(UriInterface $uri): bool
202 if (!$uri->getHost()) {
206 $domain_blocklist = DI::config()->get('system', 'blocklist', []);
207 if (!$domain_blocklist) {
211 foreach ($domain_blocklist as $domain_block) {
212 if (fnmatch(strtolower($domain_block['domain']), strtolower($uri->getHost()))) {
221 * Checks if the provided url is on the list of domains where redirects are blocked.
222 * Returns true if it is or malformed URL, false if not.
224 * @param string $url The url to check the domain from
228 public static function isRedirectBlocked(string $url): bool
230 $host = @parse_url($url, PHP_URL_HOST);
235 $no_redirect_list = DI::config()->get('system', 'no_redirect_list', []);
236 if (!$no_redirect_list) {
240 foreach ($no_redirect_list as $no_redirect) {
241 if (fnmatch(strtolower($no_redirect), strtolower($host))) {
250 * Check if email address is allowed to register here.
252 * Compare against our list (wildcards allowed).
254 * @param string $email email address
255 * @return boolean False if not allowed, true if allowed
256 * or if allowed list is not configured
257 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
259 public static function isEmailDomainAllowed(string $email): bool
261 $domain = strtolower(substr($email, strpos($email, '@') + 1));
266 $str_allowed = DI::config()->get('system', 'allowed_email', '');
267 if (empty($str_allowed)) {
271 $allowed = explode(',', $str_allowed);
273 return self::isDomainAllowed($domain, $allowed);
277 * Checks for the existence of a domain in a domain list
279 * @param string $domain
280 * @param array $domain_list
284 public static function isDomainAllowed(string $domain, array $domain_list): bool
288 foreach ($domain_list as $item) {
289 $pat = strtolower(trim($item));
290 if (fnmatch($pat, $domain) || ($pat == $domain)) {
299 public static function lookupAvatarByEmail(string $email): string
301 $avatar['size'] = 300;
302 $avatar['email'] = $email;
304 $avatar['success'] = false;
306 Hook::callAll('avatar_lookup', $avatar);
308 if (! $avatar['success']) {
309 $avatar['url'] = DI::baseUrl() . Contact::DEFAULT_AVATAR_PHOTO;
312 Logger::info('Avatar: ' . $avatar['email'] . ' ' . $avatar['url']);
313 return $avatar['url'];
317 * Remove Google Analytics and other tracking platforms params from URL
319 * @param string $url Any user-submitted URL that may contain tracking params
321 * @return string The same URL stripped of tracking parameters
323 public static function stripTrackingQueryParams(string $url): string
325 $urldata = parse_url($url);
327 if (!empty($urldata['query'])) {
328 $query = $urldata['query'];
329 parse_str($query, $querydata);
331 if (is_array($querydata)) {
332 foreach ($querydata as $param => $value) {
336 'utm_source', 'utm_medium', 'utm_term', 'utm_content', 'utm_campaign',
337 // As seen from Purism
338 'mtm_source', 'mtm_medium', 'mtm_term', 'mtm_content', 'mtm_campaign',
339 'wt_mc', 'pk_campaign', 'pk_kwd', 'mc_cid', 'mc_eid',
340 'fb_action_ids', 'fb_action_types', 'fb_ref',
342 'woo_campaign', 'woo_source', 'woo_medium', 'woo_content', 'woo_term']
345 $pair = $param . '=' . urlencode($value);
346 $url = str_replace($pair, '', $url);
348 // Second try: if the url isn't encoded completely
349 $pair = $param . '=' . str_replace(' ', '+', $value);
350 $url = str_replace($pair, '', $url);
352 // Third try: Maybe the url isn't encoded at all
353 $pair = $param . '=' . $value;
354 $url = str_replace($pair, '', $url);
356 $url = str_replace(['?&', '&&'], ['?', ''], $url);
361 if (substr($url, -1, 1) == '?') {
362 $url = substr($url, 0, -1);
370 * Add a missing base path (scheme and host) to a given url
373 * @param string $basepath
377 public static function addBasePath(string $url, string $basepath): string
380 if (!empty(parse_url($url, PHP_URL_SCHEME)) || empty(parse_url($basepath, PHP_URL_SCHEME)) || empty($url) || empty(parse_url($url))) {
385 'scheme' => parse_url($basepath, PHP_URL_SCHEME),
386 'host' => parse_url($basepath, PHP_URL_HOST),
389 $parts = array_merge($base, parse_url('/' . ltrim($url, '/')));
390 return self::unparseURL($parts);
394 * Find the matching part between two url
396 * @param string $url1
397 * @param string $url2
399 * @return string The matching part or empty string on error
401 public static function getUrlMatch(string $url1, string $url2): string
403 if (($url1 == '') || ($url2 == '')) {
407 $url1 = Strings::normaliseLink($url1);
408 $url2 = Strings::normaliseLink($url2);
410 $parts1 = parse_url($url1);
411 $parts2 = parse_url($url2);
413 if (!isset($parts1['host']) || !isset($parts2['host'])) {
417 if (empty($parts1['scheme'])) {
418 $parts1['scheme'] = '';
420 if (empty($parts2['scheme'])) {
421 $parts2['scheme'] = '';
424 if ($parts1['scheme'] != $parts2['scheme']) {
428 if (empty($parts1['host'])) {
429 $parts1['host'] = '';
431 if (empty($parts2['host'])) {
432 $parts2['host'] = '';
435 if ($parts1['host'] != $parts2['host']) {
439 if (empty($parts1['port'])) {
440 $parts1['port'] = '';
442 if (empty($parts2['port'])) {
443 $parts2['port'] = '';
446 if ($parts1['port'] != $parts2['port']) {
450 $match = $parts1['scheme'] . '://' . $parts1['host'];
452 if ($parts1['port']) {
453 $match .= ':' . $parts1['port'];
456 if (empty($parts1['path'])) {
457 $parts1['path'] = '';
459 if (empty($parts2['path'])) {
460 $parts2['path'] = '';
463 $pathparts1 = explode('/', $parts1['path']);
464 $pathparts2 = explode('/', $parts2['path']);
469 $path1 = $pathparts1[$i] ?? '';
470 $path2 = $pathparts2[$i] ?? '';
472 if ($path1 == $path2) {
473 $path .= $path1 . '/';
475 } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
479 return Strings::normaliseLink($match);
483 * Glue url parts together
485 * @param array $parsed URL parts
487 * @return string|null The glued URL or null on error
488 * @deprecated since version 2021.12, use GuzzleHttp\Psr7\Uri::fromParts($parts) instead
490 public static function unparseURL(array $parsed): string
492 $get = function ($key) use ($parsed) {
493 return isset($parsed[$key]) ? $parsed[$key] : null;
496 $pass = $get('pass');
497 $user = $get('user');
498 $userinfo = $pass !== null ? "$user:$pass" : $user;
499 $port = $get('port');
500 $scheme = $get('scheme');
501 $query = $get('query');
502 $fragment = $get('fragment');
503 $authority = ($userinfo !== null ? $userinfo . '@' : '') .
505 ($port ? ":$port" : '');
507 return (!empty($scheme) ? $scheme . ':' : '') .
508 (!empty($authority) ? '//' . $authority : '') .
510 (!empty($query) ? '?' . $query : '') .
511 (!empty($fragment) ? '#' . $fragment : '');
515 * Convert an URI to an IDN compatible URI
521 public static function convertToIdn(string $uri): string
523 $parts = parse_url($uri);
524 if (!empty($parts['scheme']) && !empty($parts['host'])) {
525 $parts['host'] = idn_to_ascii($parts['host']);
526 $uri = (string)Uri::fromParts($parts);
528 $parts = explode('@', $uri);
529 if (count($parts) == 2) {
530 $uri = $parts[0] . '@' . idn_to_ascii($parts[1]);
532 $uri = idn_to_ascii($uri);
540 * Switch the scheme of an url between http and https
544 * @return string Switched URL
546 public static function switchScheme(string $url): string
548 $scheme = parse_url($url, PHP_URL_SCHEME);
549 if (empty($scheme)) {
553 if ($scheme === 'http') {
554 $url = str_replace('http://', 'https://', $url);
555 } elseif ($scheme === 'https') {
556 $url = str_replace('https://', 'http://', $url);
563 * Adds query string parameters to the provided URI. Replace the value of existing keys.
565 * @param string $path
566 * @param array $additionalParams Associative array of parameters
570 public static function appendQueryParam(string $path, array $additionalParams): string
572 $parsed = parse_url($path);
575 if (!empty($parsed['query'])) {
576 parse_str($parsed['query'], $params);
579 $params = array_merge($params, $additionalParams);
581 $parsed['query'] = http_build_query($params);
583 return self::unparseURL($parsed);
587 * Generates ETag and Last-Modified response headers and checks them against
588 * If-None-Match and If-Modified-Since request headers if present.
590 * Blocking function, sends 304 headers and exits if check passes.
592 * @param string $etag The page etag
593 * @param string $last_modified The page last modification UTC date
598 public static function checkEtagModified(string $etag, string $last_modified)
600 $last_modified = DateTimeFormat::utc($last_modified, 'D, d M Y H:i:s') . ' GMT';
603 * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
605 $if_none_match = filter_input(INPUT_SERVER, 'HTTP_IF_NONE_MATCH');
606 $if_modified_since = filter_input(INPUT_SERVER, 'HTTP_IF_MODIFIED_SINCE');
607 $flag_not_modified = null;
608 if ($if_none_match) {
610 preg_match('/^(?:W\/")?([^"]+)"?$/i', $etag, $result);
611 $etagTrimmed = $result[1];
612 // Lazy exact ETag match, could check weak/strong ETags
613 $flag_not_modified = $if_none_match == '*' || strpos($if_none_match, $etagTrimmed) !== false;
616 if ($if_modified_since && (!$if_none_match || $flag_not_modified)) {
617 // Lazy exact Last-Modified match, could check If-Modified-Since validity
618 $flag_not_modified = $if_modified_since == $last_modified;
621 header('Etag: ' . $etag);
622 header('Last-Modified: ' . $last_modified);
624 if ($flag_not_modified) {
625 throw new NotModifiedException();
630 * Check if the given URL is a local link
636 public static function isLocalLink(string $url): bool
638 return (strpos(Strings::normaliseLink($url), Strings::normaliseLink(DI::baseUrl())) !== false);
642 * Check if the given URL is a valid HTTP/HTTPS URL
647 public static function isValidHttpUrl(string $url): bool
649 $scheme = parse_url($url, PHP_URL_SCHEME);
650 return !empty($scheme) && in_array($scheme, ['http', 'https']) && parse_url($url, PHP_URL_HOST);