]> git.mxchange.org Git - friendica.git/commitdiff
Fix Diaspora link attachment probe
authorHypolite Petovan <ben.lort@gmail.com>
Sat, 18 Feb 2017 03:35:46 +0000 (22:35 -0500)
committerHypolite Petovan <ben.lort@gmail.com>
Sat, 18 Feb 2017 03:35:46 +0000 (22:35 -0500)
- Move analytics param stripping out of original_url
- Remove HEAD curl request in ParseUrl::getSiteInfo
- Replace original_url with strip_tracking_query_params in
ParseUrl::getSiteInfo to prevent massive curl fest in border cases

include/ParseUrl.php
include/network.php

index b85175a25b57445baf8384128b1dfcb522692ad3..3a2fe9d53c6b818b39613d7d4f769bd34691ab6e 100644 (file)
@@ -130,7 +130,7 @@ class ParseUrl {
                $url = trim($url, "'");
                $url = trim($url, '"');
 
-               $url = original_url($url);
+               $url = strip_tracking_query_params($url);
 
                $siteinfo["url"] = $url;
                $siteinfo["type"] = "link";
@@ -142,8 +142,7 @@ class ParseUrl {
                $ch = curl_init();
                curl_setopt($ch, CURLOPT_URL, $url);
                curl_setopt($ch, CURLOPT_HEADER, 1);
-               curl_setopt($ch, CURLOPT_NOBODY, 1);
-               curl_setopt($ch, CURLOPT_TIMEOUT, 3);
+               curl_setopt($ch, CURLOPT_TIMEOUT, 10);
                curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
                curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
@@ -151,7 +150,6 @@ class ParseUrl {
 
                $header = curl_exec($ch);
                $curl_info = @curl_getinfo($ch);
-               $http_code = $curl_info["http_code"];
                curl_close($ch);
 
                $a->save_timestamp($stamp1, "network");
@@ -197,26 +195,6 @@ class ParseUrl {
                        }
                }
 
-               $stamp1 = microtime(true);
-
-               // Now fetch the body as well
-               $ch = curl_init();
-               curl_setopt($ch, CURLOPT_URL, $url);
-               curl_setopt($ch, CURLOPT_HEADER, 1);
-               curl_setopt($ch, CURLOPT_NOBODY, 0);
-               curl_setopt($ch, CURLOPT_TIMEOUT, 10);
-               curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-               curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
-               curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
-               curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false));
-
-               $header = curl_exec($ch);
-               $curl_info = @curl_getinfo($ch);
-               $http_code = $curl_info["http_code"];
-               curl_close($ch);
-
-               $a->save_timestamp($stamp1, "network");
-
                // Fetch the first mentioned charset. Can be in body or header
                $charset = "";
                if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) {
index 7385c94a031882244861574a397500836c354554..ecbe0e5c6d04407ed3ce97710cf137056ef01a3d 100644 (file)
@@ -670,42 +670,69 @@ function fix_contact_ssl_policy(&$contact,$new_policy) {
        }
 }
 
-function original_url($url, $depth=1, $fetchbody = false) {
-
-       $a = get_app();
-
-       // Remove Analytics Data from Google and other tracking platforms
+/**
+ * @brief Remove Google Analytics and other tracking platforms params from URL
+ *
+ * @param string $url
+ * @return string
+ */
+function strip_tracking_query_params($url)
+{
        $urldata = parse_url($url);
        if (is_string($urldata["query"])) {
                $query = $urldata["query"];
                parse_str($query, $querydata);
 
-               if (is_array($querydata))
-                       foreach ($querydata AS $param=>$value)
+               if (is_array($querydata)) {
+                       foreach ($querydata AS $param => $value) {
                                if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
                                                        "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
                                                        "fb_action_ids", "fb_action_types", "fb_ref",
                                                        "awesm", "wtrid",
                                                        "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) {
 
-                                       $pair = $param."=".urlencode($value);
+                                       $pair = $param . "=" . urlencode($value);
                                        $url = str_replace($pair, "", $url);
 
                                        // Second try: if the url isn't encoded completely
-                                       $pair = $param."=".str_replace(" ", "+", $value);
+                                       $pair = $param . "=" . str_replace(" ", "+", $value);
                                        $url = str_replace($pair, "", $url);
 
                                        // Third try: Maybey the url isn't encoded at all
-                                       $pair = $param."=".$value;
+                                       $pair = $param . "=" . $value;
                                        $url = str_replace($pair, "", $url);
 
                                        $url = str_replace(array("?&", "&&"), array("?", ""), $url);
                                }
+                       }
+               }
 
-               if (substr($url, -1, 1) == "?")
+               if (substr($url, -1, 1) == "?") {
                        $url = substr($url, 0, -1);
+               }
        }
 
+       return $url;
+}
+
+/**
+ * @brief Returns the original URL of the provided URL
+ *
+ * This function strips tracking query params and follows redirections, either
+ * through HTTP code or meta refresh tags. Stops after 10 redirections.
+ *
+ * @see ParseUrl::getSiteinfo
+ *
+ * @param string $url
+ * @param int $depth
+ * @param bool $fetchbody
+ * @return string
+ */
+function original_url($url, $depth = 1, $fetchbody = false) {
+       $a = get_app();
+
+       $url = strip_tracking_query_params($url);
+
        if ($depth > 10)
                return($url);