$url = trim($url, "'");
$url = trim($url, '"');
- $url = original_url($url);
+ $url = strip_tracking_query_params($url);
$siteinfo["url"] = $url;
$siteinfo["type"] = "link";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
- curl_setopt($ch, CURLOPT_NOBODY, 1);
- curl_setopt($ch, CURLOPT_TIMEOUT, 3);
+ curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);
- $http_code = $curl_info["http_code"];
curl_close($ch);
$a->save_timestamp($stamp1, "network");
}
}
- $stamp1 = microtime(true);
-
- // Now fetch the body as well
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_HEADER, 1);
- curl_setopt($ch, CURLOPT_NOBODY, 0);
- curl_setopt($ch, CURLOPT_TIMEOUT, 10);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
- curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
- curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false));
-
- $header = curl_exec($ch);
- $curl_info = @curl_getinfo($ch);
- $http_code = $curl_info["http_code"];
- curl_close($ch);
-
- $a->save_timestamp($stamp1, "network");
-
// Fetch the first mentioned charset. Can be in body or header
$charset = "";
if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) {
}
}
-function original_url($url, $depth=1, $fetchbody = false) {
-
- $a = get_app();
-
- // Remove Analytics Data from Google and other tracking platforms
+/**
+ * @brief Remove Google Analytics and other tracking platforms params from URL
+ *
+ * @param string $url
+ * @return string
+ */
+function strip_tracking_query_params($url)
+{
$urldata = parse_url($url);
if (is_string($urldata["query"])) {
$query = $urldata["query"];
parse_str($query, $querydata);
- if (is_array($querydata))
- foreach ($querydata AS $param=>$value)
+ if (is_array($querydata)) {
+ foreach ($querydata AS $param => $value) {
if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
"wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
"fb_action_ids", "fb_action_types", "fb_ref",
"awesm", "wtrid",
"woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) {
- $pair = $param."=".urlencode($value);
+ $pair = $param . "=" . urlencode($value);
$url = str_replace($pair, "", $url);
// Second try: if the url isn't encoded completely
- $pair = $param."=".str_replace(" ", "+", $value);
+ $pair = $param . "=" . str_replace(" ", "+", $value);
$url = str_replace($pair, "", $url);
// Third try: Maybey the url isn't encoded at all
- $pair = $param."=".$value;
+ $pair = $param . "=" . $value;
$url = str_replace($pair, "", $url);
$url = str_replace(array("?&", "&&"), array("?", ""), $url);
}
+ }
+ }
- if (substr($url, -1, 1) == "?")
+ if (substr($url, -1, 1) == "?") {
$url = substr($url, 0, -1);
+ }
}
+ return $url;
+}
+
+/**
+ * @brief Returns the original URL of the provided URL
+ *
+ * This function strips tracking query params and follows redirections, either
+ * through HTTP code or meta refresh tags. Stops after 10 redirections.
+ *
+ * @see ParseUrl::getSiteinfo
+ *
+ * @param string $url
+ * @param int $depth
+ * @param bool $fetchbody
+ * @return string
+ */
+function original_url($url, $depth = 1, $fetchbody = false) {
+ $a = get_app();
+
+ $url = strip_tracking_query_params($url);
+
if ($depth > 10)
return($url);