- if ($depth > 10)
- return($url);
-
- $url = trim($url, "'");
-
- $siteinfo = array();
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_HEADER, 1);
-
- if ($fetchbody)
- curl_setopt($ch, CURLOPT_NOBODY, 0);
- else
- curl_setopt($ch, CURLOPT_NOBODY, 1);
-
- curl_setopt($ch, CURLOPT_TIMEOUT, 10);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0');
-
- $header = curl_exec($ch);
- $curl_info = @curl_getinfo($ch);
- $http_code = $curl_info['http_code'];
- curl_close($ch);
-
- if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302"))
- AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) {
- if ($curl_info['redirect_url'] != "")
- return(original_url($curl_info['redirect_url'], ++$depth, $fetchbody));
- else
- return(original_url($curl_info['location'], ++$depth, $fetchbody));
- }
-
- $pos = strpos($header, "\r\n\r\n");
-
- if ($pos)
- $body = trim(substr($header, $pos));
- else
- $body = $header;
-
- if (trim($body) == "")
- return(original_url($url, ++$depth, true));
-
- $doc = new DOMDocument();
- @$doc->loadHTML($body);
-
- $xpath = new DomXPath($doc);
-
- $list = $xpath->query("//meta[@content]");
- foreach ($list as $node) {
- $attr = array();
- if ($node->attributes->length)
- foreach ($node->attributes as $attribute)
- $attr[$attribute->name] = $attribute->value;
-
- if (@$attr["http-equiv"] == 'refresh') {
- $path = $attr["content"];
- $pathinfo = explode(";", $path);
- $content = "";
- foreach ($pathinfo AS $value)
- if (substr(strtolower($value), 0, 4) == "url=")
- return(original_url(substr($value, 4), ++$depth));
- }
- }
-
- return($url);
+ if ($depth > 10)
+ return($url);
+
+ $url = trim($url, "'");
+
+ $stamp1 = microtime(true);
+
+ $siteinfo = array();
+ $ch = curl_init();
+ curl_setopt($ch, CURLOPT_URL, $url);
+ curl_setopt($ch, CURLOPT_HEADER, 1);
+ curl_setopt($ch, CURLOPT_NOBODY, 1);
+ curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
+
+ $header = curl_exec($ch);
+ $curl_info = @curl_getinfo($ch);
+ $http_code = $curl_info['http_code'];
+ curl_close($ch);
+
+ $a->save_timestamp($stamp1, "network");
+
+ if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302"))
+ AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) {
+ if ($curl_info['redirect_url'] != "")
+ return(original_url($curl_info['redirect_url'], ++$depth, $fetchbody));
+ else
+ return(original_url($curl_info['location'], ++$depth, $fetchbody));
+ }
+
+ // Check for redirects in the meta elements of the body if there are no redirects in the header.
+ if (!$fetchbody)
+ return(original_url($url, ++$depth, true));
+
+ // if the file is too large then exit
+ if ($curl_info["download_content_length"] > 1000000)
+ return($url);
+
+ // if it isn't a HTML file then exit
+ if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html"))
+ return($url);
+
+ $stamp1 = microtime(true);
+
+ $ch = curl_init();
+ curl_setopt($ch, CURLOPT_URL, $url);
+ curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_NOBODY, 0);
+ curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
+
+ $body = curl_exec($ch);
+ curl_close($ch);
+
+ $a->save_timestamp($stamp1, "network");
+
+ if (trim($body) == "")
+ return($url);
+
+ // Check for redirect in meta elements
+ $doc = new DOMDocument();
+ @$doc->loadHTML($body);
+
+ $xpath = new DomXPath($doc);
+
+ $list = $xpath->query("//meta[@content]");
+ foreach ($list as $node) {
+ $attr = array();
+ if ($node->attributes->length)
+ foreach ($node->attributes as $attribute)
+ $attr[$attribute->name] = $attribute->value;
+
+ if (@$attr["http-equiv"] == 'refresh') {
+ $path = $attr["content"];
+ $pathinfo = explode(";", $path);
+ $content = "";
+ foreach ($pathinfo AS $value)
+ if (substr(strtolower($value), 0, 4) == "url=")
+ return(original_url(substr($value, 4), ++$depth));
+ }
+ }
+
+ return($url);