$request->setConfig(array(
'connect_timeout' => 10, // # seconds to wait
'max_redirs' => $redirs, // # max number of http redirections to follow
- 'follow_redirects' => true, // Follow redirects
+ 'follow_redirects' => false, // We follow redirects ourselves in lib/httpclient.php
'store_body' => false, // We won't need body content here.
));
return $request;
// no content it'll be cheap. :)
$request = self::_commonHttp($short_url, $redirs);
$response = $request->send();
+ } elseif (400 == $response->getStatus()) {
+ throw new Exception('Got error 400 on HEAD request, will not go further.');
}
} catch (Exception $e) {
// Invalid URL or failure to reach server
common_log(LOG_ERR, "Error while following redirects for $short_url: " . $e->getMessage());
return $short_url;
}
-
- if ($response->getRedirectCount() && File::isProtected($response->getUrl())) {
- // Bump back up the redirect chain until we find a non-protected URL
- return self::lookupWhere($short_url, $response->getRedirectCount() - 1, true);
+
+ // if last url after all redirections is protected,
+ // use the url before it in the redirection chain
+ if ($response->getRedirectCount() && File::isProtected($response->getEffectiveUrl())) {
+ $return_url = $response->redirUrls[$response->getRedirectCount()-1];
+ } else {
+ $return_url = $response->getEffectiveUrl();
}
$ret = array('code' => $response->getStatus()
, 'redirects' => $response->getRedirectCount()
- , 'url' => $response->getUrl());
+ , 'url' => $return_url);
$type = $response->getHeader('Content-Type');
if ($type) $ret['type'] = $type;
try {
$r = File_redirection::getByUrl($in_url);
- if($r instanceof File_redirection) {
- return $r;
+
+ try {
+ $f = File::getByID($r->file_id);
+ $r->file = $f;
+ $r->redir_url = $f->url;
+ } catch (NoResultException $e) {
+ // Invalid entry, delete and run again
+ common_log(LOG_ERR, "Could not find File with id=".$r->file_id." referenced in File_redirection, deleting File redirection entry and and trying again...");
+ $r->delete();
+ return self::where($in_url);
}
+
+ // File_redirecion and File record found, return both
+ return $r;
+
} catch (NoResultException $e) {
+ // File_redirecion record not found, but this might be a direct link to a file
try {
$f = File::getByUrl($in_url);
$redir->file_id = $f->id;
$redir->file = $f;
return $redir;
- } catch (NoResultException $e) {
- // Oh well, let's keep going
+ } catch (NoResultException $e) {
+ // nope, this was not a direct link to a file either, let's keep going
}
}
- if ($discover) {
+ if ($discover) {
+ // try to follow redirects and get the final url
$redir_info = File_redirection::lookupWhere($in_url);
if(is_string($redir_info)) {
$redir_info = array('url' => $redir_info);
}
-
- // Double check that we don't already have the resolved URL
- $r = self::where($redir_info['url'], false);
- if (!empty($r->file_id)) {
- return $r;
- }
-
- $redir->httpcode = $redir_info['code'];
- $redir->redirections = intval($redir_info['redirects']);
- $redir->file = new File();
- $redir->file->url = $redir_info['url'];
- $redir->file->mimetype = $redir_info['type'];
- $redir->file->size = isset($redir_info['size']) ? $redir_info['size'] : null;
- $redir->file->date = isset($redir_info['time']) ? $redir_info['time'] : null;
- if (isset($redir_info['protected']) && !empty($redir_info['protected'])) {
- $redir->file->protected = true;
+
+ // the last url in the redirection chain can actually be a redirect!
+ // this is the case with local /attachment/{file_id} links
+ // in that case we have the file id already
+ try {
+ $r = File_redirection::getByUrl($redir_info['url']);
+
+ $f = File::getKV('id',$r->file_id);
+
+ if($f instanceof File) {
+ $redir->file = $f;
+ $redir->redir_url = $f->url;
+ } else {
+ // Invalid entry in File_redirection, delete and run again
+ common_log(LOG_ERR, "Could not find File with id=".$r->file_id." referenced in File_redirection, deleting File_redirection entry and trying again...");
+ $r->delete();
+ return self::where($in_url);
+ }
+ } catch (NoResultException $e) {
+ // save the file now when we know that we don't have it in File_redirection
+ try {
+ $redir->file = File::saveNew($redir_info,$redir_info['url']);
+ } catch (ServerException $e) {
+ common_log(LOG_ERR, $e);
+ }
}
+
+ // If this is a redirection and we have a file to redirect to, save it
+ // (if it doesn't exist in File_redirection already)
+ if($redir->file instanceof File && $redir_info['url'] != $in_url) {
+ try {
+ $file_redir = File_redirection::getByUrl($in_url);
+ } catch (NoResultException $e) {
+ $file_redir = new File_redirection();
+ $file_redir->urlhash = File::hashurl($in_url);
+ $file_redir->url = $in_url;
+ $file_redir->file_id = $redir->file->getID();
+ $file_redir->insert();
+ $file_redir->redir_url = $redir->file->url;
+ }
+
+ $file_redir->file = $redir->file;
+ return $file_redir;
+ }
}
return $redir;
$file = File::getByUrl($long_url);
} catch (NoResultException $e) {
// Check if the target URL is itself a redirect...
+ // This should already have happened in processNew in common_shorten_url()
$redir = File_redirection::where($long_url);
- $file = $redir->getFile();
- if (empty($file->id)) {
- $file->saveFile();
- }
+ $file = $redir->file;
}
// Now we definitely have a File object in $file
try {
$out_url = str_replace('.-()', '', $out_url);
break;
+ // non-HTTP schemes, so no redirects
+ case 'bitcoin':
case 'mailto':
- case 'magnet':
case 'aim':
case 'jabber':
case 'xmpp':
// don't touch anything
break;
+ // URLs without domain name, so no redirects
+ case 'magnet':
+ // don't touch anything
+ break;
+
+ // URLs with coordinates, not browsable domain names
+ case 'geo':
+ // don't touch anything
+ break;
+
default:
$out_url = $default_scheme . ltrim($out_url, '/');
$p = parse_url($out_url);