X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=classes%2FFile_redirection.php;h=742a6143cc43a93278c579accf90faf891914861;hb=0df853183485add783c59e9fac3a58258918817b;hp=08302c51d70a8306e368c6a51d7df41c4d17f06a;hpb=38c84a92c71913fff570b64d21b7d2e07dfa3dd3;p=quix0rs-gnu-social.git diff --git a/classes/File_redirection.php b/classes/File_redirection.php index 08302c51d7..742a6143cc 100644 --- a/classes/File_redirection.php +++ b/classes/File_redirection.php @@ -39,6 +39,8 @@ class File_redirection extends Managed_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + protected $file; /* Cache the associated file sometimes */ + public static function schemaDef() { return array( @@ -67,7 +69,7 @@ class File_redirection extends Managed_DataObject $request->setConfig(array( 'connect_timeout' => 10, // # seconds to wait 'max_redirs' => $redirs, // # max number of http redirections to follow - 'follow_redirects' => true, // Follow redirects + 'follow_redirects' => false, // We follow redirects ourselves in lib/httpclient.php 'store_body' => false, // We won't need body content here. )); return $request; @@ -118,21 +120,26 @@ class File_redirection extends Managed_DataObject // no content it'll be cheap. :) $request = self::_commonHttp($short_url, $redirs); $response = $request->send(); + } elseif (400 == $response->getStatus()) { + throw new Exception('Got error 400 on HEAD request, will not go further.'); } } catch (Exception $e) { // Invalid URL or failure to reach server common_log(LOG_ERR, "Error while following redirects for $short_url: " . $e->getMessage()); return $short_url; } - - if ($response->getRedirectCount() && File::isProtected($response->getUrl())) { - // Bump back up the redirect chain until we find a non-protected URL - return self::lookupWhere($short_url, $response->getRedirectCount() - 1, true); + + // if last url after all redirections is protected, + // use the url before it in the redirection chain + if ($response->getRedirectCount() && File::isProtected($response->getEffectiveUrl())) { + $return_url = $response->redirUrls[$response->getRedirectCount()-1]; + } else { + $return_url = $response->getEffectiveUrl(); } $ret = array('code' => $response->getStatus() , 'redirects' => $response->getRedirectCount() - , 'url' => $response->getUrl()); + , 'url' => $return_url); $type = $response->getHeader('Content-Type'); if ($type) $ret['type'] = $type; @@ -155,41 +162,96 @@ class File_redirection extends Managed_DataObject * * @param string $in_url * @param boolean $discover true to attempt dereferencing the redirect if we don't know it already - * @return mixed one of: - * string - target URL, if this is a direct link or a known redirect - * array - redirect info if this is an *unknown* redirect: - * associative array with the following elements: - * code: HTTP status code - * redirects: count of redirects followed - * url: URL string of final target - * type (optional): MIME type from Content-Type header - * size (optional): byte size from Content-Length header - * time (optional): timestamp from Last-Modified header + * @return File_redirection */ static function where($in_url, $discover=true) { - // let's see if we know this... + $redir = new File_redirection(); + $redir->url = $in_url; + $redir->urlhash = File::hashurl($redir->url); + $redir->redirections = 0; + try { - $a = File::getByUrl($in_url); - // this is a direct link to $a->url - return $a->url; - } catch (NoResultException $e) { + $r = File_redirection::getByUrl($in_url); + try { - $b = File_redirection::getByUrl($in_url); - // this is a redirect to $b->file_id - $a = File::getByID($b->file_id); - return $a->url; + $f = File::getByID($r->file_id); + $r->file = $f; + $r->redir_url = $f->url; } catch (NoResultException $e) { - // Oh well, let's keep going + // Invalid entry, delete and run again + common_log(LOG_ERR, "Could not find File with id=".$r->file_id." referenced in File_redirection, deleting File redirection entry and and trying again..."); + $r->delete(); + return self::where($in_url); + } + + // File_redirecion and File record found, return both + return $r; + + } catch (NoResultException $e) { + // File_redirecion record not found, but this might be a direct link to a file + try { + $f = File::getByUrl($in_url); + $redir->file_id = $f->id; + $redir->file = $f; + return $redir; + } catch (NoResultException $e) { + // nope, this was not a direct link to a file either, let's keep going } } - if ($discover) { - $ret = File_redirection::lookupWhere($in_url); - return $ret; + if ($discover) { + // try to follow redirects and get the final url + $redir_info = File_redirection::lookupWhere($in_url); + if(is_string($redir_info)) { + $redir_info = array('url' => $redir_info); + } + + // the last url in the redirection chain can actually be a redirect! + // this is the case with local /attachment/{file_id} links + // in that case we have the file id already + try { + $r = File_redirection::getByUrl($redir_info['url']); + + $f = File::getKV('id',$r->file_id); + + if($f instanceof File) { + $redir->file = $f; + $redir->redir_url = $f->url; + } else { + // Invalid entry in File_redirection, delete and run again + common_log(LOG_ERR, "Could not find File with id=".$r->file_id." referenced in File_redirection, deleting File_redirection entry and trying again..."); + $r->delete(); + return self::where($in_url); + } + } catch (NoResultException $e) { + // save the file now when we know that we don't have it in File_redirection + try { + $redir->file = File::saveNew($redir_info,$redir_info['url']); + } catch (ServerException $e) { + common_log(LOG_ERR, $e); + } + } + + // If this is a redirection and we have a file to redirect to, save it + // (if it doesn't exist in File_redirection already) + if($redir->file instanceof File && $redir_info['url'] != $in_url) { + try { + $file_redir = File_redirection::getByUrl($in_url); + } catch (NoResultException $e) { + $file_redir = new File_redirection(); + $file_redir->urlhash = File::hashurl($in_url); + $file_redir->url = $in_url; + $file_redir->file_id = $redir->file->getID(); + $file_redir->insert(); + $file_redir->redir_url = $redir->file->url; + } + + $file_redir->file = $redir->file; + return $file_redir; + } } - // No manual dereferencing; leave the unknown URL as is. - return $in_url; + return $redir; } /** @@ -246,37 +308,22 @@ class File_redirection extends Managed_DataObject if (!empty($short_url) && $short_url != $long_url) { $short_url = (string)$short_url; // store it - $file = File::getKV('url', $long_url); - if ($file instanceof File) { - $file_id = $file->getID(); - } else { + try { + $file = File::getByUrl($long_url); + } catch (NoResultException $e) { // Check if the target URL is itself a redirect... - $redir_data = File_redirection::where($long_url); - if (is_array($redir_data)) { - // We haven't seen the target URL before. - // Save file and embedding data about it! - $file = File::saveNew($redir_data, $long_url); - $file_id = $file->getID(); - } else if (is_string($redir_data)) { - // The file is a known redirect target. - $file = File::getKV('url', $redir_data); - if (empty($file)) { - // @fixme should we save a new one? - // this case was triggering sometimes for redirects - // with unresolvable targets; found while fixing - // "can't linkify" bugs with shortened links to - // SSL sites with cert issues. - return null; - } - $file_id = $file->getID(); - } + // This should already have happened in processNew in common_shorten_url() + $redir = File_redirection::where($long_url); + $file = $redir->file; } - $file_redir = File_redirection::getKV('url', $short_url); - if (!$file_redir instanceof File_redirection) { - $file_redir = new File_redirection; + // Now we definitely have a File object in $file + try { + $file_redir = File_redirection::getByUrl($short_url); + } catch (NoResultException $e) { + $file_redir = new File_redirection(); $file_redir->urlhash = File::hashurl($short_url); $file_redir->url = $short_url; - $file_redir->file_id = $file_id; + $file_redir->file_id = $file->getID(); $file_redir->insert(); } return $short_url; @@ -308,14 +355,25 @@ class File_redirection extends Managed_DataObject $out_url = str_replace('.-()', '', $out_url); break; + // non-HTTP schemes, so no redirects + case 'bitcoin': case 'mailto': - case 'magnet': case 'aim': case 'jabber': case 'xmpp': // don't touch anything break; + // URLs without domain name, so no redirects + case 'magnet': + // don't touch anything + break; + + // URLs with coordinates, not browsable domain names + case 'geo': + // don't touch anything + break; + default: $out_url = $default_scheme . ltrim($out_url, '/'); $p = parse_url($out_url); @@ -385,4 +443,12 @@ class File_redirection extends Managed_DataObject echo "DONE.\n"; echo "Resuming core schema upgrade..."; } + + public function getFile() { + if (!$this->file instanceof File) { + $this->file = File::getByID($this->file_id); + } + + return $this->file; + } }