@@ -536,6+588,14 @@ class File extends Managed_DataObject
$thumbs->delete();
}
}
+
+ $f2p = new File_to_post();
+ $f2p->file_id = $this->id;
+ if ($f2p->find()) {
+ while ($f2p->fetch()) {
+ $f2p->delete();
+ }
+ }
}
// And finally remove the entry from the database
@@ -548,4+608,87 @@ class File extends Managed_DataObject
return $title ?: null;
}
+
+ static public function hashurl($url)
+ {
+ if (empty($url)) {
+ throw new Exception('No URL provided to hash algorithm.');
+ }
+ return hash(self::URLHASH_ALG, $url);
+ }
+
+ static public function beforeSchemaUpdate()
+ {
+ $table = strtolower(get_called_class());
+ $schema = Schema::get();
+ $schemadef = $schema->getTableDef($table);
+
+ // 2015-02-19 We have to upgrade our table definitions to have the urlhash field populated
+ if (isset($schemadef['fields']['urlhash']) && isset($schemadef['unique keys']['file_urlhash_key'])) {
+ // We already have the urlhash field, so no need to migrate it.
+ return;
+ }
+ echo "\nFound old $table table, upgrading it to contain 'urlhash' field...";
+
+ $file = new File();
+ $file->query(sprintf('SELECT id, LEFT(url, 191) AS shortenedurl, COUNT(*) AS c FROM %1$s WHERE LENGTH(url)>191 GROUP BY shortenedurl HAVING c > 1', $schema->quoteIdentifier($table)));
+ print "\nFound {$file->N} URLs with too long entries in file table\n";
+ while ($file->fetch()) {
+ // We've got a URL that is too long for our future file table
+ // so we'll cut it. We could save the original URL, but there is
+ // no guarantee it is complete anyway since the previous max was 255 chars.
+ $dupfile = new File();
+ // First we find file entries that would be duplicates of this when shortened
+ // ... and we'll just throw the dupes out the window for now! It's already so borken.
+ $dupfile->query(sprintf('SELECT * FROM file WHERE LEFT(url, 191) = "%1$s"', $file->shortenedurl));
+ // Leave one of the URLs in the database by using ->find(true) (fetches first entry)
+ if ($dupfile->find(true)) {
+ print "\nShortening url entry for $table id: {$file->id} [";
+ $orig = clone($dupfile);
+ $dupfile->url = $file->shortenedurl; // make sure it's only 191 chars from now on
+ $dupfile->update($orig);
+ print "\nDeleting duplicate entries of too long URL on $table id: {$file->id} [";
+ // only start deleting with this fetch.
+ while($dupfile->fetch()) {
+ print ".";
+ $dupfile->delete();
+ }
+ print "]\n";
+ } else {
+ print "\nWarning! URL suddenly disappeared from database: {$file->url}\n";
+ }
+ }
+ echo "...and now all the non-duplicates which are longer than 191 characters...\n";
+ $file->query('UPDATE file SET url=LEFT(url, 191) WHERE LENGTH(url)>191');
+
+ echo "\n...now running hacky pre-schemaupdate change for $table:";
+ // We have to create a urlhash that is _not_ the primary key,
+ // transfer data and THEN run checkSchema
+ $schemadef['fields']['urlhash'] = array (
+ 'type' => 'varchar',
+ 'length' => 64,
+ 'not null' => false, // this is because when adding column, all entries will _be_ NULL!
+ 'description' => 'sha256 of destination URL (url field)',
+ );
+ $schemadef['fields']['url'] = array (
+ 'type' => 'text',
+ 'description' => 'destination URL after following possible redirections',
+ );
+ unset($schemadef['unique keys']);
+ $schema->ensureTable($table, $schemadef);
+ echo "DONE.\n";
+
+ $classname = ucfirst($table);
+ $tablefix = new $classname;
+ // urlhash is hash('sha256', $url) in the File table
+ echo "Updating urlhash fields in $table table...";
+ // Maybe very MySQL specific :(
+ $tablefix->query(sprintf('UPDATE %1$s SET %2$s=%3$s;',
+ $schema->quoteIdentifier($table),
+ 'urlhash',
+ // The line below is "result of sha256 on column `url`"