]> git.mxchange.org Git - friendica.git/commitdiff
We now store the tags in two separate tables
authorMichael <heluecht@pirati.ca>
Fri, 17 Apr 2020 06:35:20 +0000 (06:35 +0000)
committerMichael <heluecht@pirati.ca>
Fri, 17 Apr 2020 06:35:20 +0000 (06:35 +0000)
include/items.php
src/Model/Tag.php
src/Protocol/ActivityPub/Processor.php
src/Protocol/DFRN.php
src/Protocol/Diaspora.php
src/Protocol/Feed.php
src/Protocol/OStatus.php
static/dbstructure.config.php

index 6068be4b9469c71041ef7a498c1443ee5ead7823..2d7330858238179754f6947cc7ba1c6b79d15a61 100644 (file)
@@ -141,31 +141,44 @@ function query_page_info($url, $photo = "", $keywords = false, $keyword_blacklis
        return $data;
 }
 
-function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "", $return_array = false)
+function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
 {
        $data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
+       if (empty($data['keywords']) || !is_array($data['keywords'])) {
+               return '';
+       }
 
        $tags = "";
-       $taglist = [];
-       if (isset($data["keywords"]) && count($data["keywords"])) {
-               foreach ($data["keywords"] as $keyword) {
-                       $hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
-                               ["", "", "", "", "", ""], $keyword);
+       foreach ($data["keywords"] as $keyword) {
+               $hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
+                       ["", "", "", "", "", ""], $keyword);
 
-                       if ($tags != "") {
-                               $tags .= ", ";
-                       }
-
-                       $tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]";
-                       $taglist[] = $hashtag;
+               if ($tags != "") {
+                       $tags .= ", ";
                }
+
+               $tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]";
        }
 
-       if ($return_array) {
-               return $taglist;
-       } else {
-               return $tags;
+       return $tags;
+}
+
+function get_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
+{
+       $data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
+       if (empty($data['keywords']) || !is_array($data['keywords'])) {
+               return [];
        }
+
+       $taglist = [];
+       foreach ($data['keywords'] as $keyword) {
+               $hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
+                       ["", "", "", "", "", ""], $keyword);
+
+               $taglist[] = $hashtag;
+       }
+
+       return $taglist;
 }
 
 function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_blacklist = "")
index 7578372a9b29f0d5d6c71da4a533c330d17bffd7..43885e493ab81234b071f48f3ee5bec47860ea5d 100644 (file)
@@ -54,6 +54,54 @@ class Tag
                self::EXCLUSIVE_MENTION => '!',
        ];
 
+       public static function store(int $uriid, int $type, string $name, string $url = '')
+       {
+               $name = trim($name, "\x00..\x20\xFF#!@");
+               if (empty($name)) {
+                       return;
+               }
+
+               $fields = ['name' => substr($name, 0, 64), 'type' => $type];
+
+               if (!empty($url) && ($url != $name)) {
+                       $fields['url'] = strtolower($url);
+               }
+
+               $tag = DBA::selectFirst('tag', ['id'], $fields);
+               if (!DBA::isResult($tag)) {
+                       DBA::insert('tag', $fields, true);
+                       $tagid = DBA::lastInsertId();
+               } else {
+                       $tagid = $tag['id'];
+               }
+
+               if (empty($tagid)) {
+                       Logger::error('No tag id created', $fields);
+                       return;
+               }
+
+               DBA::insert('post-tag', ['uri-id' => $uriid, 'tid' => $tagid], true);
+
+               Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'tag' => $fields]);
+       }
+
+       public static function storeByHash(int $uriid, string $hash, string $name, string $url = '')
+       {
+               if ($hash == self::TAG_CHARACTER[self::MENTION]) {
+                       $type = self::MENTION;
+               } elseif ($hash == self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]) {
+                       $type = self::EXCLUSIVE_MENTION;
+               } elseif ($hash == self::TAG_CHARACTER[self::IMPLICIT_MENTION]) {
+                       $type = self::IMPLICIT_MENTION;
+               } elseif ($hash == self::TAG_CHARACTER[self::HASHTAG]) {
+                       $type = self::HASHTAG;
+               } else {
+                       return;
+               }
+
+               self::store($uriid, $type, $name, $url);
+       }
+
        /**
         * Store tags from the body
         *
@@ -73,9 +121,7 @@ class Tag
                                continue;
                        }
 
-                       $fields = ['uri-id' => $uriid, 'name' => substr($tag, 1, 64), 'type' => self::HASHTAG];
-                       DBA::insert('tag', $fields, true);
-                       Logger::info('Stored tag', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
+                       self::storeByHash($uriid, '#', $tag);
                }
        }
 }
index 023100dc7223f9e6b73713e92205c8db5ad7ff83..1b605c30abb135e785dbcbf51e6e20b256d27edb 100644 (file)
@@ -34,6 +34,7 @@ use Friendica\Model\Event;
 use Friendica\Model\Item;
 use Friendica\Model\ItemURI;
 use Friendica\Model\Mail;
+use Friendica\Model\Tag;
 use Friendica\Model\Term;
 use Friendica\Model\User;
 use Friendica\Protocol\Activity;
@@ -585,53 +586,43 @@ class Processor
        private static function storeTags(int $uriid, array $tags = null)
        {
                // Make sure to delete all existing tags (can happen when called via the update functionality)
-               DBA::delete('tag', ['uri-id' => $uriid]);
+               DBA::delete('post-tag', ['uri-id' => $uriid]);
 
                foreach ($tags as $tag) {
                        if (empty($tag['name']) || empty($tag['type']) || !in_array($tag['type'], ['Mention', 'Hashtag'])) {
                                continue;
                        }
 
-                       $fields = ['uri-id' => $uriid, 'name' => $tag['name']];
+                       $hash = substr($tag['name'], 0, 1);
 
                        if ($tag['type'] == 'Mention') {
-                               $fields['type'] = Term::MENTION;
-
-                               if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::MENTION]) {
-                                       $fields['name'] = substr($fields['name'], 1);
-                               } elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
-                                       $fields['type'] = Term::EXCLUSIVE_MENTION;
-                                       $fields['name'] = substr($fields['name'], 1);
-                               } elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
-                                       $fields['type'] = Term::IMPLICIT_MENTION;
-                                       $fields['name'] = substr($fields['name'], 1);
+                               if (in_array($hash, [Tag::TAG_CHARACTER[Tag::MENTION],
+                                       Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION],
+                                       Tag::TAG_CHARACTER[Tag::IMPLICIT_MENTION]])) {
+                                       $tag['name'] = substr($tag['name'], 1);
+                               } else {
+                                       $hash = '#';
                                }
+
                                if (!empty($tag['href'])) {
                                        $apcontact = APContact::getByURL($tag['href']);
                                        if (!empty($apcontact['name']) || !empty($apcontact['nick'])) {
-                                               $fields['name'] = $apcontact['name'] ?: $apcontact['nick'];
+                                               $tag['name'] = $apcontact['name'] ?: $apcontact['nick'];
                                        }
                                }
                        } elseif ($tag['type'] == 'Hashtag') {
-                               $fields['type'] = Term::HASHTAG;
-                               if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) {
-                                       $fields['name'] = substr($fields['name'], 1);
+                               if (substr($tag['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) {
+                                       $tag['name'] = substr($tag['name'], 1);
+                               } else {
+                                       $hash = '@';
                                }
                        }
 
-                       if (empty($fields['name'])) {
+                       if (empty($tag['name'])) {
                                continue;
-                       } else {
-                               $fields['name'] = substr($fields['name'], 0, 64);
                        }
                        
-                       if (!empty($tag['href'] && ($tag['href'] != $tag['name']))) {
-                               $fields['url'] = $tag['href'];
-                       }
-
-                       DBA::insert('tag', $fields, true);
-
-                       Logger::info('Stored tag/mention', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
+                       Tag::storeByHash($uriid, $hash, $tag['name'], $tag['href']);
                }
        }
 
index 9ab8bc70c9117637855ae4464974f4ee1aca81d0..32067613f117527dc0e0349df3ed9e41d2c28115 100644 (file)
@@ -2464,26 +2464,7 @@ class DFRN
 
                                                $item["tag"] .= $termhash . "[url=" . $termurl . "]" . $term . "[/url]";
 
-                                               // Store the hashtag/mention
-                                               $fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64)];
-
-                                               if ($termhash == Term::TAG_CHARACTER[Term::MENTION]) {
-                                                       $fields['type'] = Term::MENTION;
-                                               } elseif ($termhash == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
-                                                       $fields['type'] = Term::EXCLUSIVE_MENTION;
-                                               } elseif ($termhash == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
-                                                       $fields['type'] = Term::IMPLICIT_MENTION;
-                                               } elseif ($termhash == Term::TAG_CHARACTER[Term::HASHTAG]) {
-                                                       $fields['type'] = Term::HASHTAG;
-                                               }
-
-                                               if (!empty($termurl)) {
-                                                       $fields['url'] = $termurl;
-                                               }
-
-                                               DBA::insert('tag', $fields, true);
-
-                                               Logger::info('Stored tag/mention', ['uri-id' => $item['uri-id'], 'tag' => $term, 'url' => $termurl, 'hash' => $termhash, 'fields' => $fields]);
+                                               Tag::storeByHash($item['uri-id'], $termhash, $term, $termurl);
                                        }
                                }
                        }
index 5580234c6428f673f4be70e562ce2b0b42a8115b..20a2be4cc123230e38f0e57fa8895c60d3101a97 100644 (file)
@@ -1840,20 +1840,7 @@ class Diaspora
                                continue;
                        }
 
-                       $fields = ['uri-id' => $uriid, 'name' => substr($person['name'] ?: $person['nick'], 0, 64), 'url' => $person['url']];
-
-                       if ($match[1] == Term::TAG_CHARACTER[Term::MENTION]) {
-                               $fields['type'] = Term::MENTION;
-                       } elseif ($match[1] == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
-                               $fields['type'] = Term::EXCLUSIVE_MENTION;
-                       } elseif ($match[1] == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
-                               $fields['type'] = Term::IMPLICIT_MENTION;
-                       } else {
-                               continue;
-                       }
-
-                       DBA::insert('tag', $fields, true);
-                       Logger::info('Stored mention', ['uriid' => $uriid, 'match' => $match, 'fields' => $fields]);
+                       Tag::storeByHash($uriid, $match[1], $person['name'] ?: $person['nick'], $person['url']);
                }
        }
 
index 817134840901b1354bb5a2fc161741d747fd5997..c03f9598650c7c3894200169a580bf811689b6fd 100644 (file)
@@ -29,7 +29,7 @@ use Friendica\Core\Protocol;
 use Friendica\Database\DBA;
 use Friendica\DI;
 use Friendica\Model\Item;
-use Friendica\Model\Term;
+use Friendica\Model\Tag;
 use Friendica\Util\Network;
 use Friendica\Util\ParseUrl;
 use Friendica\Util\XML;
@@ -478,7 +478,7 @@ class Feed {
                                $item["title"] = "";
                                $item["body"] = $item["body"] . add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
                                $item["tag"] = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
-                               $taglist = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"], true);
+                               $taglist = get_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
                                $item["object-type"] = Activity\ObjectType::BOOKMARK;
                                unset($item["attach"]);
                        } else {
@@ -492,7 +492,7 @@ class Feed {
                                        } else {
                                                // @todo $preview is never set in this case, is it intended? - @MrPetovan 2018-02-13
                                                $item["tag"] = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
-                                               $taglist = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"], true);
+                                               $taglist = get_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
                                        }
                                        $item["body"] .= "\n" . $item['tag'];
                                } else {
@@ -531,10 +531,7 @@ class Feed {
                                if (!empty($id) && !empty($taglist)) {
                                        $feeditem = Item::selectFirst(['uri-id'], ['id' => $id]);
                                        foreach ($taglist as $tag) {
-                                               $fields = ['uri-id' => $feeditem['uri-id'], 'name' => substr($tag, 0, 64), 'type' => Term::HASHTAG];
-                                               DBA::insert('tag', $fields, true);
-               
-                                               Logger::info('Stored tag', ['uri-id' => $feeditem['uri-id'], 'tag' => $tag, 'fields' => $fields]);
+                                               Tag::storeByHash($feeditem['uri-id'], '#', $tag);
                                        }                                       
                                }
                        }
index e155708aece7d6231d276ea6fc1d93078df98947..7dad68550be080df5be48fd7121f91a70cfd8cd9 100644 (file)
@@ -36,7 +36,7 @@ use Friendica\Model\Conversation;
 use Friendica\Model\GContact;
 use Friendica\Model\Item;
 use Friendica\Model\ItemURI;
-use Friendica\Model\Term;
+use Friendica\Model\Tag;
 use Friendica\Model\User;
 use Friendica\Network\Probe;
 use Friendica\Util\DateTimeFormat;
@@ -665,10 +665,7 @@ class OStatus
                                                $item['tag'] .= '#[url=' . DI::baseUrl() . '/search?tag=' . $term . ']' . $term . '[/url]';
 
                                                // Store the hashtag
-                                               $fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64), 'type' => Term::HASHTAG];
-                                               DBA::insert('tag', $fields, true);
-
-                                               Logger::info('Stored tag', ['uri-id' => $item['uri-id'], 'tag' => $term, 'fields' => $fields]);
+                                               Tag::storeByHash($item['uri-id'], '#', $term);
                                        }
                                }
                        }
index 110842cbc0bd4c58deb4cfc646634f36adcd4c5e..204fca9bfe00b10f6fa229a456c2fee58bc32bb9 100755 (executable)
@@ -1293,16 +1293,27 @@ return [
                ]
        ],
        "tag" => [
-               "comment" => "item tags and mentions",
+               "comment" => "tags and mentions",
                "fields" => [
+                       "id" => ["type" => "int unsigned", "not null" => "1", "extra" => "auto_increment", "primary" => "1", "comment" => ""],
+                       "type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "comment" => ""],
+                       "name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "comment" => ""],
+                       "url" => ["type" => "varbinary(255)", "not null" => "1", "default" => "", "comment" => ""]
+               ],
+               "indexes" => [
+                       "PRIMARY" => ["id"],                    
+                       "type_name_url" => ["UNIQUE", "type", "name", "url"]
+               ]
+       ],
+       "post-tag" => [
+               "comment" => "post relation to tags",
+               "fields" => [
+                       "tid" => ["type" => "int unsigned", "not null" => "1", "relation" => ["tag" => "id"], "primary" => "1", "comment" => ""],
                        "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
-                       "type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""],
-                       "name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "primary" => "1", "comment" => ""],
-                       "url" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => ""]
                ],
                "indexes" => [
-                       "PRIMARY" => ["uri-id", "type", "name"],
-                       "type_name" => ["type", "name"]
+                       "PRIMARY" => ["tid", "uri-id"],
+                       "uri-id" => ["uri-id"]
                ]
        ],
        "thread" => [