Discourse addon created

author Michael <heluecht@pirati.ca>

Sat, 23 Nov 2019 23:43:59 +0000 (23:43 +0000)

committer Michael <heluecht@pirati.ca>

Sat, 23 Nov 2019 23:43:59 +0000 (23:43 +0000)
author Michael <heluecht@pirati.ca>
Sat, 23 Nov 2019 23:43:59 +0000 (23:43 +0000)
committer Michael <heluecht@pirati.ca>
Sat, 23 Nov 2019 23:43:59 +0000 (23:43 +0000)
diff --git a/discourse/README b/discourse/README

new file mode 100644 (file)

index 0000000..e4a8b9d
--- /dev/null
+++ b/discourse/README
@@ -0,0 +1,97 @@
+NSFW
+
+"Not safe for work"
+
+Scans the message content for the string 'nsfw' 
+(case insensitive) and if found replaces the content
+with a "click to open/close" link, default is closed.
+
+If you click on the 'Not safe for work' addon under
+/settings/addon a text field appears, where you can
+extend the list of search terms. The terms must be
+seperated by commas.
+
+It is also possible to enter profile URLs as values.
+This is quite useful for the case, that you perhaps
+don't want to see postings by person_A, but person_B
+is one of your contacts and person_B used to reshare
+postings by person_A.
+
+You can also make use of regular expressions.
+They also have to be seperated by commas and the
+regex itself has to be enclosed with slashes:
+
+       ... nsfw, /<REGEX>/, politics,...
+
+---------------
+A few examples:
+---------------
+
+1)
+Let's say you don't want to see postings which contain
+the term 'fake news'
+
+The term could appear in several ways:
+
+fakenews, fake news, fake_news, fake-news, f@ke news,
+f4ke news, f4k3 n3ws, and so on and so on and so on.
+
+You could write every possible version of it as single
+item into your NSFW-filter list, but this can also be
+done with a single regex, which matches all of them:
+
+       /f[@4a]k[3e][-_ ]n[3e]w[sz]/
+
+
+2)
+Another use case could be, that you are simply not
+interested in postings about christmas.
+
+       /christmas(?:[-_ ]?(?:tree|time|eve|pudding))?/
+
+
+ATTENTION:
+
+It is absolutely important, that you use grouping
+parentheses instead of capturing parentheses!!
+
+Grouping parentheses are:
+
+       (?: )
+
+If you use capturing parentheses, which are
+
+       ( )
+
+it will produce errors and the regex won't work and
+at least your targets will not get collapsed.
+
+
+
+3)
+Another possibility is the usage of a so called
+'lookbehind' construct. I'll give an example followed
+by a descripton:
+
+       /(?<!the )\badvent\b/
+
+The \b is a word boundary, what matches the beginning
+and the end of a word. The simple pattern of 'advent'
+would match advent iteself, but also adventure.
+This can be prevented by
+
+       /\badvent\b/
+
+The first part of the regex above
+
+       (?<!the )
+
+is a negative lookbehind. It makes \badvent\b only
+match, if there is no 'the ' before \badvent\b or in
+words:
+
+It looks for 'advent', but doesn't match 'the advent'.
+
+
+For more informations take a look at the PCRE regex
+dialect.
diff --git a/discourse/discourse.php b/discourse/discourse.php

new file mode 100644 (file)

index 0000000..1df248a
--- /dev/null
+++ b/discourse/discourse.php
@@ -0,0 +1,224 @@
+<?php
+
+/**
+ * Name: Discourse Mail Connector
+ * Description: Improves mails from Discourse in mailing list mode
+ * Version: 0.1
+ * Author: Michael Vogel <http://pirati.ca/profile/heluecht>
+ *
+ */
+//use DOMDocument;
+//use DOMXPath;
+use Friendica\App;
+use Friendica\Core\Hook;
+use Friendica\Core\L10n;
+use Friendica\Core\Logger;
+use Friendica\Core\PConfig;
+use Friendica\Util\XML;
+use Friendica\Content\Text\Markdown;
+use Friendica\Util\Network;
+Use Friendica\Util\DateTimeFormat;
+
+function discourse_install()
+{
+       Hook::register('email_getmessage',     __FILE__, 'discourse_email_getmessage');
+       Hook::register('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end');
+       Hook::register('addon_settings',       __FILE__, 'discourse_addon_settings');
+       Hook::register('addon_settings_post',  __FILE__, 'discourse_addon_settings_post');
+}
+
+function discourse_uninstall()
+{
+       Hook::unregister('email_getmessage',     __FILE__, 'discourse_email_getmessage');
+       Hook::unregister('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end');
+       Hook::unregister('addon_settings',       __FILE__, 'discourse_addon_settings');
+       Hook::unregister('addon_settings_post',  __FILE__, 'discourse_addon_settings_post');
+}
+
+function discourse_addon_settings(App $a, &$s)
+{
+}
+
+function discourse_addon_settings_post(App $a)
+{
+}
+
+function discourse_email_getmessage(App $a, &$message)
+{
+//     Logger::info('Got raw message', $message);
+       // Remove the title on comments, they don't serve any purpose there
+       if ($message['item']['parent-uri'] != $message['item']['uri']) {
+               unset($message['item']['title']);
+       }
+
+       if (preg_match('=topic/(.*)/(.*)@(.*)=', $message['item']['uri'], $matches)) {
+               Logger::info('Got post data', ['topic' => $matches[1], 'post' => $matches[2], 'host' => $matches[3]]);
+               if (discourse_fetch_post_from_api($message, $matches[2], $matches[3])) {
+                       return;
+               }
+       }
+
+       // Search in the text part for the link to the discourse entry and the text body
+       // The text body is used as alternative, if the fetched HTML isn't working
+       if (!empty($message['text'])) {
+               discourse_get_text($message);
+       }
+
+       if (!empty($message['item']['plink'])) {
+               if (preg_match('=(http.*)/t/.*/(.*\d)/(.*\d)=', $message['item']['plink'], $matches)) {
+                       if (discourse_fetch_topic_from_api($message, $matches[1], $matches[1], $matches[1])) {
+                               return;
+                       }
+               }
+       }
+
+       // Search in the HTML part for the discourse entry and the author profile
+       if (!empty($message['html'])) {
+               discourse_get_html($message);
+       }
+}
+
+function discourse_fetch_topic_from_api(&$message, $host, $topic, $pid)
+{
+       $url = $host . '/t/' . $topic . '/posts.json?posts_ids[]=' . $pid;
+       $curlResult = Network::curl($url);
+       if (!$curlResult->isSuccess()) {
+               return false;
+       }
+       $raw = $curlResult->getBody();
+       $data = json_decode($raw, true);
+       $posts = $data['post_stream']['posts'];
+       foreach($posts as $post) {
+               if ($post['post_number'] != $pid) {
+                       continue;
+               }
+               Logger::info('Got post data from topic', $post);
+               discourse_process_post($message, $post);
+               return true;
+       }
+       return false;
+}
+
+function discourse_fetch_post_from_api(&$message, $post, $host)
+{
+       $url = "https://" . $host . '/posts/' . $post . '.json';
+       $curlResult = Network::curl($url);
+       if (!$curlResult->isSuccess()) {
+               return false;
+       }
+
+       $raw = $curlResult->getBody();
+       $data = json_decode($raw, true);
+       if (empty($data)) {
+               return false;
+       }
+
+       discourse_process_post($message, $data);
+
+       Logger::info('Got API data', $message);
+       return true;
+}
+
+function discourse_process_post(&$message, $post)
+{
+       if ($post['post_number'] == 1) {
+               // Thread information
+       }
+
+       $nick = $post['username'];
+       $name = $post['name'];
+       // User information
+
+       $message['html'] = $post['cooked'];
+       $message['text'] = $post['raw'];
+       $message['item']['created'] = DateTimeFormat::utc($post['created_at']);
+}
+
+function discourse_get_html(&$message)
+{
+       $doc = new DOMDocument();
+       $doc2 = new DOMDocument();
+       $doc->preserveWhiteSpace = false;
+
+       $html = mb_convert_encoding($message['html'], 'HTML-ENTITIES', "UTF-8");
+       @$doc->loadHTML($html, LIBXML_HTML_NODEFDTD);
+
+       $xpath = new DomXPath($doc);
+
+       // Fetch the first 'div' before the 'hr' -hopefully this fits for all systems
+       $result = $xpath->query("//hr//preceding::div[1]");
+       $div = $doc2->importNode($result->item(0), true);
+       $doc2->appendChild($div);
+       $message['html'] = $doc2->saveHTML();
+       Logger::info('Found html body', ['html' => $message['html']]);
+
+       $profile = discourse_get_profile($xpath);
+       if (!empty($profile)) {
+               Logger::info('Found profile', $profile);
+/*
+               $message['item']['author-avatar'] = $contact['avatar'];
+               $message['item']['author-link'] = $profile['link'];
+               $message['item']['author-name'] = $profile['name'];
+*/
+       }
+}
+
+function discourse_get_text(&$message)
+{
+       $text = $message['text'];
+       $text = str_replace("\r", '', $text);
+       $pos = strpos($text, "\n---\n");
+       if ($pos > 0) {
+               $message['text'] = trim(substr($text, 0, $pos));
+               Logger::info('Found text body', ['text' => $message['text']]);
+
+               $message['text'] = Markdown::toBBCode($message['text']);
+
+               $text = substr($text, $pos);
+               if (preg_match('=\((http.*?)\)=', $text, $link)) {
+                       $message['item']['plink'] = $link[1];
+                       Logger::info('Found plink', ['plink' => $message['item']['plink']]);
+               }
+       } else {
+               Logger::info('No separator found', ['text' => $text]);
+       }
+}
+
+function discourse_get_profile($xpath)
+{
+       $profile = [];
+       $list = $xpath->query("//td//following::img");
+       foreach ($list as $node) {
+               $attr = [];
+               foreach ($node->attributes as $attribute) {
+                       $attr[$attribute->name] = $attribute->value;
+               }
+
+               if (!empty($attr['src']) && !empty($attr['title'])
+                       && !empty($attr['width']) && !empty($attr['height'])
+                       && ($attr['width'] == $attr['height'])) {
+                       $profile = ['avatar' => $attr['src'], 'name' => $attr['title']];
+                       break;
+               }
+       }
+
+       $list = $xpath->query("//td//following::a");
+       foreach ($list as $node) {
+               if (!empty(trim($node->textContent)) && $node->attributes->length) {
+                       $attr = [];
+                       foreach ($node->attributes as $attribute) {
+                               $attr[$attribute->name] = $attribute->value;
+                       }
+                       if (!empty($attr['href']) && (strpos($attr['href'], '/' . $profile['name']))) {
+                               $profile['link'] = $attr['href'];
+                               break;
+                       }
+               }
+       }
+       return $profile;
+}
+
+function discourse_email_getmessage_end(App $a, &$message)
+{
+//     Logger::info('Got converted message', $message);
+}
author	Michael <heluecht@pirati.ca>
	Sat, 23 Nov 2019 23:43:59 +0000 (23:43 +0000)
committer	Michael <heluecht@pirati.ca>
	Sat, 23 Nov 2019 23:43:59 +0000 (23:43 +0000)
discourse/README	[new file with mode: 0644]	patch \| blob
discourse/discourse.php	[new file with mode: 0644]	patch \| blob