3 * Name: Leistungsschutzrecht
4 * Description: Only useful in germany: Remove data from snippets from members of the VG Media
6 * Author: Michael Vogel <https://pirati.ca/profile/heluecht>
10 use Friendica\Core\Hook;
11 use Friendica\Core\Logger;
14 function leistungsschutzrecht_install() {
15 Hook::register('cron', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_cron');
16 Hook::register('getsiteinfo', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_getsiteinfo');
17 Hook::register('page_info_data', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_getsiteinfo');
20 function leistungsschutzrecht_getsiteinfo(App $a,array &$siteinfo) {
21 if (!isset($siteinfo['url']) || empty($siteinfo['type'])) {
25 // Avoid any third party pictures, to avoid copyright issues
26 if (!in_array($siteinfo['type'], ['photo', 'video']) && DI::config()->get('leistungsschutzrecht', 'suppress_photos', false)) {
27 unset($siteinfo['image']);
28 unset($siteinfo['images']);
31 if (!leistungsschutzrecht_is_member_site($siteinfo['url'])) {
35 if (!empty($siteinfo['text'])) {
36 $siteinfo['text'] = leistungsschutzrecht_cuttext($siteinfo['text']);
39 unset($siteinfo['keywords']);
42 function leistungsschutzrecht_cuttext(string $text): string
44 $text = str_replace(["\r", "\n"], [' ', ' '], $text);
48 $text = str_replace(' ', ' ', $text);
49 } while ($oldtext != $text);
51 $words = explode(' ', $text);
57 foreach ($words as $word) {
64 if (++$count >= $limit) {
65 if (sizeof($words) > $limit) {
75 function leistungsschutzrecht_fetchsites()
77 // This list works - but question is how current it is
78 $url = 'https://leistungsschutzrecht-stoppen.d-64.org/blacklist.txt';
79 $sitelist = DI::httpClient()->fetch($url);
80 $siteurls = explode(',', $sitelist);
82 $whitelist = ['tagesschau.de', 'heute.de', 'wdr.de'];
85 foreach ($siteurls as $site) {
86 if (!in_array($site, $whitelist)) {
87 $sites[$site] = $site;
91 // I would prefer parsing the list from the original site, but I haven't found a list.
92 // The following stays here to possibly reenable it in the future without having to reinvent the wheel completely.
96 $url = "http://www.vg-media.de/lizenzen/digitale-verlegerische-angebote/wahrnehmungsberechtigte-digitale-verlegerische-angebote.html";
98 $site = Network::fetchUrl($url);
100 $doc = new DOMDocument();
101 @$doc->loadHTML($site);
103 $xpath = new DomXPath($doc);
104 $list = $xpath->query("//td/a");
105 foreach ($list as $node) {
107 if ($node->attributes->length)
108 foreach ($node->attributes as $attribute)
109 $attr[$attribute->name] = $attribute->value;
111 if (isset($attr["href"])) {
112 $urldata = parse_url($attr["href"]);
114 if (isset($urldata["host"]) && !isset($urldata["path"])) {
115 $cleanedurlpart = explode("%", $urldata["host"]);
117 $hostname = explode(".", $cleanedurlpart[0]);
118 $site = $hostname[sizeof($hostname) - 2].".".$hostname[sizeof($hostname) - 1];
119 $sites[$site] = $site;
125 if (sizeof($sites)) {
126 DI::config()->set('leistungsschutzrecht','sites',$sites);
130 function leistungsschutzrecht_is_member_site(string $url)
132 $sites = DI::config()->get('leistungsschutzrecht','sites');
138 if (sizeof($sites) == 0) {
142 $urldata = parse_url($url);
144 if (!isset($urldata['host'])) {
148 $cleanedurlpart = explode('%', $urldata['host']);
150 $hostname = explode('.', $cleanedurlpart[0]);
151 if (empty($hostname)) {
155 if (count($hostname) <= 2) {
159 $site = $hostname[sizeof($hostname) - 2] . '.' . $hostname[sizeof($hostname) - 1];
161 return (isset($sites[$site]));
164 function leistungsschutzrecht_cron(App $a,$b)
166 $last = DI::config()->get('leistungsschutzrecht', 'last_poll');
169 $next = $last + 86400;
170 if ($next > time()) {
171 Logger::notice('poll intervall not reached');
175 leistungsschutzrecht_fetchsites();
176 DI::config()->set('leistungsschutzrecht', 'last_poll', time());