]> git.mxchange.org Git - fba.git/blob - fba/commands.py
2374f06ca17d9713d073ab70f6c538a50a31b9c9
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import tidyup
39
40 from fba.http import federation
41 from fba.http import network
42
43 from fba.models import blocks
44 from fba.models import instances
45
46 from fba.networks import friendica
47 from fba.networks import lemmy
48 from fba.networks import mastodon
49 from fba.networks import misskey
50 from fba.networks import pleroma
51
52 logging.basicConfig(level=logging.INFO)
53 logger = logging.getLogger(__name__)
54 #logger.setLevel(logging.DEBUG)
55
56 def check_instance(args: argparse.Namespace) -> int:
57     logger.debug("args.domain='%s' - CALLED!", args.domain)
58     status = 0
59     if not validators.domain(args.domain):
60         logger.warning("args.domain='%s' is not valid", args.domain)
61         status = 100
62     elif blacklist.is_blacklisted(args.domain):
63         logger.warning("args.domain='%s' is blacklisted", args.domain)
64         status = 101
65     elif instances.is_registered(args.domain):
66         logger.warning("args.domain='%s' is already registered", args.domain)
67         status = 102
68     else:
69         logger.info("args.domain='%s' is not known", args.domain)
70
71     logger.debug("status='%d' - EXIT!", status)
72     return status
73
74 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
75     logger.debug("args[]='%s' - CALLED!", type(args))
76
77     # No CSRF by default, you don't have to add network.api_headers by yourself here
78     headers = tuple()
79
80     try:
81         logger.debug("Checking CSRF from pixelfed.org")
82         headers = csrf.determine("pixelfed.org", dict())
83     except network.exceptions as exception:
84         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
85         return list()
86
87     domains = list()
88     try:
89         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90         fetched = network.get_json_api(
91             "pixelfed.org",
92             "/api/v1/servers/all.json?scope=All&country=all&language=all",
93             headers,
94             (config.get("connection_timeout"), config.get("read_timeout"))
95         )
96
97         logger.debug("JSON API returned %d elements", len(fetched))
98         if "error_message" in fetched:
99             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
100             return 101
101         elif "data" not in fetched["json"]:
102             logger.warning("API did not return JSON with 'data' element - EXIT!")
103             return 102
104
105         rows = fetched["json"]["data"]
106         logger.info("Checking %d fetched rows ...", len(rows))
107         for row in rows:
108             logger.debug("row[]='%s'", type(row))
109             if "domain" not in row:
110                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!")
111                 continue
112             elif not utils.is_domain_wanted(row['domain']):
113                 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row['domain'])
114                 continue
115             elif instances.is_registered(row['domain']):
116                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row['domain'])
117                 continue
118
119             logger.debug("Fetching instances from row[domain]='%s' ...", row['domain'])
120             federation.fetch_instances(row['domain'], None, None, inspect.currentframe().f_code.co_name)
121
122     except network.exceptions as exception:
123         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
124         return 103
125
126     logger.debug("Success! - EXIT!")
127     return 0
128
129 def fetch_bkali(args: argparse.Namespace) -> int:
130     logger.debug("args[]='%s' - CALLED!", type(args))
131     domains = list()
132     try:
133         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
134             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
135         }))
136
137         logger.debug("fetched[]='%s'", type(fetched))
138         if "error_message" in fetched:
139             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
140             return 100
141         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
142             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
143             return 101
144
145         rows = fetched["json"]
146
147         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
148         if len(rows) == 0:
149             raise Exception("WARNING: Returned no records")
150         elif "data" not in rows:
151             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
152         elif "nodeinfo" not in rows["data"]:
153             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
154
155         for entry in rows["data"]["nodeinfo"]:
156             logger.debug("entry[%s]='%s'", type(entry), entry)
157             if "domain" not in entry:
158                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
159                 continue
160             elif not utils.is_domain_wanted(entry["domain"]):
161                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
162                 continue
163             elif instances.is_registered(entry["domain"]):
164                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
165                 continue
166
167             logger.debug("Adding domain='%s' ...", entry['domain'])
168             domains.append(entry["domain"])
169
170     except network.exceptions as exception:
171         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
172         return 102
173
174     logger.debug("domains()=%d", len(domains))
175     if len(domains) > 0:
176         locking.acquire()
177
178         logger.info("Adding %d new instances ...", len(domains))
179         for domain in domains:
180             try:
181                 logger.info("Fetching instances from domain='%s' ...", domain)
182                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
183
184                 logger.debug("Invoking cookies.clear(%s) ...", domain)
185                 cookies.clear(domain)
186             except network.exceptions as exception:
187                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
188                 instances.set_last_error(domain, exception)
189
190     logger.debug("Success - EXIT!")
191     return 0
192
193 def fetch_blocks(args: argparse.Namespace) -> int:
194     logger.debug("args[]='%s' - CALLED!", type(args))
195     if args.domain is not None and args.domain != "":
196         logger.debug("args.domain='%s' - checking ...", args.domain)
197         if not validators.domain(args.domain):
198             logger.warning("args.domain='%s' is not valid.", args.domain)
199             return 100
200         elif blacklist.is_blacklisted(args.domain):
201             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
202             return 101
203         elif not instances.is_registered(args.domain):
204             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
205             return 102
206
207     locking.acquire()
208
209     if args.domain is not None and args.domain != "":
210         # Re-check single domain
211         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
212         database.cursor.execute(
213             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
214         )
215     elif args.software is not None and args.software != "":
216         # Re-check single software
217         logger.debug("Querying database for args.software='%s' ...", args.software)
218         database.cursor.execute(
219             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
220         )
221     else:
222         # Re-check after "timeout" (aka. minimum interval)
223         database.cursor.execute(
224             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
225         )
226
227     rows = database.cursor.fetchall()
228     logger.info("Checking %d entries ...", len(rows))
229     for blocker, software, origin, nodeinfo_url in rows:
230         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
231         blocker = tidyup.domain(blocker)
232         logger.debug("blocker='%s' - AFTER!", blocker)
233
234         if blocker == "":
235             logger.warning("blocker is now empty!")
236             continue
237         elif nodeinfo_url is None or nodeinfo_url == "":
238             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
239             continue
240         elif not utils.is_domain_wanted(blocker):
241             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
242             continue
243
244         logger.debug("blocker='%s'", blocker)
245         instances.set_last_blocked(blocker)
246         instances.set_has_obfuscation(blocker, False)
247
248         blocking = list()
249         blockdict = list()
250         if software == "pleroma":
251             logger.info("blocker='%s',software='%s'", blocker, software)
252             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
253         elif software == "mastodon":
254             logger.info("blocker='%s',software='%s'", blocker, software)
255             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
256         elif software == "lemmy":
257             logger.info("blocker='%s',software='%s'", blocker, software)
258             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
259         elif software == "friendica":
260             logger.info("blocker='%s',software='%s'", blocker, software)
261             blocking = friendica.fetch_blocks(blocker)
262         elif software == "misskey":
263             logger.info("blocker='%s',software='%s'", blocker, software)
264             blocking = misskey.fetch_blocks(blocker)
265         else:
266             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
267
268         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
269         for block in blocking:
270             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block['block_level'], block["reason"])
271
272             if block['block_level'] == "":
273                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
274                 continue
275
276             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
277             block["blocked"] = tidyup.domain(block["blocked"])
278             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
279             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
280
281             if block["blocked"] == "":
282                 logger.warning("blocked is empty, blocker='%s'", blocker)
283                 continue
284             elif block["blocked"].count("*") > 0:
285                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
286                 instances.set_has_obfuscation(blocker, True)
287
288                 # Some friendica servers also obscure domains without hash
289                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
290
291                 logger.debug("row[]='%s'", type(row))
292                 if row is None:
293                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
294                     continue
295
296                 block["blocked"] = row[0]
297                 origin           = row[1]
298                 nodeinfo_url     = row[2]
299             elif block["blocked"].count("?") > 0:
300                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
301                 instances.set_has_obfuscation(blocker, True)
302
303                 # Some obscure them with question marks, not sure if that's dependent on version or not
304                 row = instances.deobfuscate("?", block["blocked"] if "hash" in block else None)
305
306                 logger.debug("row[]='%s'", type(row))
307                 if row is None:
308                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
309                     continue
310
311                 block["blocked"] = row[0]
312                 origin           = row[1]
313                 nodeinfo_url     = row[2]
314
315             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
316             if not utils.is_domain_wanted(block["blocked"]):
317                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
318                 continue
319             elif block['block_level'] in ["accept", "accepted"]:
320                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
321                 continue
322             elif not instances.is_registered(block["blocked"]):
323                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
324                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
325
326             if block['block_level'] == "silence":
327                 logger.debug("Block level 'silence' has been changed to 'silenced'")
328                 block['block_level'] = "silenced"
329             elif block['block_level'] == "suspend":
330                 logger.debug("Block level 'suspend' has been changed to 'suspended'")
331                 block['block_level'] = "suspended"
332
333             if not blocks.is_instance_blocked(blocker, block["blocked"], block['block_level']):
334                 logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block["blocked"], block["reason"], block['block_level'])
335                 blocks.add_instance(blocker, block["blocked"], block["reason"], block['block_level'])
336
337                 logger.debug("block_level='%s',config[bot_enabled]=%s", block['block_level'], config.get("bot_enabled"))
338                 if block['block_level'] == "reject" and config.get("bot_enabled"):
339                     logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block["blocked"], block["reason"])
340                     blockdict.append({
341                         "blocked": block["blocked"],
342                         "reason" : block["reason"],
343                     })
344             else:
345                 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block["blocked"])
346                 blocks.update_last_seen(blocker, block["blocked"], block['block_level'])
347                 blocks.update_reason(block["reason"], blocker, block["blocked"], block['block_level'])
348
349             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
350             cookies.clear(block["blocked"])
351
352         if instances.has_pending(blocker):
353             logger.debug("Invoking instances.update_data(%s) ...", blocker)
354             instances.update_data(blocker)
355
356         logger.debug("Invoking commit() ...")
357         database.connection.commit()
358
359         logger.debug("Invoking cookies.clear(%s) ...", blocker)
360         cookies.clear(blocker)
361
362         logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
363         if config.get("bot_enabled") and len(blockdict) > 0:
364             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
365             network.send_bot_post(blocker, blockdict)
366
367     logger.debug("Success! - EXIT!")
368     return 0
369
370 def fetch_observer(args: argparse.Namespace) -> int:
371     logger.debug("args[]='%s' - CALLED!", type(args))
372     types = [
373         "akkoma",
374         "birdsitelive",
375         "bookwyrm",
376         "calckey",
377         "diaspora",
378         "foundkey",
379         "friendica",
380         "funkwhale",
381         "gancio",
382         "gnusocial",
383         "gotosocial",
384         "hometown",
385         "hubzilla",
386         "kbin",
387         "ktistec",
388         "lemmy",
389         "mastodon",
390         "microblogpub",
391         "misskey",
392         "mitra",
393         "mobilizon",
394         "owncast",
395         "peertube",
396         "pixelfed",
397         "pleroma",
398         "plume",
399         "snac",
400         "takahe",
401         "wildebeest",
402         "writefreely"
403     ]
404
405     locking.acquire()
406
407     logger.info("Fetching %d different table data ...", len(types))
408     for software in types:
409         doc = None
410
411         try:
412             logger.debug("Fetching table data for software='%s' ...", software)
413             raw = utils.fetch_url(
414                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
415                 network.web_headers,
416                 (config.get("connection_timeout"), config.get("read_timeout"))
417             ).text
418             logger.debug("raw[%s]()=%d", type(raw), len(raw))
419
420             doc = bs4.BeautifulSoup(raw, features='html.parser')
421             logger.debug("doc[]='%s'", type(doc))
422         except network.exceptions as exception:
423             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
424             continue
425
426         items = doc.findAll("a", {"class": "url"})
427         logger.info("Checking %d items,software='%s' ...", len(items), software)
428         for item in items:
429             logger.debug("item[]='%s'", type(item))
430             domain = item.decode_contents()
431
432             logger.debug("domain='%s'", domain)
433             if not utils.is_domain_wanted(domain):
434                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
435                 continue
436             elif instances.is_registered(domain):
437                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
438                 continue
439
440             logger.info("Fetching instances for domain='%s',software='%s'", domain, software)
441             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
442
443             logger.debug("Invoking cookies.clear(%s) ...", domain)
444             cookies.clear(domain)
445
446     logger.debug("Success! - EXIT!")
447     return 0
448
449 def fetch_todon_wiki(args: argparse.Namespace) -> int:
450     logger.debug("args[]='%s' - CALLED!", type(args))
451
452     locking.acquire()
453     blocklist = {
454         "silenced": list(),
455         "reject": list(),
456     }
457
458     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
459     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
460
461     doc = bs4.BeautifulSoup(raw, "html.parser")
462     logger.debug("doc[]='%s'", type(doc))
463
464     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
465     logger.info("Checking %d silenced/limited entries ...", len(silenced))
466     blocklist["silenced"] = utils.find_domains(silenced, "div")
467
468     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
469     logger.info("Checking %d suspended entries ...", len(suspended))
470     blocklist["reject"] = utils.find_domains(suspended, "div")
471
472     for block_level in blocklist:
473         blockers = blocklist[block_level]
474
475         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
476         for blocked in blockers:
477             logger.debug("blocked='%s'", blocked)
478
479             if not instances.is_registered(blocked):
480                 try:
481                     logger.info("Fetching instances from domain='%s' ...", blocked)
482                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
483
484                     logger.debug("Invoking cookies.clear(%s) ...", blocked)
485                     cookies.clear(blocked)
486                 except network.exceptions as exception:
487                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
488                     instances.set_last_error(blocked, exception)
489
490             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
491                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
492                 continue
493
494             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
495             blocks.add_instance("todon.eu", blocked, None, block_level)
496
497         logger.debug("Invoking commit() ...")
498         database.connection.commit()
499
500     logger.debug("Success! - EXIT!")
501     return 0
502
503 def fetch_cs(args: argparse.Namespace):
504     logger.debug("args[]='%s' - CALLED!", type(args))
505     extensions = [
506         "extra",
507         "abbr",
508         "attr_list",
509         "def_list",
510         "fenced_code",
511         "footnotes",
512         "md_in_html",
513         "admonition",
514         "codehilite",
515         "legacy_attrs",
516         "legacy_em",
517         "meta",
518         "nl2br",
519         "sane_lists",
520         "smarty",
521         "toc",
522         "wikilinks"
523     ]
524
525     domains = {
526         "silenced": list(),
527         "reject"  : list(),
528     }
529
530     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
531     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
532
533     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
534     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
535
536     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
537     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
538     domains["silenced"] = federation.find_domains(silenced)
539
540     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
541     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
542     domains["reject"] = federation.find_domains(blocked)
543
544     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
545     if len(domains) > 0:
546         locking.acquire()
547
548         for block_level in domains:
549             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
550
551             for row in domains[block_level]:
552                 logger.debug("row[%s]='%s'", type(row), row)
553                 if not instances.is_registered(row["domain"]):
554                     try:
555                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
556                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
557
558                         logger.debug("Invoking cookies.clear(%s) ...", row["domain"])
559                         cookies.clear(row["domain"])
560                     except network.exceptions as exception:
561                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
562                         instances.set_last_error(row["domain"], exception)
563
564                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
565                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
566                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
567
568         logger.debug("Invoking commit() ...")
569         database.connection.commit()
570
571     logger.debug("Success! - EXIT!")
572     return 0
573
574 def fetch_fba_rss(args: argparse.Namespace) -> int:
575     logger.debug("args[]='%s' - CALLED!", type(args))
576     domains = list()
577
578     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
579     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
580
581     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
582     if response.ok and response.status_code < 300 and len(response.text) > 0:
583         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
584         rss = atoma.parse_rss_bytes(response.content)
585
586         logger.debug("rss[]='%s'", type(rss))
587         for item in rss.items:
588             logger.debug("item='%s'", item)
589             domain = item.link.split("=")[1]
590
591             if blacklist.is_blacklisted(domain):
592                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
593                 continue
594             elif domain in domains:
595                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
596                 continue
597             elif instances.is_registered(domain):
598                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
599                 continue
600
601             logger.debug("Adding domain='%s'", domain)
602             domains.append(domain)
603
604     logger.debug("domains()=%d", len(domains))
605     if len(domains) > 0:
606         locking.acquire()
607
608         logger.info("Adding %d new instances ...", len(domains))
609         for domain in domains:
610             try:
611                 logger.info("Fetching instances from domain='%s' ...", domain)
612                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
613
614                 logger.debug("Invoking cookies.clear(%s) ...", domain)
615                 cookies.clear(domain)
616             except network.exceptions as exception:
617                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
618                 instances.set_last_error(domain, exception)
619
620     logger.debug("Success! - EXIT!")
621     return 0
622
623 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
624     logger.debug("args[]='%s' - CALLED!", type(args))
625     feed = "https://ryona.agency/users/fba/feed.atom"
626
627     domains = list()
628
629     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
630     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
631
632     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
633     if response.ok and response.status_code < 300 and len(response.text) > 0:
634         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
635         atom = atoma.parse_atom_bytes(response.content)
636
637         logger.debug("atom[]='%s'", type(atom))
638         for entry in atom.entries:
639             logger.debug("entry[]='%s'", type(entry))
640             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
641             logger.debug("doc[]='%s'", type(doc))
642             for element in doc.findAll("a"):
643                 for href in element["href"].split(","):
644                     logger.debug("href[%s]='%s", type(href), href)
645                     domain = tidyup.domain(href)
646
647                     logger.debug("domain='%s'", domain)
648                     if not utils.is_domain_wanted(domain):
649                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
650                         continue
651                     elif domain in domains:
652                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
653                         continue
654                     elif instances.is_registered(domain):
655                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
656                         continue
657
658                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
659                     domains.append(domain)
660
661     logger.debug("domains(%d)='%s", len(domains), domains)
662     if len(domains) > 0:
663         locking.acquire()
664
665         logger.info("Adding %d new instances ...", len(domains))
666         for domain in domains:
667             try:
668                 logger.info("Fetching instances from domain='%s' ...", domain)
669                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
670
671                 logger.debug("Invoking cookies.clear(%s) ...", domain)
672                 cookies.clear(domain)
673             except network.exceptions as exception:
674                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
675                 instances.set_last_error(domain, exception)
676
677     logger.debug("Success! - EXIT!")
678     return 0
679
680 def fetch_instances(args: argparse.Namespace) -> int:
681     logger.debug("args[]='%s' - CALLED!", type(args))
682     locking.acquire()
683
684     # Initial fetch
685     try:
686         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
687         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
688
689         logger.debug("Invoking cookies.clear(%s) ...", args.domain)
690         cookies.clear(args.domain)
691     except network.exceptions as exception:
692         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
693         instances.set_last_error(args.domain, exception)
694         return 100
695
696     if args.single:
697         logger.debug("Not fetching more instances - EXIT!")
698         return 0
699
700     # Loop through some instances
701     database.cursor.execute(
702         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
703     )
704
705     rows = database.cursor.fetchall()
706     logger.info("Checking %d entries ...", len(rows))
707     for row in rows:
708         logger.debug("domain='%s'", row[0])
709         if blacklist.is_blacklisted(row[0]):
710             logger.warning("Domain is blacklisted: row[0]='%s'", row[0])
711             continue
712
713         try:
714             logger.info("Fetching instances for domain='%s',software='%s',origin='%s',nodeinfo_url='%s'", row[0], row[2], row[1], row[3])
715             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
716
717             logger.debug("Invoking cookies.clear(%s) ...", row[0])
718             cookies.clear(row[0])
719         except network.exceptions as exception:
720             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
721             instances.set_last_error(row[0], exception)
722
723     logger.debug("Success - EXIT!")
724     return 0
725
726 def fetch_oliphant(args: argparse.Namespace) -> int:
727     logger.debug("args[]='%s' - CALLED!", type(args))
728     locking.acquire()
729
730     # Base URL
731     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
732
733     # URLs to fetch
734     blocklists = (
735         {
736             "blocker": "artisan.chat",
737             "csv_url": "mastodon/artisan.chat.csv",
738         },{
739             "blocker": "mastodon.art",
740             "csv_url": "mastodon/mastodon.art.csv",
741         },{
742             "blocker": "pleroma.envs.net",
743             "csv_url": "mastodon/pleroma.envs.net.csv",
744         },{
745             "blocker": "oliphant.social",
746             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
747         },{
748             "blocker": "mastodon.online",
749             "csv_url": "mastodon/mastodon.online.csv",
750         },{
751             "blocker": "mastodon.social",
752             "csv_url": "mastodon/mastodon.social.csv",
753         },{
754             "blocker": "mastodon.social",
755             "csv_url": "other/missing-tier0-mastodon.social.csv",
756         },{
757             "blocker": "rage.love",
758             "csv_url": "mastodon/rage.love.csv",
759         },{
760             "blocker": "sunny.garden",
761             "csv_url": "mastodon/sunny.garden.csv",
762         },{
763             "blocker": "solarpunk.moe",
764             "csv_url": "mastodon/solarpunk.moe.csv",
765         },{
766             "blocker": "toot.wales",
767             "csv_url": "mastodon/toot.wales.csv",
768         },{
769             "blocker": "union.place",
770             "csv_url": "mastodon/union.place.csv",
771         }
772     )
773
774     domains = list()
775
776     logger.debug("Downloading %d files ...", len(blocklists))
777     for block in blocklists:
778         # Is domain given and not equal blocker?
779         if isinstance(args.domain, str) and args.domain != block["blocker"]:
780             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
781             continue
782         elif args.domain in domains:
783             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
784             continue
785
786         # Fetch this URL
787         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block["blocker"])
788         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
789
790         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
791         if response.ok and response.content != "":
792             logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
793             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
794
795             logger.debug("reader[]='%s'", type(reader))
796             for row in reader:
797                 logger.debug("row[%s]='%s'", type(row), row)
798                 domain = None
799                 if "#domain" in row:
800                     domain = row["#domain"]
801                 elif "domain" in row:
802                     domain = row["domain"]
803                 else:
804                     logger.debug("row='%s' does not contain domain column", row)
805                     continue
806
807                 logger.debug("domain='%s'", domain)
808                 if not utils.is_domain_wanted(domain):
809                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
810                     continue
811
812                 logger.debug("Marking domain='%s' as handled", domain)
813                 domains.append(domain)
814
815                 logger.debug("Processing domain='%s' ...", domain)
816                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
817
818                 logger.debug("processed='%s'", processed)
819
820     logger.debug("Success! - EXIT!")
821     return 0
822
823 def fetch_txt(args: argparse.Namespace) -> int:
824     logger.debug("args[]='%s' - CALLED!", type(args))
825     locking.acquire()
826
827     # Static URLs
828     urls = ({
829         "blocker": "seirdy.one",
830         "url"    : "https://seirdy.one/pb/bsl.txt",
831     },)
832
833     logger.info("Checking %d text file(s) ...", len(urls))
834     for row in urls:
835         logger.debug("Fetching row[url]='%s' ...", row["url"])
836         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
837
838         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
839         if response.ok and response.status_code < 300 and response.text != "":
840             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
841             domains = response.text.split("\n")
842
843             logger.info("Processing %d domains ...", len(domains))
844             for domain in domains:
845                 logger.debug("domain='%s'", domain)
846                 if domain == "":
847                     logger.debug("domain is empty - SKIPPED!")
848                     continue
849                 elif not utils.is_domain_wanted(domain):
850                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
851                     continue
852
853                 logger.debug("domain='%s',row[blocker]='%s'", domain, row["blocker"])
854                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
855
856                 logger.debug("processed='%s'", processed)
857                 if not processed:
858                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
859                     continue
860
861     logger.debug("Success! - EXIT!")
862     return 0
863
864 def fetch_fedipact(args: argparse.Namespace) -> int:
865     logger.debug("args[]='%s' - CALLED!", type(args))
866     locking.acquire()
867
868     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
869
870     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
871     if response.ok and response.status_code < 300 and response.text != "":
872         logger.debug("Parsing %d Bytes ...", len(response.text))
873
874         doc = bs4.BeautifulSoup(response.text, "html.parser")
875         logger.debug("doc[]='%s'", type(doc))
876
877         rows = doc.findAll("li")
878         logger.info("Checking %d row(s) ...", len(rows))
879         for row in rows:
880             logger.debug("row[]='%s'", type(row))
881             domain = tidyup.domain(row.contents[0])
882
883             logger.debug("domain='%s'", domain)
884             if domain == "":
885                 logger.debug("domain is empty - SKIPPED!")
886                 continue
887             elif not utils.is_domain_wanted(domain):
888                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
889                 continue
890             elif instances.is_registered(domain):
891                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
892                 continue
893
894             logger.info("Fetching domain='%s' ...", domain)
895             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
896
897     logger.debug("Success! - EXIT!")
898     return 0