]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import database
31 from fba import utils
32
33 from fba.helpers import blacklist
34 from fba.helpers import config
35 from fba.helpers import cookies
36 from fba.helpers import locking
37 from fba.helpers import tidyup
38
39 from fba.http import federation
40 from fba.http import network
41
42 from fba.models import blocks
43 from fba.models import instances
44
45 from fba.networks import friendica
46 from fba.networks import lemmy
47 from fba.networks import mastodon
48 from fba.networks import misskey
49 from fba.networks import pleroma
50
51 logging.basicConfig(level=logging.INFO)
52 logger = logging.getLogger(__name__)
53 #logger.setLevel(logging.DEBUG)
54
55 def check_instance(args: argparse.Namespace) -> int:
56     logger.debug("args.domain='%s' - CALLED!", args.domain)
57     status = 0
58     if not validators.domain(args.domain):
59         logger.warning("args.domain='%s' is not valid", args.domain)
60         status = 100
61     elif blacklist.is_blacklisted(args.domain):
62         logger.warning("args.domain='%s' is blacklisted", args.domain)
63         status = 101
64     elif instances.is_registered(args.domain):
65         logger.warning("args.domain='%s' is already registered", args.domain)
66         status = 102
67     else:
68         logger.info("args.domain='%s' is not known", args.domain)
69
70     logger.debug("status='%d' - EXIT!", status)
71     return status
72
73 def fetch_bkali(args: argparse.Namespace) -> int:
74     logger.debug("args[]='%s' - CALLED!", type(args))
75     domains = list()
76     try:
77         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
78             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
79         }))
80
81         logger.debug("fetched[]='%s'", type(fetched))
82         if "error_message" in fetched:
83             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
84             return 100
85         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
86             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
87             return 101
88
89         rows = fetched["json"]
90
91         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
92         if len(rows) == 0:
93             raise Exception("WARNING: Returned no records")
94         elif "data" not in rows:
95             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
96         elif "nodeinfo" not in rows["data"]:
97             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
98
99         for entry in rows["data"]["nodeinfo"]:
100             logger.debug("entry[%s]='%s'", type(entry), entry)
101             if "domain" not in entry:
102                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
103                 continue
104             elif not utils.is_domain_wanted(entry["domain"]):
105                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
106                 continue
107             elif instances.is_registered(entry["domain"]):
108                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
109                 continue
110
111             logger.debug("Adding domain='%s' ...", entry['domain'])
112             domains.append(entry["domain"])
113
114     except network.exceptions as exception:
115         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
116         return 102
117
118     logger.debug("domains()=%d", len(domains))
119     if len(domains) > 0:
120         locking.acquire()
121
122         logger.info("Adding %d new instances ...", len(domains))
123         for domain in domains:
124             try:
125                 logger.info("Fetching instances from domain='%s' ...", domain)
126                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
127
128                 logger.debug("Invoking cookies.clear(%s) ...", domain)
129                 cookies.clear(domain)
130             except network.exceptions as exception:
131                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
132                 instances.set_last_error(domain, exception)
133
134     logger.debug("Success - EXIT!")
135     return 0
136
137 def fetch_blocks(args: argparse.Namespace) -> int:
138     logger.debug("args[]='%s' - CALLED!", type(args))
139     if args.domain is not None and args.domain != "":
140         logger.debug("args.domain='%s' - checking ...", args.domain)
141         if not validators.domain(args.domain):
142             logger.warning("args.domain='%s' is not valid.", args.domain)
143             return 100
144         elif blacklist.is_blacklisted(args.domain):
145             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
146             return 101
147         elif not instances.is_registered(args.domain):
148             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
149             return 102
150
151     locking.acquire()
152
153     if args.domain is not None and args.domain != "":
154         # Re-check single domain
155         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
156         database.cursor.execute(
157             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
158         )
159     elif args.software is not None and args.software != "":
160         # Re-check single software
161         logger.debug("Querying database for args.software='%s' ...", args.software)
162         database.cursor.execute(
163             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
164         )
165     else:
166         # Re-check after "timeout" (aka. minimum interval)
167         database.cursor.execute(
168             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
169         )
170
171     rows = database.cursor.fetchall()
172     logger.info("Checking %d entries ...", len(rows))
173     for blocker, software, origin, nodeinfo_url in rows:
174         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
175         blocker = tidyup.domain(blocker)
176         logger.debug("blocker='%s' - AFTER!", blocker)
177
178         if blocker == "":
179             logger.warning("blocker is now empty!")
180             continue
181         elif nodeinfo_url is None or nodeinfo_url == "":
182             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
183             continue
184         elif not utils.is_domain_wanted(blocker):
185             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
186             continue
187
188         logger.debug("blocker='%s'", blocker)
189         instances.set_last_blocked(blocker)
190         instances.set_has_obfuscation(blocker, False)
191
192         blocking = list()
193         blockdict = list()
194         if software == "pleroma":
195             logger.info("blocker='%s',software='%s'", blocker, software)
196             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
197         elif software == "mastodon":
198             logger.info("blocker='%s',software='%s'", blocker, software)
199             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
200         elif software == "lemmy":
201             logger.info("blocker='%s',software='%s'", blocker, software)
202             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
203         elif software == "friendica":
204             logger.info("blocker='%s',software='%s'", blocker, software)
205             blocking = friendica.fetch_blocks(blocker)
206         elif software == "misskey":
207             logger.info("blocker='%s',software='%s'", blocker, software)
208             blocking = misskey.fetch_blocks(blocker)
209         else:
210             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
211
212         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
213         for block in blocking:
214             logger.debug("blocked='%s',block_level='%s',reason='%s'", block['blocked'], block['block_level'], block['reason'])
215
216             if block['block_level'] == "":
217                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block['blocker'], block['blocked'])
218                 continue
219
220             logger.debug("blocked='%s',reason='%s' - BEFORE!", block['blocked'], block['reason'])
221             block['blocked'] = tidyup.domain(block['blocked'])
222             block['reason']  = tidyup.reason(block['reason']) if block['reason'] is not None and block['reason'] != "" else None
223             logger.debug("blocked='%s',reason='%s' - AFTER!", block['blocked'], block['reason'])
224
225             if block['blocked'] == "":
226                 logger.warning("blocked is empty, blocker='%s'", blocker)
227                 continue
228             elif block['blocked'].count("*") > 0:
229                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
230                 instances.set_has_obfuscation(blocker, True)
231
232                 # Some friendica servers also obscure domains without hash
233                 row = instances.deobfuscate("*", block['blocked'])
234
235                 logger.debug("row[]='%s'", type(row))
236                 if row is None:
237                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block['blocked'], blocker, software)
238                     continue
239
240                 block['blocked'] = row[0]
241                 origin           = row[1]
242                 nodeinfo_url     = row[2]
243             elif block['blocked'].count("?") > 0:
244                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
245                 instances.set_has_obfuscation(blocker, True)
246
247                 # Some obscure them with question marks, not sure if that's dependent on version or not
248                 row = instances.deobfuscate("?", block['blocked'])
249
250                 logger.debug("row[]='%s'", type(row))
251                 if row is None:
252                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block['blocked'], blocker, software)
253                     continue
254
255                 block['blocked'] = row[0]
256                 origin           = row[1]
257                 nodeinfo_url     = row[2]
258
259             logger.debug("Looking up instance by domainm, blocked='%s'", block['blocked'])
260             if not utils.is_domain_wanted(block['blocked']):
261                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block['blocked'])
262                 continue
263             elif block['block_level'] in ["accept", "accepted"]:
264                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block['blocked'])
265                 continue
266             elif not instances.is_registered(block['blocked']):
267                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block['blocked'], blocker)
268                 try:
269                     instances.add(block['blocked'], blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
270                 except network.exceptions as exception:
271                     logger.warning("Exception during adding blocked='%s',blocker='%s': '%s'", block['blocked'], blocker, type(exception))
272                     continue
273
274             if block['block_level'] == "silence":
275                 logger.debug("Block level 'silence' has been changed to 'silenced'")
276                 block['block_level'] = "silenced"
277
278             if not blocks.is_instance_blocked(blocker, block['blocked'], block['block_level']):
279                 logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block['blocked'], block['reason'], block['block_level'])
280                 blocks.add_instance(blocker, block['blocked'], block['reason'], block['block_level'])
281
282                 logger.debug("block_level='%s',config[bot_enabled]=%s", block['block_level'], config.get("bot_enabled"))
283                 if block['block_level'] == "reject" and config.get("bot_enabled"):
284                     logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block['blocked'], block['reason'])
285                     blockdict.append({
286                         "blocked": block['blocked'],
287                         "reason" : block['reason'],
288                     })
289             else:
290                 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block['blocked'])
291                 blocks.update_last_seen(blocker, block['blocked'], block['block_level'])
292                 blocks.update_reason(block['reason'], blocker, block['blocked'], block['block_level'])
293
294             logger.debug("Invoking cookies.clear(%s) ...", block['blocked'])
295             cookies.clear(block['blocked'])
296
297         if instances.has_pending(blocker):
298             logger.debug("Invoking instances.update_data(%s) ...", blocker)
299             instances.update_data(blocker)
300
301         logger.debug("Invoking commit() ...")
302         database.connection.commit()
303
304         logger.debug("Invoking cookies.clear(%s) ...", blocker)
305         cookies.clear(blocker)
306
307         logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
308         if config.get("bot_enabled") and len(blockdict) > 0:
309             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
310             network.send_bot_post(blocker, blockdict)
311
312     logger.debug("Success! - EXIT!")
313     return 0
314
315 def fetch_observer(args: argparse.Namespace) -> int:
316     logger.debug("args[]='%s' - CALLED!", type(args))
317     types = [
318         "akoma",
319         "birdsitelive",
320         "bookwyrm",
321         "calckey",
322         "diaspora",
323         "foundkey",
324         "friendica",
325         "funkwhale",
326         "gancio",
327         "gnusocial",
328         "gotosocial",
329         "hometown",
330         "hubzilla",
331         "kbin",
332         "ktistec",
333         "lemmy",
334         "mastodon",
335         "microblogpub",
336         "misskey",
337         "mitra",
338         "mobilizon",
339         "owncast",
340         "peertube",
341         "pixelfed",
342         "pleroma",
343         "plume",
344         "snac",
345         "takahe",
346         "wildebeest",
347         "writefreely"
348     ]
349
350     locking.acquire()
351
352     logger.info("Fetching %d different table data ...", len(types))
353     for software in types:
354         doc = None
355
356         try:
357             logger.debug("Fetching table data for software='%s' ...", software)
358             raw = utils.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
359             logger.debug("raw[%s]()=%d", type(raw), len(raw))
360
361             doc = bs4.BeautifulSoup(raw, features='html.parser')
362             logger.debug("doc[]='%s'", type(doc))
363         except network.exceptions as exception:
364             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
365             continue
366
367         items = doc.findAll("a", {"class": "url"})
368         logger.info("Checking %d items,software='%s' ...", len(items), software)
369         for item in items:
370             logger.debug("item[]='%s'", type(item))
371             domain = item.decode_contents()
372
373             logger.debug("domain='%s'", domain)
374             if not utils.is_domain_wanted(domain):
375                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
376                 continue
377             elif instances.is_registered(domain):
378                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
379                 continue
380
381             logger.info("Fetching instances for domain='%s',software='%s'", domain, software)
382             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
383
384             logger.debug("Invoking cookies.clear(%s) ...", domain)
385             cookies.clear(domain)
386
387     logger.debug("Success! - EXIT!")
388     return 0
389
390 def fetch_todon_wiki(args: argparse.Namespace) -> int:
391     logger.debug("args[]='%s' - CALLED!", type(args))
392
393     locking.acquire()
394     blocklist = {
395         "silenced": list(),
396         "reject": list(),
397     }
398
399     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
400     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
401
402     doc = bs4.BeautifulSoup(raw, "html.parser")
403     logger.debug("doc[]='%s'", type(doc))
404
405     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
406     logger.info("Checking %d silenced/limited entries ...", len(silenced))
407     blocklist["silenced"] = utils.find_domains(silenced, "div")
408
409     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
410     logger.info("Checking %d suspended entries ...", len(suspended))
411     blocklist["reject"] = utils.find_domains(suspended, "div")
412
413     for block_level in blocklist:
414         blockers = blocklist[block_level]
415
416         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
417         for blocked in blockers:
418             logger.debug("blocked='%s'", blocked)
419
420             if not instances.is_registered(blocked):
421                 try:
422                     logger.info("Fetching instances from domain='%s' ...", blocked)
423                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
424
425                     logger.debug("Invoking cookies.clear(%s) ...", blocked)
426                     cookies.clear(blocked)
427                 except network.exceptions as exception:
428                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
429                     instances.set_last_error(blocked, exception)
430
431             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
432                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
433                 continue
434
435             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
436             blocks.add_instance("todon.eu", blocked, None, block_level)
437
438         logger.debug("Invoking commit() ...")
439         database.connection.commit()
440
441     logger.debug("Success! - EXIT!")
442     return 0
443
444 def fetch_cs(args: argparse.Namespace):
445     logger.debug("args[]='%s' - CALLED!", type(args))
446     extensions = [
447         "extra",
448         "abbr",
449         "attr_list",
450         "def_list",
451         "fenced_code",
452         "footnotes",
453         "md_in_html",
454         "admonition",
455         "codehilite",
456         "legacy_attrs",
457         "legacy_em",
458         "meta",
459         "nl2br",
460         "sane_lists",
461         "smarty",
462         "toc",
463         "wikilinks"
464     ]
465
466     domains = {
467         "silenced": list(),
468         "reject"  : list(),
469     }
470
471     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
472     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
473
474     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
475     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
476
477     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
478     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
479     domains["silenced"] = federation.find_domains(silenced)
480
481     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
482     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
483     domains["reject"] = federation.find_domains(blocked)
484
485     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
486     if len(domains) > 0:
487         locking.acquire()
488
489         for block_level in domains:
490             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
491
492             for row in domains[block_level]:
493                 logger.debug("row[%s]='%s'", type(row), row)
494                 if not instances.is_registered(row["domain"]):
495                     try:
496                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
497                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
498
499                         logger.debug("Invoking cookies.clear(%s) ...", row["domain"])
500                         cookies.clear(row["domain"])
501                     except network.exceptions as exception:
502                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
503                         instances.set_last_error(row["domain"], exception)
504
505                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
506                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
507                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
508
509         logger.debug("Invoking commit() ...")
510         database.connection.commit()
511
512     logger.debug("Success! - EXIT!")
513     return 0
514
515 def fetch_fba_rss(args: argparse.Namespace) -> int:
516     logger.debug("args[]='%s' - CALLED!", type(args))
517     domains = list()
518
519     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
520     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
521
522     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
523     if response.ok and response.status_code < 300 and len(response.text) > 0:
524         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
525         rss = atoma.parse_rss_bytes(response.content)
526
527         logger.debug("rss[]='%s'", type(rss))
528         for item in rss.items:
529             logger.debug("item='%s'", item)
530             domain = item.link.split("=")[1]
531
532             if blacklist.is_blacklisted(domain):
533                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
534                 continue
535             elif domain in domains:
536                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
537                 continue
538             elif instances.is_registered(domain):
539                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
540                 continue
541
542             logger.debug("Adding domain='%s'", domain)
543             domains.append(domain)
544
545     logger.debug("domains()=%d", len(domains))
546     if len(domains) > 0:
547         locking.acquire()
548
549         logger.info("Adding %d new instances ...", len(domains))
550         for domain in domains:
551             try:
552                 logger.info("Fetching instances from domain='%s' ...", domain)
553                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
554
555                 logger.debug("Invoking cookies.clear(%s) ...", domain)
556                 cookies.clear(domain)
557             except network.exceptions as exception:
558                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
559                 instances.set_last_error(domain, exception)
560
561     logger.debug("Success! - EXIT!")
562     return 0
563
564 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
565     logger.debug("args[]='%s' - CALLED!", type(args))
566     feed = "https://ryona.agency/users/fba/feed.atom"
567
568     domains = list()
569
570     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
571     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
572
573     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
574     if response.ok and response.status_code < 300 and len(response.text) > 0:
575         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
576         atom = atoma.parse_atom_bytes(response.content)
577
578         logger.debug("atom[]='%s'", type(atom))
579         for entry in atom.entries:
580             logger.debug("entry[]='%s'", type(entry))
581             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
582             logger.debug("doc[]='%s'", type(doc))
583             for element in doc.findAll("a"):
584                 for href in element["href"].split(","):
585                     logger.debug("href[%s]='%s", type(href), href)
586                     domain = tidyup.domain(href)
587
588                     logger.debug("domain='%s'", domain)
589                     if not utils.is_domain_wanted(domain):
590                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
591                         continue
592                     elif domain in domains:
593                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
594                         continue
595                     elif instances.is_registered(domain):
596                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
597                         continue
598
599                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
600                     domains.append(domain)
601
602     logger.debug("domains(%d)='%s", len(domains), domains)
603     if len(domains) > 0:
604         locking.acquire()
605
606         logger.info("Adding %d new instances ...", len(domains))
607         for domain in domains:
608             try:
609                 logger.info("Fetching instances from domain='%s' ...", domain)
610                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
611
612                 logger.debug("Invoking cookies.clear(%s) ...", domain)
613                 cookies.clear(domain)
614             except network.exceptions as exception:
615                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
616                 instances.set_last_error(domain, exception)
617
618     logger.debug("Success! - EXIT!")
619     return 0
620
621 def fetch_instances(args: argparse.Namespace) -> int:
622     logger.debug("args[]='%s' - CALLED!", type(args))
623     locking.acquire()
624
625     # Initial fetch
626     try:
627         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
628         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
629
630         logger.debug("Invoking cookies.clear(%s) ...", args.domain)
631         cookies.clear(args.domain)
632     except network.exceptions as exception:
633         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
634         instances.set_last_error(args.domain, exception)
635         return 100
636
637     if args.single:
638         logger.debug("Not fetching more instances - EXIT!")
639         return 0
640
641     # Loop through some instances
642     database.cursor.execute(
643         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
644     )
645
646     rows = database.cursor.fetchall()
647     logger.info("Checking %d entries ...", len(rows))
648     for row in rows:
649         logger.debug("domain='%s'", row[0])
650         if blacklist.is_blacklisted(row[0]):
651             logger.warning("Domain is blacklisted: row[0]='%s'", row[0])
652             continue
653
654         try:
655             logger.info("Fetching instances for domain='%s',software='%s',origin='%s',nodeinfo_url='%s'", row[0], row[2], row[1], row[3])
656             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
657
658             logger.debug("Invoking cookies.clear(%s) ...", row[0])
659             cookies.clear(row[0])
660         except network.exceptions as exception:
661             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
662             instances.set_last_error(row[0], exception)
663
664     logger.debug("Success - EXIT!")
665     return 0
666
667 def fetch_oliphant(args: argparse.Namespace) -> int:
668     logger.debug("args[]='%s' - CALLED!", type(args))
669     locking.acquire()
670
671     # Base URL
672     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
673
674     # URLs to fetch
675     blocklists = (
676         {
677             "blocker": "artisan.chat",
678             "csv_url": "mastodon/artisan.chat.csv",
679         },{
680             "blocker": "mastodon.art",
681             "csv_url": "mastodon/mastodon.art.csv",
682         },{
683             "blocker": "pleroma.envs.net",
684             "csv_url": "mastodon/pleroma.envs.net.csv",
685         },{
686             "blocker": "oliphant.social",
687             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
688         },{
689             "blocker": "mastodon.online",
690             "csv_url": "mastodon/mastodon.online.csv",
691         },{
692             "blocker": "mastodon.social",
693             "csv_url": "mastodon/mastodon.social.csv",
694         },{
695             "blocker": "mastodon.social",
696             "csv_url": "other/missing-tier0-mastodon.social.csv",
697         },{
698             "blocker": "rage.love",
699             "csv_url": "mastodon/rage.love.csv",
700         },{
701             "blocker": "sunny.garden",
702             "csv_url": "mastodon/sunny.garden.csv",
703         },{
704             "blocker": "solarpunk.moe",
705             "csv_url": "mastodon/solarpunk.moe.csv",
706         },{
707             "blocker": "toot.wales",
708             "csv_url": "mastodon/toot.wales.csv",
709         },{
710             "blocker": "union.place",
711             "csv_url": "mastodon/union.place.csv",
712         }
713     )
714
715     domains = list()
716
717     logger.debug("Downloading %d files ...", len(blocklists))
718     for block in blocklists:
719         # Is domain given and not equal blocker?
720         if isinstance(args.domain, str) and args.domain != block["blocker"]:
721             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block['blocker'], args.domain)
722             continue
723         elif args.domain in domains:
724             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
725             continue
726
727         # Fetch this URL
728         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block['blocker'])
729         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
730
731         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
732         if response.ok and response.content != "":
733             logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
734             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
735
736             logger.debug("reader[]='%s'", type(reader))
737             for row in reader:
738                 logger.debug("row[%s]='%s'", type(row), row)
739                 domain = None
740                 if "#domain" in row:
741                     domain = row["#domain"]
742                 elif "domain" in row:
743                     domain = row["domain"]
744                 else:
745                     logger.debug("row='%s' does not contain domain column", row)
746                     continue
747
748                 logger.debug("domain='%s'", domain)
749                 if not utils.is_domain_wanted(domain):
750                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
751                     continue
752
753                 logger.debug("Marking domain='%s' as handled", domain)
754                 domains.append(domain)
755
756                 logger.debug("Processing domain='%s' ...", domain)
757                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
758
759                 logger.debug("processed='%s'", processed)
760
761     logger.debug("Success! - EXIT!")
762     return 0
763
764 def fetch_txt(args: argparse.Namespace) -> int:
765     logger.debug("args[]='%s' - CALLED!", type(args))
766     locking.acquire()
767
768     # Static URLs
769     urls = (
770         "https://seirdy.one/pb/bsl.txt",
771     )
772
773     logger.info("Checking %d text file(s) ...", len(urls))
774     for url in urls:
775         logger.debug("Fetching url='%s' ...", url)
776         response = utils.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
777
778         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
779         if response.ok and response.status_code < 300 and response.text != "":
780             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
781             domains = response.text.split("\n")
782
783             logger.info("Processing %d domains ...", len(domains))
784             for domain in domains:
785                 logger.debug("domain='%s'", domain)
786                 if domain == "":
787                     logger.debug("domain is empty - SKIPPED!")
788                     continue
789                 elif not utils.is_domain_wanted(domain):
790                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
791                     continue
792
793                 logger.debug("domain='%s'", domain)
794                 processed = utils.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
795
796                 logger.debug("processed='%s'", processed)
797                 if not processed:
798                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
799                     continue
800
801     logger.debug("Success! - EXIT!")
802     return 0
803
804 def fetch_fedipact(args: argparse.Namespace) -> int:
805     logger.debug("args[]='%s' - CALLED!", type(args))
806     locking.acquire()
807
808     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
809
810     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
811     if response.ok and response.status_code < 300 and response.text != "":
812         logger.debug("Parsing %d Bytes ...", len(response.text))
813
814         doc = bs4.BeautifulSoup(response.text, "html.parser")
815         logger.debug("doc[]='%s'", type(doc))
816
817         rows = doc.findAll("li")
818         logger.info("Checking %d row(s) ...", len(rows))
819         for row in rows:
820             logger.debug("row[]='%s'", type(row))
821             domain = tidyup.domain(row.contents[0])
822
823             logger.debug("domain='%s'", domain)
824             if domain == "":
825                 logger.debug("domain is empty - SKIPPED!")
826                 continue
827             elif not utils.is_domain_wanted(domain):
828                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
829                 continue
830             elif instances.is_registered(domain):
831                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
832                 continue
833
834             logger.info("Fetching domain='%s' ...", domain)
835             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
836
837     logger.debug("Success! - EXIT!")
838     return 0