]> git.mxchange.org Git - fba.git/blob - fba/commands.py
f19d51b68415952901ff9ab389bb24f527544f6e
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # No CSRF by default, you don't have to add network.api_headers by yourself here
79     headers = tuple()
80
81     try:
82         logger.debug("Checking CSRF from pixelfed.org")
83         headers = csrf.determine("pixelfed.org", dict())
84     except network.exceptions as exception:
85         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
86         return list()
87
88     domains = list()
89     try:
90         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
91         fetched = network.get_json_api(
92             "pixelfed.org",
93             "/api/v1/servers/all.json?scope=All&country=all&language=all",
94             headers,
95             (config.get("connection_timeout"), config.get("read_timeout"))
96         )
97
98         logger.debug("JSON API returned %d elements", len(fetched))
99         if "error_message" in fetched:
100             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
101             return 101
102         elif "data" not in fetched["json"]:
103             logger.warning("API did not return JSON with 'data' element - EXIT!")
104             return 102
105
106         rows = fetched["json"]["data"]
107         logger.info("Checking %d fetched rows ...", len(rows))
108         for row in rows:
109             logger.debug("row[]='%s'", type(row))
110             if "domain" not in row:
111                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!")
112                 continue
113             elif not utils.is_domain_wanted(row['domain']):
114                 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row['domain'])
115                 continue
116             elif instances.is_registered(row['domain']):
117                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row['domain'])
118                 continue
119
120             logger.debug("Fetching instances from row[domain]='%s' ...", row['domain'])
121             federation.fetch_instances(row['domain'], None, None, inspect.currentframe().f_code.co_name)
122
123     except network.exceptions as exception:
124         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
125         return 103
126
127     logger.debug("Success! - EXIT!")
128     return 0
129
130 def fetch_bkali(args: argparse.Namespace) -> int:
131     logger.debug("args[]='%s' - CALLED!", type(args))
132     domains = list()
133     try:
134         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
135             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
136         }))
137
138         logger.debug("fetched[]='%s'", type(fetched))
139         if "error_message" in fetched:
140             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
141             return 100
142         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
143             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
144             return 101
145
146         rows = fetched["json"]
147
148         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
149         if len(rows) == 0:
150             raise Exception("WARNING: Returned no records")
151         elif "data" not in rows:
152             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
153         elif "nodeinfo" not in rows["data"]:
154             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
155
156         for entry in rows["data"]["nodeinfo"]:
157             logger.debug("entry[%s]='%s'", type(entry), entry)
158             if "domain" not in entry:
159                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
160                 continue
161             elif not utils.is_domain_wanted(entry["domain"]):
162                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
163                 continue
164             elif instances.is_registered(entry["domain"]):
165                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
166                 continue
167
168             logger.debug("Adding domain='%s' ...", entry['domain'])
169             domains.append(entry["domain"])
170
171     except network.exceptions as exception:
172         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
173         return 102
174
175     logger.debug("domains()=%d", len(domains))
176     if len(domains) > 0:
177         locking.acquire()
178
179         logger.info("Adding %d new instances ...", len(domains))
180         for domain in domains:
181             try:
182                 logger.info("Fetching instances from domain='%s' ...", domain)
183                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
184             except network.exceptions as exception:
185                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
186                 instances.set_last_error(domain, exception)
187
188     logger.debug("Success - EXIT!")
189     return 0
190
191 def fetch_blocks(args: argparse.Namespace) -> int:
192     logger.debug("args[]='%s' - CALLED!", type(args))
193     if args.domain is not None and args.domain != "":
194         logger.debug("args.domain='%s' - checking ...", args.domain)
195         if not validators.domain(args.domain):
196             logger.warning("args.domain='%s' is not valid.", args.domain)
197             return 100
198         elif blacklist.is_blacklisted(args.domain):
199             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
200             return 101
201         elif not instances.is_registered(args.domain):
202             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
203             return 102
204
205     locking.acquire()
206
207     if args.domain is not None and args.domain != "":
208         # Re-check single domain
209         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
210         database.cursor.execute(
211             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
212         )
213     elif args.software is not None and args.software != "":
214         # Re-check single software
215         logger.debug("Querying database for args.software='%s' ...", args.software)
216         database.cursor.execute(
217             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
218         )
219     else:
220         # Re-check after "timeout" (aka. minimum interval)
221         database.cursor.execute(
222             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
223         )
224
225     rows = database.cursor.fetchall()
226     logger.info("Checking %d entries ...", len(rows))
227     for blocker, software, origin, nodeinfo_url in rows:
228         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
229         blocker = tidyup.domain(blocker)
230         logger.debug("blocker='%s' - AFTER!", blocker)
231
232         if blocker == "":
233             logger.warning("blocker is now empty!")
234             continue
235         elif nodeinfo_url is None or nodeinfo_url == "":
236             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
237             continue
238         elif not utils.is_domain_wanted(blocker):
239             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
240             continue
241
242         logger.debug("blocker='%s'", blocker)
243         instances.set_last_blocked(blocker)
244         instances.set_has_obfuscation(blocker, False)
245
246         blocking = list()
247         blockdict = list()
248         if software == "pleroma":
249             logger.info("blocker='%s',software='%s'", blocker, software)
250             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
251         elif software == "mastodon":
252             logger.info("blocker='%s',software='%s'", blocker, software)
253             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
254         elif software == "lemmy":
255             logger.info("blocker='%s',software='%s'", blocker, software)
256             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
257         elif software == "friendica":
258             logger.info("blocker='%s',software='%s'", blocker, software)
259             blocking = friendica.fetch_blocks(blocker)
260         elif software == "misskey":
261             logger.info("blocker='%s',software='%s'", blocker, software)
262             blocking = misskey.fetch_blocks(blocker)
263         else:
264             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
265
266         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
267         for block in blocking:
268             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block['block_level'], block["reason"])
269
270             if block['block_level'] == "":
271                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
272                 continue
273
274             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
275             block["blocked"] = tidyup.domain(block["blocked"])
276             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
277             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
278
279             if block["blocked"] == "":
280                 logger.warning("blocked is empty, blocker='%s'", blocker)
281                 continue
282             elif block["blocked"].count("*") > 0:
283                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
284                 instances.set_has_obfuscation(blocker, True)
285
286                 # Some friendica servers also obscure domains without hash
287                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
288
289                 logger.debug("row[]='%s'", type(row))
290                 if row is None:
291                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
292                     continue
293
294                 block["blocked"] = row[0]
295                 origin           = row[1]
296                 nodeinfo_url     = row[2]
297             elif block["blocked"].count("?") > 0:
298                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
299                 instances.set_has_obfuscation(blocker, True)
300
301                 # Some obscure them with question marks, not sure if that's dependent on version or not
302                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
303
304                 logger.debug("row[]='%s'", type(row))
305                 if row is None:
306                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
307                     continue
308
309                 block["blocked"] = row[0]
310                 origin           = row[1]
311                 nodeinfo_url     = row[2]
312
313             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
314             if not utils.is_domain_wanted(block["blocked"]):
315                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
316                 continue
317             elif block['block_level'] in ["accept", "accepted"]:
318                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
319                 continue
320             elif not instances.is_registered(block["blocked"]):
321                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
322                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
323
324             if block['block_level'] == "silence":
325                 logger.debug("Block level 'silence' has been changed to 'silenced'")
326                 block['block_level'] = "silenced"
327             elif block['block_level'] == "suspend":
328                 logger.debug("Block level 'suspend' has been changed to 'suspended'")
329                 block['block_level'] = "suspended"
330
331             if not blocks.is_instance_blocked(blocker, block["blocked"], block['block_level']):
332                 logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block["blocked"], block["reason"], block['block_level'])
333                 blocks.add_instance(blocker, block["blocked"], block["reason"], block['block_level'])
334
335                 logger.debug("block_level='%s',config[bot_enabled]='%s'", block['block_level'], config.get("bot_enabled"))
336                 if block['block_level'] == "reject" and config.get("bot_enabled"):
337                     logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block["blocked"], block["reason"])
338                     blockdict.append({
339                         "blocked": block["blocked"],
340                         "reason" : block["reason"],
341                     })
342             else:
343                 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block["blocked"])
344                 blocks.update_last_seen(blocker, block["blocked"], block['block_level'])
345                 blocks.update_reason(block["reason"], blocker, block["blocked"], block['block_level'])
346
347             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
348             cookies.clear(block["blocked"])
349
350         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
351         if instances.has_pending(blocker):
352             logger.debug("Flushing updates for blocker='%s' ...", blocker)
353             instances.update_data(blocker)
354
355         logger.debug("Invoking commit() ...")
356         database.connection.commit()
357
358         logger.debug("Invoking cookies.clear(%s) ...", blocker)
359         cookies.clear(blocker)
360
361         logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
362         if config.get("bot_enabled") and len(blockdict) > 0:
363             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
364             network.send_bot_post(blocker, blockdict)
365
366     logger.debug("Success! - EXIT!")
367     return 0
368
369 def fetch_observer(args: argparse.Namespace) -> int:
370     logger.debug("args[]='%s' - CALLED!", type(args))
371     types = [
372         "akkoma",
373         "birdsitelive",
374         "bookwyrm",
375         "calckey",
376         "diaspora",
377         "foundkey",
378         "friendica",
379         "funkwhale",
380         "gancio",
381         "gnusocial",
382         "gotosocial",
383         "hometown",
384         "hubzilla",
385         "kbin",
386         "ktistec",
387         "lemmy",
388         "mastodon",
389         "microblogpub",
390         "misskey",
391         "mitra",
392         "mobilizon",
393         "owncast",
394         "peertube",
395         "pixelfed",
396         "pleroma",
397         "plume",
398         "snac",
399         "takahe",
400         "wildebeest",
401         "writefreely"
402     ]
403
404     locking.acquire()
405
406     logger.info("Fetching %d different table data ...", len(types))
407     for software in types:
408         logger.debug("software='%s' - BEFORE!", software)
409         if args.software is not None and args.software != software:
410             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!")
411             continue
412
413         doc = None
414         try:
415             logger.debug("Fetching table data for software='%s' ...", software)
416             raw = utils.fetch_url(
417                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
418                 network.web_headers,
419                 (config.get("connection_timeout"), config.get("read_timeout"))
420             ).text
421             logger.debug("raw[%s]()=%d", type(raw), len(raw))
422
423             doc = bs4.BeautifulSoup(raw, features='html.parser')
424             logger.debug("doc[]='%s'", type(doc))
425         except network.exceptions as exception:
426             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
427             continue
428
429         items = doc.findAll("a", {"class": "url"})
430         logger.info("Checking %d items,software='%s' ...", len(items), software)
431         for item in items:
432             logger.debug("item[]='%s'", type(item))
433             domain = item.decode_contents()
434
435             logger.debug("domain='%s'", domain)
436             if not utils.is_domain_wanted(domain):
437                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
438                 continue
439             elif instances.is_registered(domain):
440                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
441                 continue
442             elif instances.is_recent(domain):
443                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
444                 continue
445
446             software = software_helper.alias(software)
447             logger.info("Fetching instances for domain='%s'", domain)
448             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
449
450     logger.debug("Success! - EXIT!")
451     return 0
452
453 def fetch_todon_wiki(args: argparse.Namespace) -> int:
454     logger.debug("args[]='%s' - CALLED!", type(args))
455
456     locking.acquire()
457     blocklist = {
458         "silenced": list(),
459         "reject": list(),
460     }
461
462     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
463     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
464
465     doc = bs4.BeautifulSoup(raw, "html.parser")
466     logger.debug("doc[]='%s'", type(doc))
467
468     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
469     logger.info("Checking %d silenced/limited entries ...", len(silenced))
470     blocklist["silenced"] = utils.find_domains(silenced, "div")
471
472     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
473     logger.info("Checking %d suspended entries ...", len(suspended))
474     blocklist["reject"] = utils.find_domains(suspended, "div")
475
476     for block_level in blocklist:
477         blockers = blocklist[block_level]
478
479         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
480         for blocked in blockers:
481             logger.debug("blocked='%s'", blocked)
482
483             if not instances.is_registered(blocked):
484                 try:
485                     logger.info("Fetching instances from domain='%s' ...", blocked)
486                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
487                 except network.exceptions as exception:
488                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
489                     instances.set_last_error(blocked, exception)
490
491             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
492                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
493                 continue
494
495             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
496             blocks.add_instance("todon.eu", blocked, None, block_level)
497
498         logger.debug("Invoking commit() ...")
499         database.connection.commit()
500
501     logger.debug("Success! - EXIT!")
502     return 0
503
504 def fetch_cs(args: argparse.Namespace):
505     logger.debug("args[]='%s' - CALLED!", type(args))
506     extensions = [
507         "extra",
508         "abbr",
509         "attr_list",
510         "def_list",
511         "fenced_code",
512         "footnotes",
513         "md_in_html",
514         "admonition",
515         "codehilite",
516         "legacy_attrs",
517         "legacy_em",
518         "meta",
519         "nl2br",
520         "sane_lists",
521         "smarty",
522         "toc",
523         "wikilinks"
524     ]
525
526     domains = {
527         "silenced": list(),
528         "reject"  : list(),
529     }
530
531     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
532     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
533
534     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
535     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
536
537     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
538     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
539     domains["silenced"] = federation.find_domains(silenced)
540
541     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
542     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
543     domains["reject"] = federation.find_domains(blocked)
544
545     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
546     if len(domains) > 0:
547         locking.acquire()
548
549         for block_level in domains:
550             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
551
552             for row in domains[block_level]:
553                 logger.debug("row[%s]='%s'", type(row), row)
554                 if not instances.is_registered(row["domain"]):
555                     try:
556                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
557                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
558                     except network.exceptions as exception:
559                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
560                         instances.set_last_error(row["domain"], exception)
561
562                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
563                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
564                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
565
566         logger.debug("Invoking commit() ...")
567         database.connection.commit()
568
569     logger.debug("Success! - EXIT!")
570     return 0
571
572 def fetch_fba_rss(args: argparse.Namespace) -> int:
573     logger.debug("args[]='%s' - CALLED!", type(args))
574     domains = list()
575
576     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
577     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
578
579     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
580     if response.ok and response.status_code < 300 and len(response.text) > 0:
581         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
582         rss = atoma.parse_rss_bytes(response.content)
583
584         logger.debug("rss[]='%s'", type(rss))
585         for item in rss.items:
586             logger.debug("item='%s'", item)
587             domain = item.link.split("=")[1]
588
589             if blacklist.is_blacklisted(domain):
590                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
591                 continue
592             elif domain in domains:
593                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
594                 continue
595             elif instances.is_registered(domain):
596                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
597                 continue
598
599             logger.debug("Adding domain='%s'", domain)
600             domains.append(domain)
601
602     logger.debug("domains()=%d", len(domains))
603     if len(domains) > 0:
604         locking.acquire()
605
606         logger.info("Adding %d new instances ...", len(domains))
607         for domain in domains:
608             try:
609                 logger.info("Fetching instances from domain='%s' ...", domain)
610                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
611             except network.exceptions as exception:
612                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
613                 instances.set_last_error(domain, exception)
614
615     logger.debug("Success! - EXIT!")
616     return 0
617
618 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
619     logger.debug("args[]='%s' - CALLED!", type(args))
620     feed = "https://ryona.agency/users/fba/feed.atom"
621
622     domains = list()
623
624     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
625     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
626
627     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
628     if response.ok and response.status_code < 300 and len(response.text) > 0:
629         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
630         atom = atoma.parse_atom_bytes(response.content)
631
632         logger.debug("atom[]='%s'", type(atom))
633         for entry in atom.entries:
634             logger.debug("entry[]='%s'", type(entry))
635             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
636             logger.debug("doc[]='%s'", type(doc))
637             for element in doc.findAll("a"):
638                 for href in element["href"].split(","):
639                     logger.debug("href[%s]='%s", type(href), href)
640                     domain = tidyup.domain(href)
641
642                     logger.debug("domain='%s'", domain)
643                     if not utils.is_domain_wanted(domain):
644                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
645                         continue
646                     elif domain in domains:
647                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
648                         continue
649                     elif instances.is_registered(domain):
650                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
651                         continue
652
653                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
654                     domains.append(domain)
655
656     logger.debug("domains(%d)='%s", len(domains), domains)
657     if len(domains) > 0:
658         locking.acquire()
659
660         logger.info("Adding %d new instances ...", len(domains))
661         for domain in domains:
662             try:
663                 logger.info("Fetching instances from domain='%s' ...", domain)
664                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
665             except network.exceptions as exception:
666                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
667                 instances.set_last_error(domain, exception)
668
669     logger.debug("Success! - EXIT!")
670     return 0
671
672 def fetch_instances(args: argparse.Namespace) -> int:
673     logger.debug("args[]='%s' - CALLED!", type(args))
674     locking.acquire()
675
676     # Initial fetch
677     try:
678         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
679         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
680     except network.exceptions as exception:
681         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
682         instances.set_last_error(args.domain, exception)
683         return 100
684
685     if args.single:
686         logger.debug("Not fetching more instances - EXIT!")
687         return 0
688
689     # Loop through some instances
690     database.cursor.execute(
691         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
692     )
693
694     rows = database.cursor.fetchall()
695     logger.info("Checking %d entries ...", len(rows))
696     for row in rows:
697         logger.debug("domain='%s'", row[0])
698         if not utils.is_domain_wanted(row[0]):
699             logger.debug("Domain is not wanted: row[0]='%s'", row[0])
700             continue
701
702         try:
703             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row[0], row[1], row[2], row[3])
704             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
705         except network.exceptions as exception:
706             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
707             instances.set_last_error(row[0], exception)
708
709     logger.debug("Success - EXIT!")
710     return 0
711
712 def fetch_oliphant(args: argparse.Namespace) -> int:
713     logger.debug("args[]='%s' - CALLED!", type(args))
714     locking.acquire()
715
716     # Base URL
717     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
718
719     # URLs to fetch
720     blocklists = (
721         {
722             "blocker": "artisan.chat",
723             "csv_url": "mastodon/artisan.chat.csv",
724         },{
725             "blocker": "mastodon.art",
726             "csv_url": "mastodon/mastodon.art.csv",
727         },{
728             "blocker": "pleroma.envs.net",
729             "csv_url": "mastodon/pleroma.envs.net.csv",
730         },{
731             "blocker": "oliphant.social",
732             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
733         },{
734             "blocker": "mastodon.online",
735             "csv_url": "mastodon/mastodon.online.csv",
736         },{
737             "blocker": "mastodon.social",
738             "csv_url": "mastodon/mastodon.social.csv",
739         },{
740             "blocker": "mastodon.social",
741             "csv_url": "other/missing-tier0-mastodon.social.csv",
742         },{
743             "blocker": "rage.love",
744             "csv_url": "mastodon/rage.love.csv",
745         },{
746             "blocker": "sunny.garden",
747             "csv_url": "mastodon/sunny.garden.csv",
748         },{
749             "blocker": "solarpunk.moe",
750             "csv_url": "mastodon/solarpunk.moe.csv",
751         },{
752             "blocker": "toot.wales",
753             "csv_url": "mastodon/toot.wales.csv",
754         },{
755             "blocker": "union.place",
756             "csv_url": "mastodon/union.place.csv",
757         }
758     )
759
760     domains = list()
761
762     logger.debug("Downloading %d files ...", len(blocklists))
763     for block in blocklists:
764         # Is domain given and not equal blocker?
765         if isinstance(args.domain, str) and args.domain != block["blocker"]:
766             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
767             continue
768         elif args.domain in domains:
769             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
770             continue
771
772         # Fetch this URL
773         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block["blocker"])
774         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
775
776         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
777         if response.ok and response.content != "":
778             logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
779             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
780
781             logger.debug("reader[]='%s'", type(reader))
782             for row in reader:
783                 logger.debug("row[%s]='%s'", type(row), row)
784                 domain = None
785                 if "#domain" in row:
786                     domain = row["#domain"]
787                 elif "domain" in row:
788                     domain = row["domain"]
789                 else:
790                     logger.debug("row='%s' does not contain domain column", row)
791                     continue
792
793                 logger.debug("domain='%s'", domain)
794                 if not utils.is_domain_wanted(domain):
795                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
796                     continue
797
798                 logger.debug("Marking domain='%s' as handled", domain)
799                 domains.append(domain)
800
801                 logger.debug("Processing domain='%s' ...", domain)
802                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
803
804                 logger.debug("processed='%s'", processed)
805
806     logger.debug("Success! - EXIT!")
807     return 0
808
809 def fetch_txt(args: argparse.Namespace) -> int:
810     logger.debug("args[]='%s' - CALLED!", type(args))
811     locking.acquire()
812
813     # Static URLs
814     urls = ({
815         "blocker": "seirdy.one",
816         "url"    : "https://seirdy.one/pb/bsl.txt",
817     },)
818
819     logger.info("Checking %d text file(s) ...", len(urls))
820     for row in urls:
821         logger.debug("Fetching row[url]='%s' ...", row["url"])
822         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
823
824         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
825         if response.ok and response.status_code < 300 and response.text != "":
826             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
827             domains = response.text.split("\n")
828
829             logger.info("Processing %d domains ...", len(domains))
830             for domain in domains:
831                 logger.debug("domain='%s'", domain)
832                 if domain == "":
833                     logger.debug("domain is empty - SKIPPED!")
834                     continue
835                 elif not utils.is_domain_wanted(domain):
836                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
837                     continue
838
839                 logger.debug("domain='%s',row[blocker]='%s'", domain, row["blocker"])
840                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
841
842                 logger.debug("processed='%s'", processed)
843                 if not processed:
844                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
845                     continue
846
847     logger.debug("Success! - EXIT!")
848     return 0
849
850 def fetch_fedipact(args: argparse.Namespace) -> int:
851     logger.debug("args[]='%s' - CALLED!", type(args))
852     locking.acquire()
853
854     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
855
856     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
857     if response.ok and response.status_code < 300 and response.text != "":
858         logger.debug("Parsing %d Bytes ...", len(response.text))
859
860         doc = bs4.BeautifulSoup(response.text, "html.parser")
861         logger.debug("doc[]='%s'", type(doc))
862
863         rows = doc.findAll("li")
864         logger.info("Checking %d row(s) ...", len(rows))
865         for row in rows:
866             logger.debug("row[]='%s'", type(row))
867             domain = tidyup.domain(row.contents[0])
868
869             logger.debug("domain='%s'", domain)
870             if domain == "":
871                 logger.debug("domain is empty - SKIPPED!")
872                 continue
873             elif not utils.is_domain_wanted(domain):
874                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
875                 continue
876             elif instances.is_registered(domain):
877                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
878                 continue
879
880             logger.info("Fetching domain='%s' ...", domain)
881             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
882
883     logger.debug("Success! - EXIT!")
884     return 0