]> git.mxchange.org Git - fba.git/blob - fba/commands.py
a4bccbc1f8bcdad4e610d21013ac06c2602b65a6
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import tidyup
39
40 from fba.http import federation
41 from fba.http import network
42
43 from fba.models import blocks
44 from fba.models import instances
45
46 from fba.networks import friendica
47 from fba.networks import lemmy
48 from fba.networks import mastodon
49 from fba.networks import misskey
50 from fba.networks import pleroma
51
52 logging.basicConfig(level=logging.INFO)
53 logger = logging.getLogger(__name__)
54 #logger.setLevel(logging.DEBUG)
55
56 def check_instance(args: argparse.Namespace) -> int:
57     logger.debug("args.domain='%s' - CALLED!", args.domain)
58     status = 0
59     if not validators.domain(args.domain):
60         logger.warning("args.domain='%s' is not valid", args.domain)
61         status = 100
62     elif blacklist.is_blacklisted(args.domain):
63         logger.warning("args.domain='%s' is blacklisted", args.domain)
64         status = 101
65     elif instances.is_registered(args.domain):
66         logger.warning("args.domain='%s' is already registered", args.domain)
67         status = 102
68     else:
69         logger.info("args.domain='%s' is not known", args.domain)
70
71     logger.debug("status=%d - EXIT!", status)
72     return status
73
74 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
75     logger.debug("args[]='%s' - CALLED!", type(args))
76
77     # No CSRF by default, you don't have to add network.api_headers by yourself here
78     headers = tuple()
79
80     try:
81         logger.debug("Checking CSRF from pixelfed.org")
82         headers = csrf.determine("pixelfed.org", dict())
83     except network.exceptions as exception:
84         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
85         return list()
86
87     domains = list()
88     try:
89         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90         fetched = network.get_json_api(
91             "pixelfed.org",
92             "/api/v1/servers/all.json?scope=All&country=all&language=all",
93             headers,
94             (config.get("connection_timeout"), config.get("read_timeout"))
95         )
96
97         logger.debug("JSON API returned %d elements", len(fetched))
98         if "error_message" in fetched:
99             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
100             return 101
101         elif "data" not in fetched["json"]:
102             logger.warning("API did not return JSON with 'data' element - EXIT!")
103             return 102
104
105         rows = fetched["json"]["data"]
106         logger.info("Checking %d fetched rows ...", len(rows))
107         for row in rows:
108             logger.debug("row[]='%s'", type(row))
109             if "domain" not in row:
110                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!")
111                 continue
112             elif not utils.is_domain_wanted(row['domain']):
113                 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row['domain'])
114                 continue
115             elif instances.is_registered(row['domain']):
116                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row['domain'])
117                 continue
118
119             logger.debug("Fetching instances from row[domain]='%s' ...", row['domain'])
120             federation.fetch_instances(row['domain'], None, None, inspect.currentframe().f_code.co_name)
121
122     except network.exceptions as exception:
123         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
124         return 103
125
126     logger.debug("Success! - EXIT!")
127     return 0
128
129 def fetch_bkali(args: argparse.Namespace) -> int:
130     logger.debug("args[]='%s' - CALLED!", type(args))
131     domains = list()
132     try:
133         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
134             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
135         }))
136
137         logger.debug("fetched[]='%s'", type(fetched))
138         if "error_message" in fetched:
139             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
140             return 100
141         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
142             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
143             return 101
144
145         rows = fetched["json"]
146
147         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
148         if len(rows) == 0:
149             raise Exception("WARNING: Returned no records")
150         elif "data" not in rows:
151             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
152         elif "nodeinfo" not in rows["data"]:
153             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
154
155         for entry in rows["data"]["nodeinfo"]:
156             logger.debug("entry[%s]='%s'", type(entry), entry)
157             if "domain" not in entry:
158                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
159                 continue
160             elif not utils.is_domain_wanted(entry["domain"]):
161                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
162                 continue
163             elif instances.is_registered(entry["domain"]):
164                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
165                 continue
166
167             logger.debug("Adding domain='%s' ...", entry['domain'])
168             domains.append(entry["domain"])
169
170     except network.exceptions as exception:
171         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
172         return 102
173
174     logger.debug("domains()=%d", len(domains))
175     if len(domains) > 0:
176         locking.acquire()
177
178         logger.info("Adding %d new instances ...", len(domains))
179         for domain in domains:
180             try:
181                 logger.info("Fetching instances from domain='%s' ...", domain)
182                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
183             except network.exceptions as exception:
184                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
185                 instances.set_last_error(domain, exception)
186
187     logger.debug("Success - EXIT!")
188     return 0
189
190 def fetch_blocks(args: argparse.Namespace) -> int:
191     logger.debug("args[]='%s' - CALLED!", type(args))
192     if args.domain is not None and args.domain != "":
193         logger.debug("args.domain='%s' - checking ...", args.domain)
194         if not validators.domain(args.domain):
195             logger.warning("args.domain='%s' is not valid.", args.domain)
196             return 100
197         elif blacklist.is_blacklisted(args.domain):
198             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
199             return 101
200         elif not instances.is_registered(args.domain):
201             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
202             return 102
203
204     locking.acquire()
205
206     if args.domain is not None and args.domain != "":
207         # Re-check single domain
208         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
209         database.cursor.execute(
210             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
211         )
212     elif args.software is not None and args.software != "":
213         # Re-check single software
214         logger.debug("Querying database for args.software='%s' ...", args.software)
215         database.cursor.execute(
216             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
217         )
218     else:
219         # Re-check after "timeout" (aka. minimum interval)
220         database.cursor.execute(
221             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
222         )
223
224     rows = database.cursor.fetchall()
225     logger.info("Checking %d entries ...", len(rows))
226     for blocker, software, origin, nodeinfo_url in rows:
227         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
228         blocker = tidyup.domain(blocker)
229         logger.debug("blocker='%s' - AFTER!", blocker)
230
231         if blocker == "":
232             logger.warning("blocker is now empty!")
233             continue
234         elif nodeinfo_url is None or nodeinfo_url == "":
235             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
236             continue
237         elif not utils.is_domain_wanted(blocker):
238             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
239             continue
240
241         logger.debug("blocker='%s'", blocker)
242         instances.set_last_blocked(blocker)
243         instances.set_has_obfuscation(blocker, False)
244
245         blocking = list()
246         blockdict = list()
247         if software == "pleroma":
248             logger.info("blocker='%s',software='%s'", blocker, software)
249             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
250         elif software == "mastodon":
251             logger.info("blocker='%s',software='%s'", blocker, software)
252             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
253         elif software == "lemmy":
254             logger.info("blocker='%s',software='%s'", blocker, software)
255             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
256         elif software == "friendica":
257             logger.info("blocker='%s',software='%s'", blocker, software)
258             blocking = friendica.fetch_blocks(blocker)
259         elif software == "misskey":
260             logger.info("blocker='%s',software='%s'", blocker, software)
261             blocking = misskey.fetch_blocks(blocker)
262         else:
263             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
264
265         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
266         for block in blocking:
267             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block['block_level'], block["reason"])
268
269             if block['block_level'] == "":
270                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
271                 continue
272
273             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
274             block["blocked"] = tidyup.domain(block["blocked"])
275             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
276             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
277
278             if block["blocked"] == "":
279                 logger.warning("blocked is empty, blocker='%s'", blocker)
280                 continue
281             elif block["blocked"].count("*") > 0:
282                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
283                 instances.set_has_obfuscation(blocker, True)
284
285                 # Some friendica servers also obscure domains without hash
286                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
287
288                 logger.debug("row[]='%s'", type(row))
289                 if row is None:
290                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
291                     continue
292
293                 block["blocked"] = row[0]
294                 origin           = row[1]
295                 nodeinfo_url     = row[2]
296             elif block["blocked"].count("?") > 0:
297                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
298                 instances.set_has_obfuscation(blocker, True)
299
300                 # Some obscure them with question marks, not sure if that's dependent on version or not
301                 row = instances.deobfuscate("?", block["blocked"] if "hash" in block else None)
302
303                 logger.debug("row[]='%s'", type(row))
304                 if row is None:
305                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
306                     continue
307
308                 block["blocked"] = row[0]
309                 origin           = row[1]
310                 nodeinfo_url     = row[2]
311
312             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
313             if not utils.is_domain_wanted(block["blocked"]):
314                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
315                 continue
316             elif block['block_level'] in ["accept", "accepted"]:
317                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
318                 continue
319             elif not instances.is_registered(block["blocked"]):
320                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
321                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
322
323             if block['block_level'] == "silence":
324                 logger.debug("Block level 'silence' has been changed to 'silenced'")
325                 block['block_level'] = "silenced"
326             elif block['block_level'] == "suspend":
327                 logger.debug("Block level 'suspend' has been changed to 'suspended'")
328                 block['block_level'] = "suspended"
329
330             if not blocks.is_instance_blocked(blocker, block["blocked"], block['block_level']):
331                 logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block["blocked"], block["reason"], block['block_level'])
332                 blocks.add_instance(blocker, block["blocked"], block["reason"], block['block_level'])
333
334                 logger.debug("block_level='%s',config[bot_enabled]='%s'", block['block_level'], config.get("bot_enabled"))
335                 if block['block_level'] == "reject" and config.get("bot_enabled"):
336                     logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block["blocked"], block["reason"])
337                     blockdict.append({
338                         "blocked": block["blocked"],
339                         "reason" : block["reason"],
340                     })
341             else:
342                 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block["blocked"])
343                 blocks.update_last_seen(blocker, block["blocked"], block['block_level'])
344                 blocks.update_reason(block["reason"], blocker, block["blocked"], block['block_level'])
345
346             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
347             cookies.clear(block["blocked"])
348
349         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
350         if instances.has_pending(blocker):
351             logger.debug("Flushing updates for blocker='%s' ...", blocker)
352             instances.update_data(blocker)
353
354         logger.debug("Invoking commit() ...")
355         database.connection.commit()
356
357         logger.debug("Invoking cookies.clear(%s) ...", blocker)
358         cookies.clear(blocker)
359
360         logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
361         if config.get("bot_enabled") and len(blockdict) > 0:
362             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
363             network.send_bot_post(blocker, blockdict)
364
365     logger.debug("Success! - EXIT!")
366     return 0
367
368 def fetch_observer(args: argparse.Namespace) -> int:
369     logger.debug("args[]='%s' - CALLED!", type(args))
370     types = [
371         "akkoma",
372         "birdsitelive",
373         "bookwyrm",
374         "calckey",
375         "diaspora",
376         "foundkey",
377         "friendica",
378         "funkwhale",
379         "gancio",
380         "gnusocial",
381         "gotosocial",
382         "hometown",
383         "hubzilla",
384         "kbin",
385         "ktistec",
386         "lemmy",
387         "mastodon",
388         "microblogpub",
389         "misskey",
390         "mitra",
391         "mobilizon",
392         "owncast",
393         "peertube",
394         "pixelfed",
395         "pleroma",
396         "plume",
397         "snac",
398         "takahe",
399         "wildebeest",
400         "writefreely"
401     ]
402
403     locking.acquire()
404
405     logger.info("Fetching %d different table data ...", len(types))
406     for software in types:
407         doc = None
408
409         try:
410             logger.debug("Fetching table data for software='%s' ...", software)
411             raw = utils.fetch_url(
412                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
413                 network.web_headers,
414                 (config.get("connection_timeout"), config.get("read_timeout"))
415             ).text
416             logger.debug("raw[%s]()=%d", type(raw), len(raw))
417
418             doc = bs4.BeautifulSoup(raw, features='html.parser')
419             logger.debug("doc[]='%s'", type(doc))
420         except network.exceptions as exception:
421             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
422             continue
423
424         items = doc.findAll("a", {"class": "url"})
425         logger.info("Checking %d items,software='%s' ...", len(items), software)
426         for item in items:
427             logger.debug("item[]='%s'", type(item))
428             domain = item.decode_contents()
429
430             logger.debug("domain='%s'", domain)
431             if not utils.is_domain_wanted(domain):
432                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
433                 continue
434             elif instances.is_registered(domain):
435                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
436                 continue
437             elif instance.is_recent(domain):
438                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
439                 continue
440
441             logger.info("Fetching instances for domain='%s',software='%s'", domain, software)
442             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
443
444     logger.debug("Success! - EXIT!")
445     return 0
446
447 def fetch_todon_wiki(args: argparse.Namespace) -> int:
448     logger.debug("args[]='%s' - CALLED!", type(args))
449
450     locking.acquire()
451     blocklist = {
452         "silenced": list(),
453         "reject": list(),
454     }
455
456     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
457     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
458
459     doc = bs4.BeautifulSoup(raw, "html.parser")
460     logger.debug("doc[]='%s'", type(doc))
461
462     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
463     logger.info("Checking %d silenced/limited entries ...", len(silenced))
464     blocklist["silenced"] = utils.find_domains(silenced, "div")
465
466     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
467     logger.info("Checking %d suspended entries ...", len(suspended))
468     blocklist["reject"] = utils.find_domains(suspended, "div")
469
470     for block_level in blocklist:
471         blockers = blocklist[block_level]
472
473         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
474         for blocked in blockers:
475             logger.debug("blocked='%s'", blocked)
476
477             if not instances.is_registered(blocked):
478                 try:
479                     logger.info("Fetching instances from domain='%s' ...", blocked)
480                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
481                 except network.exceptions as exception:
482                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
483                     instances.set_last_error(blocked, exception)
484
485             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
486                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
487                 continue
488
489             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
490             blocks.add_instance("todon.eu", blocked, None, block_level)
491
492         logger.debug("Invoking commit() ...")
493         database.connection.commit()
494
495     logger.debug("Success! - EXIT!")
496     return 0
497
498 def fetch_cs(args: argparse.Namespace):
499     logger.debug("args[]='%s' - CALLED!", type(args))
500     extensions = [
501         "extra",
502         "abbr",
503         "attr_list",
504         "def_list",
505         "fenced_code",
506         "footnotes",
507         "md_in_html",
508         "admonition",
509         "codehilite",
510         "legacy_attrs",
511         "legacy_em",
512         "meta",
513         "nl2br",
514         "sane_lists",
515         "smarty",
516         "toc",
517         "wikilinks"
518     ]
519
520     domains = {
521         "silenced": list(),
522         "reject"  : list(),
523     }
524
525     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
526     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
527
528     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
529     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
530
531     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
532     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
533     domains["silenced"] = federation.find_domains(silenced)
534
535     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
536     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
537     domains["reject"] = federation.find_domains(blocked)
538
539     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
540     if len(domains) > 0:
541         locking.acquire()
542
543         for block_level in domains:
544             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
545
546             for row in domains[block_level]:
547                 logger.debug("row[%s]='%s'", type(row), row)
548                 if not instances.is_registered(row["domain"]):
549                     try:
550                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
551                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
552                     except network.exceptions as exception:
553                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
554                         instances.set_last_error(row["domain"], exception)
555
556                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
557                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
558                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
559
560         logger.debug("Invoking commit() ...")
561         database.connection.commit()
562
563     logger.debug("Success! - EXIT!")
564     return 0
565
566 def fetch_fba_rss(args: argparse.Namespace) -> int:
567     logger.debug("args[]='%s' - CALLED!", type(args))
568     domains = list()
569
570     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
571     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
572
573     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
574     if response.ok and response.status_code < 300 and len(response.text) > 0:
575         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
576         rss = atoma.parse_rss_bytes(response.content)
577
578         logger.debug("rss[]='%s'", type(rss))
579         for item in rss.items:
580             logger.debug("item='%s'", item)
581             domain = item.link.split("=")[1]
582
583             if blacklist.is_blacklisted(domain):
584                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
585                 continue
586             elif domain in domains:
587                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
588                 continue
589             elif instances.is_registered(domain):
590                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
591                 continue
592
593             logger.debug("Adding domain='%s'", domain)
594             domains.append(domain)
595
596     logger.debug("domains()=%d", len(domains))
597     if len(domains) > 0:
598         locking.acquire()
599
600         logger.info("Adding %d new instances ...", len(domains))
601         for domain in domains:
602             try:
603                 logger.info("Fetching instances from domain='%s' ...", domain)
604                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
605             except network.exceptions as exception:
606                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
607                 instances.set_last_error(domain, exception)
608
609     logger.debug("Success! - EXIT!")
610     return 0
611
612 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
613     logger.debug("args[]='%s' - CALLED!", type(args))
614     feed = "https://ryona.agency/users/fba/feed.atom"
615
616     domains = list()
617
618     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
619     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
620
621     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
622     if response.ok and response.status_code < 300 and len(response.text) > 0:
623         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
624         atom = atoma.parse_atom_bytes(response.content)
625
626         logger.debug("atom[]='%s'", type(atom))
627         for entry in atom.entries:
628             logger.debug("entry[]='%s'", type(entry))
629             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
630             logger.debug("doc[]='%s'", type(doc))
631             for element in doc.findAll("a"):
632                 for href in element["href"].split(","):
633                     logger.debug("href[%s]='%s", type(href), href)
634                     domain = tidyup.domain(href)
635
636                     logger.debug("domain='%s'", domain)
637                     if not utils.is_domain_wanted(domain):
638                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
639                         continue
640                     elif domain in domains:
641                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
642                         continue
643                     elif instances.is_registered(domain):
644                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
645                         continue
646
647                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
648                     domains.append(domain)
649
650     logger.debug("domains(%d)='%s", len(domains), domains)
651     if len(domains) > 0:
652         locking.acquire()
653
654         logger.info("Adding %d new instances ...", len(domains))
655         for domain in domains:
656             try:
657                 logger.info("Fetching instances from domain='%s' ...", domain)
658                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
659             except network.exceptions as exception:
660                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
661                 instances.set_last_error(domain, exception)
662
663     logger.debug("Success! - EXIT!")
664     return 0
665
666 def fetch_instances(args: argparse.Namespace) -> int:
667     logger.debug("args[]='%s' - CALLED!", type(args))
668     locking.acquire()
669
670     # Initial fetch
671     try:
672         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
673         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
674     except network.exceptions as exception:
675         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
676         instances.set_last_error(args.domain, exception)
677         return 100
678
679     if args.single:
680         logger.debug("Not fetching more instances - EXIT!")
681         return 0
682
683     # Loop through some instances
684     database.cursor.execute(
685         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
686     )
687
688     rows = database.cursor.fetchall()
689     logger.info("Checking %d entries ...", len(rows))
690     for row in rows:
691         logger.debug("domain='%s'", row[0])
692         if not utils.is_domain_wanted(row[0]):
693             logger.debug("Domain is not wanted: row[0]='%s'", row[0])
694             continue
695
696         try:
697             logger.info("Fetching instances for domain='%s',software='%s',origin='%s',nodeinfo_url='%s'", row[0], row[2], row[1], row[3])
698             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
699         except network.exceptions as exception:
700             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
701             instances.set_last_error(row[0], exception)
702
703     logger.debug("Success - EXIT!")
704     return 0
705
706 def fetch_oliphant(args: argparse.Namespace) -> int:
707     logger.debug("args[]='%s' - CALLED!", type(args))
708     locking.acquire()
709
710     # Base URL
711     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
712
713     # URLs to fetch
714     blocklists = (
715         {
716             "blocker": "artisan.chat",
717             "csv_url": "mastodon/artisan.chat.csv",
718         },{
719             "blocker": "mastodon.art",
720             "csv_url": "mastodon/mastodon.art.csv",
721         },{
722             "blocker": "pleroma.envs.net",
723             "csv_url": "mastodon/pleroma.envs.net.csv",
724         },{
725             "blocker": "oliphant.social",
726             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
727         },{
728             "blocker": "mastodon.online",
729             "csv_url": "mastodon/mastodon.online.csv",
730         },{
731             "blocker": "mastodon.social",
732             "csv_url": "mastodon/mastodon.social.csv",
733         },{
734             "blocker": "mastodon.social",
735             "csv_url": "other/missing-tier0-mastodon.social.csv",
736         },{
737             "blocker": "rage.love",
738             "csv_url": "mastodon/rage.love.csv",
739         },{
740             "blocker": "sunny.garden",
741             "csv_url": "mastodon/sunny.garden.csv",
742         },{
743             "blocker": "solarpunk.moe",
744             "csv_url": "mastodon/solarpunk.moe.csv",
745         },{
746             "blocker": "toot.wales",
747             "csv_url": "mastodon/toot.wales.csv",
748         },{
749             "blocker": "union.place",
750             "csv_url": "mastodon/union.place.csv",
751         }
752     )
753
754     domains = list()
755
756     logger.debug("Downloading %d files ...", len(blocklists))
757     for block in blocklists:
758         # Is domain given and not equal blocker?
759         if isinstance(args.domain, str) and args.domain != block["blocker"]:
760             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
761             continue
762         elif args.domain in domains:
763             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
764             continue
765
766         # Fetch this URL
767         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block["blocker"])
768         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
769
770         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
771         if response.ok and response.content != "":
772             logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
773             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
774
775             logger.debug("reader[]='%s'", type(reader))
776             for row in reader:
777                 logger.debug("row[%s]='%s'", type(row), row)
778                 domain = None
779                 if "#domain" in row:
780                     domain = row["#domain"]
781                 elif "domain" in row:
782                     domain = row["domain"]
783                 else:
784                     logger.debug("row='%s' does not contain domain column", row)
785                     continue
786
787                 logger.debug("domain='%s'", domain)
788                 if not utils.is_domain_wanted(domain):
789                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
790                     continue
791
792                 logger.debug("Marking domain='%s' as handled", domain)
793                 domains.append(domain)
794
795                 logger.debug("Processing domain='%s' ...", domain)
796                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
797
798                 logger.debug("processed='%s'", processed)
799
800     logger.debug("Success! - EXIT!")
801     return 0
802
803 def fetch_txt(args: argparse.Namespace) -> int:
804     logger.debug("args[]='%s' - CALLED!", type(args))
805     locking.acquire()
806
807     # Static URLs
808     urls = ({
809         "blocker": "seirdy.one",
810         "url"    : "https://seirdy.one/pb/bsl.txt",
811     },)
812
813     logger.info("Checking %d text file(s) ...", len(urls))
814     for row in urls:
815         logger.debug("Fetching row[url]='%s' ...", row["url"])
816         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
817
818         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
819         if response.ok and response.status_code < 300 and response.text != "":
820             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
821             domains = response.text.split("\n")
822
823             logger.info("Processing %d domains ...", len(domains))
824             for domain in domains:
825                 logger.debug("domain='%s'", domain)
826                 if domain == "":
827                     logger.debug("domain is empty - SKIPPED!")
828                     continue
829                 elif not utils.is_domain_wanted(domain):
830                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
831                     continue
832
833                 logger.debug("domain='%s',row[blocker]='%s'", domain, row["blocker"])
834                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
835
836                 logger.debug("processed='%s'", processed)
837                 if not processed:
838                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
839                     continue
840
841     logger.debug("Success! - EXIT!")
842     return 0
843
844 def fetch_fedipact(args: argparse.Namespace) -> int:
845     logger.debug("args[]='%s' - CALLED!", type(args))
846     locking.acquire()
847
848     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
849
850     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
851     if response.ok and response.status_code < 300 and response.text != "":
852         logger.debug("Parsing %d Bytes ...", len(response.text))
853
854         doc = bs4.BeautifulSoup(response.text, "html.parser")
855         logger.debug("doc[]='%s'", type(doc))
856
857         rows = doc.findAll("li")
858         logger.info("Checking %d row(s) ...", len(rows))
859         for row in rows:
860             logger.debug("row[]='%s'", type(row))
861             domain = tidyup.domain(row.contents[0])
862
863             logger.debug("domain='%s'", domain)
864             if domain == "":
865                 logger.debug("domain is empty - SKIPPED!")
866                 continue
867             elif not utils.is_domain_wanted(domain):
868                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
869                 continue
870             elif instances.is_registered(domain):
871                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
872                 continue
873
874             logger.info("Fetching domain='%s' ...", domain)
875             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
876
877     logger.debug("Success! - EXIT!")
878     return 0