]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import time
21
22 import argparse
23 import atoma
24 import bs4
25 import markdown
26 import reqto
27 import validators
28
29 from fba import blacklist
30 from fba import config
31 from fba import federation
32 from fba import fba
33 from fba import network
34
35 from fba.helpers import locking
36 from fba.helpers import tidyup
37
38 from fba.models import blocks
39 from fba.models import instances
40
41 from fba.networks import friendica
42 from fba.networks import mastodon
43 from fba.networks import misskey
44 from fba.networks import pleroma
45
46 def check_instance(args: argparse.Namespace) -> int:
47     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48     status = 0
49     if not validators.domain(args.domain):
50         print(f"WARNING: args.domain='{args.domain}' is not valid")
51         status = 100
52     elif blacklist.is_blacklisted(args.domain):
53         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54         status = 101
55     elif instances.is_registered(args.domain):
56         print(f"WARNING: args.domain='{args.domain}' is already registered")
57         status = 102
58     else:
59         print(f"INFO: args.domain='{args.domain}' is not known")
60
61     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
62     return status
63
64 def fetch_bkali(args: argparse.Namespace) -> int:
65     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
66     domains = list()
67     try:
68         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
69             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
70         }))
71
72         # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
73         if "error_message" in fetched:
74             print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
75             return 100
76         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
77             print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
78             return 101
79
80         rows = fetched["json"]
81
82         # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
83         if len(rows) == 0:
84             raise Exception("WARNING: Returned no records")
85         elif "data" not in rows:
86             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
87         elif "nodeinfo" not in rows["data"]:
88             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
89
90         for entry in rows["data"]["nodeinfo"]:
91             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
92             if not "domain" in entry:
93                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
94                 continue
95             elif not validators.domain(entry["domain"]):
96                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
97                 continue
98             elif blacklist.is_blacklisted(entry["domain"]):
99                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
100                 continue
101             elif instances.is_registered(entry["domain"]):
102                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
103                 continue
104             elif instances.is_recent(entry["domain"]):
105                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!")
106                 continue
107
108             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
109             domains.append(entry["domain"])
110
111     except network.exceptions as exception:
112         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
113         return 102
114
115     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
116     if len(domains) > 0:
117         locking.acquire()
118
119         print(f"INFO: Adding {len(domains)} new instances ...")
120         for domain in domains:
121             try:
122                 print(f"INFO: Fetching instances from domain='{domain}' ...")
123                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
124             except network.exceptions as exception:
125                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
126                 instances.set_last_error(domain, exception)
127
128     # DEBUG: print("DEBUG: Success - EXIT!")
129     return 0
130
131 def fetch_blocks(args: argparse.Namespace):
132     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
133     if args.domain is not None and args.domain != "":
134         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
135         if not validators.domain(args.domain):
136             print(f"WARNING: domain='{args.domain}' is not valid.")
137             return
138         elif blacklist.is_blacklisted(args.domain):
139             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
140             return
141         elif not instances.is_registered(args.domain):
142             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
143             return
144
145     locking.acquire()
146
147     if args.domain is not None and args.domain != "":
148         # Re-check single domain
149         # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
150         fba.cursor.execute(
151             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
152         )
153     else:
154         # Re-check after "timeout" (aka. minimum interval)
155         fba.cursor.execute(
156             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
157         )
158
159     rows = fba.cursor.fetchall()
160     print(f"INFO: Checking {len(rows)} entries ...")
161     for blocker, software, origin, nodeinfo_url in rows:
162         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
163         blockdict = list()
164         blocker = tidyup.domain(blocker)
165         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
166
167         if blocker == "":
168             print("WARNING: blocker is now empty!")
169             continue
170         elif nodeinfo_url is None or nodeinfo_url == "":
171             print(f"DEBUG: blocker='{blocker}',software='{software}' has empty nodeinfo_url")
172             continue
173         elif blacklist.is_blacklisted(blocker):
174             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
175             continue
176
177         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
178         instances.set_last_blocked(blocker)
179
180         if software == "pleroma":
181             print(f"INFO: blocker='{blocker}',software='{software}'")
182             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
183         elif software == "mastodon":
184             print(f"INFO: blocker='{blocker}',software='{software}'")
185             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
186         elif software == "lemmy":
187             print(f"INFO: blocker='{blocker}',software='{software}'")
188             #lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
189         elif software == "friendica" or software == "misskey":
190             print(f"INFO: blocker='{blocker}',software='{software}'")
191
192             blocking = list()
193             if software == "friendica":
194                 blocking = friendica.fetch_blocks(blocker)
195             elif software == "misskey":
196                 blocking = misskey.fetch_blocks(blocker)
197
198             print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
199             for block_level, blocklist in blocking.items():
200                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
201                 block_level = tidyup.domain(block_level)
202                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
203                 if block_level == "":
204                     print("WARNING: block_level is empty, blocker:", blocker)
205                     continue
206
207                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
208                 for block in blocklist:
209                     blocked, reason = block.values()
210                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
211                     blocked = tidyup.domain(blocked)
212                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
213                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
214
215                     if blocked == "":
216                         print("WARNING: blocked is empty:", blocker)
217                         continue
218                     elif blacklist.is_blacklisted(blocked):
219                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
220                         continue
221                     elif blocked.count("*") > 0:
222                         # Some friendica servers also obscure domains without hash
223                         row = instances.deobscure("*", blocked)
224
225                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
226                         if row is None:
227                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
228                             continue
229
230                         blocked      = row[0]
231                         origin       = row[1]
232                         nodeinfo_url = row[2]
233                     elif blocked.count("?") > 0:
234                         # Some obscure them with question marks, not sure if that's dependent on version or not
235                         row = instances.deobscure("?", blocked)
236
237                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
238                         if row is None:
239                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
240                             continue
241
242                         blocked      = row[0]
243                         origin       = row[1]
244                         nodeinfo_url = row[2]
245
246                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
247                     if not validators.domain(blocked):
248                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
249                         continue
250                     elif blocked.endswith(".arpa"):
251                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is ending with '.arpa' - SKIPPED!")
252                         continue
253                     elif not instances.is_registered(blocked):
254                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
255                         try:
256                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
257                         except network.exceptions as exception:
258                             print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
259                             continue
260
261                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
262                         blocks.add_instance(blocker, blocked, reason, block_level)
263
264                         if block_level == "reject":
265                             blockdict.append({
266                                 "blocked": blocked,
267                                 "reason" : reason
268                             })
269                     else:
270                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
271                         blocks.update_last_seen(blocker, blocked, block_level)
272                         blocks.update_reason(reason, blocker, blocked, block_level)
273
274             # DEBUG: print("DEBUG: Committing changes ...")
275             fba.connection.commit()
276         else:
277             print("WARNING: Unknown software:", blocker, software)
278
279         if instances.has_pending(blocker):
280             # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...")
281             instances.update_data(blocker)
282
283         if config.get("bot_enabled") and len(blockdict) > 0:
284             network.send_bot_post(blocker, blockdict)
285
286     # DEBUG: print("DEBUG: EXIT!")
287
288 def fetch_observer(args: argparse.Namespace):
289     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
290     types = [
291         "akoma",
292         "birdsitelive",
293         "bookwyrm",
294         "calckey",
295         "diaspora",
296         "foundkey",
297         "friendica",
298         "funkwhale",
299         "gancio",
300         "gnusocial",
301         "gotosocial",
302         "hometown",
303         "hubzilla",
304         "kbin",
305         "ktistec",
306         "lemmy",
307         "mastodon",
308         "microblogpub",
309         "misskey",
310         "mitra",
311         "mobilizon",
312         "owncast",
313         "peertube",
314         "pixelfed",
315         "pleroma",
316         "plume",
317         "snac",
318         "takahe",
319         "wildebeest",
320         "writefreely"
321     ]
322
323     locking.acquire()
324
325     print(f"INFO: Fetching {len(types)} different table data ...")
326     for software in types:
327         doc = None
328
329         try:
330             # DEBUG: print(f"DEBUG: Fetching table data for software='{software}' ...")
331             raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
332             # DEBUG: print(f"DEBUG: raw[{type(raw)}]()={len(raw)}")
333
334             doc = bs4.BeautifulSoup(raw, features='html.parser')
335             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
336         except network.exceptions as exception:
337             print(f"WARNING: Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'")
338             continue
339
340         items = doc.findAll("a", {"class": "url"})
341         print(f"INFO: Checking {len(items)} items,software='{software}' ...")
342         for item in items:
343             # DEBUG: print(f"DEBUG: item[]='{type(item)}'")
344             domain = item.decode_contents()
345
346             # DEBUG: print(f"DEBUG: domain='{domain}'")
347             if not validators.domain(domain):
348                 print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
349                 continue
350             elif blacklist.is_blacklisted(domain):
351                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
352                 continue
353             elif instances.is_registered(domain):
354                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
355                 continue
356
357             print(f"INFO: Fetching instances for domain='{domain}',software='{software}'")
358             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
359
360     # DEBUG: print("DEBUG: EXIT!")
361
362 def fetch_cs(args: argparse.Namespace):
363     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
364     extensions = [
365         "extra",
366         "abbr",
367         "attr_list",
368         "def_list",
369         "fenced_code",
370         "footnotes",
371         "md_in_html",
372         "admonition",
373         "codehilite",
374         "legacy_attrs",
375         "legacy_em",
376         "meta",
377         "nl2br",
378         "sane_lists",
379         "smarty",
380         "toc",
381         "wikilinks"
382     ]
383
384     domains = {
385         "silenced": list(),
386         "reject"  : list(),
387     }
388
389     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
390     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
391
392     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
393
394     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
395     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
396     # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
397     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
398
399     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
400     # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
401     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
402
403     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
404     if len(domains) > 0:
405         locking.acquire()
406
407         print(f"INFO: Adding {len(domains)} new instances ...")
408         for block_level in domains:
409             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
410
411             for row in domains[block_level]:
412                 # DEBUG: print(f"DEBUG: row='{row}'")
413                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
414                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
415                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
416
417                 if not instances.is_registered(row["domain"]):
418                     try:
419                         print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
420                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
421                     except network.exceptions as exception:
422                         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
423                         instances.set_last_error(row["domain"], exception)
424
425         # DEBUG: print("DEBUG: Committing changes ...")
426         fba.connection.commit()
427
428     # DEBUG: print("DEBUG: EXIT!")
429
430 def fetch_fba_rss(args: argparse.Namespace):
431     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
432     domains = list()
433
434     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
435     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
436
437     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
438     if response.ok and response.status_code < 300 and len(response.text) > 0:
439         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
440         rss = atoma.parse_rss_bytes(response.content)
441
442         # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
443         for item in rss.items:
444             # DEBUG: print(f"DEBUG: item={item}")
445             domain = item.link.split("=")[1]
446
447             if blacklist.is_blacklisted(domain):
448                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
449                 continue
450             elif domain in domains:
451                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
452                 continue
453             elif instances.is_registered(domain):
454                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
455                 continue
456
457             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
458             domains.append(domain)
459
460     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
461     if len(domains) > 0:
462         locking.acquire()
463
464         print(f"INFO: Adding {len(domains)} new instances ...")
465         for domain in domains:
466             try:
467                 print(f"INFO: Fetching instances from domain='{domain}' ...")
468                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
469             except network.exceptions as exception:
470                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
471                 instances.set_last_error(domain, exception)
472
473     # DEBUG: print("DEBUG: EXIT!")
474
475 def fetch_fbabot_atom(args: argparse.Namespace):
476     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
477     feed = "https://ryona.agency/users/fba/feed.atom"
478
479     domains = list()
480
481     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
482     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
483
484     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
485     if response.ok and response.status_code < 300 and len(response.text) > 0:
486         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
487         atom = atoma.parse_atom_bytes(response.content)
488
489         # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
490         for entry in atom.entries:
491             # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
492             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
493             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
494             for element in doc.findAll("a"):
495                 for href in element["href"].split(","):
496                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
497                     domain = tidyup.domain(href)
498
499                     # DEBUG: print(f"DEBUG: domain='{domain}'")
500                     if blacklist.is_blacklisted(domain):
501                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
502                         continue
503                     elif domain in domains:
504                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
505                         continue
506                     elif instances.is_registered(domain):
507                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
508                         continue
509
510                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
511                     domains.append(domain)
512
513     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
514     if len(domains) > 0:
515         locking.acquire()
516
517         print(f"INFO: Adding {len(domains)} new instances ...")
518         for domain in domains:
519             try:
520                 print(f"INFO: Fetching instances from domain='{domain}' ...")
521                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
522             except network.exceptions as exception:
523                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
524                 instances.set_last_error(domain, exception)
525
526     # DEBUG: print("DEBUG: EXIT!")
527
528 def fetch_instances(args: argparse.Namespace) -> int:
529     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
530     locking.acquire()
531
532     # Initial fetch
533     try:
534         print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
535         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
536     except network.exceptions as exception:
537         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
538         instances.set_last_error(args.domain, exception)
539
540         return 100
541
542     if args.single:
543         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
544         return 0
545
546     # Loop through some instances
547     fba.cursor.execute(
548         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
549     )
550
551     rows = fba.cursor.fetchall()
552     print(f"INFO: Checking {len(rows)} entries ...")
553     for row in rows:
554         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
555         if blacklist.is_blacklisted(row[0]):
556             print("WARNING: domain is blacklisted:", row[0])
557             continue
558
559         try:
560             print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
561             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
562         except network.exceptions as exception:
563             print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
564             instances.set_last_error(row[0], exception)
565
566     # DEBUG: print("DEBUG: Success - EXIT!")
567     return 0
568
569 def fetch_oliphant(args: argparse.Namespace):
570     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
571     locking.acquire()
572
573     # Base URL
574     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
575
576     # URLs to fetch
577     blocklists = (
578         {
579             "blocker": "artisan.chat",
580             "csv_url": "mastodon/artisan.chat.csv",
581         },{
582             "blocker": "mastodon.art",
583             "csv_url": "mastodon/mastodon.art.csv",
584         },{
585             "blocker": "pleroma.envs.net",
586             "csv_url": "mastodon/pleroma.envs.net.csv",
587         },{
588             "blocker": "oliphant.social",
589             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
590         },{
591             "blocker": "mastodon.online",
592             "csv_url": "mastodon/mastodon.online.csv",
593         },{
594             "blocker": "mastodon.social",
595             "csv_url": "mastodon/mastodon.social.csv",
596         },{
597             "blocker": "mastodon.social",
598             "csv_url": "other/missing-tier0-mastodon.social.csv",
599         },{
600             "blocker": "rage.love",
601             "csv_url": "mastodon/rage.love.csv",
602         },{
603             "blocker": "sunny.garden",
604             "csv_url": "mastodon/sunny.garden.csv",
605         },{
606             "blocker": "solarpunk.moe",
607             "csv_url": "mastodon/solarpunk.moe.csv",
608         },{
609             "blocker": "toot.wales",
610             "csv_url": "mastodon/toot.wales.csv",
611         },{
612             "blocker": "union.place",
613             "csv_url": "mastodon/union.place.csv",
614         }
615     )
616
617     domains = list()
618     for block in blocklists:
619         # Is domain given and not equal blocker?
620         if isinstance(args.domain, str) and args.domain != block["blocker"]:
621             # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
622             continue
623         elif args.domain in domains:
624             # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!")
625             continue
626
627         # Fetch this URL
628         print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
629         response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
630
631         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
632         if response.ok and response.content != "":
633             # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
634             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
635
636             # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
637             for row in reader:
638                 domain = None
639                 if "#domain" in row:
640                     domain = row["#domain"]
641                 elif "domain" in row:
642                     domain = row["domain"]
643                 else:
644                     # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column")
645                     continue
646
647                 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled")
648                 domains.append(domain)
649
650                 # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...")
651                 processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
652
653                 # DEBUG: print(f"DEBUG: processed='{processed}'")
654
655     # DEBUG: print("DEBUG: EXIT!")
656
657 def fetch_txt(args: argparse.Namespace):
658     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
659     locking.acquire()
660
661     # Static URLs
662     urls = (
663         "https://seirdy.one/pb/bsl.txt",
664     )
665
666     print(f"INFO: Checking {len(urls)} text file(s) ...")
667     for url in urls:
668         # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
669         response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
670
671         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
672         if response.ok and response.text != "":
673             # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing")
674             domains = response.text.split("\n")
675
676             print(f"INFO: Processing {len(domains)} domains ...")
677             for domain in domains:
678                 if domain == "":
679                     continue
680
681                 # DEBUG: print(f"DEBUG: domain='{domain}'")
682                 processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
683
684                 # DEBUG: print(f"DEBUG: processed='{processed}'")
685                 if not processed:
686                     # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!")
687                     continue
688
689     # DEBUG: print("DEBUG: EXIT!")