1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
34 from fba import instances
35 from fba import locking
36 from fba import network
38 from fba.helpers import tidyup
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
45 def check_instance(args: argparse.Namespace) -> int:
46 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48 if not validators.domain(args.domain):
49 print(f"WARNING: args.domain='{args.domain}' is not valid")
51 elif blacklist.is_blacklisted(args.domain):
52 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54 elif instances.is_registered(args.domain):
55 print(f"WARNING: args.domain='{args.domain}' is already registered")
58 print(f"INFO: args.domain='{args.domain}' is not known")
60 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
63 def fetch_bkali(args: argparse.Namespace) -> int:
64 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
67 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
71 # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
72 if "error_message" in fetched:
73 print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
75 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
76 print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
79 rows = fetched["json"]
81 # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
83 raise Exception("WARNING: Returned no records")
84 elif "data" not in rows:
85 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
86 elif "nodeinfo" not in rows["data"]:
87 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
89 for entry in rows["data"]["nodeinfo"]:
90 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
91 if not "domain" in entry:
92 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
94 elif not validators.domain(entry["domain"]):
95 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
97 elif blacklist.is_blacklisted(entry["domain"]):
98 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
100 elif instances.is_registered(entry["domain"]):
101 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
104 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
105 domains.append(entry["domain"])
107 except network.exceptions as exception:
108 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
111 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
115 print(f"INFO: Adding {len(domains)} new instances ...")
116 for domain in domains:
118 print(f"INFO: Fetching instances from domain='{domain}' ...")
119 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
120 except network.exceptions as exception:
121 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
122 instances.update_last_error(domain, exception)
124 # DEBUG: print("DEBUG: EXIT!")
127 def fetch_blocks(args: argparse.Namespace):
128 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
129 if args.domain is not None and args.domain != "":
130 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
131 if not validators.domain(args.domain):
132 print(f"WARNING: domain='{args.domain}' is not valid.")
134 elif blacklist.is_blacklisted(args.domain):
135 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
137 elif not instances.is_registered(args.domain):
138 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
143 if args.domain is not None and args.domain != "":
144 # Re-check single domain
145 # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
147 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
150 # Re-check after "timeout" (aka. minimum interval)
152 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
155 rows = fba.cursor.fetchall()
156 print(f"INFO: Checking {len(rows)} entries ...")
157 for blocker, software, origin, nodeinfo_url in rows:
158 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
160 blocker = tidyup.domain(blocker)
161 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
164 print("WARNING: blocker is now empty!")
166 elif blacklist.is_blacklisted(blocker):
167 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
170 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
171 instances.update_last_blocked(blocker)
173 if software == "pleroma":
174 print(f"INFO: blocker='{blocker}',software='{software}'")
175 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
176 elif software == "mastodon":
177 print(f"INFO: blocker='{blocker}',software='{software}'")
178 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
179 elif software == "friendica" or software == "misskey":
180 print(f"INFO: blocker='{blocker}',software='{software}'")
183 if software == "friendica":
184 blocking = friendica.fetch_blocks(blocker)
185 elif software == "misskey":
186 blocking = misskey.fetch_blocks(blocker)
188 print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
189 for block_level, blocklist in blocking.items():
190 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
191 block_level = tidyup.domain(block_level)
192 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
193 if block_level == "":
194 print("WARNING: block_level is empty, blocker:", blocker)
197 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
198 for block in blocklist:
199 blocked, reason = block.values()
200 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
201 blocked = tidyup.domain(blocked)
202 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
203 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
206 print("WARNING: blocked is empty:", blocker)
208 elif blacklist.is_blacklisted(blocked):
209 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
211 elif blocked.count("*") > 0:
212 # Some friendica servers also obscure domains without hash
213 row = instances.deobscure("*", blocked)
215 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
217 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
222 nodeinfo_url = row[2]
223 elif blocked.count("?") > 0:
224 # Some obscure them with question marks, not sure if that's dependent on version or not
225 row = instances.deobscure("?", blocked)
227 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
229 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
234 nodeinfo_url = row[2]
236 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
237 if not validators.domain(blocked):
238 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
240 elif not instances.is_registered(blocked):
241 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
243 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
244 except network.exceptions as exception:
245 print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
248 if not blocks.is_instance_blocked(blocker, blocked, block_level):
249 blocks.add_instance(blocker, blocked, reason, block_level)
251 if block_level == "reject":
257 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
258 blocks.update_last_seen(blocker, blocked, block_level)
259 blocks.update_reason(reason, blocker, blocked, block_level)
261 # DEBUG: print("DEBUG: Committing changes ...")
262 fba.connection.commit()
264 print("WARNING: Unknown software:", blocker, software)
266 if config.get("bot_enabled") and len(blockdict) > 0:
267 network.send_bot_post(blocker, blockdict)
269 # DEBUG: print("DEBUG: EXIT!")
271 def fetch_cs(args: argparse.Namespace):
272 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
298 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
299 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
301 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
303 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
304 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
305 # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
306 domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
308 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
309 # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
310 domains["reject"] = domains["reject"] + federation.find_domains(blocked)
312 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
316 print(f"INFO: Adding {len(domains)} new instances ...")
317 for block_level in domains:
318 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
320 for row in domains[block_level]:
321 # DEBUG: print(f"DEBUG: row='{row}'")
322 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
323 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
324 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
326 if not instances.is_registered(row["domain"]):
328 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
329 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
330 except network.exceptions as exception:
331 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
332 instances.update_last_error(row["domain"], exception)
334 # DEBUG: print("DEBUG: Committing changes ...")
335 fba.connection.commit()
337 # DEBUG: print("DEBUG: EXIT!")
339 def fetch_fba_rss(args: argparse.Namespace):
340 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
343 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
344 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
346 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
347 if response.ok and response.status_code < 300 and len(response.text) > 0:
348 # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
349 rss = atoma.parse_rss_bytes(response.content)
351 # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
352 for item in rss.items:
353 # DEBUG: print(f"DEBUG: item={item}")
354 domain = item.link.split("=")[1]
356 if blacklist.is_blacklisted(domain):
357 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
359 elif domain in domains:
360 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
362 elif instances.is_registered(domain):
363 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
366 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
367 domains.append(domain)
369 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
373 print(f"INFO: Adding {len(domains)} new instances ...")
374 for domain in domains:
376 print(f"INFO: Fetching instances from domain='{domain}' ...")
377 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
378 except network.exceptions as exception:
379 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
380 instances.update_last_error(domain, exception)
382 # DEBUG: print("DEBUG: EXIT!")
384 def fetch_fbabot_atom(args: argparse.Namespace):
385 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
386 feed = "https://ryona.agency/users/fba/feed.atom"
390 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
391 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
393 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
394 if response.ok and response.status_code < 300 and len(response.text) > 0:
395 # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
396 atom = atoma.parse_atom_bytes(response.content)
398 # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
399 for entry in atom.entries:
400 # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
401 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
402 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
403 for element in doc.findAll("a"):
404 for href in element["href"].split(","):
405 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
406 domain = tidyup.domain(href)
408 # DEBUG: print(f"DEBUG: domain='{domain}'")
409 if blacklist.is_blacklisted(domain):
410 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
412 elif domain in domains:
413 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
415 elif instances.is_registered(domain):
416 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
419 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
420 domains.append(domain)
422 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
426 print(f"INFO: Adding {len(domains)} new instances ...")
427 for domain in domains:
429 print(f"INFO: Fetching instances from domain='{domain}' ...")
430 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
431 except network.exceptions as exception:
432 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
433 instances.update_last_error(domain, exception)
435 # DEBUG: print("DEBUG: EXIT!")
437 def fetch_instances(args: argparse.Namespace) -> int:
438 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
443 print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
444 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
445 except network.exceptions as exception:
446 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
447 instances.update_last_error(args.domain, exception)
452 # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
455 # Loop through some instances
457 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
460 rows = fba.cursor.fetchall()
461 print(f"INFO: Checking {len(rows)} entries ...")
463 # DEBUG: print(f"DEBUG: domain='{row[0]}'")
464 if blacklist.is_blacklisted(row[0]):
465 print("WARNING: domain is blacklisted:", row[0])
469 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
470 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
471 except network.exceptions as exception:
472 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
473 instances.update_last_error(row[0], exception)
475 # DEBUG: print("DEBUG: EXIT!")
478 def fetch_oliphant(args: argparse.Namespace):
479 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
483 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
488 "blocker": "artisan.chat",
489 "csv_url": "mastodon/artisan.chat.csv",
491 "blocker": "mastodon.art",
492 "csv_url": "mastodon/mastodon.art.csv",
494 "blocker": "pleroma.envs.net",
495 "csv_url": "mastodon/pleroma.envs.net.csv",
497 "blocker": "oliphant.social",
498 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
500 "blocker": "mastodon.online",
501 "csv_url": "mastodon/mastodon.online.csv",
503 "blocker": "mastodon.social",
504 "csv_url": "mastodon/mastodon.social.csv",
506 "blocker": "mastodon.social",
507 "csv_url": "other/missing-tier0-mastodon.social.csv",
509 "blocker": "rage.love",
510 "csv_url": "mastodon/rage.love.csv",
512 "blocker": "sunny.garden",
513 "csv_url": "mastodon/sunny.garden.csv",
515 "blocker": "solarpunk.moe",
516 "csv_url": "mastodon/solarpunk.moe.csv",
518 "blocker": "toot.wales",
519 "csv_url": "mastodon/toot.wales.csv",
521 "blocker": "union.place",
522 "csv_url": "mastodon/union.place.csv",
527 for block in blocklists:
528 # Is domain given and not equal blocker?
529 if isinstance(args.domain, str) and args.domain != block["blocker"]:
530 # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
532 elif domain in domains:
533 # DEBUG: print(f"DEBUG: domain='{domain}' already handled - SKIPPED!")
537 print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
538 response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
540 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
541 if response.ok and response.content != "":
542 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
543 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
545 # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
549 domain = row["#domain"]
550 elif "domain" in row:
551 domain = row["domain"]
553 # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column")
556 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled")
557 domains.append(domain)
559 # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...")
560 processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
562 # DEBUG: print(f"DEBUG: processed='{processed}'")
564 # DEBUG: print("DEBUG: EXIT!")
566 def fetch_txt(args: argparse.Namespace):
567 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
572 "https://seirdy.one/pb/bsl.txt",
575 print(f"INFO: Checking {len(urls)} text file(s) ...")
577 # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
578 response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
580 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
581 if response.ok and response.text != "":
582 # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing")
583 domains = response.text.split("\n")
585 print(f"INFO: Processing {len(domains)} domains ...")
586 for domain in domains:
590 # DEBUG: print(f"DEBUG: domain='{domain}'")
591 processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
593 # DEBUG: print(f"DEBUG: processed='{processed}'")
595 # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!")
598 # DEBUG: print("DEBUG: EXIT!")