1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
34 from fba import instances
35 from fba import locking
36 from fba import network
38 from fba.helpers import tidyup
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
45 def check_instance(args: argparse.Namespace) -> int:
46 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48 if not validators.domain(args.domain):
49 print(f"WARNING: args.domain='{args.domain}' is not valid")
51 elif blacklist.is_blacklisted(args.domain):
52 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54 elif instances.is_registered(args.domain):
55 print(f"WARNING: args.domain='{args.domain}' is already registered")
58 print(f"INFO: args.domain='{args.domain}' is not known")
60 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
63 def fetch_bkali(args: argparse.Namespace) -> int:
64 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
67 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
71 # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
72 if "error_message" in fetched:
73 print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
75 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
76 print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
79 rows = fetched["json"]
81 # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
83 raise Exception("WARNING: Returned no records")
84 elif "data" not in rows:
85 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
86 elif "nodeinfo" not in rows["data"]:
87 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
89 for entry in rows["data"]["nodeinfo"]:
90 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
91 if not "domain" in entry:
92 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
94 elif not validators.domain(entry["domain"]):
95 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
97 elif blacklist.is_blacklisted(entry["domain"]):
98 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
100 elif instances.is_registered(entry["domain"]):
101 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
104 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
105 domains.append(entry["domain"])
107 except network.exceptions as exception:
108 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
111 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
115 print(f"INFO: Adding {len(domains)} new instances ...")
116 for domain in domains:
118 print(f"INFO: Fetching instances from domain='{domain}' ...")
119 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
120 except network.exceptions as exception:
121 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
122 instances.update_last_error(domain, exception)
124 # DEBUG: print("DEBUG: EXIT!")
127 def fetch_blocks(args: argparse.Namespace):
128 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
129 if args.domain is not None and args.domain != "":
130 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
131 if not validators.domain(args.domain):
132 print(f"WARNING: domain='{args.domain}' is not valid.")
134 elif blacklist.is_blacklisted(args.domain):
135 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
137 elif not instances.is_registered(args.domain):
138 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
143 if args.domain is not None and args.domain != "":
144 # Re-check single domain
145 # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
147 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
150 # Re-check after "timeout" (aka. minimum interval)
152 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
155 rows = fba.cursor.fetchall()
156 print(f"INFO: Checking {len(rows)} entries ...")
157 for blocker, software, origin, nodeinfo_url in rows:
158 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
160 blocker = tidyup.domain(blocker)
161 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
164 print("WARNING: blocker is now empty!")
166 elif blacklist.is_blacklisted(blocker):
167 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
170 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
171 instances.update_last_blocked(blocker)
173 if software == "pleroma":
174 print(f"INFO: blocker='{blocker}',software='{software}'")
175 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
176 elif software == "mastodon":
177 print(f"INFO: blocker='{blocker}',software='{software}'")
178 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
179 elif software == "friendica" or software == "misskey":
180 print(f"INFO: blocker='{blocker}',software='{software}'")
183 if software == "friendica":
184 blocking = friendica.fetch_blocks(blocker)
185 elif software == "misskey":
186 blocking = misskey.fetch_blocks(blocker)
188 print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
189 for block_level, blocklist in blocking.items():
190 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
191 block_level = tidyup.domain(block_level)
192 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
193 if block_level == "":
194 print("WARNING: block_level is empty, blocker:", blocker)
197 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
198 for block in blocklist:
199 blocked, reason = block.values()
200 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
201 blocked = tidyup.domain(blocked)
202 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
203 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
206 print("WARNING: blocked is empty:", blocker)
208 elif blacklist.is_blacklisted(blocked):
209 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
211 elif blocked.count("*") > 0:
212 # Some friendica servers also obscure domains without hash
213 row = instances.deobscure("*", blocked)
215 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
217 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
222 nodeinfo_url = row[2]
223 elif blocked.count("?") > 0:
224 # Some obscure them with question marks, not sure if that's dependent on version or not
225 row = instances.deobscure("?", blocked)
227 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
229 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
234 nodeinfo_url = row[2]
236 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
237 if not validators.domain(blocked):
238 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
240 elif blocked.split(".")[-1] == "arpa":
241 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
243 elif not instances.is_registered(blocked):
244 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
246 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
247 except network.exceptions as exception:
248 print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
251 if not blocks.is_instance_blocked(blocker, blocked, block_level):
252 blocks.add_instance(blocker, blocked, reason, block_level)
254 if block_level == "reject":
260 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
261 blocks.update_last_seen(blocker, blocked, block_level)
262 blocks.update_reason(reason, blocker, blocked, block_level)
264 # DEBUG: print("DEBUG: Committing changes ...")
265 fba.connection.commit()
267 print("WARNING: Unknown software:", blocker, software)
269 if config.get("bot_enabled") and len(blockdict) > 0:
270 network.send_bot_post(blocker, blockdict)
272 # DEBUG: print("DEBUG: EXIT!")
274 def fetch_cs(args: argparse.Namespace):
275 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
301 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
302 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
304 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
306 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
307 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
308 # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
309 domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
311 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
312 # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
313 domains["reject"] = domains["reject"] + federation.find_domains(blocked)
315 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
319 print(f"INFO: Adding {len(domains)} new instances ...")
320 for block_level in domains:
321 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
323 for row in domains[block_level]:
324 # DEBUG: print(f"DEBUG: row='{row}'")
325 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
326 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
327 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
329 if not instances.is_registered(row["domain"]):
331 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
332 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
333 except network.exceptions as exception:
334 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
335 instances.update_last_error(row["domain"], exception)
337 # DEBUG: print("DEBUG: Committing changes ...")
338 fba.connection.commit()
340 # DEBUG: print("DEBUG: EXIT!")
342 def fetch_fba_rss(args: argparse.Namespace):
343 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
346 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
347 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
349 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
350 if response.ok and response.status_code < 300 and len(response.text) > 0:
351 # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
352 rss = atoma.parse_rss_bytes(response.content)
354 # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
355 for item in rss.items:
356 # DEBUG: print(f"DEBUG: item={item}")
357 domain = item.link.split("=")[1]
359 if blacklist.is_blacklisted(domain):
360 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
362 elif domain in domains:
363 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
365 elif instances.is_registered(domain):
366 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
369 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
370 domains.append(domain)
372 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
376 print(f"INFO: Adding {len(domains)} new instances ...")
377 for domain in domains:
379 print(f"INFO: Fetching instances from domain='{domain}' ...")
380 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
381 except network.exceptions as exception:
382 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
383 instances.update_last_error(domain, exception)
385 # DEBUG: print("DEBUG: EXIT!")
387 def fetch_fbabot_atom(args: argparse.Namespace):
388 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
389 feed = "https://ryona.agency/users/fba/feed.atom"
393 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
394 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
396 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
397 if response.ok and response.status_code < 300 and len(response.text) > 0:
398 # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
399 atom = atoma.parse_atom_bytes(response.content)
401 # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
402 for entry in atom.entries:
403 # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
404 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
405 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
406 for element in doc.findAll("a"):
407 for href in element["href"].split(","):
408 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
409 domain = tidyup.domain(href)
411 # DEBUG: print(f"DEBUG: domain='{domain}'")
412 if blacklist.is_blacklisted(domain):
413 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
415 elif domain in domains:
416 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
418 elif instances.is_registered(domain):
419 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
422 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
423 domains.append(domain)
425 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
429 print(f"INFO: Adding {len(domains)} new instances ...")
430 for domain in domains:
432 print(f"INFO: Fetching instances from domain='{domain}' ...")
433 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
434 except network.exceptions as exception:
435 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
436 instances.update_last_error(domain, exception)
438 # DEBUG: print("DEBUG: EXIT!")
440 def fetch_instances(args: argparse.Namespace) -> int:
441 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
446 print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
447 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
448 except network.exceptions as exception:
449 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
450 instances.update_last_error(args.domain, exception)
455 # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
458 # Loop through some instances
460 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
463 rows = fba.cursor.fetchall()
464 print(f"INFO: Checking {len(rows)} entries ...")
466 # DEBUG: print(f"DEBUG: domain='{row[0]}'")
467 if blacklist.is_blacklisted(row[0]):
468 print("WARNING: domain is blacklisted:", row[0])
472 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
473 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
474 except network.exceptions as exception:
475 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
476 instances.update_last_error(row[0], exception)
478 # DEBUG: print("DEBUG: EXIT!")
481 def fetch_oliphant(args: argparse.Namespace):
482 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
486 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
491 "blocker": "artisan.chat",
492 "csv_url": "mastodon/artisan.chat.csv",
494 "blocker": "mastodon.art",
495 "csv_url": "mastodon/mastodon.art.csv",
497 "blocker": "pleroma.envs.net",
498 "csv_url": "mastodon/pleroma.envs.net.csv",
500 "blocker": "oliphant.social",
501 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
503 "blocker": "mastodon.online",
504 "csv_url": "mastodon/mastodon.online.csv",
506 "blocker": "mastodon.social",
507 "csv_url": "mastodon/mastodon.social.csv",
509 "blocker": "mastodon.social",
510 "csv_url": "other/missing-tier0-mastodon.social.csv",
512 "blocker": "rage.love",
513 "csv_url": "mastodon/rage.love.csv",
515 "blocker": "sunny.garden",
516 "csv_url": "mastodon/sunny.garden.csv",
518 "blocker": "solarpunk.moe",
519 "csv_url": "mastodon/solarpunk.moe.csv",
521 "blocker": "toot.wales",
522 "csv_url": "mastodon/toot.wales.csv",
524 "blocker": "union.place",
525 "csv_url": "mastodon/union.place.csv",
529 for block in blocklists:
530 # Is domain given and not equal blocker?
531 if isinstance(args.domain, str) and args.domain != block["blocker"]:
532 # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
536 print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
537 response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
539 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
540 if response.ok and response.content != "":
541 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
542 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
544 # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
548 domain = row["#domain"]
549 elif "domain" in row:
550 domain = row["domain"]
552 print(f"DEBUG: row='{row}' does not contain domain column")
555 if domain.find("*") > 0:
556 # Try to de-obscure it
557 row = instances.deobscure("*", domain)
559 # DEBUG: print(f"DEBUG: row[{type(row)}]='{row}'")
561 print(f"WARNING: Cannot de-obfucate domain='{domain}' - skipped!")
564 # DEBUG: print(f"DEBUG: domain='{domain}' de-obscured to '{row[0]}'")
567 if not validators.domain(domain):
568 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
570 elif blacklist.is_blacklisted(domain):
571 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - skipped!")
573 elif instances.is_recent(domain):
574 # DEBUG: print(f"DEBUG: domain='{domain}' has been recently checked - skipped!")
578 print(f"INFO: Fetching instances for instane='{domain}' ...")
579 federation.fetch_instances(domain, block["blocker"], None, inspect.currentframe().f_code.co_name)
580 except network.exceptions as exception:
581 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_oliphant) from domain='{domain}'")
582 instances.update_last_error(domain, exception)
584 # DEBUG: print("DEBUG: EXIT!")