1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
29 from fba import blacklist
30 from fba import config
31 from fba import federation
33 from fba import network
35 from fba.helpers import locking
36 from fba.helpers import tidyup
38 from fba.models import blocks
39 from fba.models import instances
41 from fba.networks import friendica
42 from fba.networks import mastodon
43 from fba.networks import misskey
44 from fba.networks import pleroma
46 def check_instance(args: argparse.Namespace) -> int:
47 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
49 if not validators.domain(args.domain):
50 print(f"WARNING: args.domain='{args.domain}' is not valid")
52 elif blacklist.is_blacklisted(args.domain):
53 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
55 elif instances.is_registered(args.domain):
56 print(f"WARNING: args.domain='{args.domain}' is already registered")
59 print(f"INFO: args.domain='{args.domain}' is not known")
61 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
64 def fetch_bkali(args: argparse.Namespace) -> int:
65 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
68 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
69 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
72 # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
73 if "error_message" in fetched:
74 print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
76 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
77 print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
80 rows = fetched["json"]
82 # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
84 raise Exception("WARNING: Returned no records")
85 elif "data" not in rows:
86 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
87 elif "nodeinfo" not in rows["data"]:
88 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
90 for entry in rows["data"]["nodeinfo"]:
91 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
92 if not "domain" in entry:
93 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
95 elif not validators.domain(entry["domain"]):
96 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
98 elif blacklist.is_blacklisted(entry["domain"]):
99 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
101 elif instances.is_registered(entry["domain"]):
102 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
104 elif instances.is_recent(entry["domain"]):
105 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!")
108 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
109 domains.append(entry["domain"])
111 except network.exceptions as exception:
112 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
115 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
119 print(f"INFO: Adding {len(domains)} new instances ...")
120 for domain in domains:
122 print(f"INFO: Fetching instances from domain='{domain}' ...")
123 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
124 except network.exceptions as exception:
125 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
126 instances.set_last_error(domain, exception)
128 # DEBUG: print("DEBUG: Success - EXIT!")
131 def fetch_blocks(args: argparse.Namespace):
132 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
133 if args.domain is not None and args.domain != "":
134 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
135 if not validators.domain(args.domain):
136 print(f"WARNING: domain='{args.domain}' is not valid.")
138 elif blacklist.is_blacklisted(args.domain):
139 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
141 elif not instances.is_registered(args.domain):
142 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
147 if args.domain is not None and args.domain != "":
148 # Re-check single domain
149 # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
151 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
154 # Re-check after "timeout" (aka. minimum interval)
156 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
159 rows = fba.cursor.fetchall()
160 print(f"INFO: Checking {len(rows)} entries ...")
161 for blocker, software, origin, nodeinfo_url in rows:
162 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
164 blocker = tidyup.domain(blocker)
165 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
168 print("WARNING: blocker is now empty!")
170 elif blacklist.is_blacklisted(blocker):
171 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
174 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
175 instances.set_last_blocked(blocker)
177 if software == "pleroma":
178 print(f"INFO: blocker='{blocker}',software='{software}'")
179 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
180 elif software == "mastodon":
181 print(f"INFO: blocker='{blocker}',software='{software}'")
182 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
183 elif software == "friendica" or software == "misskey":
184 print(f"INFO: blocker='{blocker}',software='{software}'")
187 if software == "friendica":
188 blocking = friendica.fetch_blocks(blocker)
189 elif software == "misskey":
190 blocking = misskey.fetch_blocks(blocker)
192 print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
193 for block_level, blocklist in blocking.items():
194 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
195 block_level = tidyup.domain(block_level)
196 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
197 if block_level == "":
198 print("WARNING: block_level is empty, blocker:", blocker)
201 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
202 for block in blocklist:
203 blocked, reason = block.values()
204 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
205 blocked = tidyup.domain(blocked)
206 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
207 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
210 print("WARNING: blocked is empty:", blocker)
212 elif blacklist.is_blacklisted(blocked):
213 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
215 elif blocked.count("*") > 0:
216 # Some friendica servers also obscure domains without hash
217 row = instances.deobscure("*", blocked)
219 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
221 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
226 nodeinfo_url = row[2]
227 elif blocked.count("?") > 0:
228 # Some obscure them with question marks, not sure if that's dependent on version or not
229 row = instances.deobscure("?", blocked)
231 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
233 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
238 nodeinfo_url = row[2]
240 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
241 if not validators.domain(blocked):
242 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
244 elif blocked.endswith(".arpa"):
245 # DEBUG: print(f"DEBUG: blocked='{blocked}' is ending with '.arpa' - SKIPPED!")
247 elif not instances.is_registered(blocked):
248 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
250 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
251 except network.exceptions as exception:
252 print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
255 if not blocks.is_instance_blocked(blocker, blocked, block_level):
256 blocks.add_instance(blocker, blocked, reason, block_level)
258 if block_level == "reject":
264 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
265 blocks.update_last_seen(blocker, blocked, block_level)
266 blocks.update_reason(reason, blocker, blocked, block_level)
268 # DEBUG: print("DEBUG: Committing changes ...")
269 fba.connection.commit()
271 print("WARNING: Unknown software:", blocker, software)
273 if instances.has_pending(blocker):
274 # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...")
275 instances.update_data(blocker)
277 if config.get("bot_enabled") and len(blockdict) > 0:
278 network.send_bot_post(blocker, blockdict)
280 # DEBUG: print("DEBUG: EXIT!")
282 def fetch_cs(args: argparse.Namespace):
283 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
309 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
310 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
312 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
314 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
315 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
316 # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
317 domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
319 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
320 # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
321 domains["reject"] = domains["reject"] + federation.find_domains(blocked)
323 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
327 print(f"INFO: Adding {len(domains)} new instances ...")
328 for block_level in domains:
329 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
331 for row in domains[block_level]:
332 # DEBUG: print(f"DEBUG: row='{row}'")
333 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
334 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
335 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
337 if not instances.is_registered(row["domain"]):
339 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
340 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
341 except network.exceptions as exception:
342 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
343 instances.set_last_error(row["domain"], exception)
345 # DEBUG: print("DEBUG: Committing changes ...")
346 fba.connection.commit()
348 # DEBUG: print("DEBUG: EXIT!")
350 def fetch_fba_rss(args: argparse.Namespace):
351 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
354 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
355 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
357 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
358 if response.ok and response.status_code < 300 and len(response.text) > 0:
359 # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
360 rss = atoma.parse_rss_bytes(response.content)
362 # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
363 for item in rss.items:
364 # DEBUG: print(f"DEBUG: item={item}")
365 domain = item.link.split("=")[1]
367 if blacklist.is_blacklisted(domain):
368 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
370 elif domain in domains:
371 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
373 elif instances.is_registered(domain):
374 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
377 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
378 domains.append(domain)
380 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
384 print(f"INFO: Adding {len(domains)} new instances ...")
385 for domain in domains:
387 print(f"INFO: Fetching instances from domain='{domain}' ...")
388 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
389 except network.exceptions as exception:
390 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
391 instances.set_last_error(domain, exception)
393 # DEBUG: print("DEBUG: EXIT!")
395 def fetch_fbabot_atom(args: argparse.Namespace):
396 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
397 feed = "https://ryona.agency/users/fba/feed.atom"
401 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
402 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
404 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
405 if response.ok and response.status_code < 300 and len(response.text) > 0:
406 # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
407 atom = atoma.parse_atom_bytes(response.content)
409 # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
410 for entry in atom.entries:
411 # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
412 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
413 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
414 for element in doc.findAll("a"):
415 for href in element["href"].split(","):
416 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
417 domain = tidyup.domain(href)
419 # DEBUG: print(f"DEBUG: domain='{domain}'")
420 if blacklist.is_blacklisted(domain):
421 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
423 elif domain in domains:
424 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
426 elif instances.is_registered(domain):
427 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
430 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
431 domains.append(domain)
433 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
437 print(f"INFO: Adding {len(domains)} new instances ...")
438 for domain in domains:
440 print(f"INFO: Fetching instances from domain='{domain}' ...")
441 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
442 except network.exceptions as exception:
443 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
444 instances.set_last_error(domain, exception)
446 # DEBUG: print("DEBUG: EXIT!")
448 def fetch_instances(args: argparse.Namespace) -> int:
449 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
454 print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
455 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
456 except network.exceptions as exception:
457 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
458 instances.set_last_error(args.domain, exception)
463 # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
466 # Loop through some instances
468 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
471 rows = fba.cursor.fetchall()
472 print(f"INFO: Checking {len(rows)} entries ...")
474 # DEBUG: print(f"DEBUG: domain='{row[0]}'")
475 if blacklist.is_blacklisted(row[0]):
476 print("WARNING: domain is blacklisted:", row[0])
480 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
481 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
482 except network.exceptions as exception:
483 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
484 instances.set_last_error(row[0], exception)
486 # DEBUG: print("DEBUG: Success - EXIT!")
489 def fetch_oliphant(args: argparse.Namespace):
490 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
494 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
499 "blocker": "artisan.chat",
500 "csv_url": "mastodon/artisan.chat.csv",
502 "blocker": "mastodon.art",
503 "csv_url": "mastodon/mastodon.art.csv",
505 "blocker": "pleroma.envs.net",
506 "csv_url": "mastodon/pleroma.envs.net.csv",
508 "blocker": "oliphant.social",
509 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
511 "blocker": "mastodon.online",
512 "csv_url": "mastodon/mastodon.online.csv",
514 "blocker": "mastodon.social",
515 "csv_url": "mastodon/mastodon.social.csv",
517 "blocker": "mastodon.social",
518 "csv_url": "other/missing-tier0-mastodon.social.csv",
520 "blocker": "rage.love",
521 "csv_url": "mastodon/rage.love.csv",
523 "blocker": "sunny.garden",
524 "csv_url": "mastodon/sunny.garden.csv",
526 "blocker": "solarpunk.moe",
527 "csv_url": "mastodon/solarpunk.moe.csv",
529 "blocker": "toot.wales",
530 "csv_url": "mastodon/toot.wales.csv",
532 "blocker": "union.place",
533 "csv_url": "mastodon/union.place.csv",
538 for block in blocklists:
539 # Is domain given and not equal blocker?
540 if isinstance(args.domain, str) and args.domain != block["blocker"]:
541 # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
543 elif args.domain in domains:
544 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!")
548 print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
549 response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
551 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
552 if response.ok and response.content != "":
553 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
554 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
556 # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
560 domain = row["#domain"]
561 elif "domain" in row:
562 domain = row["domain"]
564 # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column")
567 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled")
568 domains.append(domain)
570 # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...")
571 processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
573 # DEBUG: print(f"DEBUG: processed='{processed}'")
575 # DEBUG: print("DEBUG: EXIT!")
577 def fetch_txt(args: argparse.Namespace):
578 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
583 "https://seirdy.one/pb/bsl.txt",
586 print(f"INFO: Checking {len(urls)} text file(s) ...")
588 # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
589 response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
591 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
592 if response.ok and response.text != "":
593 # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing")
594 domains = response.text.split("\n")
596 print(f"INFO: Processing {len(domains)} domains ...")
597 for domain in domains:
601 # DEBUG: print(f"DEBUG: domain='{domain}'")
602 processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
604 # DEBUG: print(f"DEBUG: processed='{processed}'")
606 # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!")
609 # DEBUG: print("DEBUG: EXIT!")