1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
34 from fba import instances
35 from fba import locking
36 from fba import network
38 from fba.helpers import tidyup
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
45 def check_instance(args: argparse.Namespace) -> int:
46 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48 if not validators.domain(args.domain):
49 print(f"WARNING: args.domain='{args.domain}' is not valid")
51 elif blacklist.is_blacklisted(args.domain):
52 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54 elif instances.is_registered(args.domain):
55 print(f"WARNING: args.domain='{args.domain}' is already registered")
58 print(f"INFO: args.domain='{args.domain}' is not known")
60 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
63 def fetch_bkali(args: argparse.Namespace) -> int:
64 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
67 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
71 # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
72 if "error_message" in fetched:
73 print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
75 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
76 print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
79 rows = fetched["json"]
81 # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
83 raise Exception("WARNING: Returned no records")
84 elif "data" not in rows:
85 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
86 elif "nodeinfo" not in rows["data"]:
87 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
89 for entry in rows["data"]["nodeinfo"]:
90 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
91 if not "domain" in entry:
92 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
94 elif not validators.domain(entry["domain"]):
95 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
97 elif blacklist.is_blacklisted(entry["domain"]):
98 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
100 elif instances.is_registered(entry["domain"]):
101 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
104 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
105 domains.append(entry["domain"])
107 except network.exceptions as exception:
108 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
111 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
115 print(f"INFO: Adding {len(domains)} new instances ...")
116 for domain in domains:
118 print(f"INFO: Fetching instances from domain='{domain}' ...")
119 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
120 except network.exceptions as exception:
121 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
122 instances.update_last_error(domain, exception)
124 # DEBUG: print("DEBUG: EXIT!")
127 def fetch_blocks(args: argparse.Namespace):
128 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
129 if args.domain is not None and args.domain != "":
130 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
131 if not validators.domain(args.domain):
132 print(f"WARNING: domain='{args.domain}' is not valid.")
134 elif blacklist.is_blacklisted(args.domain):
135 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
137 elif not instances.is_registered(args.domain):
138 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
143 if args.domain is not None and args.domain != "":
144 # Re-check single domain
145 # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
147 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
150 # Re-check after "timeout" (aka. minimum interval)
152 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
155 rows = fba.cursor.fetchall()
156 print(f"INFO: Checking {len(rows)} entries ...")
157 for blocker, software, origin, nodeinfo_url in rows:
158 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
160 blocker = tidyup.domain(blocker)
161 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
164 print("WARNING: blocker is now empty!")
166 elif blacklist.is_blacklisted(blocker):
167 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
170 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
171 instances.update_last_blocked(blocker)
173 if software == "pleroma":
174 print(f"INFO: blocker='{blocker}',software='{software}'")
175 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
176 elif software == "mastodon":
177 print(f"INFO: blocker='{blocker}',software='{software}'")
178 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
179 elif software == "friendica" or software == "misskey":
180 print(f"INFO: blocker='{blocker}',software='{software}'")
183 if software == "friendica":
184 blocking = friendica.fetch_blocks(blocker)
185 elif software == "misskey":
186 blocking = misskey.fetch_blocks(blocker)
188 print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
189 for block_level, blocklist in blocking.items():
190 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
191 block_level = tidyup.domain(block_level)
192 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
193 if block_level == "":
194 print("WARNING: block_level is empty, blocker:", blocker)
197 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
198 for block in blocklist:
199 blocked, reason = block.values()
200 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
201 blocked = tidyup.domain(blocked)
202 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
203 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
206 print("WARNING: blocked is empty:", blocker)
208 elif blacklist.is_blacklisted(blocked):
209 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
211 elif blocked.count("*") > 0:
212 # Some friendica servers also obscure domains without hash
214 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
217 searchres = fba.cursor.fetchone()
219 # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
220 if searchres is None:
221 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
224 blocked = searchres[0]
225 origin = searchres[1]
226 nodeinfo_url = searchres[2]
227 elif blocked.count("?") > 0:
228 # Some obscure them with question marks, not sure if that's dependent on version or not
230 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
233 searchres = fba.cursor.fetchone()
235 # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
236 if searchres is None:
237 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
240 blocked = searchres[0]
241 origin = searchres[1]
242 nodeinfo_url = searchres[2]
243 elif not validators.domain(blocked):
244 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
247 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
248 if not validators.domain(blocked):
249 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
251 elif blocked.split(".")[-1] == "arpa":
252 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
254 elif not instances.is_registered(blocked):
255 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
257 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
258 except network.exceptions as exception:
259 print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
262 if not blocks.is_instance_blocked(blocker, blocked, block_level):
263 blocks.add_instance(blocker, blocked, reason, block_level)
265 if block_level == "reject":
271 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
272 blocks.update_last_seen(blocker, blocked, block_level)
273 blocks.update_reason(reason, blocker, blocked, block_level)
275 # DEBUG: print("DEBUG: Committing changes ...")
276 fba.connection.commit()
278 print("WARNING: Unknown software:", blocker, software)
280 if config.get("bot_enabled") and len(blockdict) > 0:
281 network.send_bot_post(blocker, blockdict)
283 # DEBUG: print("DEBUG: EXIT!")
285 def fetch_cs(args: argparse.Namespace):
286 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
312 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
313 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
315 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
317 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
318 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
319 # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
320 domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
322 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
323 # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
324 domains["reject"] = domains["reject"] + federation.find_domains(blocked)
326 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
330 print(f"INFO: Adding {len(domains)} new instances ...")
331 for block_level in domains:
332 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
334 for row in domains[block_level]:
335 # DEBUG: print(f"DEBUG: row='{row}'")
336 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
337 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
338 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
340 if not instances.is_registered(row["domain"]):
342 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
343 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
344 except network.exceptions as exception:
345 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['domain']}'")
346 instances.update_last_error(row["domain"], exception)
348 # DEBUG: print("DEBUG: Committing changes ...")
349 fba.connection.commit()
351 # DEBUG: print("DEBUG: EXIT!")
353 def fetch_fba_rss(args: argparse.Namespace):
354 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
357 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
358 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
360 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
361 if response.ok and response.status_code < 300 and len(response.text) > 0:
362 # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
363 rss = atoma.parse_rss_bytes(response.content)
365 # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
366 for item in rss.items:
367 # DEBUG: print(f"DEBUG: item={item}")
368 domain = item.link.split("=")[1]
370 if blacklist.is_blacklisted(domain):
371 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
373 elif domain in domains:
374 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
376 elif instances.is_registered(domain):
377 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
380 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
381 domains.append(domain)
383 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
387 print(f"INFO: Adding {len(domains)} new instances ...")
388 for domain in domains:
390 print(f"INFO: Fetching instances from domain='{domain}' ...")
391 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
392 except network.exceptions as exception:
393 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
394 instances.update_last_error(domain, exception)
396 # DEBUG: print("DEBUG: EXIT!")
398 def fetch_fbabot_atom(args: argparse.Namespace):
399 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
400 feed = "https://ryona.agency/users/fba/feed.atom"
404 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
405 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
407 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
408 if response.ok and response.status_code < 300 and len(response.text) > 0:
409 # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
410 atom = atoma.parse_atom_bytes(response.content)
412 # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
413 for entry in atom.entries:
414 # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
415 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
416 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
417 for element in doc.findAll("a"):
418 for href in element["href"].split(","):
419 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
420 domain = tidyup.domain(href)
422 # DEBUG: print(f"DEBUG: domain='{domain}'")
423 if blacklist.is_blacklisted(domain):
424 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
426 elif domain in domains:
427 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
429 elif instances.is_registered(domain):
430 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
433 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
434 domains.append(domain)
436 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
440 print(f"INFO: Adding {len(domains)} new instances ...")
441 for domain in domains:
443 print(f"INFO: Fetching instances from domain='{domain}' ...")
444 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
445 except network.exceptions as exception:
446 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
447 instances.update_last_error(domain, exception)
449 # DEBUG: print("DEBUG: EXIT!")
451 def fetch_instances(args: argparse.Namespace) -> int:
452 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
457 print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
458 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
459 except network.exceptions as exception:
460 print(f"WARNING: Exception '{type(exception)}' during fetching instances from args.domain='{args.domain}'")
461 instances.update_last_error(args.domain, exception)
465 # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
468 # Loop through some instances
470 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
473 rows = fba.cursor.fetchall()
474 print(f"INFO: Checking {len(rows)} entries ...")
476 # DEBUG: print(f"DEBUG: domain='{row[0]}'")
477 if blacklist.is_blacklisted(row[0]):
478 print("WARNING: domain is blacklisted:", row[0])
482 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
483 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
484 except network.exceptions as exception:
485 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row[0]}'")
486 instances.update_last_error(row[0], exception)
488 # DEBUG: print("DEBUG: EXIT!")
491 def fetch_oliphant(args: argparse.Namespace):
492 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
496 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
501 "blocker": "artisan.chat",
502 "csv_url": "mastodon/artisan.chat.csv",
504 "blocker": "mastodon.art",
505 "csv_url": "mastodon/mastodon.art.csv",
507 "blocker": "pleroma.envs.net",
508 "csv_url": "mastodon/pleroma.envs.net.csv",
510 "blocker": "oliphant.social",
511 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
513 "blocker": "mastodon.online",
514 "csv_url": "mastodon/mastodon.online.csv",
516 "blocker": "mastodon.social",
517 "csv_url": "mastodon/mastodon.social.csv",
519 "blocker": "mastodon.social",
520 "csv_url": "other/missing-tier0-mastodon.social.csv",
522 "blocker": "rage.love",
523 "csv_url": "mastodon/rage.love.csv",
525 "blocker": "sunny.garden",
526 "csv_url": "mastodon/sunny.garden.csv",
528 "blocker": "solarpunk.moe",
529 "csv_url": "mastodon/solarpunk.moe.csv",
531 "blocker": "toot.wales",
532 "csv_url": "mastodon/toot.wales.csv",
534 "blocker": "union.place",
535 "csv_url": "mastodon/union.place.csv",
539 for block in blocklists:
540 # Is domain given and not equal blocker?
541 if isinstance(args.domain, str) and args.domain != block["blocker"]:
542 # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
546 print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
547 response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
549 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
550 if response.ok and response.content != "":
551 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
552 #print(f"DEBUG: response.content={response.content}")
553 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
554 #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
555 # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
557 if not validators.domain(row["#domain"]):
558 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
560 elif blacklist.is_blacklisted(row["#domain"]):
561 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
565 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
566 federation.fetch_instances(row["#domain"], block["blocker"], None, inspect.currentframe().f_code.co_name)
567 except network.exceptions as exception:
568 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['#domain']}'")
569 instances.update_last_error(row["#domain"], exception)
571 # DEBUG: print("DEBUG: EXIT!")