1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
34 from fba import instances
35 from fba import locking
36 from fba import network
38 from fba.helpers import tidyup
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
45 def check_instance(args: argparse.Namespace) -> int:
46 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48 if not validators.domain(args.domain):
49 print(f"WARNING: args.domain='{args.domain}' is not valid")
51 elif blacklist.is_blacklisted(args.domain):
52 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54 elif instances.is_registered(args.domain):
55 print(f"WARNING: args.domain='{args.domain}' is already registered")
58 print(f"INFO: args.domain='{args.domain}' is not known")
60 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
63 def fetch_bkali(args: argparse.Namespace) -> int:
64 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
67 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
71 # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
72 if "error_message" in fetched:
73 print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
75 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
76 print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
79 rows = fetched["json"]
81 # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
83 raise Exception("WARNING: Returned no records")
84 elif "data" not in rows:
85 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
86 elif "nodeinfo" not in rows["data"]:
87 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
89 for entry in rows["data"]["nodeinfo"]:
90 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
91 if not "domain" in entry:
92 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
94 elif not validators.domain(entry["domain"]):
95 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
97 elif blacklist.is_blacklisted(entry["domain"]):
98 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
100 elif instances.is_registered(entry["domain"]):
101 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
104 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
105 domains.append(entry["domain"])
107 except network.exceptions as exception:
108 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
111 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
115 print(f"INFO: Adding {len(domains)} new instances ...")
116 for domain in domains:
118 print(f"INFO: Fetching instances from domain='{domain}' ...")
119 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
120 except network.exceptions as exception:
121 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
122 instances.update_last_error(domain, exception)
124 # DEBUG: print("DEBUG: EXIT!")
127 def fetch_blocks(args: argparse.Namespace):
128 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
129 if args.domain is not None and args.domain != "":
130 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
131 if not validators.domain(args.domain):
132 print(f"WARNING: domain='{args.domain}' is not valid.")
134 elif blacklist.is_blacklisted(args.domain):
135 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
137 elif not instances.is_registered(args.domain):
138 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
143 if args.domain is not None and args.domain != "":
144 # Re-check single domain
146 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
149 # Re-check after "timeout" (aka. minimum interval)
151 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
154 rows = fba.cursor.fetchall()
155 print(f"INFO: Checking {len(rows)} entries ...")
156 for blocker, software, origin, nodeinfo_url in rows:
157 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
159 blocker = tidyup.domain(blocker)
160 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
163 print("WARNING: blocker is now empty!")
165 elif blacklist.is_blacklisted(blocker):
166 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
169 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
170 instances.update_last_blocked(blocker)
172 if software == "pleroma":
173 print(f"INFO: blocker='{blocker}',software='{software}'")
174 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
175 elif software == "mastodon":
176 print(f"INFO: blocker='{blocker}',software='{software}'")
177 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
178 elif software == "friendica" or software == "misskey":
179 print(f"INFO: blocker='{blocker}',software='{software}'")
182 if software == "friendica":
183 blocking = friendica.fetch_blocks(blocker)
184 elif software == "misskey":
185 blocking = misskey.fetch_blocks(blocker)
187 print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
188 for block_level, blocklist in blocking.items():
189 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
190 block_level = tidyup.domain(block_level)
191 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
192 if block_level == "":
193 print("WARNING: block_level is empty, blocker:", blocker)
196 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
197 for block in blocklist:
198 blocked, reason = block.values()
199 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
200 blocked = tidyup.domain(blocked)
201 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
202 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
205 print("WARNING: blocked is empty:", blocker)
207 elif blacklist.is_blacklisted(blocked):
208 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
210 elif blocked.count("*") > 0:
211 # Some friendica servers also obscure domains without hash
213 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
216 searchres = fba.cursor.fetchone()
218 # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
219 if searchres is None:
220 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
223 blocked = searchres[0]
224 origin = searchres[1]
225 nodeinfo_url = searchres[2]
226 elif blocked.count("?") > 0:
227 # Some obscure them with question marks, not sure if that's dependent on version or not
229 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
232 searchres = fba.cursor.fetchone()
234 # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
235 if searchres is None:
236 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
239 blocked = searchres[0]
240 origin = searchres[1]
241 nodeinfo_url = searchres[2]
242 elif not validators.domain(blocked):
243 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
246 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
247 if not validators.domain(blocked):
248 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
250 elif blocked.split(".")[-1] == "arpa":
251 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
253 elif not instances.is_registered(blocked):
254 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
256 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
257 except network.exceptions as exception:
258 print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
261 if not blocks.is_instance_blocked(blocker, blocked, block_level):
262 blocks.add_instance(blocker, blocked, reason, block_level)
264 if block_level == "reject":
270 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
271 blocks.update_last_seen(blocker, blocked, block_level)
272 blocks.update_reason(reason, blocker, blocked, block_level)
274 # DEBUG: print("DEBUG: Committing changes ...")
275 fba.connection.commit()
277 print("WARNING: Unknown software:", blocker, software)
279 if config.get("bot_enabled") and len(blockdict) > 0:
280 network.send_bot_post(blocker, blockdict)
282 # DEBUG: print("DEBUG: EXIT!")
284 def fetch_cs(args: argparse.Namespace):
285 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
311 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
312 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
314 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
316 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
317 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
318 # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
319 domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
321 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
322 # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
323 domains["reject"] = domains["reject"] + federation.find_domains(blocked)
325 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
329 print(f"INFO: Adding {len(domains)} new instances ...")
330 for block_level in domains:
331 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
333 for row in domains[block_level]:
334 # DEBUG: print(f"DEBUG: row='{row}'")
335 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
336 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
337 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
339 if not instances.is_registered(row["domain"]):
341 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
342 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
343 except network.exceptions as exception:
344 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['domain']}'")
345 instances.update_last_error(row["domain"], exception)
347 # DEBUG: print("DEBUG: Committing changes ...")
348 fba.connection.commit()
350 # DEBUG: print("DEBUG: EXIT!")
352 def fetch_fba_rss(args: argparse.Namespace):
353 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
356 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
357 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
359 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
360 if response.ok and response.status_code < 300 and len(response.text) > 0:
361 # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
362 rss = atoma.parse_rss_bytes(response.content)
364 # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
365 for item in rss.items:
366 # DEBUG: print(f"DEBUG: item={item}")
367 domain = item.link.split("=")[1]
369 if blacklist.is_blacklisted(domain):
370 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
372 elif domain in domains:
373 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
375 elif instances.is_registered(domain):
376 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
379 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
380 domains.append(domain)
382 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
386 print(f"INFO: Adding {len(domains)} new instances ...")
387 for domain in domains:
389 print(f"INFO: Fetching instances from domain='{domain}' ...")
390 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
391 except network.exceptions as exception:
392 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
393 instances.update_last_error(domain, exception)
395 # DEBUG: print("DEBUG: EXIT!")
397 def fetch_fbabot_atom(args: argparse.Namespace):
398 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
399 feed = "https://ryona.agency/users/fba/feed.atom"
403 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
404 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
406 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
407 if response.ok and response.status_code < 300 and len(response.text) > 0:
408 # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
409 atom = atoma.parse_atom_bytes(response.content)
411 # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
412 for entry in atom.entries:
413 # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
414 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
415 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
416 for element in doc.findAll("a"):
417 for href in element["href"].split(","):
418 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
419 domain = tidyup.domain(href)
421 # DEBUG: print(f"DEBUG: domain='{domain}'")
422 if blacklist.is_blacklisted(domain):
423 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
425 elif domain in domains:
426 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
428 elif instances.is_registered(domain):
429 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
432 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
433 domains.append(domain)
435 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
439 print(f"INFO: Adding {len(domains)} new instances ...")
440 for domain in domains:
442 print(f"INFO: Fetching instances from domain='{domain}' ...")
443 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
444 except network.exceptions as exception:
445 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
446 instances.update_last_error(domain, exception)
448 # DEBUG: print("DEBUG: EXIT!")
450 def fetch_instances(args: argparse.Namespace) -> int:
451 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
456 print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
457 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
458 except network.exceptions as exception:
459 print(f"WARNING: Exception '{type(exception)}' during fetching instances from args.domain='{args.domain}'")
460 instances.update_last_error(args.domain, exception)
464 # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
467 # Loop through some instances
469 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
472 rows = fba.cursor.fetchall()
473 print(f"INFO: Checking {len(rows)} entries ...")
475 # DEBUG: print(f"DEBUG: domain='{row[0]}'")
476 if blacklist.is_blacklisted(row[0]):
477 print("WARNING: domain is blacklisted:", row[0])
481 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
482 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
483 except network.exceptions as exception:
484 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row[0]}'")
485 instances.update_last_error(row[0], exception)
487 # DEBUG: print("DEBUG: EXIT!")
490 def fetch_federater(args: argparse.Namespace):
491 # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
495 response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
496 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
497 if response.ok and response.content != "":
498 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
499 ## DEBUG: print(f"DEBUG: response.content={response.content}")
500 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
501 #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
502 # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
504 if not validators.domain(row["#domain"]):
505 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
507 elif blacklist.is_blacklisted(row["#domain"]):
508 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
510 elif instances.is_registered(row["#domain"]):
511 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
515 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
516 federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
517 except network.exceptions as exception:
518 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['#domain']}'")
519 instances.update_last_error(row["#domain"], exception)
521 # DEBUG: print("DEBUG: EXIT!")