1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import blacklist
32 from fba import blocks
33 from fba import config
34 from fba import federation
36 from fba import instances
37 from fba import locking
38 from fba import network
40 from fba.helpers import tidyup
42 from fba.networks import friendica
43 from fba.networks import mastodon
44 from fba.networks import misskey
45 from fba.networks import pleroma
47 def check_instance(args: argparse.Namespace) -> int:
48 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
50 if not validators.domain(args.domain):
51 print(f"WARNING: args.domain='{args.domain}' is not valid")
53 elif blacklist.is_blacklisted(args.domain):
54 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
56 elif instances.is_registered(args.domain):
57 print(f"WARNING: args.domain='{args.domain}' is already registered")
60 print(f"INFO: args.domain='{args.domain}' is not known")
62 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
65 def fetch_bkali(args: argparse.Namespace):
66 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
69 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
70 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
73 # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
75 raise Exception("WARNING: Returned no records")
76 elif "data" not in fetched:
77 raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
78 elif "nodeinfo" not in fetched["data"]:
79 raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
81 for entry in fetched["data"]["nodeinfo"]:
82 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
83 if not "domain" in entry:
84 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
86 elif not validators.domain(entry["domain"]):
87 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
89 elif blacklist.is_blacklisted(entry["domain"]):
90 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
92 elif instances.is_registered(entry["domain"]):
93 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
96 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
97 domains.append(entry["domain"])
99 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as exception:
100 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
103 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
107 print(f"INFO: Adding {len(domains)} new instances ...")
108 for domain in domains:
110 print(f"INFO: Fetching instances from domain='{domain}' ...")
111 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
112 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
113 print(f"WARNING: Timeout during fetching instances from domain='{domain}'")
114 instances.update_last_error(domain, ex)
116 # DEBUG: print("DEBUG: EXIT!")
118 def fetch_blocks(args: argparse.Namespace):
119 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
120 if args.domain is not None and args.domain != "":
121 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
122 if not validators.domain(args.domain):
123 print(f"WARNING: domain='{args.domain}' is not valid.")
125 elif blacklist.is_blacklisted(args.domain):
126 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
128 elif not instances.is_registered(args.domain):
129 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
134 if args.domain is not None and args.domain != "":
135 # Re-check single domain
137 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
140 # Re-check after "timeout" (aka. minimum interval)
142 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
145 rows = fba.cursor.fetchall()
146 print(f"INFO: Checking {len(rows)} entries ...")
147 for blocker, software, origin, nodeinfo_url in rows:
148 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
150 blocker = tidyup.domain(blocker)
151 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
154 print("WARNING: blocker is now empty!")
156 elif blacklist.is_blacklisted(blocker):
157 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
160 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
161 instances.update_last_blocked(blocker)
163 if software == "pleroma":
164 print(f"INFO: blocker='{blocker}',software='{software}'")
165 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
166 elif software == "mastodon":
167 print(f"INFO: blocker='{blocker}',software='{software}'")
168 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
169 elif software == "friendica" or software == "misskey":
170 print(f"INFO: blocker='{blocker}',software='{software}'")
173 if software == "friendica":
174 blocking = friendica.fetch_blocks(blocker)
175 elif software == "misskey":
176 blocking = misskey.fetch_blocks(blocker)
178 print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
179 for block_level, blocklist in blocking.items():
180 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
181 block_level = tidyup.domain(block_level)
182 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
183 if block_level == "":
184 print("WARNING: block_level is empty, blocker:", blocker)
187 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
188 for block in blocklist:
189 blocked, reason = block.values()
190 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
191 blocked = tidyup.domain(blocked)
192 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
193 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
196 print("WARNING: blocked is empty:", blocker)
198 elif blacklist.is_blacklisted(blocked):
199 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
201 elif blocked.count("*") > 0:
202 # Some friendica servers also obscure domains without hash
204 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
207 searchres = fba.cursor.fetchone()
209 if searchres is None:
210 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
213 blocked = searchres[0]
214 origin = searchres[1]
215 nodeinfo_url = searchres[2]
216 elif blocked.count("?") > 0:
217 # Some obscure them with question marks, not sure if that's dependent on version or not
219 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
222 searchres = fba.cursor.fetchone()
224 if searchres is None:
225 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
228 blocked = searchres[0]
229 origin = searchres[1]
230 nodeinfo_url = searchres[2]
231 elif not validators.domain(blocked):
232 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
235 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
236 if not validators.domain(blocked):
237 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
239 elif not instances.is_registered(blocked):
240 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
241 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
243 if not blocks.is_instance_blocked(blocker, blocked, block_level):
244 blocks.add_instance(blocker, blocked, reason, block_level)
246 if block_level == "reject":
252 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
253 blocks.update_last_seen(blocker, blocked, block_level)
254 blocks.update_reason(reason, blocker, blocked, block_level)
256 # DEBUG: print("DEBUG: Committing changes ...")
257 fba.connection.commit()
259 print("WARNING: Unknown software:", blocker, software)
261 if config.get("bot_enabled") and len(blockdict) > 0:
262 network.send_bot_post(blocker, blockdict)
264 # DEBUG: print("DEBUG: EXIT!")
266 def fetch_cs(args: argparse.Namespace):
267 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
293 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
294 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
296 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
298 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
299 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
300 # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
301 domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
303 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
304 # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
305 domains["reject"] = domains["reject"] + federation.find_domains(blocked)
307 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
311 print(f"INFO: Adding {len(domains)} new instances ...")
312 for block_level in domains:
313 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
315 for row in domains[block_level]:
316 # DEBUG: print(f"DEBUG: row='{row}'")
317 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
318 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
319 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
321 if not instances.is_registered(row["domain"]):
323 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
324 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
325 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
326 print(f"WARNING: Timeout during fetching instances from domain='{row['domain']}'")
327 instances.update_last_error(row["domain"], ex)
329 # DEBUG: print("DEBUG: Committing changes ...")
330 fba.connection.commit()
332 # DEBUG: print("DEBUG: EXIT!")
334 def fetch_fba_rss(args: argparse.Namespace):
335 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
338 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
339 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
341 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
342 if response.ok and response.status_code < 300 and len(response.text) > 0:
343 # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
344 rss = atoma.parse_rss_bytes(response.content)
346 # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
347 for item in rss.items:
348 # DEBUG: print(f"DEBUG: item={item}")
349 domain = item.link.split("=")[1]
351 if blacklist.is_blacklisted(domain):
352 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
354 elif domain in domains:
355 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
357 elif instances.is_registered(domain):
358 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
361 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
362 domains.append(domain)
364 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
368 print(f"INFO: Adding {len(domains)} new instances ...")
369 for domain in domains:
371 print(f"INFO: Fetching instances from domain='{domain}' ...")
372 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
373 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
374 print(f"WARNING: Timeout during fetching instances from domain='{domain}'")
375 instances.update_last_error(domain, ex)
377 # DEBUG: print("DEBUG: EXIT!")
379 def fetch_fbabot_atom(args: argparse.Namespace):
380 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
381 feed = "https://ryona.agency/users/fba/feed.atom"
385 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
386 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
388 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
389 if response.ok and response.status_code < 300 and len(response.text) > 0:
390 # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
391 atom = atoma.parse_atom_bytes(response.content)
393 # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
394 for entry in atom.entries:
395 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
396 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
397 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
398 for element in doc.findAll("a"):
399 for href in element["href"].split(","):
400 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
401 domain = tidyup.domain(href)
403 # DEBUG: print(f"DEBUG: domain='{domain}'")
404 if blacklist.is_blacklisted(domain):
405 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
407 elif domain in domains:
408 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
410 elif instances.is_registered(domain):
411 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
414 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
415 domains.append(domain)
417 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
421 print(f"INFO: Adding {len(domains)} new instances ...")
422 for domain in domains:
424 print(f"INFO: Fetching instances from domain='{domain}' ...")
425 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
426 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
427 print(f"WARNING: Timeout during fetching instances from domain='{domain}'")
428 instances.update_last_error(domain, ex)
430 # DEBUG: print("DEBUG: EXIT!")
432 def fetch_instances(args: argparse.Namespace):
433 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
438 print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
439 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
440 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
441 print(f"WARNING: Timeout during fetching instances from args.domain='{args.domain}'")
442 instances.update_last_error(args.domain, ex)
446 # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
449 # Loop through some instances
451 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
454 rows = fba.cursor.fetchall()
455 print(f"INFO: Checking {len(rows)} entries ...")
457 # DEBUG: print(f"DEBUG: domain='{row[0]}'")
458 if blacklist.is_blacklisted(row[0]):
459 print("WARNING: domain is blacklisted:", row[0])
463 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
464 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
465 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
466 print(f"WARNING: Timeout during fetching instances from domain='{row[0]}'")
467 instances.update_last_error(row[0], ex)
469 # DEBUG: print("DEBUG: EXIT!")
471 def fetch_federater(args: argparse.Namespace):
472 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
476 response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
477 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
478 if response.ok and response.content != "":
479 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
480 ## DEBUG: print(f"DEBUG: response.content={response.content}")
481 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
482 #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
483 # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
485 if not validators.domain(row["#domain"]):
486 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
488 elif blacklist.is_blacklisted(row["#domain"]):
489 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
491 elif instances.is_registered(row["#domain"]):
492 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
496 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
497 federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
498 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
499 print(f"WARNING: Timeout during fetching instances from domain='{row['#domain']}'")
500 instances.update_last_error(row["#domain"], ex)
502 # DEBUG: print("DEBUG: EXIT!")