1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
29 from fba import blacklist
30 from fba import blocks
32 from fba import config
34 from fba import instances
35 from fba import network
37 from fba.federation import *
39 def check_instance(args: argparse.Namespace) -> int:
40 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
42 if not validators.domain(args.domain):
43 print(f"WARNING: args.domain='{args.domain}' is not valid")
45 elif blacklist.is_blacklisted(args.domain):
46 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
48 elif instances.is_registered(args.domain):
49 print(f"WARNING: args.domain='{args.domain}' is already registered")
52 print(f"INFO: args.domain='{args.domain}' is not known")
54 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
57 def fetch_bkali(args: argparse.Namespace):
58 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
61 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
62 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
65 # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
67 raise Exception("WARNING: Returned no records")
68 elif not "data" in fetched:
69 raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
70 elif not "nodeinfo" in fetched["data"]:
71 raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
73 for entry in fetched["data"]["nodeinfo"]:
74 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
75 if not "domain" in entry:
76 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
78 elif not validators.domain(entry["domain"]):
79 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
81 elif blacklist.is_blacklisted(entry["domain"]):
82 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
84 elif instances.is_registered(entry["domain"]):
85 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
88 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
89 domains.append(entry["domain"])
91 except BaseException as exception:
92 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
95 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
99 print(f"INFO: Adding {len(domains)} new instances ...")
100 for domain in domains:
101 print(f"INFO: Fetching instances from domain='{domain}' ...")
102 fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
104 # DEBUG: print("DEBUG: EXIT!")
106 def fetch_blocks(args: argparse.Namespace):
107 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
108 if args.domain is not None and args.domain != "":
109 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
110 if not validators.domain(args.domain):
111 print(f"WARNING: domain='{args.domain}' is not valid.")
113 elif blacklist.is_blacklisted(args.domain):
114 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
116 elif not instances.is_registered(args.domain):
117 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
122 if args.domain is not None and args.domain != "":
123 # Re-check single domain
125 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
128 # Re-check after "timeout" (aka. minimum interval)
130 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
133 rows = fba.cursor.fetchall()
134 print(f"INFO: Checking {len(rows)} entries ...")
135 for blocker, software, origin, nodeinfo_url in rows:
136 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
138 blocker = fba.tidyup_domain(blocker)
139 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
142 print("WARNING: blocker is now empty!")
144 elif blacklist.is_blacklisted(blocker):
145 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
148 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
149 instances.update_last_blocked(blocker)
151 if software == "pleroma":
152 print(f"INFO: blocker='{blocker}',software='{software}'")
153 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
154 elif software == "mastodon":
155 print(f"INFO: blocker='{blocker}',software='{software}'")
156 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
157 elif software == "friendica" or software == "misskey":
158 print(f"INFO: blocker='{blocker}',software='{software}'")
160 if software == "friendica":
161 rows = friendica.fetch_blocks(blocker)
162 elif software == "misskey":
163 rows = misskey.fetch_blocks(blocker)
165 print(f"INFO: Checking {len(rows.items())} entries from blocker='{blocker}',software='{software}' ...")
166 for block_level, blocklist in rows.items():
167 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
168 block_level = fba.tidyup_domain(block_level)
169 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
170 if block_level == "":
171 print("WARNING: block_level is empty, blocker:", blocker)
174 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
175 for block in blocklist:
176 blocked, reason = block.values()
177 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
178 blocked = fba.tidyup_domain(blocked)
179 reason = fba.tidyup_reason(reason) if reason is not None and reason != "" else None
180 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
183 print("WARNING: blocked is empty:", blocker)
185 elif blacklist.is_blacklisted(blocked):
186 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
188 elif blocked.count("*") > 0:
189 # Some friendica servers also obscure domains without hash
191 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
194 searchres = fba.cursor.fetchone()
196 if searchres is None:
197 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
200 blocked = searchres[0]
201 origin = searchres[1]
202 nodeinfo_url = searchres[2]
203 elif blocked.count("?") > 0:
204 # Some obscure them with question marks, not sure if that's dependent on version or not
206 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
209 searchres = fba.cursor.fetchone()
211 if searchres is None:
212 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
215 blocked = searchres[0]
216 origin = searchres[1]
217 nodeinfo_url = searchres[2]
218 elif not validators.domain(blocked):
219 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
222 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
223 if not validators.domain(blocked):
224 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
226 elif not instances.is_registered(blocked):
227 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
228 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
230 if not blocks.is_instance_blocked(blocker, blocked, block_level):
231 blocks.add_instance(blocker, blocked, reason, block_level)
233 if block_level == "reject":
239 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
240 blocks.update_last_seen(blocker, blocked, block_level)
241 blocks.update_reason(reason, blocker, blocked, block_level)
243 # DEBUG: print("DEBUG: Committing changes ...")
244 fba.connection.commit()
245 except BaseException as exception:
246 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(exception)}]:'{str(exception)}'")
248 print("WARNING: Unknown software:", blocker, software)
250 if config.get("bot_enabled") and len(blockdict) > 0:
251 network.send_bot_post(blocker, blockdict)
255 # DEBUG: print("DEBUG: EXIT!")
257 def fetch_cs(args: argparse.Namespace):
258 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
285 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
286 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
288 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
290 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
291 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
292 # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
293 domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
295 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
296 # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
297 domains["reject"] = domains["reject"] + fba.find_domains(blocked)
299 except BaseException as exception:
300 print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(exception)}]:'{str(exception)}'")
303 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
307 print(f"INFO: Adding {len(domains)} new instances ...")
308 for block_level in domains:
309 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
311 for row in domains[block_level]:
312 # DEBUG: print(f"DEBUG: row='{row}'")
313 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
314 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
315 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
317 if not instances.is_registered(row["domain"]):
318 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
319 fba.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
321 # DEBUG: print("DEBUG: Committing changes ...")
322 fba.connection.commit()
324 # DEBUG: print("DEBUG: EXIT!")
326 def fetch_fba_rss(args: argparse.Namespace):
327 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
331 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
332 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
334 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
335 if response.ok and response.status_code < 300 and len(response.text) > 0:
336 # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
337 rss = atoma.parse_rss_bytes(response.content)
339 # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
340 for item in rss.items:
341 # DEBUG: print(f"DEBUG: item={item}")
342 domain = item.link.split("=")[1]
344 if blacklist.is_blacklisted(domain):
345 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
347 elif domain in domains:
348 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
350 elif instances.is_registered(domain):
351 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
354 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
355 domains.append(domain)
357 except BaseException as exception:
358 print(f"ERROR: Cannot fetch args.feed='{args.feed}',exception[{type(exception)}]:'{str(exception)}'")
361 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
365 print(f"INFO: Adding {len(domains)} new instances ...")
366 for domain in domains:
367 print(f"INFO: Fetching instances from domain='{domain}' ...")
368 fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
370 # DEBUG: print("DEBUG: EXIT!")
372 def fetch_fbabot_atom(args: argparse.Namespace):
373 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
374 feed = "https://ryona.agency/users/fba/feed.atom"
378 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
379 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
381 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
382 if response.ok and response.status_code < 300 and len(response.text) > 0:
383 # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
384 atom = atoma.parse_atom_bytes(response.content)
386 # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
387 for entry in atom.entries:
388 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
389 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
390 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
391 for element in doc.findAll("a"):
392 for href in element["href"].split(","):
393 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
394 domain = fba.tidyup_domain(href)
396 # DEBUG: print(f"DEBUG: domain='{domain}'")
397 if blacklist.is_blacklisted(domain):
398 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
400 elif domain in domains:
401 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
403 elif instances.is_registered(domain):
404 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
407 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
408 domains.append(domain)
410 except BaseException as exception:
411 print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(exception)}]:'{str(exception)}'")
414 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
418 print(f"INFO: Adding {len(domains)} new instances ...")
419 for domain in domains:
420 print(f"INFO: Fetching instances from domain='{domain}' ...")
421 fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
423 # DEBUG: print("DEBUG: EXIT!")
425 def fetch_instances(args: argparse.Namespace):
426 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
430 fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
433 # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
436 # Loop through some instances
438 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
441 rows = fba.cursor.fetchall()
442 print(f"INFO: Checking {len(rows)} entries ...")
444 # DEBUG: print("DEBUG: domain:", row[0])
445 if blacklist.is_blacklisted(row[0]):
446 print("WARNING: domain is blacklisted:", row[0])
449 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
450 fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
452 # DEBUG: print("DEBUG: EXIT!")
454 def fetch_federater(args: argparse.Namespace):
455 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
459 response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
460 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
461 if response.ok and response.content != "":
462 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
463 #print(f"DEBUG: response.content={response.content}")
464 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
465 #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
466 # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
468 if not validators.domain(row["#domain"]):
469 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
471 elif blacklist.is_blacklisted(row["#domain"]):
472 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
474 elif instances.is_registered(row["#domain"]):
475 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
478 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
479 fba.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
481 # DEBUG: print("DEBUG: EXIT!")