1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
34 from fba import instances
35 from fba import locking
36 from fba import network
38 from fba.helpers import tidyup
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
45 def check_instance(args: argparse.Namespace) -> int:
46 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48 if not validators.domain(args.domain):
49 print(f"WARNING: args.domain='{args.domain}' is not valid")
51 elif blacklist.is_blacklisted(args.domain):
52 print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54 elif instances.is_registered(args.domain):
55 print(f"WARNING: args.domain='{args.domain}' is already registered")
58 print(f"INFO: args.domain='{args.domain}' is not known")
60 # DEBUG: print(f"DEBUG: status={status} - EXIT!")
63 def fetch_bkali(args: argparse.Namespace):
64 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
67 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
71 # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
73 raise Exception("WARNING: Returned no records")
74 elif "data" not in fetched:
75 raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
76 elif "nodeinfo" not in fetched["data"]:
77 raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
79 for entry in fetched["data"]["nodeinfo"]:
80 # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
81 if not "domain" in entry:
82 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
84 elif not validators.domain(entry["domain"]):
85 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
87 elif blacklist.is_blacklisted(entry["domain"]):
88 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
90 elif instances.is_registered(entry["domain"]):
91 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
94 # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
95 domains.append(entry["domain"])
97 except BaseException as exception:
98 print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
101 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
105 print(f"INFO: Adding {len(domains)} new instances ...")
106 for domain in domains:
107 print(f"INFO: Fetching instances from domain='{domain}' ...")
108 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
110 # DEBUG: print("DEBUG: EXIT!")
112 def fetch_blocks(args: argparse.Namespace):
113 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
114 if args.domain is not None and args.domain != "":
115 # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
116 if not validators.domain(args.domain):
117 print(f"WARNING: domain='{args.domain}' is not valid.")
119 elif blacklist.is_blacklisted(args.domain):
120 print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
122 elif not instances.is_registered(args.domain):
123 print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
128 if args.domain is not None and args.domain != "":
129 # Re-check single domain
131 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
134 # Re-check after "timeout" (aka. minimum interval)
136 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
139 rows = fba.cursor.fetchall()
140 print(f"INFO: Checking {len(rows)} entries ...")
141 for blocker, software, origin, nodeinfo_url in rows:
142 # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
144 blocker = tidyup.domain(blocker)
145 # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
148 print("WARNING: blocker is now empty!")
150 elif blacklist.is_blacklisted(blocker):
151 print(f"WARNING: blocker='{blocker}' is blacklisted now!")
154 # DEBUG: print(f"DEBUG: blocker='{blocker}'")
155 instances.update_last_blocked(blocker)
157 if software == "pleroma":
158 print(f"INFO: blocker='{blocker}',software='{software}'")
159 pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
160 elif software == "mastodon":
161 print(f"INFO: blocker='{blocker}',software='{software}'")
162 mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
163 elif software == "friendica" or software == "misskey":
164 print(f"INFO: blocker='{blocker}',software='{software}'")
165 if software == "friendica":
166 rows = friendica.fetch_blocks(blocker)
167 elif software == "misskey":
168 rows = misskey.fetch_blocks(blocker)
170 print(f"INFO: Checking {len(rows.items())} entries from blocker='{blocker}',software='{software}' ...")
171 for block_level, blocklist in rows.items():
172 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
173 block_level = tidyup.domain(block_level)
174 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
175 if block_level == "":
176 print("WARNING: block_level is empty, blocker:", blocker)
179 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
180 for block in blocklist:
181 blocked, reason = block.values()
182 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
183 blocked = tidyup.domain(blocked)
184 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
185 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
188 print("WARNING: blocked is empty:", blocker)
190 elif blacklist.is_blacklisted(blocked):
191 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
193 elif blocked.count("*") > 0:
194 # Some friendica servers also obscure domains without hash
196 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
199 searchres = fba.cursor.fetchone()
201 if searchres is None:
202 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
205 blocked = searchres[0]
206 origin = searchres[1]
207 nodeinfo_url = searchres[2]
208 elif blocked.count("?") > 0:
209 # Some obscure them with question marks, not sure if that's dependent on version or not
211 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
214 searchres = fba.cursor.fetchone()
216 if searchres is None:
217 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
220 blocked = searchres[0]
221 origin = searchres[1]
222 nodeinfo_url = searchres[2]
223 elif not validators.domain(blocked):
224 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
227 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
228 if not validators.domain(blocked):
229 print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
231 elif not instances.is_registered(blocked):
232 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
233 instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
235 if not blocks.is_instance_blocked(blocker, blocked, block_level):
236 blocks.add_instance(blocker, blocked, reason, block_level)
238 if block_level == "reject":
244 # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
245 blocks.update_last_seen(blocker, blocked, block_level)
246 blocks.update_reason(reason, blocker, blocked, block_level)
248 # DEBUG: print("DEBUG: Committing changes ...")
249 fba.connection.commit()
251 print("WARNING: Unknown software:", blocker, software)
253 if config.get("bot_enabled") and len(blockdict) > 0:
254 network.send_bot_post(blocker, blockdict)
258 # DEBUG: print("DEBUG: EXIT!")
260 def fetch_cs(args: argparse.Namespace):
261 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
287 raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
288 # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
290 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
292 # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
293 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
294 # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
295 domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
297 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
298 # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
299 domains["reject"] = domains["reject"] + federation.find_domains(blocked)
301 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
305 print(f"INFO: Adding {len(domains)} new instances ...")
306 for block_level in domains:
307 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
309 for row in domains[block_level]:
310 # DEBUG: print(f"DEBUG: row='{row}'")
311 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
312 # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
313 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
315 if not instances.is_registered(row["domain"]):
316 print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
317 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
319 # DEBUG: print("DEBUG: Committing changes ...")
320 fba.connection.commit()
322 # DEBUG: print("DEBUG: EXIT!")
324 def fetch_fba_rss(args: argparse.Namespace):
325 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
328 print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
329 response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
331 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
332 if response.ok and response.status_code < 300 and len(response.text) > 0:
333 # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
334 rss = atoma.parse_rss_bytes(response.content)
336 # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
337 for item in rss.items:
338 # DEBUG: print(f"DEBUG: item={item}")
339 domain = item.link.split("=")[1]
341 if blacklist.is_blacklisted(domain):
342 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
344 elif domain in domains:
345 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
347 elif instances.is_registered(domain):
348 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
351 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
352 domains.append(domain)
354 # DEBUG: print(f"DEBUG: domains()={len(domains)}")
358 print(f"INFO: Adding {len(domains)} new instances ...")
359 for domain in domains:
360 print(f"INFO: Fetching instances from domain='{domain}' ...")
361 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
363 # DEBUG: print("DEBUG: EXIT!")
365 def fetch_fbabot_atom(args: argparse.Namespace):
366 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
367 feed = "https://ryona.agency/users/fba/feed.atom"
371 print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
372 response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
374 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
375 if response.ok and response.status_code < 300 and len(response.text) > 0:
376 # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
377 atom = atoma.parse_atom_bytes(response.content)
379 # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
380 for entry in atom.entries:
381 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
382 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
383 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
384 for element in doc.findAll("a"):
385 for href in element["href"].split(","):
386 # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
387 domain = tidyup.domain(href)
389 # DEBUG: print(f"DEBUG: domain='{domain}'")
390 if blacklist.is_blacklisted(domain):
391 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
393 elif domain in domains:
394 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
396 elif instances.is_registered(domain):
397 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
400 # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
401 domains.append(domain)
403 # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
407 print(f"INFO: Adding {len(domains)} new instances ...")
408 for domain in domains:
409 print(f"INFO: Fetching instances from domain='{domain}' ...")
410 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
412 # DEBUG: print("DEBUG: EXIT!")
414 def fetch_instances(args: argparse.Namespace):
415 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
419 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
422 # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
425 # Loop through some instances
427 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
430 rows = fba.cursor.fetchall()
431 print(f"INFO: Checking {len(rows)} entries ...")
433 # DEBUG: print(f"DEBUG: domain='{row[0]}'")
434 if blacklist.is_blacklisted(row[0]):
435 print("WARNING: domain is blacklisted:", row[0])
438 print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
439 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
441 # DEBUG: print("DEBUG: EXIT!")
443 def fetch_federater(args: argparse.Namespace):
444 # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
448 response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
449 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
450 if response.ok and response.content != "":
451 # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
452 ## DEBUG: print(f"DEBUG: response.content={response.content}")
453 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
454 #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
455 # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
457 if not validators.domain(row["#domain"]):
458 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
460 elif blacklist.is_blacklisted(row["#domain"]):
461 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
463 elif instances.is_registered(row["#domain"]):
464 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
467 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
468 federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
470 # DEBUG: print("DEBUG: EXIT!")