1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
78 # No CSRF by default, you don't have to add network.api_headers by yourself here
82 logger.debug("Checking CSRF from pixelfed.org")
83 headers = csrf.determine("pixelfed.org", dict())
84 except network.exceptions as exception:
85 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
89 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90 fetched = network.get_json_api(
92 "/api/v1/servers/all.json?scope=All&country=all&language=all",
94 (config.get("connection_timeout"), config.get("read_timeout"))
97 logger.debug("JSON API returned %d elements", len(fetched))
98 if "error_message" in fetched:
99 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
101 elif "data" not in fetched["json"]:
102 logger.warning("API did not return JSON with 'data' element - EXIT!")
105 rows = fetched["json"]["data"]
106 logger.info("Checking %d fetched rows ...", len(rows))
108 logger.debug("row[]='%s'", type(row))
109 if "domain" not in row:
110 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
112 elif not utils.is_domain_wanted(row["domain"]):
113 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
115 elif instances.is_registered(row["domain"]):
116 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
118 elif instances.is_recent(row["domain"]):
119 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
122 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
123 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
125 except network.exceptions as exception:
126 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
129 logger.debug("Success! - EXIT!")
132 def fetch_bkali(args: argparse.Namespace) -> int:
133 logger.debug("args[]='%s' - CALLED!", type(args))
136 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
137 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
140 logger.debug("fetched[]='%s'", type(fetched))
141 if "error_message" in fetched:
142 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
144 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
145 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
148 rows = fetched["json"]
150 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
152 raise Exception("WARNING: Returned no records")
153 elif "data" not in rows:
154 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
155 elif "nodeinfo" not in rows["data"]:
156 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
158 for entry in rows["data"]["nodeinfo"]:
159 logger.debug("entry[%s]='%s'", type(entry), entry)
160 if "domain" not in entry:
161 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
163 elif not utils.is_domain_wanted(entry["domain"]):
164 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
166 elif instances.is_registered(entry["domain"]):
167 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
169 elif instances.is_recent(entry["domain"]):
170 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
173 logger.debug("Adding domain='%s' ...", entry["domain"])
174 domains.append(entry["domain"])
176 except network.exceptions as exception:
177 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
180 logger.debug("domains()=%d", len(domains))
184 logger.info("Adding %d new instances ...", len(domains))
185 for domain in domains:
187 logger.info("Fetching instances from domain='%s' ...", domain)
188 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
189 except network.exceptions as exception:
190 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
191 instances.set_last_error(domain, exception)
194 logger.debug("Success - EXIT!")
197 def fetch_blocks(args: argparse.Namespace) -> int:
198 logger.debug("args[]='%s' - CALLED!", type(args))
199 if args.domain is not None and args.domain != "":
200 logger.debug("args.domain='%s' - checking ...", args.domain)
201 if not validators.domain(args.domain):
202 logger.warning("args.domain='%s' is not valid.", args.domain)
204 elif blacklist.is_blacklisted(args.domain):
205 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
207 elif not instances.is_registered(args.domain):
208 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
213 if args.domain is not None and args.domain != "":
214 # Re-check single domain
215 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
216 database.cursor.execute(
217 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
219 elif args.software is not None and args.software != "":
220 # Re-check single software
221 logger.debug("Querying database for args.software='%s' ...", args.software)
222 database.cursor.execute(
223 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
226 # Re-check after "timeout" (aka. minimum interval)
227 database.cursor.execute(
228 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
231 rows = database.cursor.fetchall()
232 logger.info("Checking %d entries ...", len(rows))
233 for blocker, software, origin, nodeinfo_url in rows:
234 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
235 blocker = tidyup.domain(blocker)
236 logger.debug("blocker='%s' - AFTER!", blocker)
239 logger.warning("blocker is now empty!")
241 elif nodeinfo_url is None or nodeinfo_url == "":
242 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
244 elif not utils.is_domain_wanted(blocker):
245 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
248 logger.debug("blocker='%s'", blocker)
249 instances.set_last_blocked(blocker)
250 instances.set_has_obfuscation(blocker, False)
254 if software == "pleroma":
255 logger.info("blocker='%s',software='%s'", blocker, software)
256 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
257 elif software == "mastodon":
258 logger.info("blocker='%s',software='%s'", blocker, software)
259 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
260 elif software == "lemmy":
261 logger.info("blocker='%s',software='%s'", blocker, software)
262 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
263 elif software == "friendica":
264 logger.info("blocker='%s',software='%s'", blocker, software)
265 blocking = friendica.fetch_blocks(blocker)
266 elif software == "misskey":
267 logger.info("blocker='%s',software='%s'", blocker, software)
268 blocking = misskey.fetch_blocks(blocker)
270 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
272 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
274 for block in blocking:
275 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
277 if block["block_level"] == "":
278 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
281 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
282 block["blocked"] = tidyup.domain(block["blocked"])
283 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
284 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
286 if block["blocked"] == "":
287 logger.warning("blocked is empty, blocker='%s'", blocker)
289 elif block["blocked"].endswith(".onion"):
290 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
292 elif block["blocked"].endswith(".arpa"):
293 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
295 elif block["blocked"].endswith(".tld"):
296 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
298 elif block["blocked"].find("*") >= 0:
299 logger.debug("blocker='%s' uses obfuscated domains", blocker)
301 # Some friendica servers also obscure domains without hash
302 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
304 logger.debug("row[]='%s'", type(row))
306 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
307 instances.set_has_obfuscation(blocker, True)
310 block["blocked"] = row["domain"]
311 origin = row["origin"]
312 nodeinfo_url = row["nodeinfo_url"]
313 elif block["blocked"].find("?") >= 0:
314 logger.debug("blocker='%s' uses obfuscated domains", blocker)
316 # Some obscure them with question marks, not sure if that's dependent on version or not
317 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
319 logger.debug("row[]='%s'", type(row))
321 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
322 instances.set_has_obfuscation(blocker, True)
325 block["blocked"] = row["domain"]
326 origin = row["origin"]
327 nodeinfo_url = row["nodeinfo_url"]
329 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
330 if not utils.is_domain_wanted(block["blocked"]):
331 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
333 elif block["block_level"] in ["accept", "accepted"]:
334 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
336 elif not instances.is_registered(block["blocked"]):
337 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
338 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
340 block["block_level"] = utils.alias_block_level(block["block_level"])
342 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
343 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
345 "blocked": block["blocked"],
346 "reason" : block["reason"],
349 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
350 cookies.clear(block["blocked"])
352 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
353 if instances.has_pending(blocker):
354 logger.debug("Flushing updates for blocker='%s' ...", blocker)
355 instances.update_data(blocker)
357 logger.debug("Invoking commit() ...")
358 database.connection.commit()
360 logger.debug("Invoking cookies.clear(%s) ...", blocker)
361 cookies.clear(blocker)
363 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
364 if config.get("bot_enabled") and len(blockdict) > 0:
365 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
366 network.send_bot_post(blocker, blockdict)
368 logger.debug("Success! - EXIT!")
371 def fetch_observer(args: argparse.Namespace) -> int:
372 logger.debug("args[]='%s' - CALLED!", type(args))
377 logger.info("Fetching software list ...")
378 raw = utils.fetch_url(
379 "https://fediverse.observer",
381 (config.get("connection_timeout"), config.get("read_timeout"))
383 logger.debug("raw[%s]()=%d", type(raw), len(raw))
385 doc = bs4.BeautifulSoup(raw, features="html.parser")
386 logger.debug("doc[]='%s'", type(doc))
388 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
389 logger.debug("items[]='%s'", type(items))
393 logger.info("Checking %d menu items ...", len(items))
395 logger.debug("item[%s]='%s'", type(item), item)
396 if item.text.lower() == "all":
397 logger.debug("Skipping 'All' menu entry ...")
400 logger.debug("Appending item.text='%s' ...", item.text)
401 types.append(tidyup.domain(item.text))
403 logger.info("Fetching %d different table data ...", len(types))
404 for software in types:
405 logger.debug("software='%s' - BEFORE!", software)
406 if args.software is not None and args.software != software:
407 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
412 logger.debug("Fetching table data for software='%s' ...", software)
413 raw = utils.fetch_url(
414 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
416 (config.get("connection_timeout"), config.get("read_timeout"))
418 logger.debug("raw[%s]()=%d", type(raw), len(raw))
420 doc = bs4.BeautifulSoup(raw, features="html.parser")
421 logger.debug("doc[]='%s'", type(doc))
422 except network.exceptions as exception:
423 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
426 items = doc.findAll("a", {"class": "url"})
427 logger.info("Checking %d items,software='%s' ...", len(items), software)
429 logger.debug("item[]='%s'", type(item))
430 domain = item.decode_contents()
432 logger.debug("domain='%s'", domain)
433 if not utils.is_domain_wanted(domain):
434 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
436 elif instances.is_registered(domain):
437 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
439 elif instances.is_recent(domain):
440 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
443 software = software_helper.alias(software)
444 logger.info("Fetching instances for domain='%s'", domain)
445 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
447 logger.debug("Success! - EXIT!")
450 def fetch_todon_wiki(args: argparse.Namespace) -> int:
451 logger.debug("args[]='%s' - CALLED!", type(args))
459 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
460 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
462 doc = bs4.BeautifulSoup(raw, "html.parser")
463 logger.debug("doc[]='%s'", type(doc))
465 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
466 logger.info("Checking %d silenced/limited entries ...", len(silenced))
467 blocklist["silenced"] = utils.find_domains(silenced, "div")
469 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
470 logger.info("Checking %d suspended entries ...", len(suspended))
471 blocklist["reject"] = utils.find_domains(suspended, "div")
474 for block_level in blocklist:
475 blockers = blocklist[block_level]
477 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
478 for blocked in blockers:
479 logger.debug("blocked='%s'", blocked)
481 if not instances.is_registered(blocked):
483 logger.info("Fetching instances from domain='%s' ...", blocked)
484 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
485 except network.exceptions as exception:
486 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
487 instances.set_last_error(blocked, exception)
489 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
490 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
493 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
494 if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
495 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
501 logger.debug("Invoking commit() ...")
502 database.connection.commit()
504 if config.get("bot_enabled") and len(blockdict) > 0:
505 logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
506 network.send_bot_post("todon.eu", blockdict)
508 logger.debug("Success! - EXIT!")
511 def fetch_cs(args: argparse.Namespace):
512 logger.debug("args[]='%s' - CALLED!", type(args))
538 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
539 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
541 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
542 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
544 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
545 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
546 domains["silenced"] = federation.find_domains(silenced)
548 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
549 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
550 domains["reject"] = federation.find_domains(blocked)
552 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
557 for block_level in domains:
558 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
560 for row in domains[block_level]:
561 logger.debug("row[%s]='%s'", type(row), row)
562 if instances.is_recent(row["domain"], "last_blocked"):
563 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
565 elif not instances.is_registered(row["domain"]):
567 logger.info("Fetching instances from domain='%s' ...", row["domain"])
568 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
569 except network.exceptions as exception:
570 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
571 instances.set_last_error(row["domain"], exception)
573 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
574 logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
576 "blocked": row["domain"],
577 "reason" : row["reason"],
580 logger.debug("Invoking commit() ...")
581 database.connection.commit()
583 if config.get("bot_enabled") and len(blockdict) > 0:
584 logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
585 network.send_bot_post("chaos.social", blockdict)
587 logger.debug("Success! - EXIT!")
590 def fetch_fba_rss(args: argparse.Namespace) -> int:
591 logger.debug("args[]='%s' - CALLED!", type(args))
594 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
595 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
597 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
598 if response.ok and response.status_code < 300 and len(response.text) > 0:
599 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
600 rss = atoma.parse_rss_bytes(response.content)
602 logger.debug("rss[]='%s'", type(rss))
603 for item in rss.items:
604 logger.debug("item='%s'", item)
605 domain = item.link.split("=")[1]
607 if not utils.is_domain_wanted(domain):
608 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
610 elif domain in domains:
611 logger.debug("domain='%s' is already added - SKIPPED!", domain)
613 elif instances.is_registered(domain):
614 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
616 elif instances.is_recent(domain):
617 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
620 logger.debug("Adding domain='%s'", domain)
621 domains.append(domain)
623 logger.debug("domains()=%d", len(domains))
627 logger.info("Adding %d new instances ...", len(domains))
628 for domain in domains:
630 logger.info("Fetching instances from domain='%s' ...", domain)
631 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
632 except network.exceptions as exception:
633 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
634 instances.set_last_error(domain, exception)
637 logger.debug("Success! - EXIT!")
640 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
641 logger.debug("args[]='%s' - CALLED!", type(args))
642 feed = "https://ryona.agency/users/fba/feed.atom"
646 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
647 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
649 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
650 if response.ok and response.status_code < 300 and len(response.text) > 0:
651 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
652 atom = atoma.parse_atom_bytes(response.content)
654 logger.debug("atom[]='%s'", type(atom))
655 for entry in atom.entries:
656 logger.debug("entry[]='%s'", type(entry))
657 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
658 logger.debug("doc[]='%s'", type(doc))
659 for element in doc.findAll("a"):
660 for href in element["href"].split(","):
661 logger.debug("href[%s]='%s", type(href), href)
662 domain = tidyup.domain(href)
664 logger.debug("domain='%s'", domain)
665 if not utils.is_domain_wanted(domain):
666 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
668 elif domain in domains:
669 logger.debug("domain='%s' is already added - SKIPPED!", domain)
671 elif instances.is_registered(domain):
672 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
674 elif instances.is_recent(domain):
675 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
678 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
679 domains.append(domain)
681 logger.debug("domains()=%d", len(domains))
685 logger.info("Adding %d new instances ...", len(domains))
686 for domain in domains:
688 logger.info("Fetching instances from domain='%s' ...", domain)
689 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
690 except network.exceptions as exception:
691 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
692 instances.set_last_error(domain, exception)
695 logger.debug("Success! - EXIT!")
698 def fetch_instances(args: argparse.Namespace) -> int:
699 logger.debug("args[]='%s' - CALLED!", type(args))
704 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
705 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
706 except network.exceptions as exception:
707 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
708 instances.set_last_error(args.domain, exception)
709 instances.update_data(args.domain)
713 logger.debug("Not fetching more instances - EXIT!")
716 # Loop through some instances
717 database.cursor.execute(
718 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
721 rows = database.cursor.fetchall()
722 logger.info("Checking %d entries ...", len(rows))
724 logger.debug("domain='%s'", row["domain"])
725 if not utils.is_domain_wanted(row["domain"]):
726 logger.debug("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
730 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
731 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
732 except network.exceptions as exception:
733 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
734 instances.set_last_error(row["domain"], exception)
736 logger.debug("Success - EXIT!")
739 def fetch_oliphant(args: argparse.Namespace) -> int:
740 logger.debug("args[]='%s' - CALLED!", type(args))
744 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
749 "blocker": "artisan.chat",
750 "csv_url": "mastodon/artisan.chat.csv",
752 "blocker": "mastodon.art",
753 "csv_url": "mastodon/mastodon.art.csv",
755 "blocker": "pleroma.envs.net",
756 "csv_url": "mastodon/pleroma.envs.net.csv",
758 "blocker": "oliphant.social",
759 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
761 "blocker": "mastodon.online",
762 "csv_url": "mastodon/mastodon.online.csv",
764 "blocker": "mastodon.social",
765 "csv_url": "mastodon/mastodon.social.csv",
767 "blocker": "mastodon.social",
768 "csv_url": "other/missing-tier0-mastodon.social.csv",
770 "blocker": "rage.love",
771 "csv_url": "mastodon/rage.love.csv",
773 "blocker": "sunny.garden",
774 "csv_url": "mastodon/sunny.garden.csv",
776 "blocker": "solarpunk.moe",
777 "csv_url": "mastodon/solarpunk.moe.csv",
779 "blocker": "toot.wales",
780 "csv_url": "mastodon/toot.wales.csv",
782 "blocker": "union.place",
783 "csv_url": "mastodon/union.place.csv",
789 logger.debug("Downloading %d files ...", len(blocklists))
790 for block in blocklists:
791 # Is domain given and not equal blocker?
792 if isinstance(args.domain, str) and args.domain != block["blocker"]:
793 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
795 elif args.domain in domains:
796 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
798 elif instances.is_recent(block["blocker"]):
799 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
803 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
804 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
806 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
807 if not response.ok or response.status_code > 399 or response.content == "":
808 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
811 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
812 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
814 logger.debug("reader[]='%s'", type(reader))
817 logger.debug("row[%s]='%s'", type(row), row)
818 domain = severity = None
819 reject_media = reject_reports = False
821 domain = row["#domain"]
822 elif "domain" in row:
823 domain = row["domain"]
825 logger.debug("row='%s' does not contain domain column", row)
828 if "#severity" in row:
829 severity = row["#severity"]
830 elif "severity" in row:
831 severity = row["severity"]
833 logger.debug("row='%s' does not contain severity column", row)
836 if "#reject_media" in row and row["#reject_media"].lower() == "true":
838 elif "reject_media" in row and row["reject_media"].lower() == "true":
841 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
842 reject_reports = True
843 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
844 reject_reports = True
846 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
847 if not utils.is_domain_wanted(domain):
848 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
851 logger.debug("Marking domain='%s' as handled", domain)
852 domains.append(domain)
854 logger.debug("Processing domain='%s' ...", domain)
855 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
856 logger.debug("processed='%s'", processed)
858 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
859 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
862 "reason" : block["reason"],
866 utils.process_block(block["blocker"], domain, None, "reject_media")
868 utils.process_block(block["blocker"], domain, None, "reject_reports")
870 logger.debug("Invoking commit() ...")
871 database.connection.commit()
873 if config.get("bot_enabled") and len(blockdict) > 0:
874 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
875 network.send_bot_post(block["blocker"], blockdict)
877 logger.debug("Success! - EXIT!")
880 def fetch_txt(args: argparse.Namespace) -> int:
881 logger.debug("args[]='%s' - CALLED!", type(args))
886 "blocker": "seirdy.one",
887 "url" : "https://seirdy.one/pb/bsl.txt",
890 logger.info("Checking %d text file(s) ...", len(urls))
892 logger.debug("Fetching row[url]='%s' ...", row["url"])
893 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
895 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
896 if response.ok and response.status_code < 300 and response.text != "":
897 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
898 domains = response.text.split("\n")
900 logger.info("Processing %d domains ...", len(domains))
901 for domain in domains:
902 logger.debug("domain='%s'", domain)
904 logger.debug("domain is empty - SKIPPED!")
906 elif not utils.is_domain_wanted(domain):
907 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
909 elif instances.is_recent(domain):
910 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
913 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
914 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
916 logger.debug("processed='%s'", processed)
918 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
921 logger.debug("Success! - EXIT!")
924 def fetch_fedipact(args: argparse.Namespace) -> int:
925 logger.debug("args[]='%s' - CALLED!", type(args))
928 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
930 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
931 if response.ok and response.status_code < 300 and response.text != "":
932 logger.debug("Parsing %d Bytes ...", len(response.text))
934 doc = bs4.BeautifulSoup(response.text, "html.parser")
935 logger.debug("doc[]='%s'", type(doc))
937 rows = doc.findAll("li")
938 logger.info("Checking %d row(s) ...", len(rows))
940 logger.debug("row[]='%s'", type(row))
941 domain = tidyup.domain(row.contents[0])
943 logger.debug("domain='%s'", domain)
945 logger.debug("domain is empty - SKIPPED!")
947 elif not utils.is_domain_wanted(domain):
948 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
950 elif instances.is_registered(domain):
951 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
953 elif instances.is_recent(domain):
954 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
957 logger.info("Fetching domain='%s' ...", domain)
958 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
960 logger.debug("Success! - EXIT!")
963 def fetch_joinfediverse(args: argparse.Namespace) -> int:
964 logger.debug("args[]='%s' - CALLED!", type(args))
967 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
968 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
970 doc = bs4.BeautifulSoup(raw, "html.parser")
971 logger.debug("doc[]='%s'", type(doc))
973 tables = doc.findAll("table", {"class": "wikitable"})
975 logger.info("Analyzing %d table(s) ...", len(tables))
978 logger.debug("table[]='%s'", type(table))
980 rows = table.findAll("tr")
981 logger.info("Checking %d row(s) ...", len(rows))
982 block_headers = dict()
984 logger.debug("row[%s]='%s'", type(row), row)
986 headers = row.findAll("th")
987 logger.debug("Found headers()=%d header(s)", len(headers))
989 block_headers = dict()
991 for header in headers:
993 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
994 text = header.contents[0]
996 logger.debug("text[]='%s'", type(text))
997 if not isinstance(text, str):
998 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1000 elif validators.domain(text.strip()):
1001 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1004 text = tidyup.domain(text.strip())
1005 logger.debug("text='%s'", text)
1006 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1007 logger.debug("Found header: '%s'=%d", text, cnt)
1008 block_headers[cnt] = text
1010 elif len(block_headers) == 0:
1011 logger.debug("row is not scrapable - SKIPPED!")
1013 elif len(block_headers) > 0:
1014 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1018 for element in row.find_all(["th", "td"]):
1020 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1021 if cnt in block_headers:
1022 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1024 text = element.text.strip()
1025 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1027 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1028 if key in ["domain", "instance"]:
1030 elif key == "reason":
1031 block[key] = tidyup.reason(text)
1032 elif key == "subdomain(s)":
1035 block[key] = text.split("/")
1037 logger.debug("key='%s'", key)
1040 logger.debug("block()=%d ...", len(block))
1042 logger.debug("Appending block()=%d ...", len(block))
1043 blocklist.append(block)
1045 logger.debug("blocklist()=%d", len(blocklist))
1047 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1048 domains = database.cursor.fetchall()
1050 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1052 for block in blocklist:
1053 logger.debug("block='%s'", block)
1054 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1055 origin = block["blocked"]
1056 for subdomain in block["subdomain(s)"]:
1057 block["blocked"] = subdomain + "." + origin
1058 blocking.append(block)
1060 blocking.append(block)
1062 logger.debug("blocking()=%d", blocking)
1063 for block in blocking:
1064 block["blocked"] = tidyup.domain(block["blocked"])
1066 if not utils.is_domain_wanted(block["blocked"]):
1067 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1069 elif instances.is_recent(block["blocked"]):
1070 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1073 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1074 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1077 for blocker in domains:
1078 blocker = blocker[0]
1079 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1081 for block in blocking:
1082 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1084 if not utils.is_domain_wanted(block["blocked"]):
1085 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1088 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1089 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1090 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1092 "blocked": block["blocked"],
1093 "reason" : block["reason"],
1096 if instances.has_pending(blocker):
1097 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1098 instances.update_data(blocker)
1100 logger.debug("Invoking commit() ...")
1101 database.connection.commit()
1103 if config.get("bot_enabled") and len(blockdict) > 0:
1104 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1105 network.send_bot_post(blocker, blockdict)
1107 logger.debug("Success! - EXIT!")
1110 def recheck_obfuscation(args: argparse.Namespace) -> int:
1111 logger.debug("args[]='%s' - CALLED!", type(args))
1115 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1116 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1117 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1118 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1120 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1122 rows = database.cursor.fetchall()
1123 logger.info("Checking %d domains ...", len(rows))
1125 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1128 if row["software"] == "pleroma":
1129 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1130 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1131 elif row["software"] == "mastodon":
1132 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1133 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1134 elif row["software"] == "lemmy":
1135 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1136 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1137 elif row["software"] == "friendica":
1138 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1139 blocking = friendica.fetch_blocks(row["domain"])
1140 elif row["software"] == "misskey":
1141 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1142 blocking = misskey.fetch_blocks(row["domain"])
1144 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1146 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1149 for block in blocking:
1150 logger.debug("blocked='%s'", block["blocked"])
1153 if block["blocked"].endswith(".arpa"):
1154 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1156 elif block["blocked"].endswith(".tld"):
1157 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1159 elif block["blocked"].endswith(".onion"):
1160 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1162 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1163 logger.debug("block='%s' is obfuscated.", block["blocked"])
1164 obfuscated = obfuscated + 1
1165 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1166 elif not utils.is_domain_wanted(block["blocked"]):
1167 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1169 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1170 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1173 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1174 if blocked is not None and blocked != block["blocked"]:
1175 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1176 obfuscated = obfuscated - 1
1177 if blocks.is_instance_blocked(row["domain"], blocked):
1178 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1181 block["block_level"] = utils.alias_block_level(block["block_level"])
1183 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1184 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1185 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1188 "reason" : block["reason"],
1191 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1192 if obfuscated == 0 and len(blocking) > 0:
1193 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1194 instances.set_has_obfuscation(row["domain"], False)
1196 if instances.has_pending(row["domain"]):
1197 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1198 instances.update_data(row["domain"])
1200 logger.debug("Invoking commit() ...")
1201 database.connection.commit()
1203 if config.get("bot_enabled") and len(blockdict) > 0:
1204 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1205 network.send_bot_post(row["domain"], blockdict)
1207 logger.debug("Success! - EXIT!")