1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
78 # No CSRF by default, you don't have to add network.api_headers by yourself here
82 logger.debug("Checking CSRF from pixelfed.org")
83 headers = csrf.determine("pixelfed.org", dict())
84 except network.exceptions as exception:
85 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
89 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90 fetched = network.get_json_api(
92 "/api/v1/servers/all.json?scope=All&country=all&language=all",
94 (config.get("connection_timeout"), config.get("read_timeout"))
97 logger.debug("JSON API returned %d elements", len(fetched))
98 if "error_message" in fetched:
99 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
101 elif "data" not in fetched["json"]:
102 logger.warning("API did not return JSON with 'data' element - EXIT!")
105 rows = fetched["json"]["data"]
106 logger.info("Checking %d fetched rows ...", len(rows))
108 logger.debug("row[]='%s'", type(row))
109 if "domain" not in row:
110 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
112 elif not utils.is_domain_wanted(row["domain"]):
113 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
115 elif instances.is_registered(row["domain"]):
116 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
118 elif instances.is_recent(row["domain"]):
119 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
122 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
123 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
125 except network.exceptions as exception:
126 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
129 logger.debug("Success! - EXIT!")
132 def fetch_bkali(args: argparse.Namespace) -> int:
133 logger.debug("args[]='%s' - CALLED!", type(args))
136 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
137 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
140 logger.debug("fetched[]='%s'", type(fetched))
141 if "error_message" in fetched:
142 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
144 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
145 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
148 rows = fetched["json"]
150 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
152 raise Exception("WARNING: Returned no records")
153 elif "data" not in rows:
154 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
155 elif "nodeinfo" not in rows["data"]:
156 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
158 for entry in rows["data"]["nodeinfo"]:
159 logger.debug("entry[%s]='%s'", type(entry), entry)
160 if "domain" not in entry:
161 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
163 elif not utils.is_domain_wanted(entry["domain"]):
164 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
166 elif instances.is_registered(entry["domain"]):
167 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
169 elif instances.is_recent(entry["domain"]):
170 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
173 logger.debug("Adding domain='%s' ...", entry["domain"])
174 domains.append(entry["domain"])
176 except network.exceptions as exception:
177 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
180 logger.debug("domains()=%d", len(domains))
184 logger.info("Adding %d new instances ...", len(domains))
185 for domain in domains:
187 logger.info("Fetching instances from domain='%s' ...", domain)
188 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
189 except network.exceptions as exception:
190 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
191 instances.set_last_error(domain, exception)
194 logger.debug("Success - EXIT!")
197 def fetch_blocks(args: argparse.Namespace) -> int:
198 logger.debug("args[]='%s' - CALLED!", type(args))
199 if args.domain is not None and args.domain != "":
200 logger.debug("args.domain='%s' - checking ...", args.domain)
201 if not validators.domain(args.domain):
202 logger.warning("args.domain='%s' is not valid.", args.domain)
204 elif blacklist.is_blacklisted(args.domain):
205 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
207 elif not instances.is_registered(args.domain):
208 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
213 if args.domain is not None and args.domain != "":
214 # Re-check single domain
215 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
216 database.cursor.execute(
217 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
219 elif args.software is not None and args.software != "":
220 # Re-check single software
221 logger.debug("Querying database for args.software='%s' ...", args.software)
222 database.cursor.execute(
223 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
226 # Re-check after "timeout" (aka. minimum interval)
227 database.cursor.execute(
228 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
231 rows = database.cursor.fetchall()
232 logger.info("Checking %d entries ...", len(rows))
233 for blocker, software, origin, nodeinfo_url in rows:
234 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
235 blocker = tidyup.domain(blocker)
236 logger.debug("blocker='%s' - AFTER!", blocker)
239 logger.warning("blocker is now empty!")
241 elif nodeinfo_url is None or nodeinfo_url == "":
242 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
244 elif not utils.is_domain_wanted(blocker):
245 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
248 logger.debug("blocker='%s'", blocker)
249 instances.set_last_blocked(blocker)
250 instances.set_has_obfuscation(blocker, False)
254 if software == "pleroma":
255 logger.info("blocker='%s',software='%s'", blocker, software)
256 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
257 elif software == "mastodon":
258 logger.info("blocker='%s',software='%s'", blocker, software)
259 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
260 elif software == "lemmy":
261 logger.info("blocker='%s',software='%s'", blocker, software)
262 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
263 elif software == "friendica":
264 logger.info("blocker='%s',software='%s'", blocker, software)
265 blocking = friendica.fetch_blocks(blocker)
266 elif software == "misskey":
267 logger.info("blocker='%s',software='%s'", blocker, software)
268 blocking = misskey.fetch_blocks(blocker)
270 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
272 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
274 for block in blocking:
275 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
277 if block["block_level"] == "":
278 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
281 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
282 block["blocked"] = tidyup.domain(block["blocked"])
283 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
284 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
286 if block["blocked"] == "":
287 logger.warning("blocked is empty, blocker='%s'", blocker)
289 elif block["blocked"].endswith(".onion"):
290 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
292 elif block["blocked"].endswith(".arpa"):
293 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
295 elif block["blocked"].endswith(".tld"):
296 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
298 elif block["blocked"].find("*") >= 0:
299 logger.debug("blocker='%s' uses obfuscated domains", blocker)
301 # Some friendica servers also obscure domains without hash
302 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
304 logger.debug("row[]='%s'", type(row))
306 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
307 instances.set_has_obfuscation(blocker, True)
310 block["blocked"] = row["domain"]
311 origin = row["origin"]
312 nodeinfo_url = row["nodeinfo_url"]
313 elif block["blocked"].find("?") >= 0:
314 logger.debug("blocker='%s' uses obfuscated domains", blocker)
316 # Some obscure them with question marks, not sure if that's dependent on version or not
317 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
319 logger.debug("row[]='%s'", type(row))
321 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
322 instances.set_has_obfuscation(blocker, True)
325 block["blocked"] = row["domain"]
326 origin = row["origin"]
327 nodeinfo_url = row["nodeinfo_url"]
329 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
330 if not utils.is_domain_wanted(block["blocked"]):
331 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
333 elif block["block_level"] in ["accept", "accepted"]:
334 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
336 elif not instances.is_registered(block["blocked"]):
337 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
338 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
340 block["block_level"] = utils.alias_block_level(block["block_level"])
342 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
343 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
345 "blocked": block["blocked"],
346 "reason" : block["reason"],
349 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
350 cookies.clear(block["blocked"])
352 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
353 if instances.has_pending(blocker):
354 logger.debug("Flushing updates for blocker='%s' ...", blocker)
355 instances.update_data(blocker)
357 logger.debug("Invoking commit() ...")
358 database.connection.commit()
360 logger.debug("Invoking cookies.clear(%s) ...", blocker)
361 cookies.clear(blocker)
363 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
364 if config.get("bot_enabled") and len(blockdict) > 0:
365 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
366 network.send_bot_post(blocker, blockdict)
368 logger.debug("Success! - EXIT!")
371 def fetch_observer(args: argparse.Namespace) -> int:
372 logger.debug("args[]='%s' - CALLED!", type(args))
377 logger.info("Fetching software list ...")
378 raw = utils.fetch_url(
379 "https://fediverse.observer",
381 (config.get("connection_timeout"), config.get("read_timeout"))
383 logger.debug("raw[%s]()=%d", type(raw), len(raw))
385 doc = bs4.BeautifulSoup(raw, features="html.parser")
386 logger.debug("doc[]='%s'", type(doc))
388 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
389 logger.debug("items[]='%s'", type(items))
393 logger.info("Checking %d menu items ...", len(items))
395 logger.debug("item[%s]='%s'", type(item), item)
396 if item.text.lower() == "all":
397 logger.debug("Skipping 'All' menu entry ...")
400 logger.debug("Appending item.text='%s' ...", item.text)
401 types.append(tidyup.domain(item.text))
403 logger.info("Fetching %d different table data ...", len(types))
404 for software in types:
405 logger.debug("software='%s' - BEFORE!", software)
406 if args.software is not None and args.software != software:
407 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
412 logger.debug("Fetching table data for software='%s' ...", software)
413 raw = utils.fetch_url(
414 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
416 (config.get("connection_timeout"), config.get("read_timeout"))
418 logger.debug("raw[%s]()=%d", type(raw), len(raw))
420 doc = bs4.BeautifulSoup(raw, features="html.parser")
421 logger.debug("doc[]='%s'", type(doc))
422 except network.exceptions as exception:
423 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
426 items = doc.findAll("a", {"class": "url"})
427 logger.info("Checking %d items,software='%s' ...", len(items), software)
429 logger.debug("item[]='%s'", type(item))
430 domain = item.decode_contents()
432 logger.debug("domain='%s'", domain)
433 if not utils.is_domain_wanted(domain):
434 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
436 elif instances.is_registered(domain):
437 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
439 elif instances.is_recent(domain):
440 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
443 software = software_helper.alias(software)
444 logger.info("Fetching instances for domain='%s'", domain)
445 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
447 logger.debug("Success! - EXIT!")
450 def fetch_todon_wiki(args: argparse.Namespace) -> int:
451 logger.debug("args[]='%s' - CALLED!", type(args))
459 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
460 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
462 doc = bs4.BeautifulSoup(raw, "html.parser")
463 logger.debug("doc[]='%s'", type(doc))
465 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
466 logger.info("Checking %d silenced/limited entries ...", len(silenced))
467 blocklist["silenced"] = utils.find_domains(silenced, "div")
469 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
470 logger.info("Checking %d suspended entries ...", len(suspended))
471 blocklist["reject"] = utils.find_domains(suspended, "div")
474 for block_level in blocklist:
475 blockers = blocklist[block_level]
477 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
478 for blocked in blockers:
479 logger.debug("blocked='%s'", blocked)
481 if not instances.is_registered(blocked):
483 logger.info("Fetching instances from domain='%s' ...", blocked)
484 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
485 except network.exceptions as exception:
486 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
487 instances.set_last_error(blocked, exception)
489 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
490 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
493 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
494 if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
495 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
501 logger.debug("Invoking commit() ...")
502 database.connection.commit()
504 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
505 if config.get("bot_enabled") and len(blockdict) > 0:
506 logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
507 network.send_bot_post("todon.eu", blockdict)
509 logger.debug("Success! - EXIT!")
512 def fetch_cs(args: argparse.Namespace):
513 logger.debug("args[]='%s' - CALLED!", type(args))
539 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
540 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
542 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
543 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
545 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
546 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
547 domains["silenced"] = federation.find_domains(silenced)
549 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
550 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
551 domains["reject"] = federation.find_domains(blocked)
553 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
558 for block_level in domains:
559 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
561 for row in domains[block_level]:
562 logger.debug("row[%s]='%s'", type(row), row)
563 if instances.is_recent(row["domain"], "last_blocked"):
564 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
566 elif not instances.is_registered(row["domain"]):
568 logger.info("Fetching instances from domain='%s' ...", row["domain"])
569 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
570 except network.exceptions as exception:
571 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
572 instances.set_last_error(row["domain"], exception)
574 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
575 logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
577 "blocked": row["domain"],
578 "reason" : row["reason"],
581 logger.debug("Invoking commit() ...")
582 database.connection.commit()
584 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
585 if config.get("bot_enabled") and len(blockdict) > 0:
586 logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
587 network.send_bot_post("chaos.social", blockdict)
589 logger.debug("Success! - EXIT!")
592 def fetch_fba_rss(args: argparse.Namespace) -> int:
593 logger.debug("args[]='%s' - CALLED!", type(args))
596 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
597 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
599 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
600 if response.ok and response.status_code < 300 and len(response.text) > 0:
601 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
602 rss = atoma.parse_rss_bytes(response.content)
604 logger.debug("rss[]='%s'", type(rss))
605 for item in rss.items:
606 logger.debug("item='%s'", item)
607 domain = item.link.split("=")[1]
609 if not utils.is_domain_wanted(domain):
610 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
612 elif domain in domains:
613 logger.debug("domain='%s' is already added - SKIPPED!", domain)
615 elif instances.is_registered(domain):
616 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
618 elif instances.is_recent(domain):
619 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
622 logger.debug("Adding domain='%s'", domain)
623 domains.append(domain)
625 logger.debug("domains()=%d", len(domains))
629 logger.info("Adding %d new instances ...", len(domains))
630 for domain in domains:
632 logger.info("Fetching instances from domain='%s' ...", domain)
633 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
634 except network.exceptions as exception:
635 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
636 instances.set_last_error(domain, exception)
639 logger.debug("Success! - EXIT!")
642 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
643 logger.debug("args[]='%s' - CALLED!", type(args))
644 feed = "https://ryona.agency/users/fba/feed.atom"
648 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
649 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
651 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
652 if response.ok and response.status_code < 300 and len(response.text) > 0:
653 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
654 atom = atoma.parse_atom_bytes(response.content)
656 logger.debug("atom[]='%s'", type(atom))
657 for entry in atom.entries:
658 logger.debug("entry[]='%s'", type(entry))
659 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
660 logger.debug("doc[]='%s'", type(doc))
661 for element in doc.findAll("a"):
662 for href in element["href"].split(","):
663 logger.debug("href[%s]='%s", type(href), href)
664 domain = tidyup.domain(href)
666 logger.debug("domain='%s'", domain)
667 if not utils.is_domain_wanted(domain):
668 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
670 elif domain in domains:
671 logger.debug("domain='%s' is already added - SKIPPED!", domain)
673 elif instances.is_registered(domain):
674 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
676 elif instances.is_recent(domain):
677 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
680 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
681 domains.append(domain)
683 logger.debug("domains()=%d", len(domains))
687 logger.info("Adding %d new instances ...", len(domains))
688 for domain in domains:
690 logger.info("Fetching instances from domain='%s' ...", domain)
691 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
692 except network.exceptions as exception:
693 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
694 instances.set_last_error(domain, exception)
697 logger.debug("Success! - EXIT!")
700 def fetch_instances(args: argparse.Namespace) -> int:
701 logger.debug("args[]='%s' - CALLED!", type(args))
706 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
707 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
708 except network.exceptions as exception:
709 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
710 instances.set_last_error(args.domain, exception)
711 instances.update_data(args.domain)
715 logger.debug("Not fetching more instances - EXIT!")
718 # Loop through some instances
719 database.cursor.execute(
720 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
723 rows = database.cursor.fetchall()
724 logger.info("Checking %d entries ...", len(rows))
726 logger.debug("domain='%s'", row["domain"])
727 if not utils.is_domain_wanted(row["domain"]):
728 logger.debug("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
732 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
733 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
734 except network.exceptions as exception:
735 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
736 instances.set_last_error(row["domain"], exception)
738 logger.debug("Success - EXIT!")
741 def fetch_oliphant(args: argparse.Namespace) -> int:
742 logger.debug("args[]='%s' - CALLED!", type(args))
746 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
751 "blocker": "artisan.chat",
752 "csv_url": "mastodon/artisan.chat.csv",
754 "blocker": "mastodon.art",
755 "csv_url": "mastodon/mastodon.art.csv",
757 "blocker": "pleroma.envs.net",
758 "csv_url": "mastodon/pleroma.envs.net.csv",
760 "blocker": "oliphant.social",
761 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
763 "blocker": "mastodon.online",
764 "csv_url": "mastodon/mastodon.online.csv",
766 "blocker": "mastodon.social",
767 "csv_url": "mastodon/mastodon.social.csv",
769 "blocker": "mastodon.social",
770 "csv_url": "other/missing-tier0-mastodon.social.csv",
772 "blocker": "rage.love",
773 "csv_url": "mastodon/rage.love.csv",
775 "blocker": "sunny.garden",
776 "csv_url": "mastodon/sunny.garden.csv",
778 "blocker": "solarpunk.moe",
779 "csv_url": "mastodon/solarpunk.moe.csv",
781 "blocker": "toot.wales",
782 "csv_url": "mastodon/toot.wales.csv",
784 "blocker": "union.place",
785 "csv_url": "mastodon/union.place.csv",
791 logger.debug("Downloading %d files ...", len(blocklists))
792 for block in blocklists:
793 # Is domain given and not equal blocker?
794 if isinstance(args.domain, str) and args.domain != block["blocker"]:
795 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
797 elif args.domain in domains:
798 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
800 elif instances.is_recent(block["blocker"]):
801 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
805 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
806 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
808 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
809 if not response.ok or response.status_code > 399 or response.content == "":
810 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
813 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
814 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
816 logger.debug("reader[]='%s'", type(reader))
819 logger.debug("row[%s]='%s'", type(row), row)
820 domain = severity = None
821 reject_media = reject_reports = False
823 domain = row["#domain"]
824 elif "domain" in row:
825 domain = row["domain"]
827 logger.debug("row='%s' does not contain domain column", row)
830 if "#severity" in row:
831 severity = row["#severity"]
832 elif "severity" in row:
833 severity = row["severity"]
835 logger.debug("row='%s' does not contain severity column", row)
838 if "#reject_media" in row and row["#reject_media"].lower() == "true":
840 elif "reject_media" in row and row["reject_media"].lower() == "true":
843 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
844 reject_reports = True
845 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
846 reject_reports = True
848 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
849 if not utils.is_domain_wanted(domain):
850 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
853 logger.debug("Marking domain='%s' as handled", domain)
854 domains.append(domain)
856 logger.debug("Processing domain='%s' ...", domain)
857 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
858 logger.debug("processed='%s'", processed)
860 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
861 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
864 "reason" : block["reason"],
868 utils.process_block(block["blocker"], domain, None, "reject_media")
870 utils.process_block(block["blocker"], domain, None, "reject_reports")
872 logger.debug("Invoking commit() ...")
873 database.connection.commit()
875 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
876 if config.get("bot_enabled") and len(blockdict) > 0:
877 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
878 network.send_bot_post(block["blocker"], blockdict)
880 logger.debug("Success! - EXIT!")
883 def fetch_txt(args: argparse.Namespace) -> int:
884 logger.debug("args[]='%s' - CALLED!", type(args))
889 "blocker": "seirdy.one",
890 "url" : "https://seirdy.one/pb/bsl.txt",
893 logger.info("Checking %d text file(s) ...", len(urls))
895 logger.debug("Fetching row[url]='%s' ...", row["url"])
896 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
898 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
899 if response.ok and response.status_code < 300 and response.text != "":
900 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
901 domains = response.text.split("\n")
903 logger.info("Processing %d domains ...", len(domains))
904 for domain in domains:
905 logger.debug("domain='%s'", domain)
907 logger.debug("domain is empty - SKIPPED!")
909 elif not utils.is_domain_wanted(domain):
910 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
912 elif instances.is_recent(domain):
913 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
916 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
917 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
919 logger.debug("processed='%s'", processed)
921 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
924 logger.debug("Success! - EXIT!")
927 def fetch_fedipact(args: argparse.Namespace) -> int:
928 logger.debug("args[]='%s' - CALLED!", type(args))
931 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
933 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
934 if response.ok and response.status_code < 300 and response.text != "":
935 logger.debug("Parsing %d Bytes ...", len(response.text))
937 doc = bs4.BeautifulSoup(response.text, "html.parser")
938 logger.debug("doc[]='%s'", type(doc))
940 rows = doc.findAll("li")
941 logger.info("Checking %d row(s) ...", len(rows))
943 logger.debug("row[]='%s'", type(row))
944 domain = tidyup.domain(row.contents[0])
946 logger.debug("domain='%s'", domain)
948 logger.debug("domain is empty - SKIPPED!")
950 elif not utils.is_domain_wanted(domain):
951 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
953 elif instances.is_registered(domain):
954 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
956 elif instances.is_recent(domain):
957 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
960 logger.info("Fetching domain='%s' ...", domain)
961 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
963 logger.debug("Success! - EXIT!")
966 def fetch_joinfediverse(args: argparse.Namespace) -> int:
967 logger.debug("args[]='%s' - CALLED!", type(args))
970 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
971 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
973 doc = bs4.BeautifulSoup(raw, "html.parser")
974 logger.debug("doc[]='%s'", type(doc))
976 tables = doc.findAll("table", {"class": "wikitable"})
978 logger.info("Analyzing %d table(s) ...", len(tables))
981 logger.debug("table[]='%s'", type(table))
983 rows = table.findAll("tr")
984 logger.info("Checking %d row(s) ...", len(rows))
985 block_headers = dict()
987 logger.debug("row[%s]='%s'", type(row), row)
989 headers = row.findAll("th")
990 logger.debug("Found headers()=%d header(s)", len(headers))
992 block_headers = dict()
994 for header in headers:
996 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
997 text = header.contents[0]
999 logger.debug("text[]='%s'", type(text))
1000 if not isinstance(text, str):
1001 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1003 elif validators.domain(text.strip()):
1004 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1007 text = tidyup.domain(text.strip())
1008 logger.debug("text='%s'", text)
1009 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1010 logger.debug("Found header: '%s'=%d", text, cnt)
1011 block_headers[cnt] = text
1013 elif len(block_headers) == 0:
1014 logger.debug("row is not scrapable - SKIPPED!")
1016 elif len(block_headers) > 0:
1017 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1021 for element in row.find_all(["th", "td"]):
1023 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1024 if cnt in block_headers:
1025 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1027 text = element.text.strip()
1028 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1030 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1031 if key in ["domain", "instance"]:
1033 elif key == "reason":
1034 block[key] = tidyup.reason(text)
1035 elif key == "subdomain(s)":
1038 block[key] = text.split("/")
1040 logger.debug("key='%s'", key)
1043 logger.debug("block()=%d ...", len(block))
1045 logger.debug("Appending block()=%d ...", len(block))
1046 blocklist.append(block)
1048 logger.debug("blocklist()=%d", len(blocklist))
1050 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1051 domains = database.cursor.fetchall()
1053 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1055 for block in blocklist:
1056 logger.debug("block='%s'", block)
1057 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1058 origin = block["blocked"]
1059 for subdomain in block["subdomain(s)"]:
1060 block["blocked"] = subdomain + "." + origin
1061 blocking.append(block)
1063 blocking.append(block)
1065 logger.debug("blocking()=%d", blocking)
1066 for block in blocking:
1067 block["blocked"] = tidyup.domain(block["blocked"])
1069 if not utils.is_domain_wanted(block["blocked"]):
1070 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1072 elif instances.is_recent(block["blocked"]):
1073 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1076 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1077 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1080 for blocker in domains:
1081 blocker = blocker[0]
1082 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1084 for block in blocking:
1085 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1087 if not utils.is_domain_wanted(block["blocked"]):
1088 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1091 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1092 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1093 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1095 "blocked": block["blocked"],
1096 "reason" : block["reason"],
1099 if instances.has_pending(blocker):
1100 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1101 instances.update_data(blocker)
1103 logger.debug("Invoking commit() ...")
1104 database.connection.commit()
1106 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1107 if config.get("bot_enabled") and len(blockdict) > 0:
1108 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1109 network.send_bot_post(blocker, blockdict)
1111 logger.debug("Success! - EXIT!")
1114 def recheck_obfuscation(args: argparse.Namespace) -> int:
1115 logger.debug("args[]='%s' - CALLED!", type(args))
1119 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1120 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1121 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1122 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1124 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1126 rows = database.cursor.fetchall()
1127 logger.info("Checking %d domains ...", len(rows))
1129 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1132 if row["software"] == "pleroma":
1133 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1134 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1135 elif row["software"] == "mastodon":
1136 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1137 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1138 elif row["software"] == "lemmy":
1139 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1140 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1141 elif row["software"] == "friendica":
1142 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1143 blocking = friendica.fetch_blocks(row["domain"])
1144 elif row["software"] == "misskey":
1145 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1146 blocking = misskey.fetch_blocks(row["domain"])
1148 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1150 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1153 for block in blocking:
1154 logger.debug("blocked='%s'", block["blocked"])
1157 if block["blocked"].endswith(".arpa"):
1158 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1160 elif block["blocked"].endswith(".tld"):
1161 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1163 elif block["blocked"].endswith(".onion"):
1164 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1166 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1167 logger.debug("block='%s' is obfuscated.", block["blocked"])
1168 obfuscated = obfuscated + 1
1169 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1170 elif not utils.is_domain_wanted(block["blocked"]):
1171 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1173 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1174 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1177 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1178 if blocked is not None and blocked != block["blocked"]:
1179 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1180 obfuscated = obfuscated - 1
1181 if blocks.is_instance_blocked(row["domain"], blocked):
1182 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1185 block["block_level"] = utils.alias_block_level(block["block_level"])
1187 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1188 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1189 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1192 "reason" : block["reason"],
1195 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1196 if obfuscated == 0 and len(blocking) > 0:
1197 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1198 instances.set_has_obfuscation(row["domain"], False)
1200 if instances.has_pending(row["domain"]):
1201 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1202 instances.update_data(row["domain"])
1204 logger.debug("Invoking commit() ...")
1205 database.connection.commit()
1207 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1208 if config.get("bot_enabled") and len(blockdict) > 0:
1209 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1210 network.send_bot_post(row["domain"], blockdict)
1212 logger.debug("Success! - EXIT!")