1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
78 # No CSRF by default, you don't have to add network.api_headers by yourself here
82 logger.debug("Checking CSRF from pixelfed.org")
83 headers = csrf.determine("pixelfed.org", dict())
84 except network.exceptions as exception:
85 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
89 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90 fetched = network.get_json_api(
92 "/api/v1/servers/all.json?scope=All&country=all&language=all",
94 (config.get("connection_timeout"), config.get("read_timeout"))
97 logger.debug("JSON API returned %d elements", len(fetched))
98 if "error_message" in fetched:
99 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
101 elif "data" not in fetched["json"]:
102 logger.warning("API did not return JSON with 'data' element - EXIT!")
105 rows = fetched["json"]["data"]
106 logger.info("Checking %d fetched rows ...", len(rows))
108 logger.debug("row[]='%s'", type(row))
109 if "domain" not in row:
110 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
112 elif not utils.is_domain_wanted(row["domain"]):
113 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
115 elif instances.is_registered(row["domain"]):
116 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
118 elif instances.is_recent(row["domain"]):
119 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
122 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
123 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
125 except network.exceptions as exception:
126 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
129 logger.debug("Success! - EXIT!")
132 def fetch_bkali(args: argparse.Namespace) -> int:
133 logger.debug("args[]='%s' - CALLED!", type(args))
136 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
137 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
140 logger.debug("fetched[]='%s'", type(fetched))
141 if "error_message" in fetched:
142 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
144 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
145 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
148 rows = fetched["json"]
150 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
152 raise Exception("WARNING: Returned no records")
153 elif "data" not in rows:
154 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
155 elif "nodeinfo" not in rows["data"]:
156 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
158 for entry in rows["data"]["nodeinfo"]:
159 logger.debug("entry[%s]='%s'", type(entry), entry)
160 if "domain" not in entry:
161 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
163 elif not utils.is_domain_wanted(entry["domain"]):
164 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
166 elif instances.is_registered(entry["domain"]):
167 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
169 elif instances.is_recent(entry["domain"]):
170 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
173 logger.debug("Adding domain='%s' ...", entry["domain"])
174 domains.append(entry["domain"])
176 except network.exceptions as exception:
177 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
180 logger.debug("domains()=%d", len(domains))
184 logger.info("Adding %d new instances ...", len(domains))
185 for domain in domains:
187 logger.info("Fetching instances from domain='%s' ...", domain)
188 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
189 except network.exceptions as exception:
190 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
191 instances.set_last_error(domain, exception)
194 logger.debug("Success - EXIT!")
197 def fetch_blocks(args: argparse.Namespace) -> int:
198 logger.debug("args[]='%s' - CALLED!", type(args))
199 if args.domain is not None and args.domain != "":
200 logger.debug("args.domain='%s' - checking ...", args.domain)
201 if not validators.domain(args.domain):
202 logger.warning("args.domain='%s' is not valid.", args.domain)
204 elif blacklist.is_blacklisted(args.domain):
205 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
207 elif not instances.is_registered(args.domain):
208 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
213 if args.domain is not None and args.domain != "":
214 # Re-check single domain
215 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
216 database.cursor.execute(
217 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
219 elif args.software is not None and args.software != "":
220 # Re-check single software
221 logger.debug("Querying database for args.software='%s' ...", args.software)
222 database.cursor.execute(
223 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
226 # Re-check after "timeout" (aka. minimum interval)
227 database.cursor.execute(
228 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
231 rows = database.cursor.fetchall()
232 logger.info("Checking %d entries ...", len(rows))
233 for blocker, software, origin, nodeinfo_url in rows:
234 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
235 blocker = tidyup.domain(blocker)
236 logger.debug("blocker='%s' - AFTER!", blocker)
239 logger.warning("blocker is now empty!")
241 elif nodeinfo_url is None or nodeinfo_url == "":
242 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
244 elif not utils.is_domain_wanted(blocker):
245 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
248 logger.debug("blocker='%s'", blocker)
249 instances.set_last_blocked(blocker)
250 instances.set_has_obfuscation(blocker, False)
254 if software == "pleroma":
255 logger.info("blocker='%s',software='%s'", blocker, software)
256 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
257 elif software == "mastodon":
258 logger.info("blocker='%s',software='%s'", blocker, software)
259 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
260 elif software == "lemmy":
261 logger.info("blocker='%s',software='%s'", blocker, software)
262 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
263 elif software == "friendica":
264 logger.info("blocker='%s',software='%s'", blocker, software)
265 blocking = friendica.fetch_blocks(blocker)
266 elif software == "misskey":
267 logger.info("blocker='%s',software='%s'", blocker, software)
268 blocking = misskey.fetch_blocks(blocker)
270 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
272 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
274 for block in blocking:
275 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
277 if block["block_level"] == "":
278 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
281 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
282 block["blocked"] = tidyup.domain(block["blocked"])
283 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
284 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
286 if block["blocked"] == "":
287 logger.warning("blocked is empty, blocker='%s'", blocker)
289 elif block["blocked"].endswith(".onion"):
290 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
292 elif block["blocked"].endswith(".arpa"):
293 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
295 elif block["blocked"].endswith(".tld"):
296 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
298 elif block["blocked"].find("*") >= 0:
299 logger.debug("blocker='%s' uses obfuscated domains", blocker)
301 # Some friendica servers also obscure domains without hash
302 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
304 logger.debug("row[]='%s'", type(row))
306 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
307 instances.set_has_obfuscation(blocker, True)
310 block["blocked"] = row["domain"]
311 origin = row["origin"]
312 nodeinfo_url = row["nodeinfo_url"]
313 elif block["blocked"].find("?") >= 0:
314 logger.debug("blocker='%s' uses obfuscated domains", blocker)
316 # Some obscure them with question marks, not sure if that's dependent on version or not
317 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
319 logger.debug("row[]='%s'", type(row))
321 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
322 instances.set_has_obfuscation(blocker, True)
325 block["blocked"] = row["domain"]
326 origin = row["origin"]
327 nodeinfo_url = row["nodeinfo_url"]
329 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
330 if not utils.is_domain_wanted(block["blocked"]):
331 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
333 elif block["block_level"] in ["accept", "accepted"]:
334 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
336 elif not instances.is_registered(block["blocked"]):
337 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
338 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
340 block["block_level"] = utils.alias_block_level(block["block_level"])
342 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
343 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
345 "blocked": block["blocked"],
346 "reason" : block["reason"],
349 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
350 cookies.clear(block["blocked"])
352 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
353 if instances.has_pending(blocker):
354 logger.debug("Flushing updates for blocker='%s' ...", blocker)
355 instances.update_data(blocker)
357 logger.debug("Invoking commit() ...")
358 database.connection.commit()
360 logger.debug("Invoking cookies.clear(%s) ...", blocker)
361 cookies.clear(blocker)
363 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
364 if config.get("bot_enabled") and len(blockdict) > 0:
365 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
366 network.send_bot_post(blocker, blockdict)
368 logger.debug("Success! - EXIT!")
371 def fetch_observer(args: argparse.Namespace) -> int:
372 logger.debug("args[]='%s' - CALLED!", type(args))
408 logger.info("Fetching %d different table data ...", len(types))
409 for software in types:
410 logger.debug("software='%s' - BEFORE!", software)
411 if args.software is not None and args.software != software:
412 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
417 logger.debug("Fetching table data for software='%s' ...", software)
418 raw = utils.fetch_url(
419 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
421 (config.get("connection_timeout"), config.get("read_timeout"))
423 logger.debug("raw[%s]()=%d", type(raw), len(raw))
425 doc = bs4.BeautifulSoup(raw, features='html.parser')
426 logger.debug("doc[]='%s'", type(doc))
427 except network.exceptions as exception:
428 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
431 items = doc.findAll("a", {"class": "url"})
432 logger.info("Checking %d items,software='%s' ...", len(items), software)
434 logger.debug("item[]='%s'", type(item))
435 domain = item.decode_contents()
437 logger.debug("domain='%s'", domain)
438 if not utils.is_domain_wanted(domain):
439 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
441 elif instances.is_registered(domain):
442 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
444 elif instances.is_recent(domain):
445 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
448 software = software_helper.alias(software)
449 logger.info("Fetching instances for domain='%s'", domain)
450 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
452 logger.debug("Success! - EXIT!")
455 def fetch_todon_wiki(args: argparse.Namespace) -> int:
456 logger.debug("args[]='%s' - CALLED!", type(args))
464 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
465 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
467 doc = bs4.BeautifulSoup(raw, "html.parser")
468 logger.debug("doc[]='%s'", type(doc))
470 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
471 logger.info("Checking %d silenced/limited entries ...", len(silenced))
472 blocklist["silenced"] = utils.find_domains(silenced, "div")
474 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
475 logger.info("Checking %d suspended entries ...", len(suspended))
476 blocklist["reject"] = utils.find_domains(suspended, "div")
479 for block_level in blocklist:
480 blockers = blocklist[block_level]
482 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
483 for blocked in blockers:
484 logger.debug("blocked='%s'", blocked)
486 if not instances.is_registered(blocked):
488 logger.info("Fetching instances from domain='%s' ...", blocked)
489 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
490 except network.exceptions as exception:
491 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
492 instances.set_last_error(blocked, exception)
494 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
495 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
498 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
499 if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
500 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
506 logger.debug("Invoking commit() ...")
507 database.connection.commit()
509 if config.get("bot_enabled") and len(blockdict) > 0:
510 logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
511 network.send_bot_post("todon.eu", blockdict)
513 logger.debug("Success! - EXIT!")
516 def fetch_cs(args: argparse.Namespace):
517 logger.debug("args[]='%s' - CALLED!", type(args))
543 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
544 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
546 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
547 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
549 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
550 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
551 domains["silenced"] = federation.find_domains(silenced)
553 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
554 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
555 domains["reject"] = federation.find_domains(blocked)
557 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
562 for block_level in domains:
563 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
565 for row in domains[block_level]:
566 logger.debug("row[%s]='%s'", type(row), row)
567 if instances.is_recent(row["domain"], "last_blocked"):
568 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
570 elif not instances.is_registered(row["domain"]):
572 logger.info("Fetching instances from domain='%s' ...", row["domain"])
573 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
574 except network.exceptions as exception:
575 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
576 instances.set_last_error(row["domain"], exception)
578 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
579 logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
581 "blocked": row["domain"],
582 "reason" : row["reason"],
585 logger.debug("Invoking commit() ...")
586 database.connection.commit()
588 if config.get("bot_enabled") and len(blockdict) > 0:
589 logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
590 network.send_bot_post("chaos.social", blockdict)
592 logger.debug("Success! - EXIT!")
595 def fetch_fba_rss(args: argparse.Namespace) -> int:
596 logger.debug("args[]='%s' - CALLED!", type(args))
599 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
600 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
602 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
603 if response.ok and response.status_code < 300 and len(response.text) > 0:
604 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
605 rss = atoma.parse_rss_bytes(response.content)
607 logger.debug("rss[]='%s'", type(rss))
608 for item in rss.items:
609 logger.debug("item='%s'", item)
610 domain = item.link.split("=")[1]
612 if not utils.is_domain_wanted(domain):
613 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
615 elif domain in domains:
616 logger.debug("domain='%s' is already added - SKIPPED!", domain)
618 elif instances.is_registered(domain):
619 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
621 elif instances.is_recent(domain):
622 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
625 logger.debug("Adding domain='%s'", domain)
626 domains.append(domain)
628 logger.debug("domains()=%d", len(domains))
632 logger.info("Adding %d new instances ...", len(domains))
633 for domain in domains:
635 logger.info("Fetching instances from domain='%s' ...", domain)
636 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
637 except network.exceptions as exception:
638 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
639 instances.set_last_error(domain, exception)
642 logger.debug("Success! - EXIT!")
645 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
646 logger.debug("args[]='%s' - CALLED!", type(args))
647 feed = "https://ryona.agency/users/fba/feed.atom"
651 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
652 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
654 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
655 if response.ok and response.status_code < 300 and len(response.text) > 0:
656 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
657 atom = atoma.parse_atom_bytes(response.content)
659 logger.debug("atom[]='%s'", type(atom))
660 for entry in atom.entries:
661 logger.debug("entry[]='%s'", type(entry))
662 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
663 logger.debug("doc[]='%s'", type(doc))
664 for element in doc.findAll("a"):
665 for href in element["href"].split(","):
666 logger.debug("href[%s]='%s", type(href), href)
667 domain = tidyup.domain(href)
669 logger.debug("domain='%s'", domain)
670 if not utils.is_domain_wanted(domain):
671 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
673 elif domain in domains:
674 logger.debug("domain='%s' is already added - SKIPPED!", domain)
676 elif instances.is_registered(domain):
677 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
679 elif instances.is_recent(domain):
680 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
683 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
684 domains.append(domain)
686 logger.debug("domains()=%d", len(domains))
690 logger.info("Adding %d new instances ...", len(domains))
691 for domain in domains:
693 logger.info("Fetching instances from domain='%s' ...", domain)
694 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
695 except network.exceptions as exception:
696 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
697 instances.set_last_error(domain, exception)
700 logger.debug("Success! - EXIT!")
703 def fetch_instances(args: argparse.Namespace) -> int:
704 logger.debug("args[]='%s' - CALLED!", type(args))
709 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
710 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
711 except network.exceptions as exception:
712 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
713 instances.set_last_error(args.domain, exception)
714 instances.update_data(args.domain)
718 logger.debug("Not fetching more instances - EXIT!")
721 # Loop through some instances
722 database.cursor.execute(
723 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
726 rows = database.cursor.fetchall()
727 logger.info("Checking %d entries ...", len(rows))
729 logger.debug("domain='%s'", row["domain"])
730 if not utils.is_domain_wanted(row["domain"]):
731 logger.debug("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
735 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
736 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
737 except network.exceptions as exception:
738 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
739 instances.set_last_error(row["domain"], exception)
741 logger.debug("Success - EXIT!")
744 def fetch_oliphant(args: argparse.Namespace) -> int:
745 logger.debug("args[]='%s' - CALLED!", type(args))
749 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
754 "blocker": "artisan.chat",
755 "csv_url": "mastodon/artisan.chat.csv",
757 "blocker": "mastodon.art",
758 "csv_url": "mastodon/mastodon.art.csv",
760 "blocker": "pleroma.envs.net",
761 "csv_url": "mastodon/pleroma.envs.net.csv",
763 "blocker": "oliphant.social",
764 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
766 "blocker": "mastodon.online",
767 "csv_url": "mastodon/mastodon.online.csv",
769 "blocker": "mastodon.social",
770 "csv_url": "mastodon/mastodon.social.csv",
772 "blocker": "mastodon.social",
773 "csv_url": "other/missing-tier0-mastodon.social.csv",
775 "blocker": "rage.love",
776 "csv_url": "mastodon/rage.love.csv",
778 "blocker": "sunny.garden",
779 "csv_url": "mastodon/sunny.garden.csv",
781 "blocker": "solarpunk.moe",
782 "csv_url": "mastodon/solarpunk.moe.csv",
784 "blocker": "toot.wales",
785 "csv_url": "mastodon/toot.wales.csv",
787 "blocker": "union.place",
788 "csv_url": "mastodon/union.place.csv",
794 logger.debug("Downloading %d files ...", len(blocklists))
795 for block in blocklists:
796 # Is domain given and not equal blocker?
797 if isinstance(args.domain, str) and args.domain != block["blocker"]:
798 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
800 elif args.domain in domains:
801 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
803 elif instances.is_recent(block["blocker"]):
804 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
808 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
809 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
811 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
812 if not response.ok or response.status_code > 399 or response.content == "":
813 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
816 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
817 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
819 logger.debug("reader[]='%s'", type(reader))
822 logger.debug("row[%s]='%s'", type(row), row)
823 domain = severity = None
824 reject_media = reject_reports = False
826 domain = row["#domain"]
827 elif "domain" in row:
828 domain = row["domain"]
830 logger.debug("row='%s' does not contain domain column", row)
833 if "#severity" in row:
834 severity = row["#severity"]
835 elif "severity" in row:
836 severity = row["severity"]
838 logger.debug("row='%s' does not contain severity column", row)
841 if "#reject_media" in row and row["#reject_media"].lower() == "true":
843 elif "reject_media" in row and row["reject_media"].lower() == "true":
846 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
847 reject_reports = True
848 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
849 reject_reports = True
851 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
852 if not utils.is_domain_wanted(domain):
853 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
856 logger.debug("Marking domain='%s' as handled", domain)
857 domains.append(domain)
859 logger.debug("Processing domain='%s' ...", domain)
860 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
861 logger.debug("processed='%s'", processed)
863 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
864 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
867 "reason" : block["reason"],
871 utils.process_block(block["blocker"], domain, None, "reject_media")
873 utils.process_block(block["blocker"], domain, None, "reject_reports")
875 logger.debug("Invoking commit() ...")
876 database.connection.commit()
878 if config.get("bot_enabled") and len(blockdict) > 0:
879 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
880 network.send_bot_post(block["blocker"], blockdict)
882 logger.debug("Success! - EXIT!")
885 def fetch_txt(args: argparse.Namespace) -> int:
886 logger.debug("args[]='%s' - CALLED!", type(args))
891 "blocker": "seirdy.one",
892 "url" : "https://seirdy.one/pb/bsl.txt",
895 logger.info("Checking %d text file(s) ...", len(urls))
897 logger.debug("Fetching row[url]='%s' ...", row["url"])
898 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
900 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
901 if response.ok and response.status_code < 300 and response.text != "":
902 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
903 domains = response.text.split("\n")
905 logger.info("Processing %d domains ...", len(domains))
906 for domain in domains:
907 logger.debug("domain='%s'", domain)
909 logger.debug("domain is empty - SKIPPED!")
911 elif not utils.is_domain_wanted(domain):
912 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
914 elif instances.is_recent(domain):
915 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
918 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
919 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
921 logger.debug("processed='%s'", processed)
923 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
926 logger.debug("Success! - EXIT!")
929 def fetch_fedipact(args: argparse.Namespace) -> int:
930 logger.debug("args[]='%s' - CALLED!", type(args))
933 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
935 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
936 if response.ok and response.status_code < 300 and response.text != "":
937 logger.debug("Parsing %d Bytes ...", len(response.text))
939 doc = bs4.BeautifulSoup(response.text, "html.parser")
940 logger.debug("doc[]='%s'", type(doc))
942 rows = doc.findAll("li")
943 logger.info("Checking %d row(s) ...", len(rows))
945 logger.debug("row[]='%s'", type(row))
946 domain = tidyup.domain(row.contents[0])
948 logger.debug("domain='%s'", domain)
950 logger.debug("domain is empty - SKIPPED!")
952 elif not utils.is_domain_wanted(domain):
953 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
955 elif instances.is_registered(domain):
956 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
958 elif instances.is_recent(domain):
959 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
962 logger.info("Fetching domain='%s' ...", domain)
963 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
965 logger.debug("Success! - EXIT!")
968 def fetch_joinfediverse(args: argparse.Namespace) -> int:
969 logger.debug("args[]='%s' - CALLED!", type(args))
972 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
973 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
975 doc = bs4.BeautifulSoup(raw, "html.parser")
976 logger.debug("doc[]='%s'", type(doc))
978 tables = doc.findAll("table", {"class": "wikitable"})
980 logger.info("Analyzing %d table(s) ...", len(tables))
983 logger.debug("table[]='%s'", type(table))
985 rows = table.findAll("tr")
986 logger.info("Checking %d row(s) ...", len(rows))
987 block_headers = dict()
989 logger.debug("row[%s]='%s'", type(row), row)
991 headers = row.findAll("th")
992 logger.debug("Found headers()=%d header(s)", len(headers))
994 block_headers = dict()
996 for header in headers:
998 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
999 text = header.contents[0]
1001 logger.debug("text[]='%s'", type(text))
1002 if not isinstance(text, str):
1003 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1005 elif validators.domain(text.strip()):
1006 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1009 text = tidyup.domain(text.strip())
1010 logger.debug("text='%s'", text)
1011 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1012 logger.debug("Found header: '%s'=%d", text, cnt)
1013 block_headers[cnt] = text
1015 elif len(block_headers) == 0:
1016 logger.debug("row is not scrapable - SKIPPED!")
1018 elif len(block_headers) > 0:
1019 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1023 for element in row.find_all(["th", "td"]):
1025 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1026 if cnt in block_headers:
1027 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1029 text = element.text.strip()
1030 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1032 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1033 if key in ["domain", "instance"]:
1035 elif key == "reason":
1036 block[key] = tidyup.reason(text)
1037 elif key == "subdomain(s)":
1040 block[key] = text.split("/")
1042 logger.debug("key='%s'", key)
1045 logger.debug("block()=%d ...", len(block))
1047 logger.debug("Appending block()=%d ...", len(block))
1048 blocklist.append(block)
1050 logger.debug("blocklist()=%d", len(blocklist))
1052 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1053 domains = database.cursor.fetchall()
1055 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1057 for block in blocklist:
1058 logger.debug("block='%s'", block)
1059 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1060 origin = block["blocked"]
1061 for subdomain in block["subdomain(s)"]:
1062 block["blocked"] = subdomain + "." + origin
1063 blocking.append(block)
1065 blocking.append(block)
1067 logger.debug("blocking()=%d", blocking)
1068 for block in blocking:
1069 block["blocked"] = tidyup.domain(block["blocked"])
1071 if not utils.is_domain_wanted(block["blocked"]):
1072 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1074 elif instances.is_recent(block["blocked"]):
1075 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1078 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1079 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1082 for blocker in domains:
1083 blocker = blocker[0]
1084 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1086 for block in blocking:
1087 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1089 if not utils.is_domain_wanted(block["blocked"]):
1090 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1093 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1094 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1095 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1097 "blocked": block["blocked"],
1098 "reason" : block["reason"],
1101 if instances.has_pending(blocker):
1102 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1103 instances.update_data(blocker)
1105 logger.debug("Invoking commit() ...")
1106 database.connection.commit()
1108 if config.get("bot_enabled") and len(blockdict) > 0:
1109 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1110 network.send_bot_post(blocker, blockdict)
1112 logger.debug("Success! - EXIT!")
1115 def recheck_obfuscation(args: argparse.Namespace) -> int:
1116 logger.debug("args[]='%s' - CALLED!", type(args))
1120 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1121 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1122 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1123 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1125 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1127 rows = database.cursor.fetchall()
1128 logger.info("Checking %d domains ...", len(rows))
1130 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1133 if row["software"] == "pleroma":
1134 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1135 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1136 elif row["software"] == "mastodon":
1137 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1138 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1139 elif row["software"] == "lemmy":
1140 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1141 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1142 elif row["software"] == "friendica":
1143 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1144 blocking = friendica.fetch_blocks(row["domain"])
1145 elif row["software"] == "misskey":
1146 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1147 blocking = misskey.fetch_blocks(row["domain"])
1149 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1151 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1154 for block in blocking:
1155 logger.debug("blocked='%s'", block["blocked"])
1158 if block["blocked"].endswith(".arpa"):
1159 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1161 elif block["blocked"].endswith(".tld"):
1162 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1164 elif block["blocked"].endswith(".onion"):
1165 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1167 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1168 logger.debug("block='%s' is obfuscated.", block["blocked"])
1169 obfuscated = obfuscated + 1
1170 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1171 elif not utils.is_domain_wanted(block["blocked"]):
1172 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1174 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1175 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1178 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1179 if blocked is not None and blocked != block["blocked"]:
1180 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1181 obfuscated = obfuscated - 1
1182 if blocks.is_instance_blocked(row["domain"], blocked):
1183 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1186 block["block_level"] = utils.alias_block_level(block["block_level"])
1188 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1189 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1190 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1193 "reason" : block["reason"],
1196 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1197 if obfuscated == 0 and len(blocking) > 0:
1198 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1199 instances.set_has_obfuscation(row["domain"], False)
1201 if instances.has_pending(row["domain"]):
1202 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1203 instances.update_data(row["domain"])
1205 logger.debug("Invoking commit() ...")
1206 database.connection.commit()
1208 if config.get("bot_enabled") and len(blockdict) > 0:
1209 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1210 network.send_bot_post(row["domain"], blockdict)
1212 logger.debug("Success! - EXIT!")