1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
78 # No CSRF by default, you don't have to add network.api_headers by yourself here
82 logger.debug("Checking CSRF from pixelfed.org")
83 headers = csrf.determine("pixelfed.org", dict())
84 except network.exceptions as exception:
85 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
89 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90 fetched = network.get_json_api(
92 "/api/v1/servers/all.json?scope=All&country=all&language=all",
94 (config.get("connection_timeout"), config.get("read_timeout"))
97 logger.debug("JSON API returned %d elements", len(fetched))
98 if "error_message" in fetched:
99 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
101 elif "data" not in fetched["json"]:
102 logger.warning("API did not return JSON with 'data' element - EXIT!")
105 rows = fetched["json"]["data"]
106 logger.info("Checking %d fetched rows ...", len(rows))
108 logger.debug("row[]='%s'", type(row))
109 if "domain" not in row:
110 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
112 elif not utils.is_domain_wanted(row["domain"]):
113 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
115 elif instances.is_registered(row["domain"]):
116 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
118 elif instances.is_recent(row["domain"]):
119 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
122 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
123 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
125 except network.exceptions as exception:
126 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
129 logger.debug("Success! - EXIT!")
132 def fetch_bkali(args: argparse.Namespace) -> int:
133 logger.debug("args[]='%s' - CALLED!", type(args))
136 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
137 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
140 logger.debug("fetched[]='%s'", type(fetched))
141 if "error_message" in fetched:
142 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
144 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
145 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
148 rows = fetched["json"]
150 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
152 raise Exception("WARNING: Returned no records")
153 elif "data" not in rows:
154 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
155 elif "nodeinfo" not in rows["data"]:
156 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
158 for entry in rows["data"]["nodeinfo"]:
159 logger.debug("entry[%s]='%s'", type(entry), entry)
160 if "domain" not in entry:
161 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
163 elif not utils.is_domain_wanted(entry["domain"]):
164 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
166 elif instances.is_registered(entry["domain"]):
167 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
169 elif instances.is_recent(entry["domain"]):
170 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
173 logger.debug("Adding domain='%s' ...", entry["domain"])
174 domains.append(entry["domain"])
176 except network.exceptions as exception:
177 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
180 logger.debug("domains()=%d", len(domains))
184 logger.info("Adding %d new instances ...", len(domains))
185 for domain in domains:
187 logger.info("Fetching instances from domain='%s' ...", domain)
188 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
189 except network.exceptions as exception:
190 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
191 instances.set_last_error(domain, exception)
193 logger.debug("Success - EXIT!")
196 def fetch_blocks(args: argparse.Namespace) -> int:
197 logger.debug("args[]='%s' - CALLED!", type(args))
198 if args.domain is not None and args.domain != "":
199 logger.debug("args.domain='%s' - checking ...", args.domain)
200 if not validators.domain(args.domain):
201 logger.warning("args.domain='%s' is not valid.", args.domain)
203 elif blacklist.is_blacklisted(args.domain):
204 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
206 elif not instances.is_registered(args.domain):
207 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
212 if args.domain is not None and args.domain != "":
213 # Re-check single domain
214 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
215 database.cursor.execute(
216 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
218 elif args.software is not None and args.software != "":
219 # Re-check single software
220 logger.debug("Querying database for args.software='%s' ...", args.software)
221 database.cursor.execute(
222 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
225 # Re-check after "timeout" (aka. minimum interval)
226 database.cursor.execute(
227 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
230 rows = database.cursor.fetchall()
231 logger.info("Checking %d entries ...", len(rows))
232 for blocker, software, origin, nodeinfo_url in rows:
233 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
234 blocker = tidyup.domain(blocker)
235 logger.debug("blocker='%s' - AFTER!", blocker)
238 logger.warning("blocker is now empty!")
240 elif nodeinfo_url is None or nodeinfo_url == "":
241 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
243 elif not utils.is_domain_wanted(blocker):
244 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
247 logger.debug("blocker='%s'", blocker)
248 instances.set_last_blocked(blocker)
249 instances.set_has_obfuscation(blocker, False)
253 if software == "pleroma":
254 logger.info("blocker='%s',software='%s'", blocker, software)
255 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
256 elif software == "mastodon":
257 logger.info("blocker='%s',software='%s'", blocker, software)
258 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
259 elif software == "lemmy":
260 logger.info("blocker='%s',software='%s'", blocker, software)
261 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
262 elif software == "friendica":
263 logger.info("blocker='%s',software='%s'", blocker, software)
264 blocking = friendica.fetch_blocks(blocker)
265 elif software == "misskey":
266 logger.info("blocker='%s',software='%s'", blocker, software)
267 blocking = misskey.fetch_blocks(blocker)
269 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
271 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
273 for block in blocking:
274 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
276 if block["block_level"] == "":
277 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
280 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
281 block["blocked"] = tidyup.domain(block["blocked"])
282 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
283 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
285 if block["blocked"] == "":
286 logger.warning("blocked is empty, blocker='%s'", blocker)
288 elif block["blocked"].endswith(".onion"):
289 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
291 elif block["blocked"].endswith(".arpa"):
292 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
294 elif block["blocked"].endswith(".tld"):
295 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
297 elif block["blocked"].find("*") >= 0:
298 logger.debug("blocker='%s' uses obfuscated domains", blocker)
300 # Some friendica servers also obscure domains without hash
301 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
303 logger.debug("row[]='%s'", type(row))
305 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
306 instances.set_has_obfuscation(blocker, True)
309 block["blocked"] = row[0]
311 nodeinfo_url = row[2]
312 elif block["blocked"].find("?") >= 0:
313 logger.debug("blocker='%s' uses obfuscated domains", blocker)
315 # Some obscure them with question marks, not sure if that's dependent on version or not
316 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
318 logger.debug("row[]='%s'", type(row))
320 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
321 instances.set_has_obfuscation(blocker, True)
324 block["blocked"] = row[0]
326 nodeinfo_url = row[2]
328 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
329 if not utils.is_domain_wanted(block["blocked"]):
330 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
332 elif block["block_level"] in ["accept", "accepted"]:
333 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
335 elif not instances.is_registered(block["blocked"]):
336 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
337 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
339 if block["block_level"] == "silence":
340 logger.debug("Block level 'silence' has been changed to 'silenced'")
341 block["block_level"] = "silenced"
342 elif block["block_level"] == "suspend":
343 logger.debug("Block level 'suspend' has been changed to 'suspended'")
344 block["block_level"] = "suspended"
346 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
347 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
349 "blocked": block["blocked"],
350 "reason" : block["reason"],
353 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
354 cookies.clear(block["blocked"])
356 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
357 if instances.has_pending(blocker):
358 logger.debug("Flushing updates for blocker='%s' ...", blocker)
359 instances.update_data(blocker)
361 logger.debug("Invoking commit() ...")
362 database.connection.commit()
364 logger.debug("Invoking cookies.clear(%s) ...", blocker)
365 cookies.clear(blocker)
367 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
368 if config.get("bot_enabled") and len(blockdict) > 0:
369 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
370 network.send_bot_post(blocker, blockdict)
372 logger.debug("Success! - EXIT!")
375 def fetch_observer(args: argparse.Namespace) -> int:
376 logger.debug("args[]='%s' - CALLED!", type(args))
412 logger.info("Fetching %d different table data ...", len(types))
413 for software in types:
414 logger.debug("software='%s' - BEFORE!", software)
415 if args.software is not None and args.software != software:
416 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
421 logger.debug("Fetching table data for software='%s' ...", software)
422 raw = utils.fetch_url(
423 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
425 (config.get("connection_timeout"), config.get("read_timeout"))
427 logger.debug("raw[%s]()=%d", type(raw), len(raw))
429 doc = bs4.BeautifulSoup(raw, features='html.parser')
430 logger.debug("doc[]='%s'", type(doc))
431 except network.exceptions as exception:
432 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
435 items = doc.findAll("a", {"class": "url"})
436 logger.info("Checking %d items,software='%s' ...", len(items), software)
438 logger.debug("item[]='%s'", type(item))
439 domain = item.decode_contents()
441 logger.debug("domain='%s'", domain)
442 if not utils.is_domain_wanted(domain):
443 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
445 elif instances.is_registered(domain):
446 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
448 elif instances.is_recent(domain):
449 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
452 software = software_helper.alias(software)
453 logger.info("Fetching instances for domain='%s'", domain)
454 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
456 logger.debug("Success! - EXIT!")
459 def fetch_todon_wiki(args: argparse.Namespace) -> int:
460 logger.debug("args[]='%s' - CALLED!", type(args))
468 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
469 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
471 doc = bs4.BeautifulSoup(raw, "html.parser")
472 logger.debug("doc[]='%s'", type(doc))
474 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
475 logger.info("Checking %d silenced/limited entries ...", len(silenced))
476 blocklist["silenced"] = utils.find_domains(silenced, "div")
478 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
479 logger.info("Checking %d suspended entries ...", len(suspended))
480 blocklist["reject"] = utils.find_domains(suspended, "div")
483 for block_level in blocklist:
484 blockers = blocklist[block_level]
486 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
487 for blocked in blockers:
488 logger.debug("blocked='%s'", blocked)
490 if not instances.is_registered(blocked):
492 logger.info("Fetching instances from domain='%s' ...", blocked)
493 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
494 except network.exceptions as exception:
495 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
496 instances.set_last_error(blocked, exception)
498 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
499 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
502 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
503 if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
504 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
510 logger.debug("Invoking commit() ...")
511 database.connection.commit()
513 if config.get("bot_enabled") and len(blockdict) > 0:
514 logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
515 network.send_bot_post("todon.eu", blockdict)
517 logger.debug("Success! - EXIT!")
520 def fetch_cs(args: argparse.Namespace):
521 logger.debug("args[]='%s' - CALLED!", type(args))
547 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
548 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
550 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
551 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
553 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
554 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
555 domains["silenced"] = federation.find_domains(silenced)
557 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
558 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
559 domains["reject"] = federation.find_domains(blocked)
561 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
566 for block_level in domains:
567 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
569 for row in domains[block_level]:
570 logger.debug("row[%s]='%s'", type(row), row)
571 if instances.is_recent(row["domain"], "last_blocked"):
572 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
574 elif not instances.is_registered(row["domain"]):
576 logger.info("Fetching instances from domain='%s' ...", row["domain"])
577 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
578 except network.exceptions as exception:
579 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
580 instances.set_last_error(row["domain"], exception)
582 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
583 logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
585 "blocked": row["domain"],
586 "reason" : row["reason"],
589 logger.debug("Invoking commit() ...")
590 database.connection.commit()
592 if config.get("bot_enabled") and len(blockdict) > 0:
593 logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
594 network.send_bot_post("chaos.social", blockdict)
596 logger.debug("Success! - EXIT!")
599 def fetch_fba_rss(args: argparse.Namespace) -> int:
600 logger.debug("args[]='%s' - CALLED!", type(args))
603 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
604 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
606 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
607 if response.ok and response.status_code < 300 and len(response.text) > 0:
608 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
609 rss = atoma.parse_rss_bytes(response.content)
611 logger.debug("rss[]='%s'", type(rss))
612 for item in rss.items:
613 logger.debug("item='%s'", item)
614 domain = item.link.split("=")[1]
616 if not utils.is_domain_wanted(domain):
617 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
619 elif domain in domains:
620 logger.debug("domain='%s' is already added - SKIPPED!", domain)
622 elif instances.is_registered(domain):
623 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
625 elif instances.is_recent(domain):
626 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
629 logger.debug("Adding domain='%s'", domain)
630 domains.append(domain)
632 logger.debug("domains()=%d", len(domains))
636 logger.info("Adding %d new instances ...", len(domains))
637 for domain in domains:
639 logger.info("Fetching instances from domain='%s' ...", domain)
640 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
641 except network.exceptions as exception:
642 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
643 instances.set_last_error(domain, exception)
645 logger.debug("Success! - EXIT!")
648 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
649 logger.debug("args[]='%s' - CALLED!", type(args))
650 feed = "https://ryona.agency/users/fba/feed.atom"
654 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
655 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
657 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
658 if response.ok and response.status_code < 300 and len(response.text) > 0:
659 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
660 atom = atoma.parse_atom_bytes(response.content)
662 logger.debug("atom[]='%s'", type(atom))
663 for entry in atom.entries:
664 logger.debug("entry[]='%s'", type(entry))
665 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
666 logger.debug("doc[]='%s'", type(doc))
667 for element in doc.findAll("a"):
668 for href in element["href"].split(","):
669 logger.debug("href[%s]='%s", type(href), href)
670 domain = tidyup.domain(href)
672 logger.debug("domain='%s'", domain)
673 if not utils.is_domain_wanted(domain):
674 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
676 elif domain in domains:
677 logger.debug("domain='%s' is already added - SKIPPED!", domain)
679 elif instances.is_registered(domain):
680 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
682 elif instances.is_recent(domain):
683 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
686 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
687 domains.append(domain)
689 logger.debug("domains()=%d", len(domains))
693 logger.info("Adding %d new instances ...", len(domains))
694 for domain in domains:
696 logger.info("Fetching instances from domain='%s' ...", domain)
697 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
698 except network.exceptions as exception:
699 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
700 instances.set_last_error(domain, exception)
702 logger.debug("Success! - EXIT!")
705 def fetch_instances(args: argparse.Namespace) -> int:
706 logger.debug("args[]='%s' - CALLED!", type(args))
711 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
712 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
713 except network.exceptions as exception:
714 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
715 instances.set_last_error(args.domain, exception)
716 instances.update_data(args.domain)
720 logger.debug("Not fetching more instances - EXIT!")
723 # Loop through some instances
724 database.cursor.execute(
725 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
728 rows = database.cursor.fetchall()
729 logger.info("Checking %d entries ...", len(rows))
731 logger.debug("domain='%s'", row[0])
732 if not utils.is_domain_wanted(row[0]):
733 logger.debug("Domain row[0]='%s' is not wanted - SKIPPED!", row[0])
737 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row[0], row[1], row[2], row[3])
738 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
739 except network.exceptions as exception:
740 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
741 instances.set_last_error(row[0], exception)
743 logger.debug("Success - EXIT!")
746 def fetch_oliphant(args: argparse.Namespace) -> int:
747 logger.debug("args[]='%s' - CALLED!", type(args))
751 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
756 "blocker": "artisan.chat",
757 "csv_url": "mastodon/artisan.chat.csv",
759 "blocker": "mastodon.art",
760 "csv_url": "mastodon/mastodon.art.csv",
762 "blocker": "pleroma.envs.net",
763 "csv_url": "mastodon/pleroma.envs.net.csv",
765 "blocker": "oliphant.social",
766 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
768 "blocker": "mastodon.online",
769 "csv_url": "mastodon/mastodon.online.csv",
771 "blocker": "mastodon.social",
772 "csv_url": "mastodon/mastodon.social.csv",
774 "blocker": "mastodon.social",
775 "csv_url": "other/missing-tier0-mastodon.social.csv",
777 "blocker": "rage.love",
778 "csv_url": "mastodon/rage.love.csv",
780 "blocker": "sunny.garden",
781 "csv_url": "mastodon/sunny.garden.csv",
783 "blocker": "solarpunk.moe",
784 "csv_url": "mastodon/solarpunk.moe.csv",
786 "blocker": "toot.wales",
787 "csv_url": "mastodon/toot.wales.csv",
789 "blocker": "union.place",
790 "csv_url": "mastodon/union.place.csv",
796 logger.debug("Downloading %d files ...", len(blocklists))
797 for block in blocklists:
798 # Is domain given and not equal blocker?
799 if isinstance(args.domain, str) and args.domain != block["blocker"]:
800 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
802 elif args.domain in domains:
803 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
805 elif instances.is_recent(block["blocker"]):
806 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
810 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
811 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
813 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
814 if not response.ok or response.status_code > 399 or response.content == "":
815 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
818 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
819 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
821 logger.debug("reader[]='%s'", type(reader))
824 logger.debug("row[%s]='%s'", type(row), row)
825 domain = severity = None
826 reject_media = reject_reports = False
828 domain = row["#domain"]
829 elif "domain" in row:
830 domain = row["domain"]
832 logger.debug("row='%s' does not contain domain column", row)
835 if "#severity" in row:
836 severity = row["#severity"]
837 elif "severity" in row:
838 severity = row["severity"]
840 logger.debug("row='%s' does not contain severity column", row)
843 if "#reject_media" in row and row["#reject_media"].lower() == "true":
845 elif "reject_media" in row and row["reject_media"].lower() == "true":
848 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
849 reject_reports = True
850 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
851 reject_reports = True
853 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
854 if not utils.is_domain_wanted(domain):
855 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
858 logger.debug("Marking domain='%s' as handled", domain)
859 domains.append(domain)
861 logger.debug("Processing domain='%s' ...", domain)
862 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
863 logger.debug("processed='%s'", processed)
865 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
866 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
869 "reason" : block["reason"],
873 utils.process_block(block["blocker"], domain, None, "reject_media")
875 utils.process_block(block["blocker"], domain, None, "reject_reports")
877 logger.debug("Invoking commit() ...")
878 database.connection.commit()
880 if config.get("bot_enabled") and len(blockdict) > 0:
881 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
882 network.send_bot_post(block["blocker"], blockdict)
884 logger.debug("Success! - EXIT!")
887 def fetch_txt(args: argparse.Namespace) -> int:
888 logger.debug("args[]='%s' - CALLED!", type(args))
893 "blocker": "seirdy.one",
894 "url" : "https://seirdy.one/pb/bsl.txt",
897 logger.info("Checking %d text file(s) ...", len(urls))
899 logger.debug("Fetching row[url]='%s' ...", row["url"])
900 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
902 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
903 if response.ok and response.status_code < 300 and response.text != "":
904 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
905 domains = response.text.split("\n")
907 logger.info("Processing %d domains ...", len(domains))
908 for domain in domains:
909 logger.debug("domain='%s'", domain)
911 logger.debug("domain is empty - SKIPPED!")
913 elif not utils.is_domain_wanted(domain):
914 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
916 elif instances.is_recent(domain):
917 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
920 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
921 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
923 logger.debug("processed='%s'", processed)
925 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
928 logger.debug("Success! - EXIT!")
931 def fetch_fedipact(args: argparse.Namespace) -> int:
932 logger.debug("args[]='%s' - CALLED!", type(args))
935 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
937 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
938 if response.ok and response.status_code < 300 and response.text != "":
939 logger.debug("Parsing %d Bytes ...", len(response.text))
941 doc = bs4.BeautifulSoup(response.text, "html.parser")
942 logger.debug("doc[]='%s'", type(doc))
944 rows = doc.findAll("li")
945 logger.info("Checking %d row(s) ...", len(rows))
947 logger.debug("row[]='%s'", type(row))
948 domain = tidyup.domain(row.contents[0])
950 logger.debug("domain='%s'", domain)
952 logger.debug("domain is empty - SKIPPED!")
954 elif not utils.is_domain_wanted(domain):
955 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
957 elif instances.is_registered(domain):
958 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
960 elif instances.is_recent(domain):
961 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
964 logger.info("Fetching domain='%s' ...", domain)
965 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
967 logger.debug("Success! - EXIT!")
970 def fetch_joinfediverse(args: argparse.Namespace) -> int:
971 logger.debug("args[]='%s' - CALLED!", type(args))
974 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
975 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
977 doc = bs4.BeautifulSoup(raw, "html.parser")
978 logger.debug("doc[]='%s'", type(doc))
980 tables = doc.findAll("table", {"class": "wikitable"})
982 logger.info("Analyzing %d table(s) ...", len(tables))
985 logger.debug("table[]='%s'", type(table))
987 rows = table.findAll("tr")
988 logger.info("Checking %d row(s) ...", len(rows))
989 block_headers = dict()
991 logger.debug("row[%s]='%s'", type(row), row)
993 headers = row.findAll("th")
994 logger.debug("Found headers()=%d header(s)", len(headers))
996 block_headers = dict()
998 for header in headers:
1000 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1001 text = header.contents[0]
1003 logger.debug("text[]='%s'", type(text))
1004 if not isinstance(text, str):
1005 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1007 elif validators.domain(text.strip()):
1008 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1011 text = tidyup.domain(text.strip())
1012 logger.debug("text='%s'", text)
1013 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1014 logger.debug("Found header: '%s'=%d", text, cnt)
1015 block_headers[cnt] = text
1017 elif len(block_headers) == 0:
1018 logger.debug("row is not scrapable - SKIPPED!")
1020 elif len(block_headers) > 0:
1021 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1025 for element in row.find_all(["th", "td"]):
1027 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1028 if cnt in block_headers:
1029 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1031 text = element.text.strip()
1032 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1034 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1035 if key in ["domain", "instance"]:
1037 elif key == "reason":
1038 block[key] = tidyup.reason(text)
1039 elif key == "subdomain(s)":
1042 block[key] = text.split("/")
1044 logger.debug("key='%s'", key)
1047 logger.debug("block()=%d ...", len(block))
1049 logger.debug("Appending block()=%d ...", len(block))
1050 blocklist.append(block)
1052 logger.debug("blocklist()=%d", len(blocklist))
1054 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1055 domains = database.cursor.fetchall()
1057 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1059 for block in blocklist:
1060 logger.debug("block='%s'", block)
1061 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1062 origin = block["blocked"]
1063 for subdomain in block["subdomain(s)"]:
1064 block["blocked"] = subdomain + "." + origin
1065 blocking.append(block)
1067 blocking.append(block)
1069 logger.debug("blocking()=%d", blocking)
1070 for block in blocking:
1071 block["blocked"] = tidyup.domain(block["blocked"])
1073 if not utils.is_domain_wanted(block["blocked"]):
1074 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1076 elif instances.is_recent(block["blocked"]):
1077 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1080 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1081 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1084 for blocker in domains:
1085 blocker = blocker[0]
1086 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1088 for block in blocking:
1089 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1091 if not utils.is_domain_wanted(block["blocked"]):
1092 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1095 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1096 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1097 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1099 "blocked": block["blocked"],
1100 "reason" : block["reason"],
1103 if instances.has_pending(blocker):
1104 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1105 instances.update_data(blocker)
1107 logger.debug("Invoking commit() ...")
1108 database.connection.commit()
1110 if config.get("bot_enabled") and len(blockdict) > 0:
1111 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1112 network.send_bot_post(blocker, blockdict)
1114 logger.debug("Success! - EXIT!")
1117 def recheck_obfuscation(args: argparse.Namespace) -> int:
1118 logger.debug("args[]='%s' - CALLED!", type(args))
1122 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1123 rows = database.cursor.fetchall()
1124 logger.info("Checking %d domains ...", len(rows))
1126 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row[0], row[1], row[2])
1129 if row[1] == "pleroma":
1130 logger.debug("domain='%s',software='%s'", row[0], row[1])
1131 blocking = pleroma.fetch_blocks(row[0], row[2])
1132 elif row[1] == "mastodon":
1133 logger.debug("domain='%s',software='%s'", row[0], row[1])
1134 blocking = mastodon.fetch_blocks(row[0], row[2])
1135 elif row[1] == "lemmy":
1136 logger.debug("domain='%s',software='%s'", row[0], row[1])
1137 blocking = lemmy.fetch_blocks(row[0], row[2])
1138 elif row[1] == "friendica":
1139 logger.debug("domain='%s',software='%s'", row[0], row[1])
1140 blocking = friendica.fetch_blocks(row[0])
1141 elif row[1] == "misskey":
1142 logger.debug("domain='%s',software='%s'", row[0], row[1])
1143 blocking = misskey.fetch_blocks(row[0])
1145 logger.warning("Unknown sofware: domain='%s',software='%s'", row[0], row[1])
1147 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row[0])
1150 for block in blocking:
1151 logger.debug("blocked='%s'", block["blocked"])
1154 if block["blocked"].endswith(".arpa"):
1155 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1157 elif block["blocked"].endswith(".tld"):
1158 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1160 elif block["blocked"].endswith(".onion"):
1161 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1163 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1164 logger.debug("block='%s' is obfuscated.", block["blocked"])
1165 obfuscated = obfuscated + 1
1166 blocked = utils.deobfuscate_domain(block["blocked"], row[0], block["hash"] if "hash" in block else None)
1167 elif not utils.is_domain_wanted(block["blocked"]):
1168 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1170 elif blocks.is_instance_blocked(row[0], block["blocked"]):
1171 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1174 if blocked is not None and blocked != block["blocked"]:
1175 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1176 obfuscated = obfuscated - 1
1177 if blocks.is_instance_blocked(row[0], blocked):
1178 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row[0])
1181 if block["block_level"] == "silence":
1182 logger.debug("Block level 'silence' has been changed to 'silenced'")
1183 block["block_level"] = "silenced"
1184 elif block["block_level"] == "suspend":
1185 logger.debug("Block level 'suspend' has been changed to 'suspended'")
1186 block["block_level"] = "suspended"
1188 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1189 if utils.process_block(row[0], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1190 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row[0])
1193 "reason" : block["reason"],
1196 logger.info("domain='%s' has %d obfuscated domain(s)", row[0], obfuscated)
1197 if obfuscated == 0 and len(blocking) > 0:
1198 logger.info("Block list from domain='%s' has been fully deobfuscated.", row[0])
1199 instances.set_has_obfuscation(row[0], False)
1201 if instances.has_pending(row[0]):
1202 logger.debug("Flushing updates for blocker='%s' ...", row[0])
1203 instances.update_data(row[0])
1205 logger.debug("Invoking commit() ...")
1206 database.connection.commit()
1208 if config.get("bot_enabled") and len(blockdict) > 0:
1209 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row[0], len(blockdict))
1210 network.send_bot_post(row[0], blockdict)
1212 logger.debug("Success! - EXIT!")