1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
78 # No CSRF by default, you don't have to add network.api_headers by yourself here
82 logger.debug("Checking CSRF from pixelfed.org")
83 headers = csrf.determine("pixelfed.org", dict())
84 except network.exceptions as exception:
85 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
89 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90 fetched = network.get_json_api(
92 "/api/v1/servers/all.json?scope=All&country=all&language=all",
94 (config.get("connection_timeout"), config.get("read_timeout"))
97 logger.debug("JSON API returned %d elements", len(fetched))
98 if "error_message" in fetched:
99 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
101 elif "data" not in fetched["json"]:
102 logger.warning("API did not return JSON with 'data' element - EXIT!")
105 rows = fetched["json"]["data"]
106 logger.info("Checking %d fetched rows ...", len(rows))
108 logger.debug("row[]='%s'", type(row))
109 if "domain" not in row:
110 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
112 elif not utils.is_domain_wanted(row["domain"]):
113 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
115 elif instances.is_registered(row["domain"]):
116 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
118 elif instances.is_recent(row["domain"]):
119 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
122 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
123 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
125 except network.exceptions as exception:
126 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
129 logger.debug("Success! - EXIT!")
132 def fetch_bkali(args: argparse.Namespace) -> int:
133 logger.debug("args[]='%s' - CALLED!", type(args))
136 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
137 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
140 logger.debug("fetched[]='%s'", type(fetched))
141 if "error_message" in fetched:
142 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
144 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
145 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
148 rows = fetched["json"]
150 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
152 raise Exception("WARNING: Returned no records")
153 elif "data" not in rows:
154 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
155 elif "nodeinfo" not in rows["data"]:
156 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
158 for entry in rows["data"]["nodeinfo"]:
159 logger.debug("entry[%s]='%s'", type(entry), entry)
160 if "domain" not in entry:
161 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
163 elif not utils.is_domain_wanted(entry["domain"]):
164 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
166 elif instances.is_registered(entry["domain"]):
167 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
169 elif instances.is_recent(entry["domain"]):
170 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
173 logger.debug("Adding domain='%s' ...", entry["domain"])
174 domains.append(entry["domain"])
176 except network.exceptions as exception:
177 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
180 logger.debug("domains()=%d", len(domains))
184 logger.info("Adding %d new instances ...", len(domains))
185 for domain in domains:
187 logger.info("Fetching instances from domain='%s' ...", domain)
188 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
189 except network.exceptions as exception:
190 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
191 instances.set_last_error(domain, exception)
193 logger.debug("Success - EXIT!")
196 def fetch_blocks(args: argparse.Namespace) -> int:
197 logger.debug("args[]='%s' - CALLED!", type(args))
198 if args.domain is not None and args.domain != "":
199 logger.debug("args.domain='%s' - checking ...", args.domain)
200 if not validators.domain(args.domain):
201 logger.warning("args.domain='%s' is not valid.", args.domain)
203 elif blacklist.is_blacklisted(args.domain):
204 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
206 elif not instances.is_registered(args.domain):
207 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
212 if args.domain is not None and args.domain != "":
213 # Re-check single domain
214 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
215 database.cursor.execute(
216 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
218 elif args.software is not None and args.software != "":
219 # Re-check single software
220 logger.debug("Querying database for args.software='%s' ...", args.software)
221 database.cursor.execute(
222 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
225 # Re-check after "timeout" (aka. minimum interval)
226 database.cursor.execute(
227 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
230 rows = database.cursor.fetchall()
231 logger.info("Checking %d entries ...", len(rows))
232 for blocker, software, origin, nodeinfo_url in rows:
233 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
234 blocker = tidyup.domain(blocker)
235 logger.debug("blocker='%s' - AFTER!", blocker)
238 logger.warning("blocker is now empty!")
240 elif nodeinfo_url is None or nodeinfo_url == "":
241 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
243 elif not utils.is_domain_wanted(blocker):
244 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
247 logger.debug("blocker='%s'", blocker)
248 instances.set_last_blocked(blocker)
249 instances.set_has_obfuscation(blocker, False)
253 if software == "pleroma":
254 logger.info("blocker='%s',software='%s'", blocker, software)
255 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
256 elif software == "mastodon":
257 logger.info("blocker='%s',software='%s'", blocker, software)
258 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
259 elif software == "lemmy":
260 logger.info("blocker='%s',software='%s'", blocker, software)
261 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
262 elif software == "friendica":
263 logger.info("blocker='%s',software='%s'", blocker, software)
264 blocking = friendica.fetch_blocks(blocker)
265 elif software == "misskey":
266 logger.info("blocker='%s',software='%s'", blocker, software)
267 blocking = misskey.fetch_blocks(blocker)
269 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
271 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
273 for block in blocking:
274 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
276 if block["block_level"] == "":
277 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
280 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
281 block["blocked"] = tidyup.domain(block["blocked"])
282 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
283 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
285 if block["blocked"] == "":
286 logger.warning("blocked is empty, blocker='%s'", blocker)
288 elif block["blocked"].endswith(".onion"):
289 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
291 elif block["blocked"].endswith(".arpa"):
292 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
294 elif block["blocked"].endswith(".tld"):
295 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
297 elif block["blocked"].find("*") >= 0:
298 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
299 instances.set_has_obfuscation(blocker, True)
301 # Some friendica servers also obscure domains without hash
302 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
304 logger.debug("row[]='%s'", type(row))
306 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
309 block["blocked"] = row[0]
311 nodeinfo_url = row[2]
312 elif block["blocked"].find("?") >= 0:
313 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
314 instances.set_has_obfuscation(blocker, True)
316 # Some obscure them with question marks, not sure if that's dependent on version or not
317 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
319 logger.debug("row[]='%s'", type(row))
321 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
324 block["blocked"] = row[0]
326 nodeinfo_url = row[2]
328 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
329 if not utils.is_domain_wanted(block["blocked"]):
330 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
332 elif block["block_level"] in ["accept", "accepted"]:
333 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
335 elif not instances.is_registered(block["blocked"]):
336 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
337 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
339 if block["block_level"] == "silence":
340 logger.debug("Block level 'silence' has been changed to 'silenced'")
341 block["block_level"] = "silenced"
342 elif block["block_level"] == "suspend":
343 logger.debug("Block level 'suspend' has been changed to 'suspended'")
344 block["block_level"] = "suspended"
346 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
347 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
349 "blocked": block["blocked"],
350 "reason" : block["reason"],
353 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
354 cookies.clear(block["blocked"])
356 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
357 if instances.has_pending(blocker):
358 logger.debug("Flushing updates for blocker='%s' ...", blocker)
359 instances.update_data(blocker)
361 logger.debug("Invoking commit() ...")
362 database.connection.commit()
364 logger.debug("Invoking cookies.clear(%s) ...", blocker)
365 cookies.clear(blocker)
367 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
368 if config.get("bot_enabled") and len(blockdict) > 0:
369 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
370 network.send_bot_post(blocker, blockdict)
372 logger.debug("Success! - EXIT!")
375 def fetch_observer(args: argparse.Namespace) -> int:
376 logger.debug("args[]='%s' - CALLED!", type(args))
412 logger.info("Fetching %d different table data ...", len(types))
413 for software in types:
414 logger.debug("software='%s' - BEFORE!", software)
415 if args.software is not None and args.software != software:
416 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
421 logger.debug("Fetching table data for software='%s' ...", software)
422 raw = utils.fetch_url(
423 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
425 (config.get("connection_timeout"), config.get("read_timeout"))
427 logger.debug("raw[%s]()=%d", type(raw), len(raw))
429 doc = bs4.BeautifulSoup(raw, features='html.parser')
430 logger.debug("doc[]='%s'", type(doc))
431 except network.exceptions as exception:
432 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
435 items = doc.findAll("a", {"class": "url"})
436 logger.info("Checking %d items,software='%s' ...", len(items), software)
438 logger.debug("item[]='%s'", type(item))
439 domain = item.decode_contents()
441 logger.debug("domain='%s'", domain)
442 if not utils.is_domain_wanted(domain):
443 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
445 elif instances.is_registered(domain):
446 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
448 elif instances.is_recent(domain):
449 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
452 software = software_helper.alias(software)
453 logger.info("Fetching instances for domain='%s'", domain)
454 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
456 logger.debug("Success! - EXIT!")
459 def fetch_todon_wiki(args: argparse.Namespace) -> int:
460 logger.debug("args[]='%s' - CALLED!", type(args))
468 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
469 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
471 doc = bs4.BeautifulSoup(raw, "html.parser")
472 logger.debug("doc[]='%s'", type(doc))
474 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
475 logger.info("Checking %d silenced/limited entries ...", len(silenced))
476 blocklist["silenced"] = utils.find_domains(silenced, "div")
478 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
479 logger.info("Checking %d suspended entries ...", len(suspended))
480 blocklist["reject"] = utils.find_domains(suspended, "div")
483 for block_level in blocklist:
484 blockers = blocklist[block_level]
486 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
487 for blocked in blockers:
488 logger.debug("blocked='%s'", blocked)
490 if not instances.is_registered(blocked):
492 logger.info("Fetching instances from domain='%s' ...", blocked)
493 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
494 except network.exceptions as exception:
495 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
496 instances.set_last_error(blocked, exception)
498 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
499 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
502 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
503 if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
504 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
510 logger.debug("Invoking commit() ...")
511 database.connection.commit()
513 if config.get("bot_enabled") and len(blockdict) > 0:
514 logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
515 network.send_bot_post("todon.eu", blockdict)
517 logger.debug("Success! - EXIT!")
520 def fetch_cs(args: argparse.Namespace):
521 logger.debug("args[]='%s' - CALLED!", type(args))
547 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
548 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
550 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
551 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
553 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
554 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
555 domains["silenced"] = federation.find_domains(silenced)
557 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
558 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
559 domains["reject"] = federation.find_domains(blocked)
561 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
566 for block_level in domains:
567 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
569 for row in domains[block_level]:
570 logger.debug("row[%s]='%s'", type(row), row)
571 if instances.is_recent(row["domain"], "last_blocked"):
572 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
574 elif not instances.is_registered(row["domain"]):
576 logger.info("Fetching instances from domain='%s' ...", row["domain"])
577 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
578 except network.exceptions as exception:
579 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
580 instances.set_last_error(row["domain"], exception)
582 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
583 logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
585 "blocked": row["domain"],
586 "reason" : row["reason"],
589 logger.debug("Invoking commit() ...")
590 database.connection.commit()
592 if config.get("bot_enabled") and len(blockdict) > 0:
593 logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
594 network.send_bot_post("chaos.social", blockdict)
596 logger.debug("Success! - EXIT!")
599 def fetch_fba_rss(args: argparse.Namespace) -> int:
600 logger.debug("args[]='%s' - CALLED!", type(args))
603 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
604 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
606 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
607 if response.ok and response.status_code < 300 and len(response.text) > 0:
608 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
609 rss = atoma.parse_rss_bytes(response.content)
611 logger.debug("rss[]='%s'", type(rss))
612 for item in rss.items:
613 logger.debug("item='%s'", item)
614 domain = item.link.split("=")[1]
616 if not utils.is_domain_wanted(domain):
617 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
619 elif domain in domains:
620 logger.debug("domain='%s' is already added - SKIPPED!", domain)
622 elif instances.is_registered(domain):
623 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
625 elif instances.is_recent(domain):
626 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
629 logger.debug("Adding domain='%s'", domain)
630 domains.append(domain)
632 logger.debug("domains()=%d", len(domains))
636 logger.info("Adding %d new instances ...", len(domains))
637 for domain in domains:
639 logger.info("Fetching instances from domain='%s' ...", domain)
640 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
641 except network.exceptions as exception:
642 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
643 instances.set_last_error(domain, exception)
645 logger.debug("Success! - EXIT!")
648 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
649 logger.debug("args[]='%s' - CALLED!", type(args))
650 feed = "https://ryona.agency/users/fba/feed.atom"
654 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
655 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
657 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
658 if response.ok and response.status_code < 300 and len(response.text) > 0:
659 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
660 atom = atoma.parse_atom_bytes(response.content)
662 logger.debug("atom[]='%s'", type(atom))
663 for entry in atom.entries:
664 logger.debug("entry[]='%s'", type(entry))
665 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
666 logger.debug("doc[]='%s'", type(doc))
667 for element in doc.findAll("a"):
668 for href in element["href"].split(","):
669 logger.debug("href[%s]='%s", type(href), href)
670 domain = tidyup.domain(href)
672 logger.debug("domain='%s'", domain)
673 if not utils.is_domain_wanted(domain):
674 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
676 elif domain in domains:
677 logger.debug("domain='%s' is already added - SKIPPED!", domain)
679 elif instances.is_registered(domain):
680 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
682 elif instances.is_recent(domain):
683 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
686 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
687 domains.append(domain)
689 logger.debug("domains()=%d", len(domains))
693 logger.info("Adding %d new instances ...", len(domains))
694 for domain in domains:
696 logger.info("Fetching instances from domain='%s' ...", domain)
697 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
698 except network.exceptions as exception:
699 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
700 instances.set_last_error(domain, exception)
702 logger.debug("Success! - EXIT!")
705 def fetch_instances(args: argparse.Namespace) -> int:
706 logger.debug("args[]='%s' - CALLED!", type(args))
711 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
712 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
713 except network.exceptions as exception:
714 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
715 instances.set_last_error(args.domain, exception)
719 logger.debug("Not fetching more instances - EXIT!")
722 # Loop through some instances
723 database.cursor.execute(
724 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
727 rows = database.cursor.fetchall()
728 logger.info("Checking %d entries ...", len(rows))
730 logger.debug("domain='%s'", row[0])
731 if not utils.is_domain_wanted(row[0]):
732 logger.debug("Domain row[0]='%s' is not wanted - SKIPPED!", row[0])
736 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row[0], row[1], row[2], row[3])
737 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
738 except network.exceptions as exception:
739 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
740 instances.set_last_error(row[0], exception)
742 logger.debug("Success - EXIT!")
745 def fetch_oliphant(args: argparse.Namespace) -> int:
746 logger.debug("args[]='%s' - CALLED!", type(args))
750 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
755 "blocker": "artisan.chat",
756 "csv_url": "mastodon/artisan.chat.csv",
758 "blocker": "mastodon.art",
759 "csv_url": "mastodon/mastodon.art.csv",
761 "blocker": "pleroma.envs.net",
762 "csv_url": "mastodon/pleroma.envs.net.csv",
764 "blocker": "oliphant.social",
765 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
767 "blocker": "mastodon.online",
768 "csv_url": "mastodon/mastodon.online.csv",
770 "blocker": "mastodon.social",
771 "csv_url": "mastodon/mastodon.social.csv",
773 "blocker": "mastodon.social",
774 "csv_url": "other/missing-tier0-mastodon.social.csv",
776 "blocker": "rage.love",
777 "csv_url": "mastodon/rage.love.csv",
779 "blocker": "sunny.garden",
780 "csv_url": "mastodon/sunny.garden.csv",
782 "blocker": "solarpunk.moe",
783 "csv_url": "mastodon/solarpunk.moe.csv",
785 "blocker": "toot.wales",
786 "csv_url": "mastodon/toot.wales.csv",
788 "blocker": "union.place",
789 "csv_url": "mastodon/union.place.csv",
795 logger.debug("Downloading %d files ...", len(blocklists))
796 for block in blocklists:
797 # Is domain given and not equal blocker?
798 if isinstance(args.domain, str) and args.domain != block["blocker"]:
799 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
801 elif args.domain in domains:
802 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
804 elif instances.is_recent(block["blocker"]):
805 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
809 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
810 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
812 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
813 if not response.ok or response.status_code > 399 or response.content == "":
814 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
817 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
818 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
820 logger.debug("reader[]='%s'", type(reader))
823 logger.debug("row[%s]='%s'", type(row), row)
824 domain = severity = None
825 reject_media = reject_reports = False
827 domain = row["#domain"]
828 elif "domain" in row:
829 domain = row["domain"]
831 logger.debug("row='%s' does not contain domain column", row)
834 if "#severity" in row:
835 severity = row["#severity"]
836 elif "severity" in row:
837 severity = row["severity"]
839 logger.debug("row='%s' does not contain severity column", row)
842 if "#reject_media" in row and row["#reject_media"].lower() == "true":
844 elif "reject_media" in row and row["reject_media"].lower() == "true":
847 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
848 reject_reports = True
849 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
850 reject_reports = True
852 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
853 if not utils.is_domain_wanted(domain):
854 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
857 logger.debug("Marking domain='%s' as handled", domain)
858 domains.append(domain)
860 logger.debug("Processing domain='%s' ...", domain)
861 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
862 logger.debug("processed='%s'", processed)
864 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
865 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
868 "reason" : block["reason"],
872 utils.process_block(block["blocker"], domain, None, "reject_media")
874 utils.process_block(block["blocker"], domain, None, "reject_reports")
876 logger.debug("Invoking commit() ...")
877 database.connection.commit()
879 if config.get("bot_enabled") and len(blockdict) > 0:
880 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
881 network.send_bot_post(block["blocker"], blockdict)
883 logger.debug("Success! - EXIT!")
886 def fetch_txt(args: argparse.Namespace) -> int:
887 logger.debug("args[]='%s' - CALLED!", type(args))
892 "blocker": "seirdy.one",
893 "url" : "https://seirdy.one/pb/bsl.txt",
896 logger.info("Checking %d text file(s) ...", len(urls))
898 logger.debug("Fetching row[url]='%s' ...", row["url"])
899 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
901 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
902 if response.ok and response.status_code < 300 and response.text != "":
903 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
904 domains = response.text.split("\n")
906 logger.info("Processing %d domains ...", len(domains))
907 for domain in domains:
908 logger.debug("domain='%s'", domain)
910 logger.debug("domain is empty - SKIPPED!")
912 elif not utils.is_domain_wanted(domain):
913 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
915 elif instances.is_recent(domain):
916 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
919 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
920 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
922 logger.debug("processed='%s'", processed)
924 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
927 logger.debug("Success! - EXIT!")
930 def fetch_fedipact(args: argparse.Namespace) -> int:
931 logger.debug("args[]='%s' - CALLED!", type(args))
934 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
936 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
937 if response.ok and response.status_code < 300 and response.text != "":
938 logger.debug("Parsing %d Bytes ...", len(response.text))
940 doc = bs4.BeautifulSoup(response.text, "html.parser")
941 logger.debug("doc[]='%s'", type(doc))
943 rows = doc.findAll("li")
944 logger.info("Checking %d row(s) ...", len(rows))
946 logger.debug("row[]='%s'", type(row))
947 domain = tidyup.domain(row.contents[0])
949 logger.debug("domain='%s'", domain)
951 logger.debug("domain is empty - SKIPPED!")
953 elif not utils.is_domain_wanted(domain):
954 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
956 elif instances.is_registered(domain):
957 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
959 elif instances.is_recent(domain):
960 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
963 logger.info("Fetching domain='%s' ...", domain)
964 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
966 logger.debug("Success! - EXIT!")
969 def fetch_joinfediverse(args: argparse.Namespace) -> int:
970 logger.debug("args[]='%s' - CALLED!", type(args))
973 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
974 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
976 doc = bs4.BeautifulSoup(raw, "html.parser")
977 logger.debug("doc[]='%s'", type(doc))
979 tables = doc.findAll("table", {"class": "wikitable"})
981 logger.info("Analyzing %d table(s) ...", len(tables))
984 logger.debug("table[]='%s'", type(table))
986 rows = table.findAll("tr")
987 logger.info("Checking %d row(s) ...", len(rows))
988 block_headers = dict()
990 logger.debug("row[%s]='%s'", type(row), row)
992 headers = row.findAll("th")
993 logger.debug("Found headers()=%d header(s)", len(headers))
995 block_headers = dict()
997 for header in headers:
999 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1000 text = header.contents[0]
1002 logger.debug("text[]='%s'", type(text))
1003 if not isinstance(text, str):
1004 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1006 elif validators.domain(text.strip()):
1007 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1010 text = tidyup.domain(text.strip())
1011 logger.debug("text='%s'", text)
1012 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1013 logger.debug("Found header: '%s'=%d", text, cnt)
1014 block_headers[cnt] = text
1016 elif len(block_headers) == 0:
1017 logger.debug("row is not scrapable - SKIPPED!")
1019 elif len(block_headers) > 0:
1020 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1024 for element in row.find_all(["th", "td"]):
1026 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1027 if cnt in block_headers:
1028 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1030 text = element.text.strip()
1031 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1033 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1034 if key in ["domain", "instance"]:
1036 elif key == "reason":
1037 block[key] = tidyup.reason(text)
1038 elif key == "subdomain(s)":
1041 block[key] = text.split("/")
1043 logger.debug("key='%s'", key)
1046 logger.debug("block()=%d ...", len(block))
1048 logger.debug("Appending block()=%d ...", len(block))
1049 blocklist.append(block)
1051 logger.debug("blocklist()=%d", len(blocklist))
1053 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1054 domains = database.cursor.fetchall()
1056 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1058 for block in blocklist:
1059 logger.debug("block='%s'", block)
1060 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1061 origin = block["blocked"]
1062 for subdomain in block["subdomain(s)"]:
1063 block["blocked"] = subdomain + "." + origin
1064 blocking.append(block)
1066 blocking.append(block)
1068 logger.debug("blocking()=%d", blocking)
1069 for block in blocking:
1070 block["blocked"] = tidyup.domain(block["blocked"])
1072 if not utils.is_domain_wanted(block["blocked"]):
1073 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1075 elif instances.is_recent(block["blocked"]):
1076 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1079 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1080 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1083 for blocker in domains:
1084 blocker = blocker[0]
1085 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1087 for block in blocking:
1088 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1090 if not utils.is_domain_wanted(block["blocked"]):
1091 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1094 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1095 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1096 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1098 "blocked": block["blocked"],
1099 "reason" : block["reason"],
1102 if instances.has_pending(blocker):
1103 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1104 instances.update_data(blocker)
1106 logger.debug("Invoking commit() ...")
1107 database.connection.commit()
1109 if config.get("bot_enabled") and len(blockdict) > 0:
1110 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1111 network.send_bot_post(blocker, blockdict)
1113 logger.debug("Success! - EXIT!")
1116 def recheck_obfuscation(args: argparse.Namespace) -> int:
1117 logger.debug("args[]='%s' - CALLED!", type(args))
1121 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1122 rows = database.cursor.fetchall()
1123 logger.info("Checking %d domains ...", len(rows))
1125 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row[0], row[1], row[2])
1128 if row[1] == "pleroma":
1129 logger.debug("domain='%s',software='%s'", row[0], row[1])
1130 blocking = pleroma.fetch_blocks(row[0], row[2])
1131 elif row[1] == "mastodon":
1132 logger.debug("domain='%s',software='%s'", row[0], row[1])
1133 blocking = mastodon.fetch_blocks(row[0], row[2])
1134 elif row[1] == "lemmy":
1135 logger.debug("domain='%s',software='%s'", row[0], row[1])
1136 blocking = lemmy.fetch_blocks(row[0], row[2])
1137 elif row[1] == "friendica":
1138 logger.debug("domain='%s',software='%s'", row[0], row[1])
1139 blocking = friendica.fetch_blocks(row[0])
1140 elif row[1] == "misskey":
1141 logger.debug("domain='%s',software='%s'", row[0], row[1])
1142 blocking = misskey.fetch_blocks(row[0])
1144 logger.warning("Unknown sofware: domain='%s',software='%s'", row[0], row[1])
1146 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row[0])
1149 for block in blocking:
1150 logger.debug("blocked='%s'", block["blocked"])
1153 if block["blocked"].endswith(".arpa"):
1154 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1156 elif block["blocked"].endswith(".tld"):
1157 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1159 elif block["blocked"].endswith(".onion"):
1160 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1162 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1163 logger.debug("block='%s' is obfuscated.", block["blocked"])
1164 obfuscated = obfuscated + 1
1165 blocked = utils.deobfuscate_domain(block["blocked"], row[0], block["hash"] if "hash" in block else None)
1166 elif not utils.is_domain_wanted(block["blocked"]):
1167 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1169 elif blocks.is_instance_blocked(row[0], block["blocked"]):
1170 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1173 if blocked is not None and blocked != block["blocked"]:
1174 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1175 obfuscated = obfuscated - 1
1176 if blocks.is_instance_blocked(row[0], blocked):
1177 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row[0])
1180 if block["block_level"] == "silence":
1181 logger.debug("Block level 'silence' has been changed to 'silenced'")
1182 block["block_level"] = "silenced"
1183 elif block["block_level"] == "suspend":
1184 logger.debug("Block level 'suspend' has been changed to 'suspended'")
1185 block["block_level"] = "suspended"
1187 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1188 if utils.process_block(row[0], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1189 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row[0])
1192 "reason" : block["reason"],
1195 logger.info("domain='%s' has %d obfuscated domain(s)", row[0], obfuscated)
1196 if obfuscated == 0 and len(blocking) > 0:
1197 logger.info("Block list from domain='%s' has been fully deobfuscated.", row[0])
1198 instances.set_has_obfuscation(row[0], False)
1200 if instances.has_pending(row[0]):
1201 logger.debug("Flushing updates for blocker='%s' ...", row[0])
1202 instances.update_data(row[0])
1204 logger.debug("Invoking commit() ...")
1205 database.connection.commit()
1207 if config.get("bot_enabled") and len(blockdict) > 0:
1208 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row[0], len(blockdict))
1209 network.send_bot_post(row[0], blockdict)
1211 logger.debug("Success! - EXIT!")