1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
30 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
43 from fba.http import federation
44 from fba.http import network
46 from fba.models import apis
47 from fba.models import blocks
48 from fba.models import instances
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def check_instance(args: argparse.Namespace) -> int:
61 logger.debug("args.domain='%s' - CALLED!", args.domain)
63 if not validators.domain(args.domain):
64 logger.warning("args.domain='%s' is not valid", args.domain)
66 elif blacklist.is_blacklisted(args.domain):
67 logger.warning("args.domain='%s' is blacklisted", args.domain)
69 elif instances.is_registered(args.domain):
70 logger.warning("args.domain='%s' is already registered", args.domain)
73 logger.info("args.domain='%s' is not known", args.domain)
75 logger.debug("status=%d - EXIT!", status)
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79 logger.debug("args[]='%s' - CALLED!", type(args))
82 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
85 for row in database.cursor.fetchall():
86 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87 punycode = row["domain"].encode("idna").decode("utf-8")
89 if row["nodeinfo_url"].startswith("/"):
90 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
96 logger.info("Found %d row(s)", cnt)
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102 logger.debug("args[]='%s' - CALLED!", type(args))
104 # No CSRF by default, you don't have to add network.api_headers by yourself here
106 api_domain = "pixelfed.org"
108 if apis.is_recent(api_domain):
109 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
112 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
113 apis.update(api_domain)
116 logger.debug("Checking CSRF from api_domain='%s' ...", api_domain)
117 headers = csrf.determine(api_domain, dict())
118 except network.exceptions as exception:
119 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
123 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124 fetched = network.get_json_api(
126 "/api/v1/servers/all.json?scope=All&country=all&language=all",
128 (config.get("connection_timeout"), config.get("read_timeout"))
131 logger.debug("JSON API returned %d elements", len(fetched))
132 if "error_message" in fetched:
133 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135 elif "data" not in fetched["json"]:
136 logger.warning("API did not return JSON with 'data' element - EXIT!")
139 rows = fetched["json"]["data"]
140 logger.info("Checking %d fetched rows ...", len(rows))
142 logger.debug("row[]='%s'", type(row))
143 if "domain" not in row:
144 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146 elif row["domain"] == "":
147 logger.debug("row[domain] is empty - SKIPPED!")
149 elif not utils.is_domain_wanted(row["domain"]):
150 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
152 elif instances.is_registered(row["domain"]):
153 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
155 elif instances.is_recent(row["domain"]):
156 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
159 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
160 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
162 except network.exceptions as exception:
163 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
166 logger.debug("Success! - EXIT!")
169 def fetch_bkali(args: argparse.Namespace) -> int:
170 logger.debug("args[]='%s' - CALLED!", type(args))
172 logger.debug("Invoking locking.acquire() ...")
175 api_domain = "gql.apis.bka.li"
176 if apis.is_recent(api_domain):
177 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
180 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
181 apis.update(api_domain)
185 logger.info("Fetching domainlist from api_domain='%s' ...", api_domain)
186 fetched = network.post_json_api(
190 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
194 logger.debug("fetched[]='%s'", type(fetched))
195 if "error_message" in fetched:
196 logger.warning("post_json_api() for 'gql.apis.bka.li' returned error message='%s", fetched["error_message"])
198 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
199 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
202 rows = fetched["json"]
204 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
206 raise Exception("WARNING: Returned no records")
207 elif "data" not in rows:
208 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
209 elif "nodeinfo" not in rows["data"]:
210 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
212 for entry in rows["data"]["nodeinfo"]:
213 logger.debug("entry[%s]='%s'", type(entry), entry)
214 if "domain" not in entry:
215 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
217 elif entry["domain"] == "":
218 logger.debug("entry[domain] is empty - SKIPPED!")
220 elif not utils.is_domain_wanted(entry["domain"]):
221 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
223 elif instances.is_registered(entry["domain"]):
224 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
226 elif instances.is_recent(entry["domain"]):
227 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
230 logger.debug("Adding domain='%s' ...", entry["domain"])
231 domains.append(entry["domain"])
233 except network.exceptions as exception:
234 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
237 logger.debug("domains()=%d", len(domains))
239 logger.info("Adding %d new instances ...", len(domains))
240 for domain in domains:
242 logger.info("Fetching instances from domain='%s' ...", domain)
243 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
244 except network.exceptions as exception:
245 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
246 instances.set_last_error(domain, exception)
249 logger.debug("Success - EXIT!")
252 def fetch_blocks(args: argparse.Namespace) -> int:
253 logger.debug("args[]='%s' - CALLED!", type(args))
254 if args.domain is not None and args.domain != "":
255 logger.debug("args.domain='%s' - checking ...", args.domain)
256 if not validators.domain(args.domain):
257 logger.warning("args.domain='%s' is not valid.", args.domain)
259 elif blacklist.is_blacklisted(args.domain):
260 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
262 elif not instances.is_registered(args.domain):
263 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
266 logger.debug("Invoking locking.acquire() ...")
269 if args.domain is not None and args.domain != "":
270 # Re-check single domain
271 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
272 database.cursor.execute(
273 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
275 elif args.software is not None and args.software != "":
276 # Re-check single software
277 logger.debug("Querying database for args.software='%s' ...", args.software)
278 database.cursor.execute(
279 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
282 # Re-check after "timeout" (aka. minimum interval)
283 database.cursor.execute(
284 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
287 rows = database.cursor.fetchall()
288 logger.info("Checking %d entries ...", len(rows))
289 for blocker, software, origin, nodeinfo_url in rows:
290 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
291 blocker = tidyup.domain(blocker)
292 logger.debug("blocker='%s' - AFTER!", blocker)
295 logger.warning("blocker is now empty!")
297 elif nodeinfo_url is None or nodeinfo_url == "":
298 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
300 elif not utils.is_domain_wanted(blocker):
301 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
304 logger.debug("blocker='%s'", blocker)
305 instances.set_last_blocked(blocker)
306 instances.set_has_obfuscation(blocker, False)
310 if software == "pleroma":
311 logger.info("blocker='%s',software='%s'", blocker, software)
312 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
313 elif software == "mastodon":
314 logger.info("blocker='%s',software='%s'", blocker, software)
315 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
316 elif software == "lemmy":
317 logger.info("blocker='%s',software='%s'", blocker, software)
318 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
319 elif software == "friendica":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = friendica.fetch_blocks(blocker)
322 elif software == "misskey":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = misskey.fetch_blocks(blocker)
326 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
328 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
329 instances.set_total_blocks(blocker, blocking)
331 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
333 for block in blocking:
334 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
336 if block["block_level"] == "":
337 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
340 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
341 block["blocked"] = tidyup.domain(block["blocked"])
342 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
343 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
345 if block["blocked"] == "":
346 logger.warning("blocked is empty, blocker='%s'", blocker)
348 elif block["blocked"].endswith(".onion"):
349 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
351 elif block["blocked"].endswith(".arpa"):
352 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
354 elif block["blocked"].endswith(".tld"):
355 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
357 elif block["blocked"].find("*") >= 0:
358 logger.debug("blocker='%s' uses obfuscated domains", blocker)
360 # Some friendica servers also obscure domains without hash
361 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
363 logger.debug("row[]='%s'", type(row))
365 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
366 instances.set_has_obfuscation(blocker, True)
369 block["blocked"] = row["domain"]
370 origin = row["origin"]
371 nodeinfo_url = row["nodeinfo_url"]
372 elif block["blocked"].find("?") >= 0:
373 logger.debug("blocker='%s' uses obfuscated domains", blocker)
375 # Some obscure them with question marks, not sure if that's dependent on version or not
376 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
378 logger.debug("row[]='%s'", type(row))
380 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
381 instances.set_has_obfuscation(blocker, True)
384 block["blocked"] = row["domain"]
385 origin = row["origin"]
386 nodeinfo_url = row["nodeinfo_url"]
388 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
389 if block["blocked"] == "":
390 logger.debug("block[blocked] is empty - SKIPPED!")
392 elif not utils.is_domain_wanted(block["blocked"]):
393 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
395 elif block["block_level"] in ["accept", "accepted"]:
396 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
398 elif not instances.is_registered(block["blocked"]):
399 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
400 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
402 block["block_level"] = utils.alias_block_level(block["block_level"])
404 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
405 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
407 "blocked": block["blocked"],
408 "reason" : block["reason"],
411 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
412 cookies.clear(block["blocked"])
414 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
415 if instances.has_pending(blocker):
416 logger.debug("Flushing updates for blocker='%s' ...", blocker)
417 instances.update_data(blocker)
419 logger.debug("Invoking commit() ...")
420 database.connection.commit()
422 logger.debug("Invoking cookies.clear(%s) ...", blocker)
423 cookies.clear(blocker)
425 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
426 if config.get("bot_enabled") and len(blockdict) > 0:
427 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
428 network.send_bot_post(blocker, blockdict)
430 logger.debug("Success! - EXIT!")
433 def fetch_observer(args: argparse.Namespace) -> int:
434 logger.debug("args[]='%s' - CALLED!", type(args))
436 logger.debug("Invoking locking.acquire() ...")
439 api_domain = "fediverse.observer"
440 if apis.is_recent(api_domain):
441 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
444 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
445 apis.update(api_domain)
448 if args.software is None:
449 logger.info("Fetching software list ...")
450 raw = utils.fetch_url(
451 f"https://{api_domain}",
453 (config.get("connection_timeout"), config.get("read_timeout"))
455 logger.debug("raw[%s]()=%d", type(raw), len(raw))
457 doc = bs4.BeautifulSoup(raw, features="html.parser")
458 logger.debug("doc[]='%s'", type(doc))
460 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
461 logger.debug("items[]='%s'", type(items))
463 logger.info("Checking %d menu items ...", len(items))
465 logger.debug("item[%s]='%s'", type(item), item)
466 if item.text.lower() == "all":
467 logger.debug("Skipping 'All' menu entry ...")
470 logger.debug("Appending item.text='%s' ...", item.text)
471 types.append(tidyup.domain(item.text))
473 logger.info("Adding args.software='%s' as type ...", args.software)
474 types.append(args.software)
476 logger.info("Fetching %d different table data ...", len(types))
477 for software in types:
478 logger.debug("software='%s' - BEFORE!", software)
479 if args.software is not None and args.software != software:
480 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
485 logger.debug("Fetching table data for software='%s' ...", software)
486 raw = utils.fetch_url(
487 f"https://{api_domain}/app/views/tabledata.php?software={software}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
495 except network.exceptions as exception:
496 logger.warning("Cannot fetch software='%s' from api_domain='%s': '%s'", software, api_domain, type(exception))
499 items = doc.findAll("a", {"class": "url"})
500 logger.info("Checking %d items,software='%s' ...", len(items), software)
502 logger.debug("item[]='%s'", type(item))
503 domain = item.decode_contents()
505 logger.debug("domain='%s' - AFTER!", domain)
507 logger.debug("domain is empty - SKIPPED!")
509 elif not utils.is_domain_wanted(domain):
510 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
512 elif instances.is_registered(domain):
513 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
515 elif instances.is_recent(domain):
516 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
519 software = software_helper.alias(software)
520 logger.info("Fetching instances for domain='%s'", domain)
521 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
523 logger.debug("Success! - EXIT!")
526 def fetch_todon_wiki(args: argparse.Namespace) -> int:
527 logger.debug("args[]='%s' - CALLED!", type(args))
529 logger.debug("Invoking locking.acquire() ...")
532 api_domain = "wiki.todon.eu"
533 if apis.is_recent(api_domain):
534 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
537 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
538 apis.update(api_domain)
545 raw = utils.fetch_url(f"https://{api_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
546 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
548 doc = bs4.BeautifulSoup(raw, "html.parser")
549 logger.debug("doc[]='%s'", type(doc))
551 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
552 logger.info("Checking %d silenced/limited entries ...", len(silenced))
553 blocklist["silenced"] = utils.find_domains(silenced, "div")
555 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
556 logger.info("Checking %d suspended entries ...", len(suspended))
557 blocklist["reject"] = utils.find_domains(suspended, "div")
559 blocking = blocklist["silenced"] + blocklist["reject"]
562 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
563 instances.set_total_blocks(blocker, blocking)
566 for block_level in blocklist:
567 blockers = blocklist[block_level]
569 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
570 for blocked in blockers:
571 logger.debug("blocked='%s'", blocked)
573 if not instances.is_registered(blocked):
575 logger.info("Fetching instances from domain='%s' ...", blocked)
576 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
577 except network.exceptions as exception:
578 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
579 instances.set_last_error(blocked, exception)
581 if blocks.is_instance_blocked(blocker, blocked, block_level):
582 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
585 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
586 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
587 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
593 logger.debug("Invoking commit() ...")
594 database.connection.commit()
596 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
597 if config.get("bot_enabled") and len(blockdict) > 0:
598 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
599 network.send_bot_post(blocker, blockdict)
601 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
602 if instances.has_pending(blocker):
603 logger.debug("Flushing updates for blocker='%s' ...", blocker)
604 instances.update_data(blocker)
606 logger.debug("Success! - EXIT!")
609 def fetch_cs(args: argparse.Namespace):
610 logger.debug("args[]='%s' - CALLED!", type(args))
612 logger.debug("Invoking locking.acquire() ...")
640 api_domain = "raw.githubusercontent.com"
641 if apis.is_recent(api_domain):
642 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
645 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
646 apis.update(api_domain)
648 raw = utils.fetch_url(f"https://{api_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
649 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
651 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
652 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
654 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
655 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
656 domains["silenced"] = federation.find_domains(silenced)
658 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
659 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
660 domains["reject"] = federation.find_domains(blocked)
662 blocking = blocklist["silenced"] + blocklist["reject"]
663 blocker = "chaos.social"
665 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
666 instances.set_total_blocks(blocker, blocking)
668 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
672 for block_level in domains:
673 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
675 for row in domains[block_level]:
676 logger.debug("row[%s]='%s'", type(row), row)
677 if instances.is_recent(row["domain"], "last_blocked"):
678 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
680 elif not instances.is_registered(row["domain"]):
682 logger.info("Fetching instances from domain='%s' ...", row["domain"])
683 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
684 except network.exceptions as exception:
685 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
686 instances.set_last_error(row["domain"], exception)
688 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
689 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
691 "blocked": row["domain"],
692 "reason" : row["reason"],
695 logger.debug("Invoking commit() ...")
696 database.connection.commit()
698 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
699 if config.get("bot_enabled") and len(blockdict) > 0:
700 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
701 network.send_bot_post(blocker, blockdict)
703 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
704 if instances.has_pending(blocker):
705 logger.debug("Flushing updates for blocker='%s' ...", blocker)
706 instances.update_data(blocker)
708 logger.debug("Success! - EXIT!")
711 def fetch_fba_rss(args: argparse.Namespace) -> int:
712 logger.debug("args[]='%s' - CALLED!", type(args))
716 logger.debug("Invoking locking.acquire() ...")
719 components = urlparse(args.feed)
721 if apis.is_recent(components.netloc):
722 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
725 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
726 apis.update(components.netloc)
728 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
729 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
731 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
732 if response.ok and response.status_code < 300 and len(response.text) > 0:
733 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
734 rss = atoma.parse_rss_bytes(response.content)
736 logger.debug("rss[]='%s'", type(rss))
737 for item in rss.items:
738 logger.debug("item='%s'", item)
739 domain = tidyup.domain(item.link.split("=")[1])
741 logger.debug("domain='%s' - AFTER!", domain)
743 logger.debug("domain is empty - SKIPPED!")
745 elif not utils.is_domain_wanted(domain):
746 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
748 elif domain in domains:
749 logger.debug("domain='%s' is already added - SKIPPED!", domain)
751 elif instances.is_registered(domain):
752 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
754 elif instances.is_recent(domain):
755 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
758 logger.debug("Adding domain='%s'", domain)
759 domains.append(domain)
761 logger.debug("domains()=%d", len(domains))
763 logger.info("Adding %d new instances ...", len(domains))
764 for domain in domains:
766 logger.info("Fetching instances from domain='%s' ...", domain)
767 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
768 except network.exceptions as exception:
769 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
770 instances.set_last_error(domain, exception)
773 logger.debug("Success! - EXIT!")
776 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
777 logger.debug("args[]='%s' - CALLED!", type(args))
779 logger.debug("Invoking locking.acquire() ...")
782 api_domain = "ryana.agency"
783 if apis.is_recent(api_domain):
784 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
787 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
788 apis.update(api_domain)
790 feed = f"https://{api_domain}/users/fba/feed.atom"
794 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
795 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
797 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
798 if response.ok and response.status_code < 300 and len(response.text) > 0:
799 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
800 atom = atoma.parse_atom_bytes(response.content)
802 logger.debug("atom[]='%s'", type(atom))
803 for entry in atom.entries:
804 logger.debug("entry[]='%s'", type(entry))
805 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
806 logger.debug("doc[]='%s'", type(doc))
807 for element in doc.findAll("a"):
808 logger.debug("element[]='%s'", type(element))
809 for href in element["href"].split(","):
810 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
811 domain = tidyup.domain(href)
813 logger.debug("domain='%s' - AFTER!", domain)
815 logger.debug("domain is empty - SKIPPED!")
817 elif not utils.is_domain_wanted(domain):
818 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
820 elif domain in domains:
821 logger.debug("domain='%s' is already added - SKIPPED!", domain)
823 elif instances.is_registered(domain):
824 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
826 elif instances.is_recent(domain):
827 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
830 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
831 domains.append(domain)
833 logger.debug("domains()=%d", len(domains))
835 logger.info("Adding %d new instances ...", len(domains))
836 for domain in domains:
837 logger.debug("domain='%s'", domain)
839 logger.info("Fetching instances from domain='%s' ...", domain)
840 federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
841 except network.exceptions as exception:
842 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
843 instances.set_last_error(domain, exception)
846 logger.debug("Success! - EXIT!")
849 def fetch_instances(args: argparse.Namespace) -> int:
850 logger.debug("args[]='%s' - CALLED!", type(args))
852 logger.debug("args.domain='%s' - checking ...", args.domain)
853 if not validators.domain(args.domain):
854 logger.warning("args.domain='%s' is not valid.", args.domain)
856 elif blacklist.is_blacklisted(args.domain):
857 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
860 logger.debug("Invoking locking.acquire() ...")
865 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
866 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
867 except network.exceptions as exception:
868 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
869 instances.set_last_error(args.domain, exception)
870 instances.update_data(args.domain)
874 logger.debug("Not fetching more instances - EXIT!")
877 # Loop through some instances
878 database.cursor.execute(
879 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
882 rows = database.cursor.fetchall()
883 logger.info("Checking %d entries ...", len(rows))
885 logger.debug("row[domain]='%s'", row["domain"])
886 if row["domain"] == "":
887 logger.debug("row[domain] is empty - SKIPPED!")
889 elif not utils.is_domain_wanted(row["domain"]):
890 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
894 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
895 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
896 except network.exceptions as exception:
897 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
898 instances.set_last_error(row["domain"], exception)
900 logger.debug("Success - EXIT!")
903 def fetch_oliphant(args: argparse.Namespace) -> int:
904 logger.debug("args[]='%s' - CALLED!", type(args))
906 logger.debug("Invoking locking.acquire() ...")
909 api_domain = "codeberg.org"
910 if apis.is_recent(api_domain):
911 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
914 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
915 apis.update(api_domain)
918 base_url = f"https://{api_domain}/oliphant/blocklists/raw/branch/main/blocklists"
923 "blocker": "artisan.chat",
924 "csv_url": "mastodon/artisan.chat.csv",
926 "blocker": "mastodon.art",
927 "csv_url": "mastodon/mastodon.art.csv",
929 "blocker": "pleroma.envs.net",
930 "csv_url": "mastodon/pleroma.envs.net.csv",
932 "blocker": "oliphant.social",
933 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
935 "blocker": "mastodon.online",
936 "csv_url": "mastodon/mastodon.online.csv",
938 "blocker": "mastodon.social",
939 "csv_url": "mastodon/mastodon.social.csv",
941 "blocker": "mastodon.social",
942 "csv_url": "other/missing-tier0-mastodon.social.csv",
944 "blocker": "rage.love",
945 "csv_url": "mastodon/rage.love.csv",
947 "blocker": "sunny.garden",
948 "csv_url": "mastodon/sunny.garden.csv",
950 "blocker": "solarpunk.moe",
951 "csv_url": "mastodon/solarpunk.moe.csv",
953 "blocker": "toot.wales",
954 "csv_url": "mastodon/toot.wales.csv",
956 "blocker": "union.place",
957 "csv_url": "mastodon/union.place.csv",
963 logger.debug("Downloading %d files ...", len(blocklists))
964 for block in blocklists:
965 # Is domain given and not equal blocker?
966 if isinstance(args.domain, str) and args.domain != block["blocker"]:
967 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
969 elif args.domain in domains:
970 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
972 elif instances.is_recent(block["blocker"]):
973 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
977 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
978 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
980 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
981 if not response.ok or response.status_code >= 300 or response.content == "":
982 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
985 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
986 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
990 logger.info("Processing %d rows ...", len(reader))
993 logger.debug("row[%s]='%s'", type(row), row)
994 domain = severity = None
995 reject_media = reject_reports = False
998 domain = row["#domain"]
999 elif "domain" in row:
1000 domain = row["domain"]
1002 logger.debug("row='%s' does not contain domain column", row)
1005 if "#severity" in row:
1006 severity = row["#severity"]
1007 elif "severity" in row:
1008 severity = row["severity"]
1010 logger.debug("row='%s' does not contain severity column", row)
1013 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1015 elif "reject_media" in row and row["reject_media"].lower() == "true":
1018 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1019 reject_reports = True
1020 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1021 reject_reports = True
1024 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1026 logger.debug("domain is empty - SKIPPED!")
1028 elif not utils.is_domain_wanted(domain):
1029 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1032 logger.debug("Marking domain='%s' as handled", domain)
1033 domains.append(domain)
1035 logger.debug("Processing domain='%s' ...", domain)
1036 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1037 logger.debug("processed='%s'", processed)
1039 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
1040 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1043 "reason" : block["reason"],
1047 utils.process_block(block["blocker"], domain, None, "reject_media")
1049 utils.process_block(block["blocker"], domain, None, "reject_reports")
1051 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
1052 instances.set_total_blocks(block["blocker"], cnt)
1054 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1055 if instances.has_pending(block["blocker"]):
1056 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1057 instances.update_data(block["blocker"])
1059 logger.debug("Invoking commit() ...")
1060 database.connection.commit()
1062 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1063 if config.get("bot_enabled") and len(blockdict) > 0:
1064 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1065 network.send_bot_post(block["blocker"], blockdict)
1067 logger.debug("Success! - EXIT!")
1070 def fetch_txt(args: argparse.Namespace) -> int:
1071 logger.debug("args[]='%s' - CALLED!", type(args))
1073 logger.debug("Invoking locking.acquire() ...")
1078 "blocker": "seirdy.one",
1079 "url" : "https://seirdy.one/pb/bsl.txt",
1082 logger.info("Checking %d text file(s) ...", len(urls))
1084 logger.debug("Fetching row[url]='%s' ...", row["url"])
1085 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1087 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1088 if response.ok and response.status_code < 300 and response.text != "":
1089 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1090 domains = response.text.split("\n")
1092 logger.info("Processing %d domains ...", len(domains))
1093 for domain in domains:
1094 logger.debug("domain='%s' - BEFORE!", domain)
1095 domain = tidyup.domain(domain)
1097 logger.debug("domain='%s' - AFTER!", domain)
1099 logger.debug("domain is empty - SKIPPED!")
1101 elif not utils.is_domain_wanted(domain):
1102 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1104 elif instances.is_recent(domain):
1105 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1108 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1109 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1111 logger.debug("processed='%s'", processed)
1113 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1116 logger.debug("Success! - EXIT!")
1119 def fetch_fedipact(args: argparse.Namespace) -> int:
1120 logger.debug("args[]='%s' - CALLED!", type(args))
1122 logger.debug("Invoking locking.acquire() ...")
1125 api_domain = "fedipact.online"
1126 if apis.is_recent(api_domain):
1127 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1130 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1131 apis.update(api_domain)
1133 response = utils.fetch_url(
1134 f"https://{api_domain}",
1135 network.web_headers,
1136 (config.get("connection_timeout"), config.get("read_timeout"))
1139 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1140 if response.ok and response.status_code < 300 and response.text != "":
1141 logger.debug("Parsing %d Bytes ...", len(response.text))
1143 doc = bs4.BeautifulSoup(response.text, "html.parser")
1144 logger.debug("doc[]='%s'", type(doc))
1146 rows = doc.findAll("li")
1147 logger.info("Checking %d row(s) ...", len(rows))
1149 logger.debug("row[]='%s'", type(row))
1150 domain = tidyup.domain(row.contents[0])
1152 logger.debug("domain='%s' - AFTER!", domain)
1154 logger.debug("domain is empty - SKIPPED!")
1156 elif not utils.is_domain_wanted(domain):
1157 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1159 elif instances.is_registered(domain):
1160 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1162 elif instances.is_recent(domain):
1163 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1166 logger.info("Fetching domain='%s' ...", domain)
1167 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1169 logger.debug("Success! - EXIT!")
1172 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1173 logger.debug("args[]='%s' - CALLED!", type(args))
1175 logger.debug("Invoking locking.acquire() ...")
1178 api_domain = "joinfediverse.wiki"
1179 if apis.is_recent(api_domain):
1180 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1183 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1184 apis.update(api_domain)
1186 raw = utils.fetch_url(
1187 f"https://{api_domain}/FediBlock",
1188 network.web_headers,
1189 (config.get("connection_timeout"), config.get("read_timeout"))
1191 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1193 doc = bs4.BeautifulSoup(raw, "html.parser")
1194 logger.debug("doc[]='%s'", type(doc))
1196 tables = doc.findAll("table", {"class": "wikitable"})
1198 logger.info("Analyzing %d table(s) ...", len(tables))
1200 for table in tables:
1201 logger.debug("table[]='%s'", type(table))
1203 rows = table.findAll("tr")
1204 logger.info("Checking %d row(s) ...", len(rows))
1205 block_headers = dict()
1207 logger.debug("row[%s]='%s'", type(row), row)
1209 headers = row.findAll("th")
1210 logger.debug("Found headers()=%d header(s)", len(headers))
1211 if len(headers) > 1:
1212 block_headers = dict()
1214 for header in headers:
1216 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1217 text = header.contents[0]
1219 logger.debug("text[]='%s'", type(text))
1220 if not isinstance(text, str):
1221 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1223 elif validators.domain(text.strip()):
1224 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1227 text = tidyup.domain(text.strip())
1228 logger.debug("text='%s'", text)
1229 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1230 logger.debug("Found header: '%s'=%d", text, cnt)
1231 block_headers[cnt] = text
1233 elif len(block_headers) == 0:
1234 logger.debug("row is not scrapable - SKIPPED!")
1236 elif len(block_headers) > 0:
1237 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1241 for element in row.find_all(["th", "td"]):
1243 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1244 if cnt in block_headers:
1245 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1247 text = element.text.strip()
1248 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1250 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1251 if key in ["domain", "instance"]:
1253 elif key == "reason":
1254 block[key] = tidyup.reason(text)
1255 elif key == "subdomain(s)":
1258 block[key] = text.split("/")
1260 logger.debug("key='%s'", key)
1263 logger.debug("block()=%d ...", len(block))
1265 logger.debug("Appending block()=%d ...", len(block))
1266 blocklist.append(block)
1268 logger.debug("blocklist()=%d", len(blocklist))
1270 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1271 domains = database.cursor.fetchall()
1273 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1275 for block in blocklist:
1276 logger.debug("block='%s'", block)
1277 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1278 origin = block["blocked"]
1279 for subdomain in block["subdomain(s)"]:
1280 block["blocked"] = subdomain + "." + origin
1281 blocking.append(block)
1283 blocking.append(block)
1285 logger.debug("blocking()=%d", blocking)
1286 for block in blocking:
1287 logger.debug("block[]='%s'", type(block))
1288 block["blocked"] = tidyup.domain(block["blocked"])
1290 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1291 if block["blocked"] == "":
1292 logger.debug("block[blocked] is empty - SKIPPED!")
1294 elif not utils.is_domain_wanted(block["blocked"]):
1295 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1297 elif instances.is_recent(block["blocked"]):
1298 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1301 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1302 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1305 for blocker in domains:
1306 blocker = blocker[0]
1307 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1309 for block in blocking:
1310 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1311 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1313 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1314 if block["blocked"] == "":
1315 logger.debug("block[blocked] is empty - SKIPPED!")
1317 elif not utils.is_domain_wanted(block["blocked"]):
1318 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1321 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1322 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1323 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1325 "blocked": block["blocked"],
1326 "reason" : block["reason"],
1329 if instances.has_pending(blocker):
1330 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1331 instances.update_data(blocker)
1333 logger.debug("Invoking commit() ...")
1334 database.connection.commit()
1336 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1337 if config.get("bot_enabled") and len(blockdict) > 0:
1338 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1339 network.send_bot_post(blocker, blockdict)
1341 logger.debug("Success! - EXIT!")
1344 def recheck_obfuscation(args: argparse.Namespace) -> int:
1345 logger.debug("args[]='%s' - CALLED!", type(args))
1347 logger.debug("Invoking locking.acquire() ...")
1350 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1351 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1352 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1353 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1355 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1357 rows = database.cursor.fetchall()
1358 logger.info("Checking %d domains ...", len(rows))
1360 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1361 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1362 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1366 if row["software"] == "pleroma":
1367 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1368 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1369 elif row["software"] == "mastodon":
1370 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1371 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1372 elif row["software"] == "lemmy":
1373 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1374 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1375 elif row["software"] == "friendica":
1376 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1377 blocking = friendica.fetch_blocks(row["domain"])
1378 elif row["software"] == "misskey":
1379 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1380 blocking = misskey.fetch_blocks(row["domain"])
1382 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1384 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1385 instances.set_total_blocks(row["domain"], blocking)
1387 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1390 for block in blocking:
1391 logger.debug("block[blocked]='%s'", block["blocked"])
1394 if block["blocked"] == "":
1395 logger.debug("block[blocked] is empty - SKIPPED!")
1397 elif block["blocked"].endswith(".arpa"):
1398 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1400 elif block["blocked"].endswith(".tld"):
1401 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1403 elif block["blocked"].endswith(".onion"):
1404 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1406 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1407 logger.debug("block='%s' is obfuscated.", block["blocked"])
1408 obfuscated = obfuscated + 1
1409 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1410 elif not utils.is_domain_wanted(block["blocked"]):
1411 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1413 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1414 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1417 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1418 if blocked is not None and blocked != block["blocked"]:
1419 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1420 obfuscated = obfuscated - 1
1421 if blocks.is_instance_blocked(row["domain"], blocked):
1422 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1425 block["block_level"] = utils.alias_block_level(block["block_level"])
1427 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1428 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1429 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1432 "reason" : block["reason"],
1435 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1436 if obfuscated == 0 and len(blocking) > 0:
1437 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1438 instances.set_has_obfuscation(row["domain"], False)
1440 if instances.has_pending(row["domain"]):
1441 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1442 instances.update_data(row["domain"])
1444 logger.debug("Invoking commit() ...")
1445 database.connection.commit()
1447 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1448 if config.get("bot_enabled") and len(blockdict) > 0:
1449 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1450 network.send_bot_post(row["domain"], blockdict)
1452 logger.debug("Success! - EXIT!")
1455 def fetch_fedilist(args: argparse.Namespace) -> int:
1456 logger.debug("args[]='%s' - CALLED!", type(args))
1458 logger.debug("Invoking locking.acquire() ...")
1461 api_domain = "demo.fedilist.com"
1462 if apis.is_recent(api_domain):
1463 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1466 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1467 apis.update(api_domain)
1469 url = f"http://{api_domain}/instance/csv?onion=not"
1470 if args.software is not None and args.software != "":
1471 logger.debug("args.software='%s'", args.software)
1472 url = f"http://{api_domain}/instance/csv?software={args.software}&onion=not"
1474 logger.info("Fetching url='%s' ...", url)
1475 response = reqto.get(
1477 headers=network.web_headers,
1478 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1479 allow_redirects=False
1482 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1483 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1484 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1487 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1489 logger.debug("reader[]='%s'", type(reader))
1492 logger.debug("row[]='%s'", type(row))
1493 domain = tidyup.domain(row["hostname"])
1494 logger.debug("domain='%s' - AFTER!", domain)
1497 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1499 elif not utils.is_domain_wanted(domain):
1500 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1502 elif (args.all is None or not args.all) and instances.is_registered(domain):
1503 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1505 elif instances.is_recent(domain):
1506 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1509 logger.info("Fetching instances from domain='%s' ...", domain)
1510 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1512 logger.debug("Success! - EXIT!")
1515 def update_nodeinfo(args: argparse.Namespace) -> int:
1516 logger.debug("args[]='%s' - CALLED!", type(args))
1518 logger.debug("Invoking locking.acquire() ...")
1521 if args.domain is not None and args.domain != "":
1522 logger.debug("Fetching args.domain='%s'", args.domain)
1523 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1524 elif args.software is not None and args.software != "":
1525 logger.info("Fetching domains for args.software='%s'", args.software)
1526 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1528 logger.info("Fetching domains for recently updated ...")
1529 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1531 domains = database.cursor.fetchall()
1533 logger.info("Checking %d domain(s) ...", len(domains))
1536 logger.debug("row[]='%s'", type(row))
1538 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1539 software = federation.determine_software(row["domain"])
1541 logger.debug("Determined software='%s'", software)
1542 if software != row["software"] and software is not None:
1543 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1544 instances.set_software(row["domain"], software)
1546 instances.set_success(row["domain"])
1547 except network.exceptions as exception:
1548 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1549 instances.set_last_error(row["domain"], exception)
1551 instances.set_last_nodeinfo(row["domain"])
1552 instances.update_data(row["domain"])
1555 logger.debug("Success! - EXIT!")
1558 def fetch_instances_social(args: argparse.Namespace) -> int:
1559 logger.debug("args[]='%s' - CALLED!", type(args))
1561 logger.debug("Invoking locking.acquire() ...")
1564 api_domain = "instances.social"
1566 if config.get("instances_social_api_key") == "":
1567 logger.error("API key not set. Please set in your config.json file.")
1569 elif apis.is_recent(api_domain):
1570 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1573 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1574 apis.update(api_domain)
1577 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1580 fetched = network.get_json_api(
1582 "/api/1.0/instances/list?count=0&sort_by=name",
1584 (config.get("connection_timeout"), config.get("read_timeout"))
1586 logger.debug("fetched[]='%s'", type(fetched))
1588 if "error_message" in fetched:
1589 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1591 elif "exception" in fetched:
1592 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1594 elif "json" not in fetched:
1595 logger.warning("fetched has no element 'json' - EXIT!")
1597 elif "instances" not in fetched["json"]:
1598 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1602 rows = fetched["json"]["instances"]
1604 logger.info("Checking %d row(s) ...", len(rows))
1606 logger.debug("row[]='%s'", type(row))
1607 domain = tidyup.domain(row["name"])
1609 logger.debug("domain='%s' - AFTER!", domain)
1611 logger.debug("domain is empty - SKIPPED!")
1613 elif not utils.is_domain_wanted(domain):
1614 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1616 elif domain in domains:
1617 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1619 elif instances.is_registered(domain):
1620 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1622 elif instances.is_recent(domain):
1623 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1626 logger.info("Fetching instances from domain='%s'", domain)
1627 federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
1629 logger.debug("Success! - EXIT!")