1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import apis
45 from fba.models import blocks
46 from fba.models import instances
48 from fba.networks import friendica
49 from fba.networks import lemmy
50 from fba.networks import mastodon
51 from fba.networks import misskey
52 from fba.networks import pleroma
54 logging.basicConfig(level=logging.INFO)
55 logger = logging.getLogger(__name__)
56 #logger.setLevel(logging.DEBUG)
58 def check_instance(args: argparse.Namespace) -> int:
59 logger.debug("args.domain='%s' - CALLED!", args.domain)
61 if not validators.domain(args.domain):
62 logger.warning("args.domain='%s' is not valid", args.domain)
64 elif blacklist.is_blacklisted(args.domain):
65 logger.warning("args.domain='%s' is blacklisted", args.domain)
67 elif instances.is_registered(args.domain):
68 logger.warning("args.domain='%s' is already registered", args.domain)
71 logger.info("args.domain='%s' is not known", args.domain)
73 logger.debug("status=%d - EXIT!", status)
76 def check_nodeinfo(args: argparse.Namespace) -> int:
77 logger.debug("args[]='%s' - CALLED!", type(args))
80 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
83 for row in database.cursor.fetchall():
84 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
85 punycode = row["domain"].encode("idna").decode("utf-8")
87 if row["nodeinfo_url"].startswith("/"):
88 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
90 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
91 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
94 logger.info("Found %d row(s)", cnt)
99 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
100 logger.debug("args[]='%s' - CALLED!", type(args))
102 # No CSRF by default, you don't have to add network.api_headers by yourself here
104 api_domain = "pixelfed.org"
106 if apis.is_recent(api_domain):
107 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
110 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
111 apis.update(api_domain)
114 logger.debug("Checking CSRF from api_domain='%s' ...", api_domain)
115 headers = csrf.determine(api_domain, dict())
116 except network.exceptions as exception:
117 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
121 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
122 fetched = network.get_json_api(
124 "/api/v1/servers/all.json?scope=All&country=all&language=all",
126 (config.get("connection_timeout"), config.get("read_timeout"))
129 logger.debug("JSON API returned %d elements", len(fetched))
130 if "error_message" in fetched:
131 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
133 elif "data" not in fetched["json"]:
134 logger.warning("API did not return JSON with 'data' element - EXIT!")
137 rows = fetched["json"]["data"]
138 logger.info("Checking %d fetched rows ...", len(rows))
140 logger.debug("row[]='%s'", type(row))
141 if "domain" not in row:
142 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
144 elif row["domain"] == "":
145 logger.debug("row[domain] is empty - SKIPPED!")
147 elif not utils.is_domain_wanted(row["domain"]):
148 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
150 elif instances.is_registered(row["domain"]):
151 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
153 elif instances.is_recent(row["domain"]):
154 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
157 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
158 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
160 except network.exceptions as exception:
161 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
164 logger.debug("Success! - EXIT!")
167 def fetch_bkali(args: argparse.Namespace) -> int:
168 logger.debug("args[]='%s' - CALLED!", type(args))
170 api_domain = "gql.apis.bka.li"
171 if apis.is_recent(api_domain):
172 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
175 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
176 apis.update(api_domain)
180 logger.info("Fetching domainlist from api_domain='%s' ...", api_domain)
181 fetched = network.post_json_api(
185 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
189 logger.debug("fetched[]='%s'", type(fetched))
190 if "error_message" in fetched:
191 logger.warning("post_json_api() for 'gql.apis.bka.li' returned error message='%s", fetched["error_message"])
193 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
194 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
197 rows = fetched["json"]
199 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
201 raise Exception("WARNING: Returned no records")
202 elif "data" not in rows:
203 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
204 elif "nodeinfo" not in rows["data"]:
205 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
207 for entry in rows["data"]["nodeinfo"]:
208 logger.debug("entry[%s]='%s'", type(entry), entry)
209 if "domain" not in entry:
210 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
212 elif entry["domain"] == "":
213 logger.debug("entry[domain] is empty - SKIPPED!")
215 elif not utils.is_domain_wanted(entry["domain"]):
216 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
218 elif instances.is_registered(entry["domain"]):
219 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
221 elif instances.is_recent(entry["domain"]):
222 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
225 logger.debug("Adding domain='%s' ...", entry["domain"])
226 domains.append(entry["domain"])
228 except network.exceptions as exception:
229 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
232 logger.debug("domains()=%d", len(domains))
236 logger.info("Adding %d new instances ...", len(domains))
237 for domain in domains:
239 logger.info("Fetching instances from domain='%s' ...", domain)
240 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
241 except network.exceptions as exception:
242 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
243 instances.set_last_error(domain, exception)
246 logger.debug("Success - EXIT!")
249 def fetch_blocks(args: argparse.Namespace) -> int:
250 logger.debug("args[]='%s' - CALLED!", type(args))
251 if args.domain is not None and args.domain != "":
252 logger.debug("args.domain='%s' - checking ...", args.domain)
253 if not validators.domain(args.domain):
254 logger.warning("args.domain='%s' is not valid.", args.domain)
256 elif blacklist.is_blacklisted(args.domain):
257 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
259 elif not instances.is_registered(args.domain):
260 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
265 if args.domain is not None and args.domain != "":
266 # Re-check single domain
267 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
268 database.cursor.execute(
269 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
271 elif args.software is not None and args.software != "":
272 # Re-check single software
273 logger.debug("Querying database for args.software='%s' ...", args.software)
274 database.cursor.execute(
275 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
278 # Re-check after "timeout" (aka. minimum interval)
279 database.cursor.execute(
280 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
283 rows = database.cursor.fetchall()
284 logger.info("Checking %d entries ...", len(rows))
285 for blocker, software, origin, nodeinfo_url in rows:
286 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
287 blocker = tidyup.domain(blocker)
288 logger.debug("blocker='%s' - AFTER!", blocker)
291 logger.warning("blocker is now empty!")
293 elif nodeinfo_url is None or nodeinfo_url == "":
294 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
296 elif not utils.is_domain_wanted(blocker):
297 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
300 logger.debug("blocker='%s'", blocker)
301 instances.set_last_blocked(blocker)
302 instances.set_has_obfuscation(blocker, False)
306 if software == "pleroma":
307 logger.info("blocker='%s',software='%s'", blocker, software)
308 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
309 elif software == "mastodon":
310 logger.info("blocker='%s',software='%s'", blocker, software)
311 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
312 elif software == "lemmy":
313 logger.info("blocker='%s',software='%s'", blocker, software)
314 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
315 elif software == "friendica":
316 logger.info("blocker='%s',software='%s'", blocker, software)
317 blocking = friendica.fetch_blocks(blocker)
318 elif software == "misskey":
319 logger.info("blocker='%s',software='%s'", blocker, software)
320 blocking = misskey.fetch_blocks(blocker)
322 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
324 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
325 instances.set_total_blocks(blocker, blocking)
327 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
329 for block in blocking:
330 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
332 if block["block_level"] == "":
333 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
336 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
337 block["blocked"] = tidyup.domain(block["blocked"])
338 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
339 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
341 if block["blocked"] == "":
342 logger.warning("blocked is empty, blocker='%s'", blocker)
344 elif block["blocked"].endswith(".onion"):
345 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
347 elif block["blocked"].endswith(".arpa"):
348 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
350 elif block["blocked"].endswith(".tld"):
351 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
353 elif block["blocked"].find("*") >= 0:
354 logger.debug("blocker='%s' uses obfuscated domains", blocker)
356 # Some friendica servers also obscure domains without hash
357 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
359 logger.debug("row[]='%s'", type(row))
361 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
362 instances.set_has_obfuscation(blocker, True)
365 block["blocked"] = row["domain"]
366 origin = row["origin"]
367 nodeinfo_url = row["nodeinfo_url"]
368 elif block["blocked"].find("?") >= 0:
369 logger.debug("blocker='%s' uses obfuscated domains", blocker)
371 # Some obscure them with question marks, not sure if that's dependent on version or not
372 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
374 logger.debug("row[]='%s'", type(row))
376 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377 instances.set_has_obfuscation(blocker, True)
380 block["blocked"] = row["domain"]
381 origin = row["origin"]
382 nodeinfo_url = row["nodeinfo_url"]
384 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
385 if block["blocked"] == "":
386 logger.debug("block[blocked] is empty - SKIPPED!")
388 elif not utils.is_domain_wanted(block["blocked"]):
389 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
391 elif block["block_level"] in ["accept", "accepted"]:
392 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
394 elif not instances.is_registered(block["blocked"]):
395 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
396 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
398 block["block_level"] = utils.alias_block_level(block["block_level"])
400 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
401 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
403 "blocked": block["blocked"],
404 "reason" : block["reason"],
407 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
408 cookies.clear(block["blocked"])
410 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
411 if instances.has_pending(blocker):
412 logger.debug("Flushing updates for blocker='%s' ...", blocker)
413 instances.update_data(blocker)
415 logger.debug("Invoking commit() ...")
416 database.connection.commit()
418 logger.debug("Invoking cookies.clear(%s) ...", blocker)
419 cookies.clear(blocker)
421 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
422 if config.get("bot_enabled") and len(blockdict) > 0:
423 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
424 network.send_bot_post(blocker, blockdict)
426 logger.debug("Success! - EXIT!")
429 def fetch_observer(args: argparse.Namespace) -> int:
430 logger.debug("args[]='%s' - CALLED!", type(args))
432 api_domain = "fediverse.observer"
433 if apis.is_recent(api_domain):
434 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
437 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
438 apis.update(api_domain)
444 if args.software is None:
445 logger.info("Fetching software list ...")
446 raw = utils.fetch_url(
447 f"https://{api_domain}",
449 (config.get("connection_timeout"), config.get("read_timeout"))
451 logger.debug("raw[%s]()=%d", type(raw), len(raw))
453 doc = bs4.BeautifulSoup(raw, features="html.parser")
454 logger.debug("doc[]='%s'", type(doc))
456 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
457 logger.debug("items[]='%s'", type(items))
459 logger.info("Checking %d menu items ...", len(items))
461 logger.debug("item[%s]='%s'", type(item), item)
462 if item.text.lower() == "all":
463 logger.debug("Skipping 'All' menu entry ...")
466 logger.debug("Appending item.text='%s' ...", item.text)
467 types.append(tidyup.domain(item.text))
469 logger.info("Adding args.software='%s' as type ...", args.software)
470 types.append(args.software)
472 logger.info("Fetching %d different table data ...", len(types))
473 for software in types:
474 logger.debug("software='%s' - BEFORE!", software)
475 if args.software is not None and args.software != software:
476 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
481 logger.debug("Fetching table data for software='%s' ...", software)
482 raw = utils.fetch_url(
483 f"https://{api_domain}/app/views/tabledata.php?software={software}",
485 (config.get("connection_timeout"), config.get("read_timeout"))
487 logger.debug("raw[%s]()=%d", type(raw), len(raw))
489 doc = bs4.BeautifulSoup(raw, features="html.parser")
490 logger.debug("doc[]='%s'", type(doc))
491 except network.exceptions as exception:
492 logger.warning("Cannot fetch software='%s' from api_domain='%s': '%s'", software, api_domain, type(exception))
495 items = doc.findAll("a", {"class": "url"})
496 logger.info("Checking %d items,software='%s' ...", len(items), software)
498 logger.debug("item[]='%s'", type(item))
499 domain = item.decode_contents()
501 logger.debug("domain='%s' - AFTER!", domain)
503 logger.debug("domain is empty - SKIPPED!")
505 elif not utils.is_domain_wanted(domain):
506 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
508 elif instances.is_registered(domain):
509 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
511 elif instances.is_recent(domain):
512 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
515 software = software_helper.alias(software)
516 logger.info("Fetching instances for domain='%s'", domain)
517 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
519 logger.debug("Success! - EXIT!")
522 def fetch_todon_wiki(args: argparse.Namespace) -> int:
523 logger.debug("args[]='%s' - CALLED!", type(args))
525 api_domain = "wiki.todon.eu"
526 if apis.is_recent(api_domain):
527 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
530 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
531 apis.update(api_domain)
540 raw = utils.fetch_url(f"https://{api_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
541 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
543 doc = bs4.BeautifulSoup(raw, "html.parser")
544 logger.debug("doc[]='%s'", type(doc))
546 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
547 logger.info("Checking %d silenced/limited entries ...", len(silenced))
548 blocklist["silenced"] = utils.find_domains(silenced, "div")
550 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
551 logger.info("Checking %d suspended entries ...", len(suspended))
552 blocklist["reject"] = utils.find_domains(suspended, "div")
554 blocking = blocklist["silenced"] + blocklist["reject"]
557 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
558 instances.set_total_blocks(blocker, blocking)
561 for block_level in blocklist:
562 blockers = blocklist[block_level]
564 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
565 for blocked in blockers:
566 logger.debug("blocked='%s'", blocked)
568 if not instances.is_registered(blocked):
570 logger.info("Fetching instances from domain='%s' ...", blocked)
571 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
572 except network.exceptions as exception:
573 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
574 instances.set_last_error(blocked, exception)
576 if blocks.is_instance_blocked(blocker, blocked, block_level):
577 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
580 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
581 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
582 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
588 logger.debug("Invoking commit() ...")
589 database.connection.commit()
591 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
592 if config.get("bot_enabled") and len(blockdict) > 0:
593 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
594 network.send_bot_post(blocker, blockdict)
596 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
597 if instances.has_pending(blocker):
598 logger.debug("Flushing updates for blocker='%s' ...", blocker)
599 instances.update_data(blocker)
601 logger.debug("Success! - EXIT!")
604 def fetch_cs(args: argparse.Namespace):
605 logger.debug("args[]='%s' - CALLED!", type(args))
631 api_domain = "raw.githubusercontent.com"
632 if apis.is_recent(api_domain):
633 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
636 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
637 apis.update(api_domain)
639 raw = utils.fetch_url(f"https://{api_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
640 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
642 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
643 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
645 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
646 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
647 domains["silenced"] = federation.find_domains(silenced)
649 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
650 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
651 domains["reject"] = federation.find_domains(blocked)
653 blocking = blocklist["silenced"] + blocklist["reject"]
654 blocker = "chaos.social"
656 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
657 instances.set_total_blocks(blocker, blocking)
659 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
664 for block_level in domains:
665 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
667 for row in domains[block_level]:
668 logger.debug("row[%s]='%s'", type(row), row)
669 if instances.is_recent(row["domain"], "last_blocked"):
670 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
672 elif not instances.is_registered(row["domain"]):
674 logger.info("Fetching instances from domain='%s' ...", row["domain"])
675 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
676 except network.exceptions as exception:
677 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
678 instances.set_last_error(row["domain"], exception)
680 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
681 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
683 "blocked": row["domain"],
684 "reason" : row["reason"],
687 logger.debug("Invoking commit() ...")
688 database.connection.commit()
690 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
691 if config.get("bot_enabled") and len(blockdict) > 0:
692 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
693 network.send_bot_post(blocker, blockdict)
695 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
696 if instances.has_pending(blocker):
697 logger.debug("Flushing updates for blocker='%s' ...", blocker)
698 instances.update_data(blocker)
700 logger.debug("Success! - EXIT!")
703 def fetch_fba_rss(args: argparse.Namespace) -> int:
704 logger.debug("args[]='%s' - CALLED!", type(args))
707 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
708 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
710 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
711 if response.ok and response.status_code < 300 and len(response.text) > 0:
712 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
713 rss = atoma.parse_rss_bytes(response.content)
715 logger.debug("rss[]='%s'", type(rss))
716 for item in rss.items:
717 logger.debug("item='%s'", item)
718 domain = tidyup.domain(item.link.split("=")[1])
720 logger.debug("domain='%s' - AFTER!", domain)
722 logger.debug("domain is empty - SKIPPED!")
724 elif not utils.is_domain_wanted(domain):
725 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
727 elif domain in domains:
728 logger.debug("domain='%s' is already added - SKIPPED!", domain)
730 elif instances.is_registered(domain):
731 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
733 elif instances.is_recent(domain):
734 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
737 logger.debug("Adding domain='%s'", domain)
738 domains.append(domain)
740 logger.debug("domains()=%d", len(domains))
744 logger.info("Adding %d new instances ...", len(domains))
745 for domain in domains:
747 logger.info("Fetching instances from domain='%s' ...", domain)
748 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
749 except network.exceptions as exception:
750 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
751 instances.set_last_error(domain, exception)
754 logger.debug("Success! - EXIT!")
757 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
758 logger.debug("args[]='%s' - CALLED!", type(args))
760 api_domain = "ryana.agency"
761 if apis.is_recent(api_domain):
762 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
765 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
766 apis.update(api_domain)
768 feed = f"https://{api_domain}/users/fba/feed.atom"
772 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
773 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
775 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
776 if response.ok and response.status_code < 300 and len(response.text) > 0:
777 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
778 atom = atoma.parse_atom_bytes(response.content)
780 logger.debug("atom[]='%s'", type(atom))
781 for entry in atom.entries:
782 logger.debug("entry[]='%s'", type(entry))
783 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
784 logger.debug("doc[]='%s'", type(doc))
785 for element in doc.findAll("a"):
786 logger.debug("element[]='%s'", type(element))
787 for href in element["href"].split(","):
788 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
789 domain = tidyup.domain(href)
791 logger.debug("domain='%s' - AFTER!", domain)
793 logger.debug("domain is empty - SKIPPED!")
795 elif not utils.is_domain_wanted(domain):
796 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
798 elif domain in domains:
799 logger.debug("domain='%s' is already added - SKIPPED!", domain)
801 elif instances.is_registered(domain):
802 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
804 elif instances.is_recent(domain):
805 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
808 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
809 domains.append(domain)
811 logger.debug("domains()=%d", len(domains))
815 logger.info("Adding %d new instances ...", len(domains))
816 for domain in domains:
817 logger.debug("domain='%s'", domain)
819 logger.info("Fetching instances from domain='%s' ...", domain)
820 federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
821 except network.exceptions as exception:
822 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
823 instances.set_last_error(domain, exception)
826 logger.debug("Success! - EXIT!")
829 def fetch_instances(args: argparse.Namespace) -> int:
830 logger.debug("args[]='%s' - CALLED!", type(args))
836 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
837 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
838 except network.exceptions as exception:
839 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
840 instances.set_last_error(args.domain, exception)
841 instances.update_data(args.domain)
845 logger.debug("Not fetching more instances - EXIT!")
848 # Loop through some instances
849 database.cursor.execute(
850 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
853 rows = database.cursor.fetchall()
854 logger.info("Checking %d entries ...", len(rows))
856 logger.debug("row[domain]='%s'", row["domain"])
857 if row["domain"] == "":
858 logger.debug("row[domain] is empty - SKIPPED!")
860 elif not utils.is_domain_wanted(row["domain"]):
861 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
865 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
866 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
867 except network.exceptions as exception:
868 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
869 instances.set_last_error(row["domain"], exception)
871 logger.debug("Success - EXIT!")
874 def fetch_oliphant(args: argparse.Namespace) -> int:
875 logger.debug("args[]='%s' - CALLED!", type(args))
877 api_domain = "codeberg.org"
878 if apis.is_recent(api_domain):
879 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
882 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
883 apis.update(api_domain)
888 base_url = f"https://{api_domain}/oliphant/blocklists/raw/branch/main/blocklists"
893 "blocker": "artisan.chat",
894 "csv_url": "mastodon/artisan.chat.csv",
896 "blocker": "mastodon.art",
897 "csv_url": "mastodon/mastodon.art.csv",
899 "blocker": "pleroma.envs.net",
900 "csv_url": "mastodon/pleroma.envs.net.csv",
902 "blocker": "oliphant.social",
903 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
905 "blocker": "mastodon.online",
906 "csv_url": "mastodon/mastodon.online.csv",
908 "blocker": "mastodon.social",
909 "csv_url": "mastodon/mastodon.social.csv",
911 "blocker": "mastodon.social",
912 "csv_url": "other/missing-tier0-mastodon.social.csv",
914 "blocker": "rage.love",
915 "csv_url": "mastodon/rage.love.csv",
917 "blocker": "sunny.garden",
918 "csv_url": "mastodon/sunny.garden.csv",
920 "blocker": "solarpunk.moe",
921 "csv_url": "mastodon/solarpunk.moe.csv",
923 "blocker": "toot.wales",
924 "csv_url": "mastodon/toot.wales.csv",
926 "blocker": "union.place",
927 "csv_url": "mastodon/union.place.csv",
933 logger.debug("Downloading %d files ...", len(blocklists))
934 for block in blocklists:
935 # Is domain given and not equal blocker?
936 if isinstance(args.domain, str) and args.domain != block["blocker"]:
937 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
939 elif args.domain in domains:
940 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
942 elif instances.is_recent(block["blocker"]):
943 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
947 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
948 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
950 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
951 if not response.ok or response.status_code >= 300 or response.content == "":
952 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
955 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
956 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
960 logger.info("Processing %d rows ...", len(reader))
963 logger.debug("row[%s]='%s'", type(row), row)
964 domain = severity = None
965 reject_media = reject_reports = False
968 domain = row["#domain"]
969 elif "domain" in row:
970 domain = row["domain"]
972 logger.debug("row='%s' does not contain domain column", row)
975 if "#severity" in row:
976 severity = row["#severity"]
977 elif "severity" in row:
978 severity = row["severity"]
980 logger.debug("row='%s' does not contain severity column", row)
983 if "#reject_media" in row and row["#reject_media"].lower() == "true":
985 elif "reject_media" in row and row["reject_media"].lower() == "true":
988 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
989 reject_reports = True
990 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
991 reject_reports = True
994 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
996 logger.debug("domain is empty - SKIPPED!")
998 elif not utils.is_domain_wanted(domain):
999 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1002 logger.debug("Marking domain='%s' as handled", domain)
1003 domains.append(domain)
1005 logger.debug("Processing domain='%s' ...", domain)
1006 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1007 logger.debug("processed='%s'", processed)
1009 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
1010 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1013 "reason" : block["reason"],
1017 utils.process_block(block["blocker"], domain, None, "reject_media")
1019 utils.process_block(block["blocker"], domain, None, "reject_reports")
1021 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
1022 instances.set_total_blocks(block["blocker"], cnt)
1024 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1025 if instances.has_pending(block["blocker"]):
1026 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1027 instances.update_data(block["blocker"])
1029 logger.debug("Invoking commit() ...")
1030 database.connection.commit()
1032 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1033 if config.get("bot_enabled") and len(blockdict) > 0:
1034 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1035 network.send_bot_post(block["blocker"], blockdict)
1037 logger.debug("Success! - EXIT!")
1040 def fetch_txt(args: argparse.Namespace) -> int:
1041 logger.debug("args[]='%s' - CALLED!", type(args))
1047 "blocker": "seirdy.one",
1048 "url" : "https://seirdy.one/pb/bsl.txt",
1051 logger.info("Checking %d text file(s) ...", len(urls))
1053 logger.debug("Fetching row[url]='%s' ...", row["url"])
1054 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1056 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1057 if response.ok and response.status_code < 300 and response.text != "":
1058 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1059 domains = response.text.split("\n")
1061 logger.info("Processing %d domains ...", len(domains))
1062 for domain in domains:
1063 logger.debug("domain='%s' - BEFORE!", domain)
1064 domain = tidyup.domain(domain)
1066 logger.debug("domain='%s' - AFTER!", domain)
1068 logger.debug("domain is empty - SKIPPED!")
1070 elif not utils.is_domain_wanted(domain):
1071 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1073 elif instances.is_recent(domain):
1074 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1077 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1078 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1080 logger.debug("processed='%s'", processed)
1082 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1085 logger.debug("Success! - EXIT!")
1088 def fetch_fedipact(args: argparse.Namespace) -> int:
1089 logger.debug("args[]='%s' - CALLED!", type(args))
1091 api_domain = "fedipact.online"
1092 if apis.is_recent(api_domain):
1093 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1096 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1097 apis.update(api_domain)
1101 response = utils.fetch_url(
1102 f"https://{api_domain}",
1103 network.web_headers,
1104 (config.get("connection_timeout"), config.get("read_timeout"))
1107 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1108 if response.ok and response.status_code < 300 and response.text != "":
1109 logger.debug("Parsing %d Bytes ...", len(response.text))
1111 doc = bs4.BeautifulSoup(response.text, "html.parser")
1112 logger.debug("doc[]='%s'", type(doc))
1114 rows = doc.findAll("li")
1115 logger.info("Checking %d row(s) ...", len(rows))
1117 logger.debug("row[]='%s'", type(row))
1118 domain = tidyup.domain(row.contents[0])
1120 logger.debug("domain='%s' - AFTER!", domain)
1122 logger.debug("domain is empty - SKIPPED!")
1124 elif not utils.is_domain_wanted(domain):
1125 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1127 elif instances.is_registered(domain):
1128 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1130 elif instances.is_recent(domain):
1131 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1134 logger.info("Fetching domain='%s' ...", domain)
1135 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1137 logger.debug("Success! - EXIT!")
1140 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1141 logger.debug("args[]='%s' - CALLED!", type(args))
1143 api_domain = "joinfediverse.wiki"
1144 if apis.is_recent(api_domain):
1145 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1148 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1149 apis.update(api_domain)
1153 raw = utils.fetch_url(
1154 f"https://{api_domain}/FediBlock",
1155 network.web_headers,
1156 (config.get("connection_timeout"), config.get("read_timeout"))
1158 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1160 doc = bs4.BeautifulSoup(raw, "html.parser")
1161 logger.debug("doc[]='%s'", type(doc))
1163 tables = doc.findAll("table", {"class": "wikitable"})
1165 logger.info("Analyzing %d table(s) ...", len(tables))
1167 for table in tables:
1168 logger.debug("table[]='%s'", type(table))
1170 rows = table.findAll("tr")
1171 logger.info("Checking %d row(s) ...", len(rows))
1172 block_headers = dict()
1174 logger.debug("row[%s]='%s'", type(row), row)
1176 headers = row.findAll("th")
1177 logger.debug("Found headers()=%d header(s)", len(headers))
1178 if len(headers) > 1:
1179 block_headers = dict()
1181 for header in headers:
1183 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1184 text = header.contents[0]
1186 logger.debug("text[]='%s'", type(text))
1187 if not isinstance(text, str):
1188 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1190 elif validators.domain(text.strip()):
1191 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1194 text = tidyup.domain(text.strip())
1195 logger.debug("text='%s'", text)
1196 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1197 logger.debug("Found header: '%s'=%d", text, cnt)
1198 block_headers[cnt] = text
1200 elif len(block_headers) == 0:
1201 logger.debug("row is not scrapable - SKIPPED!")
1203 elif len(block_headers) > 0:
1204 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1208 for element in row.find_all(["th", "td"]):
1210 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1211 if cnt in block_headers:
1212 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1214 text = element.text.strip()
1215 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1217 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1218 if key in ["domain", "instance"]:
1220 elif key == "reason":
1221 block[key] = tidyup.reason(text)
1222 elif key == "subdomain(s)":
1225 block[key] = text.split("/")
1227 logger.debug("key='%s'", key)
1230 logger.debug("block()=%d ...", len(block))
1232 logger.debug("Appending block()=%d ...", len(block))
1233 blocklist.append(block)
1235 logger.debug("blocklist()=%d", len(blocklist))
1237 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1238 domains = database.cursor.fetchall()
1240 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1242 for block in blocklist:
1243 logger.debug("block='%s'", block)
1244 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1245 origin = block["blocked"]
1246 for subdomain in block["subdomain(s)"]:
1247 block["blocked"] = subdomain + "." + origin
1248 blocking.append(block)
1250 blocking.append(block)
1252 logger.debug("blocking()=%d", blocking)
1253 for block in blocking:
1254 logger.debug("block[]='%s'", type(block))
1255 block["blocked"] = tidyup.domain(block["blocked"])
1257 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1258 if block["blocked"] == "":
1259 logger.debug("block[blocked] is empty - SKIPPED!")
1261 elif not utils.is_domain_wanted(block["blocked"]):
1262 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1264 elif instances.is_recent(block["blocked"]):
1265 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1268 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1269 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1272 for blocker in domains:
1273 blocker = blocker[0]
1274 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1276 for block in blocking:
1277 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1278 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1280 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1281 if block["blocked"] == "":
1282 logger.debug("block[blocked] is empty - SKIPPED!")
1284 elif not utils.is_domain_wanted(block["blocked"]):
1285 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1288 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1289 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1290 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1292 "blocked": block["blocked"],
1293 "reason" : block["reason"],
1296 if instances.has_pending(blocker):
1297 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1298 instances.update_data(blocker)
1300 logger.debug("Invoking commit() ...")
1301 database.connection.commit()
1303 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1304 if config.get("bot_enabled") and len(blockdict) > 0:
1305 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1306 network.send_bot_post(blocker, blockdict)
1308 logger.debug("Success! - EXIT!")
1311 def recheck_obfuscation(args: argparse.Namespace) -> int:
1312 logger.debug("args[]='%s' - CALLED!", type(args))
1316 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1317 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1318 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1319 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1321 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1323 rows = database.cursor.fetchall()
1324 logger.info("Checking %d domains ...", len(rows))
1326 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1327 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1328 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1332 if row["software"] == "pleroma":
1333 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1334 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1335 elif row["software"] == "mastodon":
1336 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1337 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1338 elif row["software"] == "lemmy":
1339 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1340 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1341 elif row["software"] == "friendica":
1342 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1343 blocking = friendica.fetch_blocks(row["domain"])
1344 elif row["software"] == "misskey":
1345 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1346 blocking = misskey.fetch_blocks(row["domain"])
1348 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1350 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1351 instances.set_total_blocks(row["domain"], blocking)
1353 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1356 for block in blocking:
1357 logger.debug("block[blocked]='%s'", block["blocked"])
1360 if block["blocked"] == "":
1361 logger.debug("block[blocked] is empty - SKIPPED!")
1363 elif block["blocked"].endswith(".arpa"):
1364 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1366 elif block["blocked"].endswith(".tld"):
1367 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1369 elif block["blocked"].endswith(".onion"):
1370 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1372 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1373 logger.debug("block='%s' is obfuscated.", block["blocked"])
1374 obfuscated = obfuscated + 1
1375 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1376 elif not utils.is_domain_wanted(block["blocked"]):
1377 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1379 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1380 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1383 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1384 if blocked is not None and blocked != block["blocked"]:
1385 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1386 obfuscated = obfuscated - 1
1387 if blocks.is_instance_blocked(row["domain"], blocked):
1388 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1391 block["block_level"] = utils.alias_block_level(block["block_level"])
1393 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1394 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1395 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1398 "reason" : block["reason"],
1401 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1402 if obfuscated == 0 and len(blocking) > 0:
1403 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1404 instances.set_has_obfuscation(row["domain"], False)
1406 if instances.has_pending(row["domain"]):
1407 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1408 instances.update_data(row["domain"])
1410 logger.debug("Invoking commit() ...")
1411 database.connection.commit()
1413 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1414 if config.get("bot_enabled") and len(blockdict) > 0:
1415 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1416 network.send_bot_post(row["domain"], blockdict)
1418 logger.debug("Success! - EXIT!")
1421 def fetch_fedilist(args: argparse.Namespace) -> int:
1422 logger.debug("args[]='%s' - CALLED!", type(args))
1424 api_domain = "demo.fedilist.com"
1425 if apis.is_recent(api_domain):
1426 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1429 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1430 apis.update(api_domain)
1432 url = f"http://{api_domain}/instance/csv?onion=not"
1433 if args.software is not None and args.software != "":
1434 logger.debug("args.software='%s'", args.software)
1435 url = f"http://{api_domain}/instance/csv?software={args.software}&onion=not"
1439 logger.info("Fetching url='%s' from fedilist.com ...", url)
1440 response = reqto.get(
1442 headers=network.web_headers,
1443 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1444 allow_redirects=False
1447 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1448 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1450 logger.debug("reader[]='%s'", type(reader))
1453 logger.debug("row[]='%s'", type(row))
1454 domain = tidyup.domain(row["hostname"])
1455 logger.debug("domain='%s' - AFTER!", domain)
1458 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1460 elif not utils.is_domain_wanted(domain):
1461 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1463 elif (args.all is None or not args.all) and instances.is_registered(domain):
1464 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1466 elif instances.is_recent(domain):
1467 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1470 logger.info("Fetching instances from domain='%s' ...", domain)
1471 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1473 logger.debug("Success! - EXIT!")
1476 def update_nodeinfo(args: argparse.Namespace) -> int:
1477 logger.debug("args[]='%s' - CALLED!", type(args))
1481 if args.domain is not None and args.domain != "":
1482 logger.debug("Fetching args.domain='%s'", args.domain)
1483 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1484 elif args.software is not None and args.software != "":
1485 logger.info("Fetching domains for args.software='%s'", args.software)
1486 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1488 logger.info("Fetching domains for recently updated ...")
1489 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1491 domains = database.cursor.fetchall()
1493 logger.info("Checking %d domain(s) ...", len(domains))
1496 logger.debug("row[]='%s'", type(row))
1498 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1499 software = federation.determine_software(row["domain"])
1501 logger.debug("Determined software='%s'", software)
1502 if software != row["software"]:
1503 logger.warning("Software type has changed from '%s' to '%s'!", row["software"], software)
1504 instances.set_software(row["domain"], software)
1506 instances.set_success(row["domain"])
1507 except network.exceptions as exception:
1508 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1509 instances.set_last_error(row["domain"], exception)
1511 instances.set_last_nodeinfo(row["domain"])
1512 instances.update_data(row["domain"])
1515 logger.debug("Success! - EXIT!")
1518 def fetch_instances_social(args: argparse.Namespace) -> int:
1519 logger.debug("args[]='%s' - CALLED!", type(args))
1521 api_domain = "instances.social"
1523 if config.get("instances_social_api_key") == "":
1524 logger.error("API key not set. Please set in your config.json file.")
1526 elif apis.is_recent(api_domain):
1527 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1530 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1531 apis.update(api_domain)
1535 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1538 fetched = network.get_json_api(
1540 "/api/1.0/instances/list?count=0&sort_by=name",
1542 (config.get("connection_timeout"), config.get("read_timeout"))
1544 logger.debug("fetched[]='%s'", type(fetched))
1546 if "error_message" in fetched:
1547 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1549 elif "exception" in fetched:
1550 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1552 elif "json" not in fetched:
1553 logger.warning("fetched has no element 'json' - EXIT!")
1555 elif "instances" not in fetched["json"]:
1556 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1560 rows = fetched["json"]["instances"]
1562 logger.info("Checking %d row(s) ...", len(rows))
1564 logger.debug("row[]='%s'", type(row))
1565 domain = tidyup.domain(row["name"])
1567 logger.debug("domain='%s' - AFTER!", domain)
1569 logger.debug("domain is empty - SKIPPED!")
1571 elif not utils.is_domain_wanted(domain):
1572 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1574 elif domain in domains:
1575 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1577 elif instances.is_registered(domain):
1578 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1580 elif instances.is_recent(domain):
1581 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1584 logger.info("Fetching instances from domain='%s'", domain)
1585 federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
1587 logger.debug("Success! - EXIT!")