1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
44 from fba.http import federation
45 from fba.http import network
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
61 def check_instance(args: argparse.Namespace) -> int:
62 logger.debug("args.domain='%s' - CALLED!", args.domain)
64 if not validators.domain(args.domain):
65 logger.warning("args.domain='%s' is not valid", args.domain)
67 elif blacklist.is_blacklisted(args.domain):
68 logger.warning("args.domain='%s' is blacklisted", args.domain)
70 elif instances.is_registered(args.domain):
71 logger.warning("args.domain='%s' is already registered", args.domain)
74 logger.info("args.domain='%s' is not known", args.domain)
76 logger.debug("status=%d - EXIT!", status)
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80 logger.debug("args[]='%s' - CALLED!", type(args))
83 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
86 for row in database.cursor.fetchall():
87 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88 punycode = row["domain"].encode("idna").decode("utf-8")
90 if row["nodeinfo_url"].startswith("/"):
91 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
93 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
97 logger.info("Found %d row(s)", cnt)
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103 logger.debug("args[]='%s' - CALLED!", type(args))
105 # No CSRF by default, you don't have to add network.source_headers by yourself here
107 source_domain = "pixelfed.org"
109 if sources.is_recent(source_domain):
110 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
113 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114 sources.update(source_domain)
117 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118 headers = csrf.determine(source_domain, dict())
119 except network.exceptions as exception:
120 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
124 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125 fetched = network.get_json_api(
127 "/api/v1/servers/all.json?scope=All&country=all&language=all",
129 (config.get("connection_timeout"), config.get("read_timeout"))
132 logger.debug("JSON API returned %d elements", len(fetched))
133 if "error_message" in fetched:
134 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
136 elif "data" not in fetched["json"]:
137 logger.warning("API did not return JSON with 'data' element - EXIT!")
140 rows = fetched["json"]["data"]
141 logger.info("Checking %d fetched rows ...", len(rows))
143 logger.debug("row[]='%s'", type(row))
144 if "domain" not in row:
145 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
147 elif row["domain"] == "":
148 logger.debug("row[domain] is empty - SKIPPED!")
151 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152 domain = row["domain"].encode("idna").decode("utf-8")
153 logger.debug("domain='%s' - AFTER!", domain)
155 if not utils.is_domain_wanted(domain):
156 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
158 elif instances.is_registered(domain):
159 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
161 elif instances.is_recent(domain):
162 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
165 logger.debug("Fetching instances from domain='%s' ...", domain)
166 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
168 except network.exceptions as exception:
169 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
172 logger.debug("Success! - EXIT!")
175 def fetch_bkali(args: argparse.Namespace) -> int:
176 logger.debug("args[]='%s' - CALLED!", type(args))
178 logger.debug("Invoking locking.acquire() ...")
181 source_domain = "gql.api.bka.li"
182 if sources.is_recent(source_domain):
183 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
186 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187 sources.update(source_domain)
191 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192 fetched = network.post_json_api(
196 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
200 logger.debug("fetched[]='%s'", type(fetched))
201 if "error_message" in fetched:
202 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
204 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
208 rows = fetched["json"]
210 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
212 raise Exception("WARNING: Returned no records")
213 elif "data" not in rows:
214 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215 elif "nodeinfo" not in rows["data"]:
216 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
218 for entry in rows["data"]["nodeinfo"]:
219 logger.debug("entry[%s]='%s'", type(entry), entry)
220 if "domain" not in entry:
221 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
223 elif entry["domain"] == "":
224 logger.debug("entry[domain] is empty - SKIPPED!")
226 elif not utils.is_domain_wanted(entry["domain"]):
227 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
229 elif instances.is_registered(entry["domain"]):
230 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
232 elif instances.is_recent(entry["domain"]):
233 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
236 logger.debug("Adding domain='%s' ...", entry["domain"])
237 domains.append(entry["domain"])
239 except network.exceptions as exception:
240 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
243 logger.debug("domains()=%d", len(domains))
245 logger.info("Adding %d new instances ...", len(domains))
246 for domain in domains:
247 logger.debug("domain='%s' - BEFORE!", domain)
248 domain = domain.encode("idna").decode("utf-8")
249 logger.debug("domain='%s' - AFTER!", domain)
252 logger.info("Fetching instances from domain='%s' ...", domain)
253 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254 except network.exceptions as exception:
255 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256 instances.set_last_error(domain, exception)
259 logger.debug("Success - EXIT!")
262 def fetch_blocks(args: argparse.Namespace) -> int:
263 logger.debug("args[]='%s' - CALLED!", type(args))
264 if args.domain is not None and args.domain != "":
265 logger.debug("args.domain='%s' - checking ...", args.domain)
266 if not validators.domain(args.domain):
267 logger.warning("args.domain='%s' is not valid.", args.domain)
269 elif blacklist.is_blacklisted(args.domain):
270 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
272 elif not instances.is_registered(args.domain):
273 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
276 logger.debug("Invoking locking.acquire() ...")
279 if args.domain is not None and args.domain != "":
280 # Re-check single domain
281 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282 database.cursor.execute(
283 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
285 elif args.software is not None and args.software != "":
286 # Re-check single software
287 logger.debug("Querying database for args.software='%s' ...", args.software)
288 database.cursor.execute(
289 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
292 # Re-check after "timeout" (aka. minimum interval)
293 database.cursor.execute(
294 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
297 rows = database.cursor.fetchall()
298 logger.info("Checking %d entries ...", len(rows))
299 for blocker, software, origin, nodeinfo_url in rows:
300 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301 blocker = tidyup.domain(blocker)
302 logger.debug("blocker='%s' - AFTER!", blocker)
305 logger.warning("blocker is now empty!")
307 elif nodeinfo_url is None or nodeinfo_url == "":
308 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
310 elif not utils.is_domain_wanted(blocker):
311 logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker)
314 logger.debug("blocker='%s'", blocker)
315 instances.set_last_blocked(blocker)
316 instances.set_has_obfuscation(blocker, False)
319 if software == "pleroma":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322 elif software == "mastodon":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325 elif software == "lemmy":
326 logger.info("blocker='%s',software='%s'", blocker, software)
327 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328 elif software == "friendica":
329 logger.info("blocker='%s',software='%s'", blocker, software)
330 blocking = friendica.fetch_blocks(blocker)
331 elif software == "misskey":
332 logger.info("blocker='%s',software='%s'", blocker, software)
333 blocking = misskey.fetch_blocks(blocker)
335 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
337 logger.debug("blocker='%s'", blocker)
338 if blocker != "chaos.social":
339 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
340 instances.set_total_blocks(blocker, blocking)
342 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
344 for block in blocking:
345 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
347 if block["block_level"] == "":
348 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
351 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
352 block["blocked"] = tidyup.domain(block["blocked"])
353 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
354 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
356 if block["blocked"] == "":
357 logger.warning("blocked is empty, blocker='%s'", blocker)
359 elif block["blocked"].endswith(".onion"):
360 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
362 elif block["blocked"].endswith(".arpa"):
363 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
365 elif block["blocked"].endswith(".tld"):
366 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
368 elif block["blocked"].find("*") >= 0:
369 logger.debug("blocker='%s' uses obfuscated domains", blocker)
371 # Some friendica servers also obscure domains without hash
372 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
374 logger.debug("row[]='%s'", type(row))
376 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377 instances.set_has_obfuscation(blocker, True)
380 block["blocked"] = row["domain"]
381 origin = row["origin"]
382 nodeinfo_url = row["nodeinfo_url"]
383 elif block["blocked"].find("?") >= 0:
384 logger.debug("blocker='%s' uses obfuscated domains", blocker)
386 # Some obscure them with question marks, not sure if that's dependent on version or not
387 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
389 logger.debug("row[]='%s'", type(row))
391 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392 instances.set_has_obfuscation(blocker, True)
395 block["blocked"] = row["domain"]
396 origin = row["origin"]
397 nodeinfo_url = row["nodeinfo_url"]
399 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
400 if block["blocked"] == "":
401 logger.debug("block[blocked] is empty - SKIPPED!")
404 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
405 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
406 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
408 if not utils.is_domain_wanted(block["blocked"]):
409 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
411 elif block["block_level"] in ["accept", "accepted"]:
412 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
414 elif not instances.is_registered(block["blocked"]):
415 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
416 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
418 block["block_level"] = blocks.alias_block_level(block["block_level"])
420 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
421 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
423 "blocked": block["blocked"],
424 "reason" : block["reason"],
427 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
428 cookies.clear(block["blocked"])
430 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
431 if instances.has_pending(blocker):
432 logger.debug("Flushing updates for blocker='%s' ...", blocker)
433 instances.update_data(blocker)
435 logger.debug("Invoking commit() ...")
436 database.connection.commit()
438 logger.debug("Invoking cookies.clear(%s) ...", blocker)
439 cookies.clear(blocker)
441 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
442 if config.get("bot_enabled") and len(blockdict) > 0:
443 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
444 network.send_bot_post(blocker, blockdict)
446 logger.debug("Success! - EXIT!")
449 def fetch_observer(args: argparse.Namespace) -> int:
450 logger.debug("args[]='%s' - CALLED!", type(args))
452 logger.debug("Invoking locking.acquire() ...")
455 source_domain = "fediverse.observer"
456 if sources.is_recent(source_domain):
457 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
460 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
461 sources.update(source_domain)
464 if args.software is None:
465 logger.info("Fetching software list ...")
466 raw = utils.fetch_url(
467 f"https://{source_domain}",
469 (config.get("connection_timeout"), config.get("read_timeout"))
471 logger.debug("raw[%s]()=%d", type(raw), len(raw))
473 doc = bs4.BeautifulSoup(raw, features="html.parser")
474 logger.debug("doc[]='%s'", type(doc))
476 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
477 logger.debug("navbar[]='%s'", type(navbar))
479 logger.warning("Cannot find navigation bar, cannot continue!")
482 items = navbar.findAll("a", {"class": "dropdown-item"})
483 logger.debug("items[]='%s'", type(items))
485 logger.info("Checking %d menu items ...", len(items))
487 logger.debug("item[%s]='%s'", type(item), item)
488 if item.text.lower() == "all":
489 logger.debug("Skipping 'All' menu entry ...")
492 logger.debug("Appending item.text='%s' ...", item.text)
493 types.append(tidyup.domain(item.text))
495 logger.info("Adding args.software='%s' as type ...", args.software)
496 types.append(args.software)
498 logger.info("Fetching %d different table data ...", len(types))
499 for software in types:
500 logger.debug("software='%s' - BEFORE!", software)
501 if args.software is not None and args.software != software:
502 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
507 logger.debug("Fetching table data for software='%s' ...", software)
508 raw = utils.fetch_url(
509 f"https://{source_domain}/app/views/tabledata.php?software={software}",
511 (config.get("connection_timeout"), config.get("read_timeout"))
513 logger.debug("raw[%s]()=%d", type(raw), len(raw))
515 doc = bs4.BeautifulSoup(raw, features="html.parser")
516 logger.debug("doc[]='%s'", type(doc))
517 except network.exceptions as exception:
518 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
521 items = doc.findAll("a", {"class": "url"})
522 logger.info("Checking %d items,software='%s' ...", len(items), software)
524 logger.debug("item[]='%s'", type(item))
525 domain = item.decode_contents()
526 logger.debug("domain='%s' - AFTER!", domain)
529 logger.debug("domain is empty - SKIPPED!")
532 logger.debug("domain='%s' - BEFORE!", domain)
533 domain = domain.encode("idna").decode("utf-8")
534 logger.debug("domain='%s' - AFTER!", domain)
536 if not utils.is_domain_wanted(domain):
537 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
539 elif instances.is_registered(domain):
540 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
542 elif instances.is_recent(domain):
543 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
546 software = software_helper.alias(software)
547 logger.info("Fetching instances for domain='%s'", domain)
548 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
550 logger.debug("Success! - EXIT!")
553 def fetch_todon_wiki(args: argparse.Namespace) -> int:
554 logger.debug("args[]='%s' - CALLED!", type(args))
556 logger.debug("Invoking locking.acquire() ...")
559 source_domain = "wiki.todon.eu"
560 if sources.is_recent(source_domain):
561 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
564 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
565 sources.update(source_domain)
572 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
573 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
575 doc = bs4.BeautifulSoup(raw, "html.parser")
576 logger.debug("doc[]='%s'", type(doc))
578 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
579 logger.info("Checking %d silenced/limited entries ...", len(silenced))
580 blocklist["silenced"] = utils.find_domains(silenced, "div")
582 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
583 logger.info("Checking %d suspended entries ...", len(suspended))
584 blocklist["reject"] = utils.find_domains(suspended, "div")
586 blocking = blocklist["silenced"] + blocklist["reject"]
589 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
590 instances.set_total_blocks(blocker, blocking)
593 for block_level in blocklist:
594 blockers = blocklist[block_level]
596 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
597 for blocked in blockers:
598 logger.debug("blocked='%s'", blocked)
600 if not instances.is_registered(blocked):
602 logger.info("Fetching instances from domain='%s' ...", blocked)
603 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
604 except network.exceptions as exception:
605 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
606 instances.set_last_error(blocked, exception)
608 if blocks.is_instance_blocked(blocker, blocked, block_level):
609 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
612 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
613 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
614 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
620 logger.debug("Invoking commit() ...")
621 database.connection.commit()
623 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
624 if config.get("bot_enabled") and len(blockdict) > 0:
625 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
626 network.send_bot_post(blocker, blockdict)
628 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
629 if instances.has_pending(blocker):
630 logger.debug("Flushing updates for blocker='%s' ...", blocker)
631 instances.update_data(blocker)
633 logger.debug("Success! - EXIT!")
636 def fetch_cs(args: argparse.Namespace):
637 logger.debug("args[]='%s' - CALLED!", type(args))
639 logger.debug("Invoking locking.acquire() ...")
667 source_domain = "raw.githubusercontent.com"
668 if sources.is_recent(source_domain):
669 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
672 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
673 sources.update(source_domain)
675 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
676 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
678 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
679 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
681 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
682 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
683 blocklist["silenced"] = federation.find_domains(silenced)
685 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
686 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
687 blocklist["reject"] = federation.find_domains(blocked)
689 blocking = blocklist["silenced"] + blocklist["reject"]
690 blocker = "chaos.social"
692 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
693 instances.set_total_blocks(blocker, blocking)
695 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
696 if len(blocking) > 0:
698 for block_level in blocklist:
699 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
701 for row in blocklist[block_level]:
702 logger.debug("row[%s]='%s'", type(row), row)
703 if not "domain" in row:
704 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
706 elif not instances.is_registered(row["domain"]):
708 logger.info("Fetching instances from domain='%s' ...", row["domain"])
709 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
710 except network.exceptions as exception:
711 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
712 instances.set_last_error(row["domain"], exception)
714 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
715 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
717 "blocked": row["domain"],
718 "reason" : row["reason"],
721 logger.debug("Invoking commit() ...")
722 database.connection.commit()
724 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
725 if config.get("bot_enabled") and len(blockdict) > 0:
726 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
727 network.send_bot_post(blocker, blockdict)
729 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
730 if instances.has_pending(blocker):
731 logger.debug("Flushing updates for blocker='%s' ...", blocker)
732 instances.update_data(blocker)
734 logger.debug("Success! - EXIT!")
737 def fetch_fba_rss(args: argparse.Namespace) -> int:
738 logger.debug("args[]='%s' - CALLED!", type(args))
742 logger.debug("Invoking locking.acquire() ...")
745 components = urlparse(args.feed)
747 if sources.is_recent(components.netloc):
748 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
751 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
752 sources.update(components.netloc)
754 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
755 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
757 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
758 if response.ok and response.status_code < 300 and len(response.text) > 0:
759 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
760 rss = atoma.parse_rss_bytes(response.content)
762 logger.debug("rss[]='%s'", type(rss))
763 for item in rss.items:
764 logger.debug("item[%s]='%s'", type(item), item)
765 domain = tidyup.domain(item.link.split("=")[1])
767 logger.debug("domain='%s' - AFTER!", domain)
769 logger.debug("domain is empty - SKIPPED!")
772 logger.debug("domain='%s' - BEFORE!", domain)
773 domain = domain.encode("idna").decode("utf-8")
774 logger.debug("domain='%s' - AFTER!", domain)
776 if not utils.is_domain_wanted(domain):
777 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
779 elif domain in domains:
780 logger.debug("domain='%s' is already added - SKIPPED!", domain)
782 elif instances.is_registered(domain):
783 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
785 elif instances.is_recent(domain):
786 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
789 logger.debug("Adding domain='%s'", domain)
790 domains.append(domain)
792 logger.debug("domains()=%d", len(domains))
794 logger.info("Adding %d new instances ...", len(domains))
795 for domain in domains:
796 logger.debug("domain='%s'", domain)
798 logger.info("Fetching instances from domain='%s' ...", domain)
799 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
800 except network.exceptions as exception:
801 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
802 instances.set_last_error(domain, exception)
805 logger.debug("Success! - EXIT!")
808 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
809 logger.debug("args[]='%s' - CALLED!", type(args))
811 logger.debug("Invoking locking.acquire() ...")
814 source_domain = "ryona.agency"
815 if sources.is_recent(source_domain):
816 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
819 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
820 sources.update(source_domain)
822 feed = f"https://{source_domain}/users/fba/feed.atom"
826 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
827 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
829 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
830 if response.ok and response.status_code < 300 and len(response.text) > 0:
831 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
832 atom = atoma.parse_atom_bytes(response.content)
834 logger.debug("atom[]='%s'", type(atom))
835 for entry in atom.entries:
836 logger.debug("entry[]='%s'", type(entry))
837 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
838 logger.debug("doc[]='%s'", type(doc))
839 for element in doc.findAll("a"):
840 logger.debug("element[]='%s'", type(element))
841 for href in element["href"].split(","):
842 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
843 domain = tidyup.domain(href)
845 logger.debug("domain='%s' - AFTER!", domain)
847 logger.debug("domain is empty - SKIPPED!")
850 logger.debug("domain='%s' - BEFORE!", domain)
851 domain = domain.encode("idna").decode("utf-8")
852 logger.debug("domain='%s' - AFTER!", domain)
854 if not utils.is_domain_wanted(domain):
855 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
857 elif domain in domains:
858 logger.debug("domain='%s' is already added - SKIPPED!", domain)
860 elif instances.is_registered(domain):
861 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
863 elif instances.is_recent(domain):
864 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
867 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
868 domains.append(domain)
870 logger.debug("domains()=%d", len(domains))
872 logger.info("Adding %d new instances ...", len(domains))
873 for domain in domains:
874 logger.debug("domain='%s'", domain)
876 logger.info("Fetching instances from domain='%s' ...", domain)
877 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
878 except network.exceptions as exception:
879 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
880 instances.set_last_error(domain, exception)
883 logger.debug("Success! - EXIT!")
886 def fetch_instances(args: argparse.Namespace) -> int:
887 logger.debug("args[]='%s' - CALLED!", type(args))
889 logger.debug("args.domain='%s' - checking ...", args.domain)
890 if not validators.domain(args.domain):
891 logger.warning("args.domain='%s' is not valid.", args.domain)
893 elif blacklist.is_blacklisted(args.domain):
894 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
897 logger.debug("Invoking locking.acquire() ...")
902 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
903 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
904 except network.exceptions as exception:
905 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
906 instances.set_last_error(args.domain, exception)
907 instances.update_data(args.domain)
911 logger.debug("Not fetching more instances - EXIT!")
914 # Loop through some instances
915 database.cursor.execute(
916 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
919 rows = database.cursor.fetchall()
920 logger.info("Checking %d entries ...", len(rows))
922 logger.debug("row[domain]='%s'", row["domain"])
923 if row["domain"] == "":
924 logger.debug("row[domain] is empty - SKIPPED!")
927 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
928 domain = row["domain"].encode("idna").decode("utf-8")
929 logger.debug("domain='%s' - AFTER!", domain)
931 if not utils.is_domain_wanted(domain):
932 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
936 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
937 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
938 except network.exceptions as exception:
939 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
940 instances.set_last_error(domain, exception)
942 logger.debug("Success - EXIT!")
945 def fetch_oliphant(args: argparse.Namespace) -> int:
946 logger.debug("args[]='%s' - CALLED!", type(args))
948 logger.debug("Invoking locking.acquire() ...")
951 source_domain = "codeberg.org"
952 if sources.is_recent(source_domain):
953 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
956 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
957 sources.update(source_domain)
960 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
965 "blocker": "artisan.chat",
966 "csv_url": "mastodon/artisan.chat.csv",
968 "blocker": "mastodon.art",
969 "csv_url": "mastodon/mastodon.art.csv",
971 "blocker": "pleroma.envs.net",
972 "csv_url": "mastodon/pleroma.envs.net.csv",
974 "blocker": "oliphant.social",
975 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
977 "blocker": "mastodon.online",
978 "csv_url": "mastodon/mastodon.online.csv",
980 "blocker": "mastodon.social",
981 "csv_url": "mastodon/mastodon.social.csv",
983 "blocker": "mastodon.social",
984 "csv_url": "other/missing-tier0-mastodon.social.csv",
986 "blocker": "rage.love",
987 "csv_url": "mastodon/rage.love.csv",
989 "blocker": "sunny.garden",
990 "csv_url": "mastodon/sunny.garden.csv",
992 "blocker": "sunny.garden",
993 "csv_url": "mastodon/gardenfence.csv",
995 "blocker": "solarpunk.moe",
996 "csv_url": "mastodon/solarpunk.moe.csv",
998 "blocker": "toot.wales",
999 "csv_url": "mastodon/toot.wales.csv",
1001 "blocker": "union.place",
1002 "csv_url": "mastodon/union.place.csv",
1004 "blocker": "oliphant.social",
1005 "csv_url": "mastodon/birdsite.csv",
1011 logger.debug("Downloading %d files ...", len(blocklists))
1012 for block in blocklists:
1013 # Is domain given and not equal blocker?
1014 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1017 elif args.domain in domains:
1018 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1022 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1023 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1025 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1026 if not response.ok or response.status_code >= 300 or response.content == "":
1027 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1030 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1031 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1037 logger.debug("row[%s]='%s'", type(row), row)
1038 domain = severity = None
1039 reject_media = reject_reports = False
1041 if "#domain" in row:
1042 domain = row["#domain"]
1043 elif "domain" in row:
1044 domain = row["domain"]
1046 logger.debug("row='%s' does not contain domain column", row)
1049 if "#severity" in row:
1050 severity = blocks.alias_block_level(row["#severity"])
1051 elif "severity" in row:
1052 severity = blocks.alias_block_level(row["severity"])
1054 logger.debug("row='%s' does not contain severity column", row)
1057 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1059 elif "reject_media" in row and row["reject_media"].lower() == "true":
1062 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1063 reject_reports = True
1064 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1065 reject_reports = True
1068 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1070 logger.debug("domain is empty - SKIPPED!")
1072 elif domain.endswith(".onion"):
1073 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1075 elif domain.endswith(".arpa"):
1076 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1078 elif domain.endswith(".tld"):
1079 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1081 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1082 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1083 domain = utils.deobfuscate(domain, block["blocker"])
1084 logger.debug("domain='%s' - AFTER!", domain)
1086 if not validators.domain(domain):
1087 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1089 elif blacklist.is_blacklisted(domain):
1090 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1092 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1093 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1096 logger.debug("Marking domain='%s' as handled", domain)
1097 domains.append(domain)
1099 logger.debug("Processing domain='%s' ...", domain)
1100 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1101 logger.debug("processed='%s'", processed)
1103 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1104 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1107 "reason" : block["reason"],
1111 processing.block(block["blocker"], domain, None, "reject_media")
1113 processing.block(block["blocker"], domain, None, "reject_reports")
1115 logger.debug("block[blocker]='%s'", block["blocker"])
1116 if block["blocker"] != "chaos.social":
1117 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1118 instances.set_total_blocks(block["blocker"], domains)
1120 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1121 if instances.has_pending(block["blocker"]):
1122 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1123 instances.update_data(block["blocker"])
1125 logger.debug("Invoking commit() ...")
1126 database.connection.commit()
1128 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1129 if config.get("bot_enabled") and len(blockdict) > 0:
1130 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1131 network.send_bot_post(block["blocker"], blockdict)
1133 logger.debug("Success! - EXIT!")
1136 def fetch_txt(args: argparse.Namespace) -> int:
1137 logger.debug("args[]='%s' - CALLED!", type(args))
1139 logger.debug("Invoking locking.acquire() ...")
1144 "blocker": "seirdy.one",
1145 "url" : "https://seirdy.one/pb/bsl.txt",
1148 logger.info("Checking %d text file(s) ...", len(urls))
1150 logger.debug("Fetching row[url]='%s' ...", row["url"])
1151 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1153 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1154 if response.ok and response.status_code < 300 and response.text != "":
1155 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1156 domains = response.text.split("\n")
1158 logger.info("Processing %d domains ...", len(domains))
1159 for domain in domains:
1160 logger.debug("domain='%s' - BEFORE!", domain)
1161 domain = tidyup.domain(domain)
1163 logger.debug("domain='%s' - AFTER!", domain)
1165 logger.debug("domain is empty - SKIPPED!")
1167 elif not utils.is_domain_wanted(domain):
1168 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1170 elif instances.is_recent(domain):
1171 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1174 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1175 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1177 logger.debug("processed='%s'", processed)
1179 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1182 logger.debug("Success! - EXIT!")
1185 def fetch_fedipact(args: argparse.Namespace) -> int:
1186 logger.debug("args[]='%s' - CALLED!", type(args))
1188 logger.debug("Invoking locking.acquire() ...")
1191 source_domain = "fedipact.online"
1192 if sources.is_recent(source_domain):
1193 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1196 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1197 sources.update(source_domain)
1199 response = utils.fetch_url(
1200 f"https://{source_domain}",
1201 network.web_headers,
1202 (config.get("connection_timeout"), config.get("read_timeout"))
1205 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1206 if response.ok and response.status_code < 300 and response.text != "":
1207 logger.debug("Parsing %d Bytes ...", len(response.text))
1209 doc = bs4.BeautifulSoup(response.text, "html.parser")
1210 logger.debug("doc[]='%s'", type(doc))
1212 rows = doc.findAll("li")
1213 logger.info("Checking %d row(s) ...", len(rows))
1215 logger.debug("row[]='%s'", type(row))
1216 domain = tidyup.domain(row.contents[0])
1218 logger.debug("domain='%s' - AFTER!", domain)
1220 logger.debug("domain is empty - SKIPPED!")
1223 logger.debug("domain='%s' - BEFORE!", domain)
1224 domain = domain.encode("idna").decode("utf-8")
1225 logger.debug("domain='%s' - AFTER!", domain)
1227 if not utils.is_domain_wanted(domain):
1228 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1230 elif instances.is_registered(domain):
1231 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1233 elif instances.is_recent(domain):
1234 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1237 logger.info("Fetching domain='%s' ...", domain)
1238 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1240 logger.debug("Success! - EXIT!")
1243 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1244 logger.debug("args[]='%s' - CALLED!", type(args))
1246 logger.debug("Invoking locking.acquire() ...")
1249 source_domain = "instanceapp.misskey.page"
1250 if sources.is_recent(source_domain):
1251 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1254 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1255 sources.update(source_domain)
1257 raw = utils.fetch_url(
1258 f"https://{source_domain}/instances.json",
1259 network.web_headers,
1260 (config.get("connection_timeout"), config.get("read_timeout"))
1262 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1264 parsed = json.loads(raw)
1265 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1267 if "instancesInfos" not in parsed:
1268 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1271 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1272 for row in parsed["instancesInfos"]:
1273 logger.debug("row[%s]='%s'", type(row), row)
1274 if "url" not in row:
1275 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1277 elif not utils.is_domain_wanted(row["url"]):
1278 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1280 elif instances.is_registered(row["url"]):
1281 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1284 logger.info("Fetching row[url]='%s' ...", row["url"])
1285 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1287 logger.debug("Success! - EXIT!")
1290 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1291 logger.debug("args[]='%s' - CALLED!", type(args))
1293 logger.debug("Invoking locking.acquire() ...")
1296 source_domain = "joinfediverse.wiki"
1297 if sources.is_recent(source_domain):
1298 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1301 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1302 sources.update(source_domain)
1304 raw = utils.fetch_url(
1305 f"https://{source_domain}/FediBlock",
1306 network.web_headers,
1307 (config.get("connection_timeout"), config.get("read_timeout"))
1309 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1311 doc = bs4.BeautifulSoup(raw, "html.parser")
1312 logger.debug("doc[]='%s'", type(doc))
1314 tables = doc.findAll("table", {"class": "wikitable"})
1316 logger.info("Analyzing %d table(s) ...", len(tables))
1318 for table in tables:
1319 logger.debug("table[]='%s'", type(table))
1321 rows = table.findAll("tr")
1322 logger.info("Checking %d row(s) ...", len(rows))
1323 block_headers = dict()
1325 logger.debug("row[%s]='%s'", type(row), row)
1327 headers = row.findAll("th")
1328 logger.debug("Found headers()=%d header(s)", len(headers))
1329 if len(headers) > 1:
1330 block_headers = dict()
1332 for header in headers:
1334 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1335 text = header.contents[0]
1337 logger.debug("text[]='%s'", type(text))
1338 if not isinstance(text, str):
1339 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1341 elif validators.domain(text.strip()):
1342 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1345 text = tidyup.domain(text.strip())
1346 logger.debug("text='%s' - AFTER!", text)
1347 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1348 logger.debug("Found header: '%s'=%d", text, cnt)
1349 block_headers[cnt] = text
1351 elif len(block_headers) == 0:
1352 logger.debug("row is not scrapable - SKIPPED!")
1354 elif len(block_headers) > 0:
1355 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1359 for element in row.find_all(["th", "td"]):
1361 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1362 if cnt in block_headers:
1363 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1365 text = element.text.strip()
1366 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1368 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1369 if key in ["domain", "instance"]:
1371 elif key == "reason":
1372 block[key] = tidyup.reason(text)
1373 elif key == "subdomain(s)":
1376 block[key] = text.split("/")
1378 logger.debug("key='%s'", key)
1381 logger.debug("block()=%d ...", len(block))
1383 logger.debug("Appending block()=%d ...", len(block))
1384 blocklist.append(block)
1386 logger.debug("blocklist()=%d", len(blocklist))
1388 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1389 domains = database.cursor.fetchall()
1391 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1393 for block in blocklist:
1394 logger.debug("block='%s'", block)
1395 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1396 origin = block["blocked"]
1397 logger.debug("origin='%s'", origin)
1398 for subdomain in block["subdomain(s)"]:
1399 block["blocked"] = subdomain + "." + origin
1400 logger.debug("block[blocked]='%s'", block["blocked"])
1401 blocking.append(block)
1403 blocking.append(block)
1405 logger.debug("blocking()=%d", blocking)
1406 for block in blocking:
1407 logger.debug("block[]='%s'", type(block))
1408 if "blocked" not in block:
1409 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1411 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1412 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1414 if block["blocked"] == "":
1415 logger.debug("block[blocked] is empty - SKIPPED!")
1417 elif not utils.is_domain_wanted(block["blocked"]):
1418 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1420 elif instances.is_recent(block["blocked"]):
1421 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1424 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1425 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1428 for blocker in domains:
1429 blocker = blocker[0]
1430 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1432 for block in blocking:
1433 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1434 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1436 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1437 if block["blocked"] == "":
1438 logger.debug("block[blocked] is empty - SKIPPED!")
1440 elif not utils.is_domain_wanted(block["blocked"]):
1441 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1444 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1445 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1446 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1448 "blocked": block["blocked"],
1449 "reason" : block["reason"],
1452 if instances.has_pending(blocker):
1453 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1454 instances.update_data(blocker)
1456 logger.debug("Invoking commit() ...")
1457 database.connection.commit()
1459 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1460 if config.get("bot_enabled") and len(blockdict) > 0:
1461 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1462 network.send_bot_post(blocker, blockdict)
1464 logger.debug("Success! - EXIT!")
1467 def recheck_obfuscation(args: argparse.Namespace) -> int:
1468 logger.debug("args[]='%s' - CALLED!", type(args))
1470 logger.debug("Invoking locking.acquire() ...")
1473 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1474 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1475 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1476 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1478 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1480 rows = database.cursor.fetchall()
1481 logger.info("Checking %d domains ...", len(rows))
1483 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1484 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1485 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1489 if row["software"] == "pleroma":
1490 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1491 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1492 elif row["software"] == "mastodon":
1493 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1494 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1495 elif row["software"] == "lemmy":
1496 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1497 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1498 elif row["software"] == "friendica":
1499 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1500 blocking = friendica.fetch_blocks(row["domain"])
1501 elif row["software"] == "misskey":
1502 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1503 blocking = misskey.fetch_blocks(row["domain"])
1505 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1507 logger.debug("row[domain]='%s'", row["domain"])
1508 # chaos.social requires special care ...
1509 if row["domain"] != "chaos.social":
1510 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1511 instances.set_total_blocks(row["domain"], blocking)
1516 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1517 for block in blocking:
1518 logger.debug("block[blocked]='%s'", block["blocked"])
1521 if block["blocked"] == "":
1522 logger.debug("block[blocked] is empty - SKIPPED!")
1524 elif block["blocked"].endswith(".arpa"):
1525 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1527 elif block["blocked"].endswith(".tld"):
1528 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1530 elif block["blocked"].endswith(".onion"):
1531 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1533 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1534 logger.debug("block='%s' is obfuscated.", block["blocked"])
1535 obfuscated = obfuscated + 1
1536 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1537 elif not utils.is_domain_wanted(block["blocked"]):
1538 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1540 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1541 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1544 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1545 if blocked is not None and blocked != block["blocked"]:
1546 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1547 obfuscated = obfuscated - 1
1548 if blocks.is_instance_blocked(row["domain"], blocked):
1549 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1552 block["block_level"] = blocks.alias_block_level(block["block_level"])
1554 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1555 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1556 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1559 "reason" : block["reason"],
1562 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1563 if obfuscated == 0 and len(blocking) > 0:
1564 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1565 instances.set_has_obfuscation(row["domain"], False)
1567 if instances.has_pending(row["domain"]):
1568 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1569 instances.update_data(row["domain"])
1571 logger.debug("Invoking commit() ...")
1572 database.connection.commit()
1574 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1575 if config.get("bot_enabled") and len(blockdict) > 0:
1576 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1577 network.send_bot_post(row["domain"], blockdict)
1579 logger.debug("Success! - EXIT!")
1582 def fetch_fedilist(args: argparse.Namespace) -> int:
1583 logger.debug("args[]='%s' - CALLED!", type(args))
1585 logger.debug("Invoking locking.acquire() ...")
1588 source_domain = "demo.fedilist.com"
1589 if sources.is_recent(source_domain):
1590 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1593 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1594 sources.update(source_domain)
1596 url = f"http://{source_domain}/instance/csv?onion=not"
1597 if args.software is not None and args.software != "":
1598 logger.debug("args.software='%s'", args.software)
1599 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1601 logger.info("Fetching url='%s' ...", url)
1602 response = reqto.get(
1604 headers=network.web_headers,
1605 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1606 allow_redirects=False
1609 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1610 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1611 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1614 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1616 logger.debug("reader[]='%s'", type(reader))
1618 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1623 logger.info("Checking %d rows ...", len(rows))
1625 logger.debug("row[]='%s'", type(row))
1626 if "hostname" not in row:
1627 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1630 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1631 domain = tidyup.domain(row["hostname"])
1632 logger.debug("domain='%s' - AFTER!", domain)
1635 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1638 logger.debug("domain='%s' - BEFORE!", domain)
1639 domain = domain.encode("idna").decode("utf-8")
1640 logger.debug("domain='%s' - AFTER!", domain)
1642 if not utils.is_domain_wanted(domain):
1643 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1645 elif (args.force is None or not args.force) and instances.is_registered(domain):
1646 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1648 elif instances.is_recent(domain):
1649 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1652 logger.info("Fetching instances from domain='%s' ...", domain)
1653 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1655 logger.debug("Success! - EXIT!")
1658 def update_nodeinfo(args: argparse.Namespace) -> int:
1659 logger.debug("args[]='%s' - CALLED!", type(args))
1661 logger.debug("Invoking locking.acquire() ...")
1664 if args.domain is not None and args.domain != "":
1665 logger.debug("Fetching args.domain='%s'", args.domain)
1666 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1667 elif args.software is not None and args.software != "":
1668 logger.info("Fetching domains for args.software='%s'", args.software)
1669 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1671 logger.info("Fetching domains for recently updated ...")
1672 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1674 domains = database.cursor.fetchall()
1676 logger.info("Checking %d domain(s) ...", len(domains))
1679 logger.debug("row[]='%s'", type(row))
1681 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1682 software = federation.determine_software(row["domain"])
1684 logger.debug("Determined software='%s'", software)
1685 if (software != row["software"] and software is not None) or args.force is True:
1686 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1687 instances.set_software(row["domain"], software)
1689 instances.set_success(row["domain"])
1690 except network.exceptions as exception:
1691 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1692 instances.set_last_error(row["domain"], exception)
1694 instances.set_last_nodeinfo(row["domain"])
1695 instances.update_data(row["domain"])
1698 logger.debug("Success! - EXIT!")
1701 def fetch_instances_social(args: argparse.Namespace) -> int:
1702 logger.debug("args[]='%s' - CALLED!", type(args))
1704 logger.debug("Invoking locking.acquire() ...")
1707 source_domain = "instances.social"
1709 if config.get("instances_social_api_key") == "":
1710 logger.error("API key not set. Please set in your config.json file.")
1712 elif sources.is_recent(source_domain):
1713 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1716 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1717 sources.update(source_domain)
1720 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1723 fetched = network.get_json_api(
1725 "/api/1.0/instances/list?count=0&sort_by=name",
1727 (config.get("connection_timeout"), config.get("read_timeout"))
1729 logger.debug("fetched[]='%s'", type(fetched))
1731 if "error_message" in fetched:
1732 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1734 elif "exception" in fetched:
1735 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1737 elif "json" not in fetched:
1738 logger.warning("fetched has no element 'json' - EXIT!")
1740 elif "instances" not in fetched["json"]:
1741 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1745 rows = fetched["json"]["instances"]
1747 logger.info("Checking %d row(s) ...", len(rows))
1749 logger.debug("row[]='%s'", type(row))
1750 domain = tidyup.domain(row["name"])
1751 logger.debug("domain='%s' - AFTER!", domain)
1754 logger.debug("domain is empty - SKIPPED!")
1757 logger.debug("domain='%s' - BEFORE!", domain)
1758 domain = domain.encode("idna").decode("utf-8")
1759 logger.debug("domain='%s' - AFTER!", domain)
1761 if not utils.is_domain_wanted(domain):
1762 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1764 elif domain in domains:
1765 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1767 elif instances.is_registered(domain):
1768 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1770 elif instances.is_recent(domain):
1771 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1774 logger.info("Fetching instances from domain='%s'", domain)
1775 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1777 logger.debug("Success! - EXIT!")
1780 def convert_idna(args: argparse.Namespace) -> int:
1781 logger.debug("args[]='%s' - CALLED!", type(args))
1783 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1784 rows = database.cursor.fetchall()
1786 logger.debug("rows[]='%s'", type(rows))
1787 instances.translate_idnas(rows, "domain")
1789 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1790 rows = database.cursor.fetchall()
1792 logger.debug("rows[]='%s'", type(rows))
1793 instances.translate_idnas(rows, "origin")
1795 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1796 rows = database.cursor.fetchall()
1798 logger.debug("rows[]='%s'", type(rows))
1799 blocks.translate_idnas(rows, "blocker")
1801 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1802 rows = database.cursor.fetchall()
1804 logger.debug("rows[]='%s'", type(rows))
1805 blocks.translate_idnas(rows, "blocked")
1807 logger.debug("Success! - EXIT!")