1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
44 from fba.http import federation
45 from fba.http import network
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
61 def check_instance(args: argparse.Namespace) -> int:
62 logger.debug("args.domain='%s' - CALLED!", args.domain)
64 if not validators.domain(args.domain):
65 logger.warning("args.domain='%s' is not valid", args.domain)
67 elif blacklist.is_blacklisted(args.domain):
68 logger.warning("args.domain='%s' is blacklisted", args.domain)
70 elif instances.is_registered(args.domain):
71 logger.warning("args.domain='%s' is already registered", args.domain)
74 logger.info("args.domain='%s' is not known", args.domain)
76 logger.debug("status=%d - EXIT!", status)
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80 logger.debug("args[]='%s' - CALLED!", type(args))
83 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
86 for row in database.cursor.fetchall():
87 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88 punycode = row["domain"].encode("idna").decode("utf-8")
90 if row["nodeinfo_url"].startswith("/"):
91 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
93 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
97 logger.info("Found %d row(s)", cnt)
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103 logger.debug("args[]='%s' - CALLED!", type(args))
105 # No CSRF by default, you don't have to add network.source_headers by yourself here
107 source_domain = "pixelfed.org"
109 if sources.is_recent(source_domain):
110 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
113 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114 sources.update(source_domain)
117 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118 headers = csrf.determine(source_domain, dict())
119 except network.exceptions as exception:
120 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
124 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125 fetched = network.get_json_api(
127 "/api/v1/servers/all.json?scope=All&country=all&language=all",
129 (config.get("connection_timeout"), config.get("read_timeout"))
132 logger.debug("JSON API returned %d elements", len(fetched))
133 if "error_message" in fetched:
134 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
136 elif "data" not in fetched["json"]:
137 logger.warning("API did not return JSON with 'data' element - EXIT!")
140 rows = fetched["json"]["data"]
141 logger.info("Checking %d fetched rows ...", len(rows))
143 logger.debug("row[]='%s'", type(row))
144 if "domain" not in row:
145 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
147 elif row["domain"] == "":
148 logger.debug("row[domain] is empty - SKIPPED!")
151 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152 domain = row["domain"].encode("idna").decode("utf-8")
153 logger.debug("domain='%s' - AFTER!", domain)
155 if not utils.is_domain_wanted(domain):
156 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
158 elif instances.is_registered(domain):
159 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
161 elif instances.is_recent(domain):
162 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
165 logger.debug("Fetching instances from domain='%s' ...", domain)
166 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
168 except network.exceptions as exception:
169 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
172 logger.debug("Success! - EXIT!")
175 def fetch_bkali(args: argparse.Namespace) -> int:
176 logger.debug("args[]='%s' - CALLED!", type(args))
178 logger.debug("Invoking locking.acquire() ...")
181 source_domain = "gql.api.bka.li"
182 if sources.is_recent(source_domain):
183 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
186 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187 sources.update(source_domain)
191 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192 fetched = network.post_json_api(
196 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
200 logger.debug("fetched[]='%s'", type(fetched))
201 if "error_message" in fetched:
202 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
204 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
208 rows = fetched["json"]
210 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
212 raise Exception("WARNING: Returned no records")
213 elif "data" not in rows:
214 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215 elif "nodeinfo" not in rows["data"]:
216 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
218 for entry in rows["data"]["nodeinfo"]:
219 logger.debug("entry[%s]='%s'", type(entry), entry)
220 if "domain" not in entry:
221 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
223 elif entry["domain"] == "":
224 logger.debug("entry[domain] is empty - SKIPPED!")
226 elif not utils.is_domain_wanted(entry["domain"]):
227 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
229 elif instances.is_registered(entry["domain"]):
230 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
232 elif instances.is_recent(entry["domain"]):
233 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
236 logger.debug("Adding domain='%s' ...", entry["domain"])
237 domains.append(entry["domain"])
239 except network.exceptions as exception:
240 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
243 logger.debug("domains()=%d", len(domains))
245 logger.info("Adding %d new instances ...", len(domains))
246 for domain in domains:
247 logger.debug("domain='%s' - BEFORE!", domain)
248 domain = domain.encode("idna").decode("utf-8")
249 logger.debug("domain='%s' - AFTER!", domain)
252 logger.info("Fetching instances from domain='%s' ...", domain)
253 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254 except network.exceptions as exception:
255 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256 instances.set_last_error(domain, exception)
259 logger.debug("Success - EXIT!")
262 def fetch_blocks(args: argparse.Namespace) -> int:
263 logger.debug("args[]='%s' - CALLED!", type(args))
264 if args.domain is not None and args.domain != "":
265 logger.debug("args.domain='%s' - checking ...", args.domain)
266 if not validators.domain(args.domain):
267 logger.warning("args.domain='%s' is not valid.", args.domain)
269 elif blacklist.is_blacklisted(args.domain):
270 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
272 elif not instances.is_registered(args.domain):
273 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
276 logger.debug("Invoking locking.acquire() ...")
279 if args.domain is not None and args.domain != "":
280 # Re-check single domain
281 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282 database.cursor.execute(
283 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
285 elif args.software is not None and args.software != "":
286 # Re-check single software
287 logger.debug("Querying database for args.software='%s' ...", args.software)
288 database.cursor.execute(
289 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
292 # Re-check after "timeout" (aka. minimum interval)
293 database.cursor.execute(
294 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
297 rows = database.cursor.fetchall()
298 logger.info("Checking %d entries ...", len(rows))
299 for blocker, software, origin, nodeinfo_url in rows:
300 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301 blocker = tidyup.domain(blocker)
302 logger.debug("blocker='%s' - AFTER!", blocker)
305 logger.warning("blocker is now empty!")
307 elif nodeinfo_url is None or nodeinfo_url == "":
308 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
310 elif not utils.is_domain_wanted(blocker):
311 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314 logger.debug("blocker='%s'", blocker)
315 instances.set_last_blocked(blocker)
316 instances.set_has_obfuscation(blocker, False)
319 if software == "pleroma":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322 elif software == "mastodon":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325 elif software == "lemmy":
326 logger.info("blocker='%s',software='%s'", blocker, software)
327 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328 elif software == "friendica":
329 logger.info("blocker='%s',software='%s'", blocker, software)
330 blocking = friendica.fetch_blocks(blocker)
331 elif software == "misskey":
332 logger.info("blocker='%s',software='%s'", blocker, software)
333 blocking = misskey.fetch_blocks(blocker)
335 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
337 logger.debug("blocker='%s'", blocker)
338 if blocker != "chaos.social":
339 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
340 instances.set_total_blocks(blocker, blocking)
342 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
344 for block in blocking:
345 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
347 if block["block_level"] == "":
348 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
351 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
352 block["blocked"] = tidyup.domain(block["blocked"])
353 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
354 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
356 if block["blocked"] == "":
357 logger.warning("blocked is empty, blocker='%s'", blocker)
359 elif block["blocked"].endswith(".onion"):
360 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
362 elif block["blocked"].endswith(".arpa"):
363 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
365 elif block["blocked"].endswith(".tld"):
366 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
368 elif block["blocked"].find("*") >= 0:
369 logger.debug("blocker='%s' uses obfuscated domains", blocker)
371 # Some friendica servers also obscure domains without hash
372 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
374 logger.debug("row[]='%s'", type(row))
376 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377 instances.set_has_obfuscation(blocker, True)
380 block["blocked"] = row["domain"]
381 origin = row["origin"]
382 nodeinfo_url = row["nodeinfo_url"]
383 elif block["blocked"].find("?") >= 0:
384 logger.debug("blocker='%s' uses obfuscated domains", blocker)
386 # Some obscure them with question marks, not sure if that's dependent on version or not
387 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
389 logger.debug("row[]='%s'", type(row))
391 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392 instances.set_has_obfuscation(blocker, True)
395 block["blocked"] = row["domain"]
396 origin = row["origin"]
397 nodeinfo_url = row["nodeinfo_url"]
399 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
400 if block["blocked"] == "":
401 logger.debug("block[blocked] is empty - SKIPPED!")
404 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
405 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
406 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
408 if not utils.is_domain_wanted(block["blocked"]):
409 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
411 elif block["block_level"] in ["accept", "accepted"]:
412 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
414 elif not instances.is_registered(block["blocked"]):
415 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
416 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
418 block["block_level"] = blocks.alias_block_level(block["block_level"])
420 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
421 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
423 "blocked": block["blocked"],
424 "reason" : block["reason"],
427 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
428 cookies.clear(block["blocked"])
430 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
431 if instances.has_pending(blocker):
432 logger.debug("Flushing updates for blocker='%s' ...", blocker)
433 instances.update_data(blocker)
435 logger.debug("Invoking commit() ...")
436 database.connection.commit()
438 logger.debug("Invoking cookies.clear(%s) ...", blocker)
439 cookies.clear(blocker)
441 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
442 if config.get("bot_enabled") and len(blockdict) > 0:
443 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
444 network.send_bot_post(blocker, blockdict)
446 logger.debug("Success! - EXIT!")
449 def fetch_observer(args: argparse.Namespace) -> int:
450 logger.debug("args[]='%s' - CALLED!", type(args))
452 logger.debug("Invoking locking.acquire() ...")
455 source_domain = "fediverse.observer"
456 if sources.is_recent(source_domain):
457 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
460 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
461 sources.update(source_domain)
464 if args.software is None:
465 logger.info("Fetching software list ...")
466 raw = utils.fetch_url(
467 f"https://{source_domain}",
469 (config.get("connection_timeout"), config.get("read_timeout"))
471 logger.debug("raw[%s]()=%d", type(raw), len(raw))
473 doc = bs4.BeautifulSoup(raw, features="html.parser")
474 logger.debug("doc[]='%s'", type(doc))
476 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
477 logger.debug("navbar[]='%s'", type(navbar))
479 logger.warning("Cannot find navigation bar, cannot continue!")
482 items = navbar.findAll("a", {"class": "dropdown-item"})
483 logger.debug("items[]='%s'", type(items))
485 logger.info("Checking %d menu items ...", len(items))
487 logger.debug("item[%s]='%s'", type(item), item)
488 if item.text.lower() == "all":
489 logger.debug("Skipping 'All' menu entry ...")
492 logger.debug("Appending item.text='%s' ...", item.text)
493 types.append(tidyup.domain(item.text))
495 logger.info("Adding args.software='%s' as type ...", args.software)
496 types.append(args.software)
498 logger.info("Fetching %d different table data ...", len(types))
499 for software in types:
500 logger.debug("software='%s' - BEFORE!", software)
501 if args.software is not None and args.software != software:
502 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
507 logger.debug("Fetching table data for software='%s' ...", software)
508 raw = utils.fetch_url(
509 f"https://{source_domain}/app/views/tabledata.php?software={software}",
511 (config.get("connection_timeout"), config.get("read_timeout"))
513 logger.debug("raw[%s]()=%d", type(raw), len(raw))
515 doc = bs4.BeautifulSoup(raw, features="html.parser")
516 logger.debug("doc[]='%s'", type(doc))
517 except network.exceptions as exception:
518 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
521 items = doc.findAll("a", {"class": "url"})
522 logger.info("Checking %d items,software='%s' ...", len(items), software)
524 logger.debug("item[]='%s'", type(item))
525 domain = item.decode_contents()
526 logger.debug("domain='%s' - AFTER!", domain)
529 logger.debug("domain is empty - SKIPPED!")
532 logger.debug("domain='%s' - BEFORE!", domain)
533 domain = domain.encode("idna").decode("utf-8")
534 logger.debug("domain='%s' - AFTER!", domain)
536 if not utils.is_domain_wanted(domain):
537 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
539 elif instances.is_registered(domain):
540 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
542 elif instances.is_recent(domain):
543 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
546 software = software_helper.alias(software)
547 logger.info("Fetching instances for domain='%s'", domain)
548 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
550 logger.debug("Success! - EXIT!")
553 def fetch_todon_wiki(args: argparse.Namespace) -> int:
554 logger.debug("args[]='%s' - CALLED!", type(args))
556 logger.debug("Invoking locking.acquire() ...")
559 source_domain = "wiki.todon.eu"
560 if sources.is_recent(source_domain):
561 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
564 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
565 sources.update(source_domain)
572 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
573 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
575 doc = bs4.BeautifulSoup(raw, "html.parser")
576 logger.debug("doc[]='%s'", type(doc))
578 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
579 logger.info("Checking %d silenced/limited entries ...", len(silenced))
580 blocklist["silenced"] = utils.find_domains(silenced, "div")
582 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
583 logger.info("Checking %d suspended entries ...", len(suspended))
584 blocklist["reject"] = utils.find_domains(suspended, "div")
586 blocking = blocklist["silenced"] + blocklist["reject"]
589 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
590 instances.set_total_blocks(blocker, blocking)
593 for block_level in blocklist:
594 blockers = blocklist[block_level]
596 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
597 for blocked in blockers:
598 logger.debug("blocked='%s'", blocked)
600 if not instances.is_registered(blocked):
602 logger.info("Fetching instances from domain='%s' ...", blocked)
603 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
604 except network.exceptions as exception:
605 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
606 instances.set_last_error(blocked, exception)
608 if blocks.is_instance_blocked(blocker, blocked, block_level):
609 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
612 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
613 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
614 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
620 logger.debug("Invoking commit() ...")
621 database.connection.commit()
623 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
624 if config.get("bot_enabled") and len(blockdict) > 0:
625 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
626 network.send_bot_post(blocker, blockdict)
628 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
629 if instances.has_pending(blocker):
630 logger.debug("Flushing updates for blocker='%s' ...", blocker)
631 instances.update_data(blocker)
633 logger.debug("Success! - EXIT!")
636 def fetch_cs(args: argparse.Namespace):
637 logger.debug("args[]='%s' - CALLED!", type(args))
639 logger.debug("Invoking locking.acquire() ...")
667 source_domain = "raw.githubusercontent.com"
668 if sources.is_recent(source_domain):
669 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
672 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
673 sources.update(source_domain)
675 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
676 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
678 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
679 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
681 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
682 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
683 blocklist["silenced"] = federation.find_domains(silenced)
685 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
686 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
687 blocklist["reject"] = federation.find_domains(blocked)
689 blocking = blocklist["silenced"] + blocklist["reject"]
690 blocker = "chaos.social"
692 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
693 instances.set_total_blocks(blocker, blocking)
695 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
696 if len(blocking) > 0:
698 for block_level in blocklist:
699 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
701 for row in blocklist[block_level]:
702 logger.debug("row[%s]='%s'", type(row), row)
703 if not "domain" in row:
704 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
706 elif not instances.is_registered(row["domain"]):
708 logger.info("Fetching instances from domain='%s' ...", row["domain"])
709 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
710 except network.exceptions as exception:
711 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
712 instances.set_last_error(row["domain"], exception)
714 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
715 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
717 "blocked": row["domain"],
718 "reason" : row["reason"],
721 logger.debug("Invoking commit() ...")
722 database.connection.commit()
724 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
725 if config.get("bot_enabled") and len(blockdict) > 0:
726 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
727 network.send_bot_post(blocker, blockdict)
729 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
730 if instances.has_pending(blocker):
731 logger.debug("Flushing updates for blocker='%s' ...", blocker)
732 instances.update_data(blocker)
734 logger.debug("Success! - EXIT!")
737 def fetch_fba_rss(args: argparse.Namespace) -> int:
738 logger.debug("args[]='%s' - CALLED!", type(args))
742 logger.debug("Invoking locking.acquire() ...")
745 components = urlparse(args.feed)
747 if sources.is_recent(components.netloc):
748 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
751 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
752 sources.update(components.netloc)
754 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
755 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
757 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
758 if response.ok and response.status_code < 300 and len(response.text) > 0:
759 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
760 rss = atoma.parse_rss_bytes(response.content)
762 logger.debug("rss[]='%s'", type(rss))
763 for item in rss.items:
764 logger.debug("item='%s'", item)
765 domain = tidyup.domain(item.link.split("=")[1])
767 logger.debug("domain='%s' - AFTER!", domain)
769 logger.debug("domain is empty - SKIPPED!")
772 logger.debug("domain='%s' - BEFORE!", domain)
773 domain = domain.encode("idna").decode("utf-8")
774 logger.debug("domain='%s' - AFTER!", domain)
776 if not utils.is_domain_wanted(domain):
777 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
779 elif domain in domains:
780 logger.debug("domain='%s' is already added - SKIPPED!", domain)
782 elif instances.is_registered(domain):
783 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
785 elif instances.is_recent(domain):
786 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
789 logger.debug("Adding domain='%s'", domain)
790 domains.append(domain)
792 logger.debug("domains()=%d", len(domains))
794 logger.info("Adding %d new instances ...", len(domains))
795 for domain in domains:
796 logger.debug("domain='%s'", domain)
798 logger.info("Fetching instances from domain='%s' ...", domain)
799 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
800 except network.exceptions as exception:
801 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
802 instances.set_last_error(domain, exception)
805 logger.debug("Success! - EXIT!")
808 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
809 logger.debug("args[]='%s' - CALLED!", type(args))
811 logger.debug("Invoking locking.acquire() ...")
814 source_domain = "ryona.agency"
815 if sources.is_recent(source_domain):
816 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
819 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
820 sources.update(source_domain)
822 feed = f"https://{source_domain}/users/fba/feed.atom"
826 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
827 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
829 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
830 if response.ok and response.status_code < 300 and len(response.text) > 0:
831 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
832 atom = atoma.parse_atom_bytes(response.content)
834 logger.debug("atom[]='%s'", type(atom))
835 for entry in atom.entries:
836 logger.debug("entry[]='%s'", type(entry))
837 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
838 logger.debug("doc[]='%s'", type(doc))
839 for element in doc.findAll("a"):
840 logger.debug("element[]='%s'", type(element))
841 for href in element["href"].split(","):
842 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
843 domain = tidyup.domain(href)
845 logger.debug("domain='%s' - AFTER!", domain)
847 logger.debug("domain is empty - SKIPPED!")
850 logger.debug("domain='%s' - BEFORE!", domain)
851 domain = domain.encode("idna").decode("utf-8")
852 logger.debug("domain='%s' - AFTER!", domain)
854 if not utils.is_domain_wanted(domain):
855 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
857 elif domain in domains:
858 logger.debug("domain='%s' is already added - SKIPPED!", domain)
860 elif instances.is_registered(domain):
861 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
863 elif instances.is_recent(domain):
864 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
867 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
868 domains.append(domain)
870 logger.debug("domains()=%d", len(domains))
872 logger.info("Adding %d new instances ...", len(domains))
873 for domain in domains:
874 logger.debug("domain='%s'", domain)
876 logger.info("Fetching instances from domain='%s' ...", domain)
877 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
878 except network.exceptions as exception:
879 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
880 instances.set_last_error(domain, exception)
883 logger.debug("Success! - EXIT!")
886 def fetch_instances(args: argparse.Namespace) -> int:
887 logger.debug("args[]='%s' - CALLED!", type(args))
889 logger.debug("args.domain='%s' - checking ...", args.domain)
890 if not validators.domain(args.domain):
891 logger.warning("args.domain='%s' is not valid.", args.domain)
893 elif blacklist.is_blacklisted(args.domain):
894 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
897 logger.debug("Invoking locking.acquire() ...")
902 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
903 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
904 except network.exceptions as exception:
905 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
906 instances.set_last_error(args.domain, exception)
907 instances.update_data(args.domain)
911 logger.debug("Not fetching more instances - EXIT!")
914 # Loop through some instances
915 database.cursor.execute(
916 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
919 rows = database.cursor.fetchall()
920 logger.info("Checking %d entries ...", len(rows))
922 logger.debug("row[domain]='%s'", row["domain"])
923 if row["domain"] == "":
924 logger.debug("row[domain] is empty - SKIPPED!")
927 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
928 domain = row["domain"].encode("idna").decode("utf-8")
929 logger.debug("domain='%s' - AFTER!", domain)
931 if not utils.is_domain_wanted(domain):
932 logger.warning("Domain domain='%s' is not wanted - SKIPPED!", domain)
936 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
937 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
938 except network.exceptions as exception:
939 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
940 instances.set_last_error(domain, exception)
942 logger.debug("Success - EXIT!")
945 def fetch_oliphant(args: argparse.Namespace) -> int:
946 logger.debug("args[]='%s' - CALLED!", type(args))
948 logger.debug("Invoking locking.acquire() ...")
951 source_domain = "codeberg.org"
952 if sources.is_recent(source_domain):
953 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
956 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
957 sources.update(source_domain)
960 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
965 "blocker": "artisan.chat",
966 "csv_url": "mastodon/artisan.chat.csv",
968 "blocker": "mastodon.art",
969 "csv_url": "mastodon/mastodon.art.csv",
971 "blocker": "pleroma.envs.net",
972 "csv_url": "mastodon/pleroma.envs.net.csv",
974 "blocker": "oliphant.social",
975 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
977 "blocker": "mastodon.online",
978 "csv_url": "mastodon/mastodon.online.csv",
980 "blocker": "mastodon.social",
981 "csv_url": "mastodon/mastodon.social.csv",
983 "blocker": "mastodon.social",
984 "csv_url": "other/missing-tier0-mastodon.social.csv",
986 "blocker": "rage.love",
987 "csv_url": "mastodon/rage.love.csv",
989 "blocker": "sunny.garden",
990 "csv_url": "mastodon/sunny.garden.csv",
992 "blocker": "sunny.garden",
993 "csv_url": "mastodon/gardenfence.csv",
995 "blocker": "solarpunk.moe",
996 "csv_url": "mastodon/solarpunk.moe.csv",
998 "blocker": "toot.wales",
999 "csv_url": "mastodon/toot.wales.csv",
1001 "blocker": "union.place",
1002 "csv_url": "mastodon/union.place.csv",
1004 "blocker": "oliphant.social",
1005 "csv_url": "mastodon/birdsite.csv",
1011 logger.debug("Downloading %d files ...", len(blocklists))
1012 for block in blocklists:
1013 # Is domain given and not equal blocker?
1014 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1017 elif args.domain in domains:
1018 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1022 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1023 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1025 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1026 if not response.ok or response.status_code >= 300 or response.content == "":
1027 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1030 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1031 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1037 logger.debug("row[%s]='%s'", type(row), row)
1038 domain = severity = None
1039 reject_media = reject_reports = False
1041 if "#domain" in row:
1042 domain = row["#domain"]
1043 elif "domain" in row:
1044 domain = row["domain"]
1046 logger.debug("row='%s' does not contain domain column", row)
1049 if "#severity" in row:
1050 severity = blocks.alias_block_level(row["#severity"])
1051 elif "severity" in row:
1052 severity = blocks.alias_block_level(row["severity"])
1054 logger.debug("row='%s' does not contain severity column", row)
1057 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1059 elif "reject_media" in row and row["reject_media"].lower() == "true":
1062 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1063 reject_reports = True
1064 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1065 reject_reports = True
1068 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1070 logger.debug("domain is empty - SKIPPED!")
1072 elif domain.endswith(".onion"):
1073 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1075 elif domain.endswith(".arpa"):
1076 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1078 elif domain.endswith(".tld"):
1079 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1081 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1082 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1083 domain = utils.deobfuscate(domain, block["blocker"])
1084 logger.debug("domain='%s' - AFTER!", domain)
1086 if not validators.domain(domain):
1087 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1089 elif blacklist.is_blacklisted(domain):
1090 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1093 logger.debug("Marking domain='%s' as handled", domain)
1094 domains.append(domain)
1096 logger.debug("Processing domain='%s' ...", domain)
1097 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1098 logger.debug("processed='%s'", processed)
1100 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1101 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1104 "reason" : block["reason"],
1108 processing.block(block["blocker"], domain, None, "reject_media")
1110 processing.block(block["blocker"], domain, None, "reject_reports")
1112 logger.debug("block[blocker]='%s'", block["blocker"])
1113 if block["blocker"] != "chaos.social":
1114 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1115 instances.set_total_blocks(block["blocker"], domains)
1117 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1118 if instances.has_pending(block["blocker"]):
1119 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1120 instances.update_data(block["blocker"])
1122 logger.debug("Invoking commit() ...")
1123 database.connection.commit()
1125 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1126 if config.get("bot_enabled") and len(blockdict) > 0:
1127 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1128 network.send_bot_post(block["blocker"], blockdict)
1130 logger.debug("Success! - EXIT!")
1133 def fetch_txt(args: argparse.Namespace) -> int:
1134 logger.debug("args[]='%s' - CALLED!", type(args))
1136 logger.debug("Invoking locking.acquire() ...")
1141 "blocker": "seirdy.one",
1142 "url" : "https://seirdy.one/pb/bsl.txt",
1145 logger.info("Checking %d text file(s) ...", len(urls))
1147 logger.debug("Fetching row[url]='%s' ...", row["url"])
1148 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1150 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1151 if response.ok and response.status_code < 300 and response.text != "":
1152 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1153 domains = response.text.split("\n")
1155 logger.info("Processing %d domains ...", len(domains))
1156 for domain in domains:
1157 logger.debug("domain='%s' - BEFORE!", domain)
1158 domain = tidyup.domain(domain)
1160 logger.debug("domain='%s' - AFTER!", domain)
1162 logger.debug("domain is empty - SKIPPED!")
1164 elif not utils.is_domain_wanted(domain):
1165 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1167 elif instances.is_recent(domain):
1168 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1171 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1172 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1174 logger.debug("processed='%s'", processed)
1176 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1179 logger.debug("Success! - EXIT!")
1182 def fetch_fedipact(args: argparse.Namespace) -> int:
1183 logger.debug("args[]='%s' - CALLED!", type(args))
1185 logger.debug("Invoking locking.acquire() ...")
1188 source_domain = "fedipact.online"
1189 if sources.is_recent(source_domain):
1190 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1193 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1194 sources.update(source_domain)
1196 response = utils.fetch_url(
1197 f"https://{source_domain}",
1198 network.web_headers,
1199 (config.get("connection_timeout"), config.get("read_timeout"))
1202 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1203 if response.ok and response.status_code < 300 and response.text != "":
1204 logger.debug("Parsing %d Bytes ...", len(response.text))
1206 doc = bs4.BeautifulSoup(response.text, "html.parser")
1207 logger.debug("doc[]='%s'", type(doc))
1209 rows = doc.findAll("li")
1210 logger.info("Checking %d row(s) ...", len(rows))
1212 logger.debug("row[]='%s'", type(row))
1213 domain = tidyup.domain(row.contents[0])
1215 logger.debug("domain='%s' - AFTER!", domain)
1217 logger.debug("domain is empty - SKIPPED!")
1220 logger.debug("domain='%s' - BEFORE!", domain)
1221 domain = domain.encode("idna").decode("utf-8")
1222 logger.debug("domain='%s' - AFTER!", domain)
1224 if not utils.is_domain_wanted(domain):
1225 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1227 elif instances.is_registered(domain):
1228 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1230 elif instances.is_recent(domain):
1231 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1234 logger.info("Fetching domain='%s' ...", domain)
1235 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1237 logger.debug("Success! - EXIT!")
1240 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1241 logger.debug("args[]='%s' - CALLED!", type(args))
1243 logger.debug("Invoking locking.acquire() ...")
1246 source_domain = "joinfediverse.wiki"
1247 if sources.is_recent(source_domain):
1248 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1251 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1252 sources.update(source_domain)
1254 raw = utils.fetch_url(
1255 f"https://{source_domain}/FediBlock",
1256 network.web_headers,
1257 (config.get("connection_timeout"), config.get("read_timeout"))
1259 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1261 doc = bs4.BeautifulSoup(raw, "html.parser")
1262 logger.debug("doc[]='%s'", type(doc))
1264 tables = doc.findAll("table", {"class": "wikitable"})
1266 logger.info("Analyzing %d table(s) ...", len(tables))
1268 for table in tables:
1269 logger.debug("table[]='%s'", type(table))
1271 rows = table.findAll("tr")
1272 logger.info("Checking %d row(s) ...", len(rows))
1273 block_headers = dict()
1275 logger.debug("row[%s]='%s'", type(row), row)
1277 headers = row.findAll("th")
1278 logger.debug("Found headers()=%d header(s)", len(headers))
1279 if len(headers) > 1:
1280 block_headers = dict()
1282 for header in headers:
1284 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1285 text = header.contents[0]
1287 logger.debug("text[]='%s'", type(text))
1288 if not isinstance(text, str):
1289 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1291 elif validators.domain(text.strip()):
1292 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1295 text = tidyup.domain(text.strip())
1296 logger.debug("text='%s' - AFTER!", text)
1297 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1298 logger.debug("Found header: '%s'=%d", text, cnt)
1299 block_headers[cnt] = text
1301 elif len(block_headers) == 0:
1302 logger.debug("row is not scrapable - SKIPPED!")
1304 elif len(block_headers) > 0:
1305 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1309 for element in row.find_all(["th", "td"]):
1311 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1312 if cnt in block_headers:
1313 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1315 text = element.text.strip()
1316 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1318 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1319 if key in ["domain", "instance"]:
1321 elif key == "reason":
1322 block[key] = tidyup.reason(text)
1323 elif key == "subdomain(s)":
1326 block[key] = text.split("/")
1328 logger.debug("key='%s'", key)
1331 logger.debug("block()=%d ...", len(block))
1333 logger.debug("Appending block()=%d ...", len(block))
1334 blocklist.append(block)
1336 logger.debug("blocklist()=%d", len(blocklist))
1338 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1339 domains = database.cursor.fetchall()
1341 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1343 for block in blocklist:
1344 logger.debug("block='%s'", block)
1345 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1346 origin = block["blocked"]
1347 logger.debug("origin='%s'", origin)
1348 for subdomain in block["subdomain(s)"]:
1349 block["blocked"] = subdomain + "." + origin
1350 logger.debug("block[blocked]='%s'", block["blocked"])
1351 blocking.append(block)
1353 blocking.append(block)
1355 logger.debug("blocking()=%d", blocking)
1356 for block in blocking:
1357 logger.debug("block[]='%s'", type(block))
1358 if "blocked" not in block:
1359 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1361 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1362 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1364 if block["blocked"] == "":
1365 logger.debug("block[blocked] is empty - SKIPPED!")
1367 elif not utils.is_domain_wanted(block["blocked"]):
1368 logger.warning("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1370 elif instances.is_recent(block["blocked"]):
1371 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1374 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1375 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1378 for blocker in domains:
1379 blocker = blocker[0]
1380 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1382 for block in blocking:
1383 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1384 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1386 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1387 if block["blocked"] == "":
1388 logger.debug("block[blocked] is empty - SKIPPED!")
1390 elif not utils.is_domain_wanted(block["blocked"]):
1391 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1394 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1395 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1396 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1398 "blocked": block["blocked"],
1399 "reason" : block["reason"],
1402 if instances.has_pending(blocker):
1403 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1404 instances.update_data(blocker)
1406 logger.debug("Invoking commit() ...")
1407 database.connection.commit()
1409 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1410 if config.get("bot_enabled") and len(blockdict) > 0:
1411 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1412 network.send_bot_post(blocker, blockdict)
1414 logger.debug("Success! - EXIT!")
1417 def recheck_obfuscation(args: argparse.Namespace) -> int:
1418 logger.debug("args[]='%s' - CALLED!", type(args))
1420 logger.debug("Invoking locking.acquire() ...")
1423 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1424 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1425 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1426 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1428 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1430 rows = database.cursor.fetchall()
1431 logger.info("Checking %d domains ...", len(rows))
1433 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1434 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1435 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1439 if row["software"] == "pleroma":
1440 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1441 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1442 elif row["software"] == "mastodon":
1443 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1444 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1445 elif row["software"] == "lemmy":
1446 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1447 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1448 elif row["software"] == "friendica":
1449 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1450 blocking = friendica.fetch_blocks(row["domain"])
1451 elif row["software"] == "misskey":
1452 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1453 blocking = misskey.fetch_blocks(row["domain"])
1455 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1457 logger.debug("row[domain]='%s'", row["domain"])
1458 if row["domain"] != "chaos.social":
1459 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1460 instances.set_total_blocks(row["domain"], blocking)
1465 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1466 for block in blocking:
1467 logger.debug("block[blocked]='%s'", block["blocked"])
1470 if block["blocked"] == "":
1471 logger.debug("block[blocked] is empty - SKIPPED!")
1473 elif block["blocked"].endswith(".arpa"):
1474 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1476 elif block["blocked"].endswith(".tld"):
1477 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1479 elif block["blocked"].endswith(".onion"):
1480 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1482 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1483 logger.debug("block='%s' is obfuscated.", block["blocked"])
1484 obfuscated = obfuscated + 1
1485 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1486 elif not utils.is_domain_wanted(block["blocked"]):
1487 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1489 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1490 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1493 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1494 if blocked is not None and blocked != block["blocked"]:
1495 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1496 obfuscated = obfuscated - 1
1497 if blocks.is_instance_blocked(row["domain"], blocked):
1498 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1501 block["block_level"] = blocks.alias_block_level(block["block_level"])
1503 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1504 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1505 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1508 "reason" : block["reason"],
1511 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1512 if obfuscated == 0 and len(blocking) > 0:
1513 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1514 instances.set_has_obfuscation(row["domain"], False)
1516 if instances.has_pending(row["domain"]):
1517 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1518 instances.update_data(row["domain"])
1520 logger.debug("Invoking commit() ...")
1521 database.connection.commit()
1523 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1524 if config.get("bot_enabled") and len(blockdict) > 0:
1525 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1526 network.send_bot_post(row["domain"], blockdict)
1528 logger.debug("Success! - EXIT!")
1531 def fetch_fedilist(args: argparse.Namespace) -> int:
1532 logger.debug("args[]='%s' - CALLED!", type(args))
1534 logger.debug("Invoking locking.acquire() ...")
1537 source_domain = "demo.fedilist.com"
1538 if sources.is_recent(source_domain):
1539 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1542 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1543 sources.update(source_domain)
1545 url = f"http://{source_domain}/instance/csv?onion=not"
1546 if args.software is not None and args.software != "":
1547 logger.debug("args.software='%s'", args.software)
1548 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1550 logger.info("Fetching url='%s' ...", url)
1551 response = reqto.get(
1553 headers=network.web_headers,
1554 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1555 allow_redirects=False
1558 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1559 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1560 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1563 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1565 logger.debug("reader[]='%s'", type(reader))
1567 logger.debug("row[]='%s'", type(row))
1568 if "hostname" not in row:
1569 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1572 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1573 domain = tidyup.domain(row["hostname"])
1574 logger.debug("domain='%s' - AFTER!", domain)
1577 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1580 logger.debug("domain='%s' - BEFORE!", domain)
1581 domain = domain.encode("idna").decode("utf-8")
1582 logger.debug("domain='%s' - AFTER!", domain)
1584 if not utils.is_domain_wanted(domain):
1585 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1587 elif (args.all is None or not args.all) and instances.is_registered(domain):
1588 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", domain, type(args.all))
1590 elif instances.is_recent(domain):
1591 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1594 logger.info("Fetching instances from domain='%s' ...", domain)
1595 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1597 logger.debug("Success! - EXIT!")
1600 def update_nodeinfo(args: argparse.Namespace) -> int:
1601 logger.debug("args[]='%s' - CALLED!", type(args))
1603 logger.debug("Invoking locking.acquire() ...")
1606 if args.domain is not None and args.domain != "":
1607 logger.debug("Fetching args.domain='%s'", args.domain)
1608 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1609 elif args.software is not None and args.software != "":
1610 logger.info("Fetching domains for args.software='%s'", args.software)
1611 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1613 logger.info("Fetching domains for recently updated ...")
1614 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1616 domains = database.cursor.fetchall()
1618 logger.info("Checking %d domain(s) ...", len(domains))
1621 logger.debug("row[]='%s'", type(row))
1623 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1624 software = federation.determine_software(row["domain"])
1626 logger.debug("Determined software='%s'", software)
1627 if (software != row["software"] and software is not None) or args.force is True:
1628 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1629 instances.set_software(row["domain"], software)
1631 instances.set_success(row["domain"])
1632 except network.exceptions as exception:
1633 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1634 instances.set_last_error(row["domain"], exception)
1636 instances.set_last_nodeinfo(row["domain"])
1637 instances.update_data(row["domain"])
1640 logger.debug("Success! - EXIT!")
1643 def fetch_instances_social(args: argparse.Namespace) -> int:
1644 logger.debug("args[]='%s' - CALLED!", type(args))
1646 logger.debug("Invoking locking.acquire() ...")
1649 source_domain = "instances.social"
1651 if config.get("instances_social_api_key") == "":
1652 logger.error("API key not set. Please set in your config.json file.")
1654 elif sources.is_recent(source_domain):
1655 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1658 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1659 sources.update(source_domain)
1662 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1665 fetched = network.get_json_api(
1667 "/api/1.0/instances/list?count=0&sort_by=name",
1669 (config.get("connection_timeout"), config.get("read_timeout"))
1671 logger.debug("fetched[]='%s'", type(fetched))
1673 if "error_message" in fetched:
1674 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1676 elif "exception" in fetched:
1677 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1679 elif "json" not in fetched:
1680 logger.warning("fetched has no element 'json' - EXIT!")
1682 elif "instances" not in fetched["json"]:
1683 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1687 rows = fetched["json"]["instances"]
1689 logger.info("Checking %d row(s) ...", len(rows))
1691 logger.debug("row[]='%s'", type(row))
1692 domain = tidyup.domain(row["name"])
1693 logger.debug("domain='%s' - AFTER!", domain)
1696 logger.debug("domain is empty - SKIPPED!")
1699 logger.debug("domain='%s' - BEFORE!", domain)
1700 domain = domain.encode("idna").decode("utf-8")
1701 logger.debug("domain='%s' - AFTER!", domain)
1703 if not utils.is_domain_wanted(domain):
1704 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1706 elif domain in domains:
1707 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1709 elif instances.is_registered(domain):
1710 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1712 elif instances.is_recent(domain):
1713 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1716 logger.info("Fetching instances from domain='%s'", domain)
1717 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1719 logger.debug("Success! - EXIT!")
1722 def convert_idna(args: argparse.Namespace) -> int:
1723 logger.debug("args[]='%s' - CALLED!", type(args))
1725 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1726 rows = database.cursor.fetchall()
1728 logger.debug("rows[]='%s'", type(rows))
1729 instances.translate_idnas(rows, "domain")
1731 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1732 rows = database.cursor.fetchall()
1734 logger.debug("rows[]='%s'", type(rows))
1735 instances.translate_idnas(rows, "origin")
1737 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1738 rows = database.cursor.fetchall()
1740 logger.debug("rows[]='%s'", type(rows))
1741 blocks.translate_idnas(rows, "blocker")
1743 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1744 rows = database.cursor.fetchall()
1746 logger.debug("rows[]='%s'", type(rows))
1747 blocks.translate_idnas(rows, "blocked")
1749 logger.debug("Success! - EXIT!")