1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
44 from fba.http import federation
45 from fba.http import network
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
61 def check_instance(args: argparse.Namespace) -> int:
62 logger.debug("args.domain='%s' - CALLED!", args.domain)
64 if not validators.domain(args.domain):
65 logger.warning("args.domain='%s' is not valid", args.domain)
67 elif blacklist.is_blacklisted(args.domain):
68 logger.warning("args.domain='%s' is blacklisted", args.domain)
70 elif instances.is_registered(args.domain):
71 logger.warning("args.domain='%s' is already registered", args.domain)
74 logger.info("args.domain='%s' is not known", args.domain)
76 logger.debug("status=%d - EXIT!", status)
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80 logger.debug("args[]='%s' - CALLED!", type(args))
83 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
86 for row in database.cursor.fetchall():
87 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88 punycode = row["domain"].encode("idna").decode("utf-8")
90 if row["nodeinfo_url"].startswith("/"):
91 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
93 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
97 logger.info("Found %d row(s)", cnt)
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103 logger.debug("args[]='%s' - CALLED!", type(args))
105 # No CSRF by default, you don't have to add network.source_headers by yourself here
107 source_domain = "pixelfed.org"
109 if sources.is_recent(source_domain):
110 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
113 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114 sources.update(source_domain)
117 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118 headers = csrf.determine(source_domain, dict())
119 except network.exceptions as exception:
120 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
124 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125 fetched = network.get_json_api(
127 "/api/v1/servers/all.json?scope=All&country=all&language=all",
129 (config.get("connection_timeout"), config.get("read_timeout"))
132 logger.debug("JSON API returned %d elements", len(fetched))
133 if "error_message" in fetched:
134 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
136 elif "data" not in fetched["json"]:
137 logger.warning("API did not return JSON with 'data' element - EXIT!")
140 rows = fetched["json"]["data"]
141 logger.info("Checking %d fetched rows ...", len(rows))
143 logger.debug("row[]='%s'", type(row))
144 if "domain" not in row:
145 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
147 elif row["domain"] == "":
148 logger.debug("row[domain] is empty - SKIPPED!")
151 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152 domain = row["domain"].encode("idna").decode("utf-8")
153 logger.debug("domain='%s' - AFTER!", domain)
155 if not utils.is_domain_wanted(domain):
156 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
158 elif instances.is_registered(domain):
159 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
161 elif instances.is_recent(domain):
162 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
165 logger.debug("Fetching instances from domain='%s' ...", domain)
166 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
168 except network.exceptions as exception:
169 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
172 logger.debug("Success! - EXIT!")
175 def fetch_bkali(args: argparse.Namespace) -> int:
176 logger.debug("args[]='%s' - CALLED!", type(args))
178 logger.debug("Invoking locking.acquire() ...")
181 source_domain = "gql.api.bka.li"
182 if sources.is_recent(source_domain):
183 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
186 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187 sources.update(source_domain)
191 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192 fetched = network.post_json_api(
196 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
200 logger.debug("fetched[]='%s'", type(fetched))
201 if "error_message" in fetched:
202 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
204 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
208 rows = fetched["json"]
210 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
212 raise Exception("WARNING: Returned no records")
213 elif "data" not in rows:
214 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215 elif "nodeinfo" not in rows["data"]:
216 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
218 for entry in rows["data"]["nodeinfo"]:
219 logger.debug("entry[%s]='%s'", type(entry), entry)
220 if "domain" not in entry:
221 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
223 elif entry["domain"] == "":
224 logger.debug("entry[domain] is empty - SKIPPED!")
226 elif not utils.is_domain_wanted(entry["domain"]):
227 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
229 elif instances.is_registered(entry["domain"]):
230 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
232 elif instances.is_recent(entry["domain"]):
233 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
236 logger.debug("Adding domain='%s' ...", entry["domain"])
237 domains.append(entry["domain"])
239 except network.exceptions as exception:
240 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
243 logger.debug("domains()=%d", len(domains))
245 logger.info("Adding %d new instances ...", len(domains))
246 for domain in domains:
247 logger.debug("domain='%s' - BEFORE!", domain)
248 domain = domain.encode("idna").decode("utf-8")
249 logger.debug("domain='%s' - AFTER!", domain)
252 logger.info("Fetching instances from domain='%s' ...", domain)
253 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254 except network.exceptions as exception:
255 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256 instances.set_last_error(domain, exception)
259 logger.debug("Success - EXIT!")
262 def fetch_blocks(args: argparse.Namespace) -> int:
263 logger.debug("args[]='%s' - CALLED!", type(args))
264 if args.domain is not None and args.domain != "":
265 logger.debug("args.domain='%s' - checking ...", args.domain)
266 if not validators.domain(args.domain):
267 logger.warning("args.domain='%s' is not valid.", args.domain)
269 elif blacklist.is_blacklisted(args.domain):
270 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
272 elif not instances.is_registered(args.domain):
273 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
276 logger.debug("Invoking locking.acquire() ...")
279 if args.domain is not None and args.domain != "":
280 # Re-check single domain
281 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282 database.cursor.execute(
283 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
285 elif args.software is not None and args.software != "":
286 # Re-check single software
287 logger.debug("Querying database for args.software='%s' ...", args.software)
288 database.cursor.execute(
289 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
292 # Re-check after "timeout" (aka. minimum interval)
293 database.cursor.execute(
294 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
297 rows = database.cursor.fetchall()
298 logger.info("Checking %d entries ...", len(rows))
299 for blocker, software, origin, nodeinfo_url in rows:
300 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301 blocker = tidyup.domain(blocker)
302 logger.debug("blocker='%s' - AFTER!", blocker)
305 logger.warning("blocker is now empty!")
307 elif nodeinfo_url is None or nodeinfo_url == "":
308 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
310 elif not utils.is_domain_wanted(blocker):
311 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314 logger.debug("blocker='%s'", blocker)
315 instances.set_last_blocked(blocker)
316 instances.set_has_obfuscation(blocker, False)
319 if software == "pleroma":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322 elif software == "mastodon":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325 elif software == "lemmy":
326 logger.info("blocker='%s',software='%s'", blocker, software)
327 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328 elif software == "friendica":
329 logger.info("blocker='%s',software='%s'", blocker, software)
330 blocking = friendica.fetch_blocks(blocker)
331 elif software == "misskey":
332 logger.info("blocker='%s',software='%s'", blocker, software)
333 blocking = misskey.fetch_blocks(blocker)
335 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
337 logger.debug("blocker='%s'", blocker)
338 if blocker != "chaos.social":
339 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
340 instances.set_total_blocks(blocker, blocking)
342 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
344 for block in blocking:
345 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
347 if block["block_level"] == "":
348 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
351 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
352 block["blocked"] = tidyup.domain(block["blocked"])
353 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
354 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
356 if block["blocked"] == "":
357 logger.warning("blocked is empty, blocker='%s'", blocker)
359 elif block["blocked"].endswith(".onion"):
360 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
362 elif block["blocked"].endswith(".arpa"):
363 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
365 elif block["blocked"].endswith(".tld"):
366 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
368 elif block["blocked"].find("*") >= 0:
369 logger.debug("blocker='%s' uses obfuscated domains", blocker)
371 # Some friendica servers also obscure domains without hash
372 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
374 logger.debug("row[]='%s'", type(row))
376 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377 instances.set_has_obfuscation(blocker, True)
380 block["blocked"] = row["domain"]
381 origin = row["origin"]
382 nodeinfo_url = row["nodeinfo_url"]
383 elif block["blocked"].find("?") >= 0:
384 logger.debug("blocker='%s' uses obfuscated domains", blocker)
386 # Some obscure them with question marks, not sure if that's dependent on version or not
387 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
389 logger.debug("row[]='%s'", type(row))
391 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392 instances.set_has_obfuscation(blocker, True)
395 block["blocked"] = row["domain"]
396 origin = row["origin"]
397 nodeinfo_url = row["nodeinfo_url"]
399 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
400 if block["blocked"] == "":
401 logger.debug("block[blocked] is empty - SKIPPED!")
404 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
405 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
406 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
408 if not utils.is_domain_wanted(block["blocked"]):
409 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
411 elif block["block_level"] in ["accept", "accepted"]:
412 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
414 elif not instances.is_registered(block["blocked"]):
415 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
416 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
418 block["block_level"] = blocks.alias_block_level(block["block_level"])
420 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
421 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
423 "blocked": block["blocked"],
424 "reason" : block["reason"],
427 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
428 cookies.clear(block["blocked"])
430 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
431 if instances.has_pending(blocker):
432 logger.debug("Flushing updates for blocker='%s' ...", blocker)
433 instances.update_data(blocker)
435 logger.debug("Invoking commit() ...")
436 database.connection.commit()
438 logger.debug("Invoking cookies.clear(%s) ...", blocker)
439 cookies.clear(blocker)
441 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
442 if config.get("bot_enabled") and len(blockdict) > 0:
443 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
444 network.send_bot_post(blocker, blockdict)
446 logger.debug("Success! - EXIT!")
449 def fetch_observer(args: argparse.Namespace) -> int:
450 logger.debug("args[]='%s' - CALLED!", type(args))
452 logger.debug("Invoking locking.acquire() ...")
455 source_domain = "fediverse.observer"
456 if sources.is_recent(source_domain):
457 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
460 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
461 sources.update(source_domain)
464 if args.software is None:
465 logger.info("Fetching software list ...")
466 raw = utils.fetch_url(
467 f"https://{source_domain}",
469 (config.get("connection_timeout"), config.get("read_timeout"))
471 logger.debug("raw[%s]()=%d", type(raw), len(raw))
473 doc = bs4.BeautifulSoup(raw, features="html.parser")
474 logger.debug("doc[]='%s'", type(doc))
476 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
477 logger.debug("navbar[]='%s'", type(navbar))
479 logger.warning("Cannot find navigation bar, cannot continue!")
482 items = navbar.findAll("a", {"class": "dropdown-item"})
483 logger.debug("items[]='%s'", type(items))
485 logger.info("Checking %d menu items ...", len(items))
487 logger.debug("item[%s]='%s'", type(item), item)
488 if item.text.lower() == "all":
489 logger.debug("Skipping 'All' menu entry ...")
492 logger.debug("Appending item.text='%s' ...", item.text)
493 types.append(tidyup.domain(item.text))
495 logger.info("Adding args.software='%s' as type ...", args.software)
496 types.append(args.software)
498 logger.info("Fetching %d different table data ...", len(types))
499 for software in types:
500 logger.debug("software='%s' - BEFORE!", software)
501 if args.software is not None and args.software != software:
502 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
507 logger.debug("Fetching table data for software='%s' ...", software)
508 raw = utils.fetch_url(
509 f"https://{source_domain}/app/views/tabledata.php?software={software}",
511 (config.get("connection_timeout"), config.get("read_timeout"))
513 logger.debug("raw[%s]()=%d", type(raw), len(raw))
515 doc = bs4.BeautifulSoup(raw, features="html.parser")
516 logger.debug("doc[]='%s'", type(doc))
517 except network.exceptions as exception:
518 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
521 items = doc.findAll("a", {"class": "url"})
522 logger.info("Checking %d items,software='%s' ...", len(items), software)
524 logger.debug("item[]='%s'", type(item))
525 domain = item.decode_contents()
526 logger.debug("domain='%s' - AFTER!", domain)
529 logger.debug("domain is empty - SKIPPED!")
532 logger.debug("domain='%s' - BEFORE!", domain)
533 domain = domain.encode("idna").decode("utf-8")
534 logger.debug("domain='%s' - AFTER!", domain)
536 if not utils.is_domain_wanted(domain):
537 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
539 elif instances.is_registered(domain):
540 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
542 elif instances.is_recent(domain):
543 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
546 software = software_helper.alias(software)
547 logger.info("Fetching instances for domain='%s'", domain)
548 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
550 logger.debug("Success! - EXIT!")
553 def fetch_todon_wiki(args: argparse.Namespace) -> int:
554 logger.debug("args[]='%s' - CALLED!", type(args))
556 logger.debug("Invoking locking.acquire() ...")
559 source_domain = "wiki.todon.eu"
560 if sources.is_recent(source_domain):
561 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
564 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
565 sources.update(source_domain)
572 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
573 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
575 doc = bs4.BeautifulSoup(raw, "html.parser")
576 logger.debug("doc[]='%s'", type(doc))
578 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
579 logger.info("Checking %d silenced/limited entries ...", len(silenced))
580 blocklist["silenced"] = utils.find_domains(silenced, "div")
582 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
583 logger.info("Checking %d suspended entries ...", len(suspended))
584 blocklist["reject"] = utils.find_domains(suspended, "div")
586 blocking = blocklist["silenced"] + blocklist["reject"]
589 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
590 instances.set_total_blocks(blocker, blocking)
593 for block_level in blocklist:
594 blockers = blocklist[block_level]
596 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
597 for blocked in blockers:
598 logger.debug("blocked='%s'", blocked)
600 if not instances.is_registered(blocked):
602 logger.info("Fetching instances from domain='%s' ...", blocked)
603 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
604 except network.exceptions as exception:
605 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
606 instances.set_last_error(blocked, exception)
608 if blocks.is_instance_blocked(blocker, blocked, block_level):
609 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
612 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
613 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
614 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
620 logger.debug("Invoking commit() ...")
621 database.connection.commit()
623 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
624 if config.get("bot_enabled") and len(blockdict) > 0:
625 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
626 network.send_bot_post(blocker, blockdict)
628 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
629 if instances.has_pending(blocker):
630 logger.debug("Flushing updates for blocker='%s' ...", blocker)
631 instances.update_data(blocker)
633 logger.debug("Success! - EXIT!")
636 def fetch_cs(args: argparse.Namespace):
637 logger.debug("args[]='%s' - CALLED!", type(args))
639 logger.debug("Invoking locking.acquire() ...")
667 source_domain = "raw.githubusercontent.com"
668 if sources.is_recent(source_domain):
669 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
672 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
673 sources.update(source_domain)
675 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
676 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
678 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
679 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
681 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
682 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
683 blocklist["silenced"] = federation.find_domains(silenced)
685 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
686 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
687 blocklist["reject"] = federation.find_domains(blocked)
689 blocking = blocklist["silenced"] + blocklist["reject"]
690 blocker = "chaos.social"
692 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
693 instances.set_total_blocks(blocker, blocking)
695 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
696 if len(blocking) > 0:
698 for block_level in blocklist:
699 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
701 for row in blocklist[block_level]:
702 logger.debug("row[%s]='%s'", type(row), row)
703 if not "domain" in row:
704 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
706 elif not instances.is_registered(row["domain"]):
708 logger.info("Fetching instances from domain='%s' ...", row["domain"])
709 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
710 except network.exceptions as exception:
711 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
712 instances.set_last_error(row["domain"], exception)
714 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
715 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
717 "blocked": row["domain"],
718 "reason" : row["reason"],
721 logger.debug("Invoking commit() ...")
722 database.connection.commit()
724 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
725 if config.get("bot_enabled") and len(blockdict) > 0:
726 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
727 network.send_bot_post(blocker, blockdict)
729 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
730 if instances.has_pending(blocker):
731 logger.debug("Flushing updates for blocker='%s' ...", blocker)
732 instances.update_data(blocker)
734 logger.debug("Success! - EXIT!")
737 def fetch_fba_rss(args: argparse.Namespace) -> int:
738 logger.debug("args[]='%s' - CALLED!", type(args))
742 logger.debug("Invoking locking.acquire() ...")
745 components = urlparse(args.feed)
747 if sources.is_recent(components.netloc):
748 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
751 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
752 sources.update(components.netloc)
754 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
755 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
757 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
758 if response.ok and response.status_code < 300 and len(response.text) > 0:
759 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
760 rss = atoma.parse_rss_bytes(response.content)
762 logger.debug("rss[]='%s'", type(rss))
763 for item in rss.items:
764 logger.debug("item='%s'", item)
765 domain = tidyup.domain(item.link.split("=")[1])
767 logger.debug("domain='%s' - AFTER!", domain)
769 logger.debug("domain is empty - SKIPPED!")
772 logger.debug("domain='%s' - BEFORE!", domain)
773 domain = domain.encode("idna").decode("utf-8")
774 logger.debug("domain='%s' - AFTER!", domain)
776 if not utils.is_domain_wanted(domain):
777 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
779 elif domain in domains:
780 logger.debug("domain='%s' is already added - SKIPPED!", domain)
782 elif instances.is_registered(domain):
783 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
785 elif instances.is_recent(domain):
786 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
789 logger.debug("Adding domain='%s'", domain)
790 domains.append(domain)
792 logger.debug("domains()=%d", len(domains))
794 logger.info("Adding %d new instances ...", len(domains))
795 for domain in domains:
796 logger.debug("domain='%s'", domain)
798 logger.info("Fetching instances from domain='%s' ...", domain)
799 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
800 except network.exceptions as exception:
801 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
802 instances.set_last_error(domain, exception)
805 logger.debug("Success! - EXIT!")
808 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
809 logger.debug("args[]='%s' - CALLED!", type(args))
811 logger.debug("Invoking locking.acquire() ...")
814 source_domain = "ryona.agency"
815 if sources.is_recent(source_domain):
816 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
819 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
820 sources.update(source_domain)
822 feed = f"https://{source_domain}/users/fba/feed.atom"
826 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
827 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
829 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
830 if response.ok and response.status_code < 300 and len(response.text) > 0:
831 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
832 atom = atoma.parse_atom_bytes(response.content)
834 logger.debug("atom[]='%s'", type(atom))
835 for entry in atom.entries:
836 logger.debug("entry[]='%s'", type(entry))
837 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
838 logger.debug("doc[]='%s'", type(doc))
839 for element in doc.findAll("a"):
840 logger.debug("element[]='%s'", type(element))
841 for href in element["href"].split(","):
842 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
843 domain = tidyup.domain(href)
845 logger.debug("domain='%s' - AFTER!", domain)
847 logger.debug("domain is empty - SKIPPED!")
850 logger.debug("domain='%s' - BEFORE!", domain)
851 domain = domain.encode("idna").decode("utf-8")
852 logger.debug("domain='%s' - AFTER!", domain)
854 if not utils.is_domain_wanted(domain):
855 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
857 elif domain in domains:
858 logger.debug("domain='%s' is already added - SKIPPED!", domain)
860 elif instances.is_registered(domain):
861 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
863 elif instances.is_recent(domain):
864 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
867 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
868 domains.append(domain)
870 logger.debug("domains()=%d", len(domains))
872 logger.info("Adding %d new instances ...", len(domains))
873 for domain in domains:
874 logger.debug("domain='%s'", domain)
876 logger.info("Fetching instances from domain='%s' ...", domain)
877 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
878 except network.exceptions as exception:
879 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
880 instances.set_last_error(domain, exception)
883 logger.debug("Success! - EXIT!")
886 def fetch_instances(args: argparse.Namespace) -> int:
887 logger.debug("args[]='%s' - CALLED!", type(args))
889 logger.debug("args.domain='%s' - checking ...", args.domain)
890 if not validators.domain(args.domain):
891 logger.warning("args.domain='%s' is not valid.", args.domain)
893 elif blacklist.is_blacklisted(args.domain):
894 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
897 logger.debug("Invoking locking.acquire() ...")
902 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
903 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
904 except network.exceptions as exception:
905 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
906 instances.set_last_error(args.domain, exception)
907 instances.update_data(args.domain)
911 logger.debug("Not fetching more instances - EXIT!")
914 # Loop through some instances
915 database.cursor.execute(
916 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
919 rows = database.cursor.fetchall()
920 logger.info("Checking %d entries ...", len(rows))
922 logger.debug("row[domain]='%s'", row["domain"])
923 if row["domain"] == "":
924 logger.debug("row[domain] is empty - SKIPPED!")
927 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
928 domain = row["domain"].encode("idna").decode("utf-8")
929 logger.debug("domain='%s' - AFTER!", domain)
931 if not utils.is_domain_wanted(domain):
932 logger.warning("Domain domain='%s' is not wanted - SKIPPED!", domain)
936 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
937 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
938 except network.exceptions as exception:
939 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
940 instances.set_last_error(domain, exception)
942 logger.debug("Success - EXIT!")
945 def fetch_oliphant(args: argparse.Namespace) -> int:
946 logger.debug("args[]='%s' - CALLED!", type(args))
948 logger.debug("Invoking locking.acquire() ...")
951 source_domain = "codeberg.org"
952 if sources.is_recent(source_domain):
953 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
956 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
957 sources.update(source_domain)
960 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
965 "blocker": "artisan.chat",
966 "csv_url": "mastodon/artisan.chat.csv",
968 "blocker": "mastodon.art",
969 "csv_url": "mastodon/mastodon.art.csv",
971 "blocker": "pleroma.envs.net",
972 "csv_url": "mastodon/pleroma.envs.net.csv",
974 "blocker": "oliphant.social",
975 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
977 "blocker": "mastodon.online",
978 "csv_url": "mastodon/mastodon.online.csv",
980 "blocker": "mastodon.social",
981 "csv_url": "mastodon/mastodon.social.csv",
983 "blocker": "mastodon.social",
984 "csv_url": "other/missing-tier0-mastodon.social.csv",
986 "blocker": "rage.love",
987 "csv_url": "mastodon/rage.love.csv",
989 "blocker": "sunny.garden",
990 "csv_url": "mastodon/sunny.garden.csv",
992 "blocker": "sunny.garden",
993 "csv_url": "mastodon/gardenfence.csv",
995 "blocker": "solarpunk.moe",
996 "csv_url": "mastodon/solarpunk.moe.csv",
998 "blocker": "toot.wales",
999 "csv_url": "mastodon/toot.wales.csv",
1001 "blocker": "union.place",
1002 "csv_url": "mastodon/union.place.csv",
1004 "blocker": "oliphant.social",
1005 "csv_url": "mastodon/birdsite.csv",
1011 logger.debug("Downloading %d files ...", len(blocklists))
1012 for block in blocklists:
1013 # Is domain given and not equal blocker?
1014 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1017 elif args.domain in domains:
1018 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1022 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1023 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1025 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1026 if not response.ok or response.status_code >= 300 or response.content == "":
1027 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1030 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1031 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1037 logger.debug("row[%s]='%s'", type(row), row)
1038 domain = severity = None
1039 reject_media = reject_reports = False
1041 if "#domain" in row:
1042 domain = row["#domain"]
1043 elif "domain" in row:
1044 domain = row["domain"]
1046 logger.debug("row='%s' does not contain domain column", row)
1049 if "#severity" in row:
1050 severity = blocks.alias_block_level(row["#severity"])
1051 elif "severity" in row:
1052 severity = blocks.alias_block_level(row["severity"])
1054 logger.debug("row='%s' does not contain severity column", row)
1057 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1059 elif "reject_media" in row and row["reject_media"].lower() == "true":
1062 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1063 reject_reports = True
1064 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1065 reject_reports = True
1068 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1070 logger.debug("domain is empty - SKIPPED!")
1072 elif domain.endswith(".onion"):
1073 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1075 elif domain.endswith(".arpa"):
1076 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1078 elif domain.endswith(".tld"):
1079 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1081 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1082 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1083 domain = utils.deobfuscate(domain, block["blocker"])
1084 logger.debug("domain='%s' - AFTER!", domain)
1086 if not validators.domain(domain):
1087 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1089 elif blacklist.is_blacklisted(domain):
1090 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1093 logger.debug("Marking domain='%s' as handled", domain)
1094 domains.append(domain)
1096 logger.debug("Processing domain='%s' ...", domain)
1097 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1098 logger.debug("processed='%s'", processed)
1100 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1101 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1104 "reason" : block["reason"],
1108 processing.block(block["blocker"], domain, None, "reject_media")
1110 processing.block(block["blocker"], domain, None, "reject_reports")
1112 logger.debug("block[blocker]='%s'", block["blocker"])
1113 if block["blocker"] != "chaos.social":
1114 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1115 instances.set_total_blocks(block["blocker"], domains)
1117 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1118 if instances.has_pending(block["blocker"]):
1119 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1120 instances.update_data(block["blocker"])
1122 logger.debug("Invoking commit() ...")
1123 database.connection.commit()
1125 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1126 if config.get("bot_enabled") and len(blockdict) > 0:
1127 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1128 network.send_bot_post(block["blocker"], blockdict)
1130 logger.debug("Success! - EXIT!")
1133 def fetch_txt(args: argparse.Namespace) -> int:
1134 logger.debug("args[]='%s' - CALLED!", type(args))
1136 logger.debug("Invoking locking.acquire() ...")
1141 "blocker": "seirdy.one",
1142 "url" : "https://seirdy.one/pb/bsl.txt",
1145 logger.info("Checking %d text file(s) ...", len(urls))
1147 logger.debug("Fetching row[url]='%s' ...", row["url"])
1148 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1150 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1151 if response.ok and response.status_code < 300 and response.text != "":
1152 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1153 domains = response.text.split("\n")
1155 logger.info("Processing %d domains ...", len(domains))
1156 for domain in domains:
1157 logger.debug("domain='%s' - BEFORE!", domain)
1158 domain = tidyup.domain(domain)
1160 logger.debug("domain='%s' - AFTER!", domain)
1162 logger.debug("domain is empty - SKIPPED!")
1164 elif not utils.is_domain_wanted(domain):
1165 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1167 elif instances.is_recent(domain):
1168 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1171 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1172 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1174 logger.debug("processed='%s'", processed)
1176 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1179 logger.debug("Success! - EXIT!")
1182 def fetch_fedipact(args: argparse.Namespace) -> int:
1183 logger.debug("args[]='%s' - CALLED!", type(args))
1185 logger.debug("Invoking locking.acquire() ...")
1188 source_domain = "fedipact.online"
1189 if sources.is_recent(source_domain):
1190 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1193 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1194 sources.update(source_domain)
1196 response = utils.fetch_url(
1197 f"https://{source_domain}",
1198 network.web_headers,
1199 (config.get("connection_timeout"), config.get("read_timeout"))
1202 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1203 if response.ok and response.status_code < 300 and response.text != "":
1204 logger.debug("Parsing %d Bytes ...", len(response.text))
1206 doc = bs4.BeautifulSoup(response.text, "html.parser")
1207 logger.debug("doc[]='%s'", type(doc))
1209 rows = doc.findAll("li")
1210 logger.info("Checking %d row(s) ...", len(rows))
1212 logger.debug("row[]='%s'", type(row))
1213 domain = tidyup.domain(row.contents[0])
1215 logger.debug("domain='%s' - AFTER!", domain)
1217 logger.debug("domain is empty - SKIPPED!")
1220 logger.debug("domain='%s' - BEFORE!", domain)
1221 domain = domain.encode("idna").decode("utf-8")
1222 logger.debug("domain='%s' - AFTER!", domain)
1224 if not utils.is_domain_wanted(domain):
1225 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1227 elif instances.is_registered(domain):
1228 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1230 elif instances.is_recent(domain):
1231 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1234 logger.info("Fetching domain='%s' ...", domain)
1235 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1237 logger.debug("Success! - EXIT!")
1240 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1241 logger.debug("args[]='%s' - CALLED!", type(args))
1243 logger.debug("Invoking locking.acquire() ...")
1246 source_domain = "joinfediverse.wiki"
1247 if sources.is_recent(source_domain):
1248 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1251 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1252 sources.update(source_domain)
1254 raw = utils.fetch_url(
1255 f"https://{source_domain}/FediBlock",
1256 network.web_headers,
1257 (config.get("connection_timeout"), config.get("read_timeout"))
1259 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1261 doc = bs4.BeautifulSoup(raw, "html.parser")
1262 logger.debug("doc[]='%s'", type(doc))
1264 tables = doc.findAll("table", {"class": "wikitable"})
1266 logger.info("Analyzing %d table(s) ...", len(tables))
1268 for table in tables:
1269 logger.debug("table[]='%s'", type(table))
1271 rows = table.findAll("tr")
1272 logger.info("Checking %d row(s) ...", len(rows))
1273 block_headers = dict()
1275 logger.debug("row[%s]='%s'", type(row), row)
1277 headers = row.findAll("th")
1278 logger.debug("Found headers()=%d header(s)", len(headers))
1279 if len(headers) > 1:
1280 block_headers = dict()
1282 for header in headers:
1284 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1285 text = header.contents[0]
1287 logger.debug("text[]='%s'", type(text))
1288 if not isinstance(text, str):
1289 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1291 elif validators.domain(text.strip()):
1292 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1295 text = tidyup.domain(text.strip())
1296 logger.debug("text='%s' - AFTER!", text)
1297 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1298 logger.debug("Found header: '%s'=%d", text, cnt)
1299 block_headers[cnt] = text
1301 elif len(block_headers) == 0:
1302 logger.debug("row is not scrapable - SKIPPED!")
1304 elif len(block_headers) > 0:
1305 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1309 for element in row.find_all(["th", "td"]):
1311 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1312 if cnt in block_headers:
1313 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1315 text = element.text.strip()
1316 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1318 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1319 if key in ["domain", "instance"]:
1321 elif key == "reason":
1322 block[key] = tidyup.reason(text)
1323 elif key == "subdomain(s)":
1326 block[key] = text.split("/")
1328 logger.debug("key='%s'", key)
1331 logger.debug("block()=%d ...", len(block))
1333 logger.debug("Appending block()=%d ...", len(block))
1334 blocklist.append(block)
1336 logger.debug("blocklist()=%d", len(blocklist))
1338 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1339 domains = database.cursor.fetchall()
1341 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1343 for block in blocklist:
1344 logger.debug("block='%s'", block)
1345 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1346 origin = block["blocked"]
1347 logger.debug("origin='%s'", origin)
1348 for subdomain in block["subdomain(s)"]:
1349 block["blocked"] = subdomain + "." + origin
1350 logger.debug("block[blocked]='%s'", block["blocked"])
1351 blocking.append(block)
1353 blocking.append(block)
1355 logger.debug("blocking()=%d", blocking)
1356 for block in blocking:
1357 logger.debug("block[]='%s'", type(block))
1358 if "blocked" not in block:
1359 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1361 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1362 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1364 if block["blocked"] == "":
1365 logger.debug("block[blocked] is empty - SKIPPED!")
1367 elif not utils.is_domain_wanted(block["blocked"]):
1368 logger.warning("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1370 elif instances.is_recent(block["blocked"]):
1371 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1374 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1375 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1378 for blocker in domains:
1379 blocker = blocker[0]
1380 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1382 for block in blocking:
1383 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1384 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1386 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1387 if block["blocked"] == "":
1388 logger.debug("block[blocked] is empty - SKIPPED!")
1390 elif not utils.is_domain_wanted(block["blocked"]):
1391 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1394 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1395 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1396 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1398 "blocked": block["blocked"],
1399 "reason" : block["reason"],
1402 if instances.has_pending(blocker):
1403 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1404 instances.update_data(blocker)
1406 logger.debug("Invoking commit() ...")
1407 database.connection.commit()
1409 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1410 if config.get("bot_enabled") and len(blockdict) > 0:
1411 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1412 network.send_bot_post(blocker, blockdict)
1414 logger.debug("Success! - EXIT!")
1417 def recheck_obfuscation(args: argparse.Namespace) -> int:
1418 logger.debug("args[]='%s' - CALLED!", type(args))
1420 logger.debug("Invoking locking.acquire() ...")
1423 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1424 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1425 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1426 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1428 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1430 rows = database.cursor.fetchall()
1431 logger.info("Checking %d domains ...", len(rows))
1433 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1434 if (args.force is None or not args.force) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1435 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1439 if row["software"] == "pleroma":
1440 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1441 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1442 elif row["software"] == "mastodon":
1443 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1444 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1445 elif row["software"] == "lemmy":
1446 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1447 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1448 elif row["software"] == "friendica":
1449 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1450 blocking = friendica.fetch_blocks(row["domain"])
1451 elif row["software"] == "misskey":
1452 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1453 blocking = misskey.fetch_blocks(row["domain"])
1455 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1457 logger.debug("row[domain]='%s'", row["domain"])
1458 # chaos.social requires special care ...
1459 if row["domain"] != "chaos.social":
1460 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1461 instances.set_total_blocks(row["domain"], blocking)
1466 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1467 for block in blocking:
1468 logger.debug("block[blocked]='%s'", block["blocked"])
1471 if block["blocked"] == "":
1472 logger.debug("block[blocked] is empty - SKIPPED!")
1474 elif block["blocked"].endswith(".arpa"):
1475 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1477 elif block["blocked"].endswith(".tld"):
1478 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1480 elif block["blocked"].endswith(".onion"):
1481 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1483 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1484 logger.debug("block='%s' is obfuscated.", block["blocked"])
1485 obfuscated = obfuscated + 1
1486 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1487 elif not utils.is_domain_wanted(block["blocked"]):
1488 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1490 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1491 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1494 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1495 if blocked is not None and blocked != block["blocked"]:
1496 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1497 obfuscated = obfuscated - 1
1498 if blocks.is_instance_blocked(row["domain"], blocked):
1499 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1502 block["block_level"] = blocks.alias_block_level(block["block_level"])
1504 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1505 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1506 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1509 "reason" : block["reason"],
1512 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1513 if obfuscated == 0 and len(blocking) > 0:
1514 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1515 instances.set_has_obfuscation(row["domain"], False)
1517 if instances.has_pending(row["domain"]):
1518 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1519 instances.update_data(row["domain"])
1521 logger.debug("Invoking commit() ...")
1522 database.connection.commit()
1524 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1525 if config.get("bot_enabled") and len(blockdict) > 0:
1526 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1527 network.send_bot_post(row["domain"], blockdict)
1529 logger.debug("Success! - EXIT!")
1532 def fetch_fedilist(args: argparse.Namespace) -> int:
1533 logger.debug("args[]='%s' - CALLED!", type(args))
1535 logger.debug("Invoking locking.acquire() ...")
1538 source_domain = "demo.fedilist.com"
1539 if sources.is_recent(source_domain):
1540 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1543 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1544 sources.update(source_domain)
1546 url = f"http://{source_domain}/instance/csv?onion=not"
1547 if args.software is not None and args.software != "":
1548 logger.debug("args.software='%s'", args.software)
1549 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1551 logger.info("Fetching url='%s' ...", url)
1552 response = reqto.get(
1554 headers=network.web_headers,
1555 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1556 allow_redirects=False
1559 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1560 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1561 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1564 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1566 logger.debug("reader[]='%s'", type(reader))
1568 logger.debug("row[]='%s'", type(row))
1569 if "hostname" not in row:
1570 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1573 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1574 domain = tidyup.domain(row["hostname"])
1575 logger.debug("domain='%s' - AFTER!", domain)
1578 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1581 logger.debug("domain='%s' - BEFORE!", domain)
1582 domain = domain.encode("idna").decode("utf-8")
1583 logger.debug("domain='%s' - AFTER!", domain)
1585 if not utils.is_domain_wanted(domain):
1586 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1588 elif (args.force is None or not args.force) and instances.is_registered(domain):
1589 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1591 elif instances.is_recent(domain):
1592 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1595 logger.info("Fetching instances from domain='%s' ...", domain)
1596 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1598 logger.debug("Success! - EXIT!")
1601 def update_nodeinfo(args: argparse.Namespace) -> int:
1602 logger.debug("args[]='%s' - CALLED!", type(args))
1604 logger.debug("Invoking locking.acquire() ...")
1607 if args.domain is not None and args.domain != "":
1608 logger.debug("Fetching args.domain='%s'", args.domain)
1609 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1610 elif args.software is not None and args.software != "":
1611 logger.info("Fetching domains for args.software='%s'", args.software)
1612 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1614 logger.info("Fetching domains for recently updated ...")
1615 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1617 domains = database.cursor.fetchall()
1619 logger.info("Checking %d domain(s) ...", len(domains))
1622 logger.debug("row[]='%s'", type(row))
1624 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1625 software = federation.determine_software(row["domain"])
1627 logger.debug("Determined software='%s'", software)
1628 if (software != row["software"] and software is not None) or args.force is True:
1629 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1630 instances.set_software(row["domain"], software)
1632 instances.set_success(row["domain"])
1633 except network.exceptions as exception:
1634 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1635 instances.set_last_error(row["domain"], exception)
1637 instances.set_last_nodeinfo(row["domain"])
1638 instances.update_data(row["domain"])
1641 logger.debug("Success! - EXIT!")
1644 def fetch_instances_social(args: argparse.Namespace) -> int:
1645 logger.debug("args[]='%s' - CALLED!", type(args))
1647 logger.debug("Invoking locking.acquire() ...")
1650 source_domain = "instances.social"
1652 if config.get("instances_social_api_key") == "":
1653 logger.error("API key not set. Please set in your config.json file.")
1655 elif sources.is_recent(source_domain):
1656 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1659 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1660 sources.update(source_domain)
1663 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1666 fetched = network.get_json_api(
1668 "/api/1.0/instances/list?count=0&sort_by=name",
1670 (config.get("connection_timeout"), config.get("read_timeout"))
1672 logger.debug("fetched[]='%s'", type(fetched))
1674 if "error_message" in fetched:
1675 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1677 elif "exception" in fetched:
1678 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1680 elif "json" not in fetched:
1681 logger.warning("fetched has no element 'json' - EXIT!")
1683 elif "instances" not in fetched["json"]:
1684 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1688 rows = fetched["json"]["instances"]
1690 logger.info("Checking %d row(s) ...", len(rows))
1692 logger.debug("row[]='%s'", type(row))
1693 domain = tidyup.domain(row["name"])
1694 logger.debug("domain='%s' - AFTER!", domain)
1697 logger.debug("domain is empty - SKIPPED!")
1700 logger.debug("domain='%s' - BEFORE!", domain)
1701 domain = domain.encode("idna").decode("utf-8")
1702 logger.debug("domain='%s' - AFTER!", domain)
1704 if not utils.is_domain_wanted(domain):
1705 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1707 elif domain in domains:
1708 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1710 elif instances.is_registered(domain):
1711 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1713 elif instances.is_recent(domain):
1714 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1717 logger.info("Fetching instances from domain='%s'", domain)
1718 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1720 logger.debug("Success! - EXIT!")
1723 def convert_idna(args: argparse.Namespace) -> int:
1724 logger.debug("args[]='%s' - CALLED!", type(args))
1726 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1727 rows = database.cursor.fetchall()
1729 logger.debug("rows[]='%s'", type(rows))
1730 instances.translate_idnas(rows, "domain")
1732 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1733 rows = database.cursor.fetchall()
1735 logger.debug("rows[]='%s'", type(rows))
1736 instances.translate_idnas(rows, "origin")
1738 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1739 rows = database.cursor.fetchall()
1741 logger.debug("rows[]='%s'", type(rows))
1742 blocks.translate_idnas(rows, "blocker")
1744 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1745 rows = database.cursor.fetchall()
1747 logger.debug("rows[]='%s'", type(rows))
1748 blocks.translate_idnas(rows, "blocked")
1750 logger.debug("Success! - EXIT!")