1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
43 from fba.http import federation
44 from fba.http import network
46 from fba.models import blocks
47 from fba.models import instances
48 from fba.models import sources
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def check_instance(args: argparse.Namespace) -> int:
61 logger.debug("args.domain='%s' - CALLED!", args.domain)
63 if not validators.domain(args.domain):
64 logger.warning("args.domain='%s' is not valid", args.domain)
66 elif blacklist.is_blacklisted(args.domain):
67 logger.warning("args.domain='%s' is blacklisted", args.domain)
69 elif instances.is_registered(args.domain):
70 logger.warning("args.domain='%s' is already registered", args.domain)
73 logger.info("args.domain='%s' is not known", args.domain)
75 logger.debug("status=%d - EXIT!", status)
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79 logger.debug("args[]='%s' - CALLED!", type(args))
82 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
85 for row in database.cursor.fetchall():
86 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87 punycode = row["domain"].encode("idna").decode("utf-8")
89 if row["nodeinfo_url"].startswith("/"):
90 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
96 logger.info("Found %d row(s)", cnt)
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102 logger.debug("args[]='%s' - CALLED!", type(args))
104 # No CSRF by default, you don't have to add network.source_headers by yourself here
106 source_domain = "pixelfed.org"
108 if sources.is_recent(source_domain):
109 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
112 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
113 sources.update(source_domain)
116 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
117 headers = csrf.determine(source_domain, dict())
118 except network.exceptions as exception:
119 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
123 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124 fetched = network.get_json_api(
126 "/api/v1/servers/all.json?scope=All&country=all&language=all",
128 (config.get("connection_timeout"), config.get("read_timeout"))
131 logger.debug("JSON API returned %d elements", len(fetched))
132 if "error_message" in fetched:
133 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135 elif "data" not in fetched["json"]:
136 logger.warning("API did not return JSON with 'data' element - EXIT!")
139 rows = fetched["json"]["data"]
140 logger.info("Checking %d fetched rows ...", len(rows))
142 logger.debug("row[]='%s'", type(row))
143 if "domain" not in row:
144 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146 elif row["domain"] == "":
147 logger.debug("row[domain] is empty - SKIPPED!")
149 elif not utils.is_domain_wanted(row["domain"]):
150 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
152 elif instances.is_registered(row["domain"]):
153 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
155 elif instances.is_recent(row["domain"]):
156 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
159 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
160 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
162 except network.exceptions as exception:
163 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
166 logger.debug("Success! - EXIT!")
169 def fetch_bkali(args: argparse.Namespace) -> int:
170 logger.debug("args[]='%s' - CALLED!", type(args))
172 logger.debug("Invoking locking.acquire() ...")
175 source_domain = "gql.api.bka.li"
176 if sources.is_recent(source_domain):
177 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
180 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
181 sources.update(source_domain)
185 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
186 fetched = network.post_json_api(
190 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
194 logger.debug("fetched[]='%s'", type(fetched))
195 if "error_message" in fetched:
196 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
198 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
199 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
202 rows = fetched["json"]
204 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
206 raise Exception("WARNING: Returned no records")
207 elif "data" not in rows:
208 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
209 elif "nodeinfo" not in rows["data"]:
210 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
212 for entry in rows["data"]["nodeinfo"]:
213 logger.debug("entry[%s]='%s'", type(entry), entry)
214 if "domain" not in entry:
215 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
217 elif entry["domain"] == "":
218 logger.debug("entry[domain] is empty - SKIPPED!")
220 elif not utils.is_domain_wanted(entry["domain"]):
221 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
223 elif instances.is_registered(entry["domain"]):
224 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
226 elif instances.is_recent(entry["domain"]):
227 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
230 logger.debug("Adding domain='%s' ...", entry["domain"])
231 domains.append(entry["domain"])
233 except network.exceptions as exception:
234 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
237 logger.debug("domains()=%d", len(domains))
239 logger.info("Adding %d new instances ...", len(domains))
240 for domain in domains:
242 logger.info("Fetching instances from domain='%s' ...", domain)
243 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
244 except network.exceptions as exception:
245 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
246 instances.set_last_error(domain, exception)
249 logger.debug("Success - EXIT!")
252 def fetch_blocks(args: argparse.Namespace) -> int:
253 logger.debug("args[]='%s' - CALLED!", type(args))
254 if args.domain is not None and args.domain != "":
255 logger.debug("args.domain='%s' - checking ...", args.domain)
256 if not validators.domain(args.domain):
257 logger.warning("args.domain='%s' is not valid.", args.domain)
259 elif blacklist.is_blacklisted(args.domain):
260 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
262 elif not instances.is_registered(args.domain):
263 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
266 logger.debug("Invoking locking.acquire() ...")
269 if args.domain is not None and args.domain != "":
270 # Re-check single domain
271 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
272 database.cursor.execute(
273 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
275 elif args.software is not None and args.software != "":
276 # Re-check single software
277 logger.debug("Querying database for args.software='%s' ...", args.software)
278 database.cursor.execute(
279 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
282 # Re-check after "timeout" (aka. minimum interval)
283 database.cursor.execute(
284 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
287 rows = database.cursor.fetchall()
288 logger.info("Checking %d entries ...", len(rows))
289 for blocker, software, origin, nodeinfo_url in rows:
290 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
291 blocker = tidyup.domain(blocker)
292 logger.debug("blocker='%s' - AFTER!", blocker)
295 logger.warning("blocker is now empty!")
297 elif nodeinfo_url is None or nodeinfo_url == "":
298 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
300 elif not utils.is_domain_wanted(blocker):
301 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
304 logger.debug("blocker='%s'", blocker)
305 instances.set_last_blocked(blocker)
306 instances.set_has_obfuscation(blocker, False)
310 if software == "pleroma":
311 logger.info("blocker='%s',software='%s'", blocker, software)
312 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
313 elif software == "mastodon":
314 logger.info("blocker='%s',software='%s'", blocker, software)
315 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
316 elif software == "lemmy":
317 logger.info("blocker='%s',software='%s'", blocker, software)
318 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
319 elif software == "friendica":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = friendica.fetch_blocks(blocker)
322 elif software == "misskey":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = misskey.fetch_blocks(blocker)
326 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
328 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
329 instances.set_total_blocks(blocker, blocking)
331 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
333 for block in blocking:
334 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
336 if block["block_level"] == "":
337 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
340 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
341 block["blocked"] = tidyup.domain(block["blocked"])
342 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
343 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
345 if block["blocked"] == "":
346 logger.warning("blocked is empty, blocker='%s'", blocker)
348 elif block["blocked"].endswith(".onion"):
349 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
351 elif block["blocked"].endswith(".arpa"):
352 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
354 elif block["blocked"].endswith(".tld"):
355 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
357 elif block["blocked"].find("*") >= 0:
358 logger.debug("blocker='%s' uses obfuscated domains", blocker)
360 # Some friendica servers also obscure domains without hash
361 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
363 logger.debug("row[]='%s'", type(row))
365 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
366 instances.set_has_obfuscation(blocker, True)
369 block["blocked"] = row["domain"]
370 origin = row["origin"]
371 nodeinfo_url = row["nodeinfo_url"]
372 elif block["blocked"].find("?") >= 0:
373 logger.debug("blocker='%s' uses obfuscated domains", blocker)
375 # Some obscure them with question marks, not sure if that's dependent on version or not
376 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
378 logger.debug("row[]='%s'", type(row))
380 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
381 instances.set_has_obfuscation(blocker, True)
384 block["blocked"] = row["domain"]
385 origin = row["origin"]
386 nodeinfo_url = row["nodeinfo_url"]
388 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
389 if block["blocked"] == "":
390 logger.debug("block[blocked] is empty - SKIPPED!")
392 elif not utils.is_domain_wanted(block["blocked"]):
393 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
395 elif block["block_level"] in ["accept", "accepted"]:
396 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
398 elif not instances.is_registered(block["blocked"]):
399 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
400 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
402 block["block_level"] = utils.alias_block_level(block["block_level"])
404 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
405 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
407 "blocked": block["blocked"],
408 "reason" : block["reason"],
411 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
412 cookies.clear(block["blocked"])
414 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
415 if instances.has_pending(blocker):
416 logger.debug("Flushing updates for blocker='%s' ...", blocker)
417 instances.update_data(blocker)
419 logger.debug("Invoking commit() ...")
420 database.connection.commit()
422 logger.debug("Invoking cookies.clear(%s) ...", blocker)
423 cookies.clear(blocker)
425 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
426 if config.get("bot_enabled") and len(blockdict) > 0:
427 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
428 network.send_bot_post(blocker, blockdict)
430 logger.debug("Success! - EXIT!")
433 def fetch_observer(args: argparse.Namespace) -> int:
434 logger.debug("args[]='%s' - CALLED!", type(args))
436 logger.debug("Invoking locking.acquire() ...")
439 source_domain = "fediverse.observer"
440 if sources.is_recent(source_domain):
441 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
444 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
445 sources.update(source_domain)
448 if args.software is None:
449 logger.info("Fetching software list ...")
450 raw = utils.fetch_url(
451 f"https://{source_domain}",
453 (config.get("connection_timeout"), config.get("read_timeout"))
455 logger.debug("raw[%s]()=%d", type(raw), len(raw))
457 doc = bs4.BeautifulSoup(raw, features="html.parser")
458 logger.debug("doc[]='%s'", type(doc))
460 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
461 logger.debug("items[]='%s'", type(items))
463 logger.info("Checking %d menu items ...", len(items))
465 logger.debug("item[%s]='%s'", type(item), item)
466 if item.text.lower() == "all":
467 logger.debug("Skipping 'All' menu entry ...")
470 logger.debug("Appending item.text='%s' ...", item.text)
471 types.append(tidyup.domain(item.text))
473 logger.info("Adding args.software='%s' as type ...", args.software)
474 types.append(args.software)
476 logger.info("Fetching %d different table data ...", len(types))
477 for software in types:
478 logger.debug("software='%s' - BEFORE!", software)
479 if args.software is not None and args.software != software:
480 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
485 logger.debug("Fetching table data for software='%s' ...", software)
486 raw = utils.fetch_url(
487 f"https://{source_domain}/app/views/tabledata.php?software={software}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
495 except network.exceptions as exception:
496 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
499 items = doc.findAll("a", {"class": "url"})
500 logger.info("Checking %d items,software='%s' ...", len(items), software)
502 logger.debug("item[]='%s'", type(item))
503 domain = item.decode_contents()
505 logger.debug("domain='%s' - AFTER!", domain)
507 logger.debug("domain is empty - SKIPPED!")
509 elif not utils.is_domain_wanted(domain):
510 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
512 elif instances.is_registered(domain):
513 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
515 elif instances.is_recent(domain):
516 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
519 software = software_helper.alias(software)
520 logger.info("Fetching instances for domain='%s'", domain)
521 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
523 logger.debug("Success! - EXIT!")
526 def fetch_todon_wiki(args: argparse.Namespace) -> int:
527 logger.debug("args[]='%s' - CALLED!", type(args))
529 logger.debug("Invoking locking.acquire() ...")
532 source_domain = "wiki.todon.eu"
533 if sources.is_recent(source_domain):
534 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
537 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
538 sources.update(source_domain)
545 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
546 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
548 doc = bs4.BeautifulSoup(raw, "html.parser")
549 logger.debug("doc[]='%s'", type(doc))
551 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
552 logger.info("Checking %d silenced/limited entries ...", len(silenced))
553 blocklist["silenced"] = utils.find_domains(silenced, "div")
555 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
556 logger.info("Checking %d suspended entries ...", len(suspended))
557 blocklist["reject"] = utils.find_domains(suspended, "div")
559 blocking = blocklist["silenced"] + blocklist["reject"]
562 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
563 instances.set_total_blocks(blocker, blocking)
566 for block_level in blocklist:
567 blockers = blocklist[block_level]
569 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
570 for blocked in blockers:
571 logger.debug("blocked='%s'", blocked)
573 if not instances.is_registered(blocked):
575 logger.info("Fetching instances from domain='%s' ...", blocked)
576 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
577 except network.exceptions as exception:
578 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
579 instances.set_last_error(blocked, exception)
581 if blocks.is_instance_blocked(blocker, blocked, block_level):
582 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
585 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
586 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
587 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
593 logger.debug("Invoking commit() ...")
594 database.connection.commit()
596 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
597 if config.get("bot_enabled") and len(blockdict) > 0:
598 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
599 network.send_bot_post(blocker, blockdict)
601 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
602 if instances.has_pending(blocker):
603 logger.debug("Flushing updates for blocker='%s' ...", blocker)
604 instances.update_data(blocker)
606 logger.debug("Success! - EXIT!")
609 def fetch_cs(args: argparse.Namespace):
610 logger.debug("args[]='%s' - CALLED!", type(args))
612 logger.debug("Invoking locking.acquire() ...")
640 source_domain = "raw.githubusercontent.com"
641 if sources.is_recent(source_domain):
642 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
645 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
646 sources.update(source_domain)
648 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
649 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
651 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
652 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
654 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
655 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
656 blocklist["silenced"] = federation.find_domains(silenced)
658 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
659 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
660 blocklist["reject"] = federation.find_domains(blocked)
662 blocking = blocklist["silenced"] + blocklist["reject"]
663 blocker = "chaos.social"
665 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
666 instances.set_total_blocks(blocker, blocking)
668 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
670 if len(blocking) > 0:
671 for block_level in blocklist:
672 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
674 for row in blocklist[block_level]:
675 logger.debug("row[%s]='%s'", type(row), row)
676 if instances.is_recent(row["domain"], "last_blocked"):
677 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
679 elif not instances.is_registered(row["domain"]):
681 logger.info("Fetching instances from domain='%s' ...", row["domain"])
682 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
683 except network.exceptions as exception:
684 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
685 instances.set_last_error(row["domain"], exception)
687 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
688 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
690 "blocked": row["domain"],
691 "reason" : row["reason"],
694 logger.debug("Invoking commit() ...")
695 database.connection.commit()
697 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
698 if config.get("bot_enabled") and len(blockdict) > 0:
699 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
700 network.send_bot_post(blocker, blockdict)
702 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
703 if instances.has_pending(blocker):
704 logger.debug("Flushing updates for blocker='%s' ...", blocker)
705 instances.update_data(blocker)
707 logger.debug("Success! - EXIT!")
710 def fetch_fba_rss(args: argparse.Namespace) -> int:
711 logger.debug("args[]='%s' - CALLED!", type(args))
715 logger.debug("Invoking locking.acquire() ...")
718 components = urlparse(args.feed)
720 if sources.is_recent(components.netloc):
721 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
724 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
725 sources.update(components.netloc)
727 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
728 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
730 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
731 if response.ok and response.status_code < 300 and len(response.text) > 0:
732 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
733 rss = atoma.parse_rss_bytes(response.content)
735 logger.debug("rss[]='%s'", type(rss))
736 for item in rss.items:
737 logger.debug("item='%s'", item)
738 domain = tidyup.domain(item.link.split("=")[1])
740 logger.debug("domain='%s' - AFTER!", domain)
742 logger.debug("domain is empty - SKIPPED!")
744 elif not utils.is_domain_wanted(domain):
745 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
747 elif domain in domains:
748 logger.debug("domain='%s' is already added - SKIPPED!", domain)
750 elif instances.is_registered(domain):
751 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
753 elif instances.is_recent(domain):
754 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
757 logger.debug("Adding domain='%s'", domain)
758 domains.append(domain)
760 logger.debug("domains()=%d", len(domains))
762 logger.info("Adding %d new instances ...", len(domains))
763 for domain in domains:
765 logger.info("Fetching instances from domain='%s' ...", domain)
766 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
767 except network.exceptions as exception:
768 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
769 instances.set_last_error(domain, exception)
772 logger.debug("Success! - EXIT!")
775 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
776 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.debug("Invoking locking.acquire() ...")
781 source_domain = "ryona.agency"
782 if sources.is_recent(source_domain):
783 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
786 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
787 sources.update(source_domain)
789 feed = f"https://{source_domain}/users/fba/feed.atom"
793 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
794 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
796 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
797 if response.ok and response.status_code < 300 and len(response.text) > 0:
798 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
799 atom = atoma.parse_atom_bytes(response.content)
801 logger.debug("atom[]='%s'", type(atom))
802 for entry in atom.entries:
803 logger.debug("entry[]='%s'", type(entry))
804 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
805 logger.debug("doc[]='%s'", type(doc))
806 for element in doc.findAll("a"):
807 logger.debug("element[]='%s'", type(element))
808 for href in element["href"].split(","):
809 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
810 domain = tidyup.domain(href)
812 logger.debug("domain='%s' - AFTER!", domain)
814 logger.debug("domain is empty - SKIPPED!")
816 elif not utils.is_domain_wanted(domain):
817 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
819 elif domain in domains:
820 logger.debug("domain='%s' is already added - SKIPPED!", domain)
822 elif instances.is_registered(domain):
823 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
825 elif instances.is_recent(domain):
826 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
829 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
830 domains.append(domain)
832 logger.debug("domains()=%d", len(domains))
834 logger.info("Adding %d new instances ...", len(domains))
835 for domain in domains:
836 logger.debug("domain='%s'", domain)
838 logger.info("Fetching instances from domain='%s' ...", domain)
839 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
840 except network.exceptions as exception:
841 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
842 instances.set_last_error(domain, exception)
845 logger.debug("Success! - EXIT!")
848 def fetch_instances(args: argparse.Namespace) -> int:
849 logger.debug("args[]='%s' - CALLED!", type(args))
851 logger.debug("args.domain='%s' - checking ...", args.domain)
852 if not validators.domain(args.domain):
853 logger.warning("args.domain='%s' is not valid.", args.domain)
855 elif blacklist.is_blacklisted(args.domain):
856 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
859 logger.debug("Invoking locking.acquire() ...")
864 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
865 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
866 except network.exceptions as exception:
867 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
868 instances.set_last_error(args.domain, exception)
869 instances.update_data(args.domain)
873 logger.debug("Not fetching more instances - EXIT!")
876 # Loop through some instances
877 database.cursor.execute(
878 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
881 rows = database.cursor.fetchall()
882 logger.info("Checking %d entries ...", len(rows))
884 logger.debug("row[domain]='%s'", row["domain"])
885 if row["domain"] == "":
886 logger.debug("row[domain] is empty - SKIPPED!")
888 elif not utils.is_domain_wanted(row["domain"]):
889 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
893 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
894 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
895 except network.exceptions as exception:
896 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
897 instances.set_last_error(row["domain"], exception)
899 logger.debug("Success - EXIT!")
902 def fetch_oliphant(args: argparse.Namespace) -> int:
903 logger.debug("args[]='%s' - CALLED!", type(args))
905 logger.debug("Invoking locking.acquire() ...")
908 source_domain = "codeberg.org"
909 if sources.is_recent(source_domain):
910 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
913 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
914 sources.update(source_domain)
917 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
922 "blocker": "artisan.chat",
923 "csv_url": "mastodon/artisan.chat.csv",
925 "blocker": "mastodon.art",
926 "csv_url": "mastodon/mastodon.art.csv",
928 "blocker": "pleroma.envs.net",
929 "csv_url": "mastodon/pleroma.envs.net.csv",
931 "blocker": "oliphant.social",
932 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
934 "blocker": "mastodon.online",
935 "csv_url": "mastodon/mastodon.online.csv",
937 "blocker": "mastodon.social",
938 "csv_url": "mastodon/mastodon.social.csv",
940 "blocker": "mastodon.social",
941 "csv_url": "other/missing-tier0-mastodon.social.csv",
943 "blocker": "rage.love",
944 "csv_url": "mastodon/rage.love.csv",
946 "blocker": "sunny.garden",
947 "csv_url": "mastodon/sunny.garden.csv",
949 "blocker": "solarpunk.moe",
950 "csv_url": "mastodon/solarpunk.moe.csv",
952 "blocker": "toot.wales",
953 "csv_url": "mastodon/toot.wales.csv",
955 "blocker": "union.place",
956 "csv_url": "mastodon/union.place.csv",
962 logger.debug("Downloading %d files ...", len(blocklists))
963 for block in blocklists:
964 # Is domain given and not equal blocker?
965 if isinstance(args.domain, str) and args.domain != block["blocker"]:
966 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
968 elif args.domain in domains:
969 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
971 elif instances.is_recent(block["blocker"]):
972 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
976 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
977 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
979 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
980 if not response.ok or response.status_code >= 300 or response.content == "":
981 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
984 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
985 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
991 logger.debug("row[%s]='%s'", type(row), row)
992 domain = severity = None
993 reject_media = reject_reports = False
996 domain = row["#domain"]
997 elif "domain" in row:
998 domain = row["domain"]
1000 logger.debug("row='%s' does not contain domain column", row)
1003 if "#severity" in row:
1004 severity = row["#severity"]
1005 elif "severity" in row:
1006 severity = row["severity"]
1008 logger.debug("row='%s' does not contain severity column", row)
1011 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1013 elif "reject_media" in row and row["reject_media"].lower() == "true":
1016 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1017 reject_reports = True
1018 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1019 reject_reports = True
1022 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1024 logger.debug("domain is empty - SKIPPED!")
1026 elif not utils.is_domain_wanted(domain):
1027 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1030 logger.debug("Marking domain='%s' as handled", domain)
1031 domains.append(domain)
1033 logger.debug("Processing domain='%s' ...", domain)
1034 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1035 logger.debug("processed='%s'", processed)
1037 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
1038 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1041 "reason" : block["reason"],
1045 utils.process_block(block["blocker"], domain, None, "reject_media")
1047 utils.process_block(block["blocker"], domain, None, "reject_reports")
1049 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1050 instances.set_total_blocks(block["blocker"], domains)
1052 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1053 if instances.has_pending(block["blocker"]):
1054 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1055 instances.update_data(block["blocker"])
1057 logger.debug("Invoking commit() ...")
1058 database.connection.commit()
1060 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1061 if config.get("bot_enabled") and len(blockdict) > 0:
1062 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1063 network.send_bot_post(block["blocker"], blockdict)
1065 logger.debug("Success! - EXIT!")
1068 def fetch_txt(args: argparse.Namespace) -> int:
1069 logger.debug("args[]='%s' - CALLED!", type(args))
1071 logger.debug("Invoking locking.acquire() ...")
1076 "blocker": "seirdy.one",
1077 "url" : "https://seirdy.one/pb/bsl.txt",
1080 logger.info("Checking %d text file(s) ...", len(urls))
1082 logger.debug("Fetching row[url]='%s' ...", row["url"])
1083 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1085 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1086 if response.ok and response.status_code < 300 and response.text != "":
1087 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1088 domains = response.text.split("\n")
1090 logger.info("Processing %d domains ...", len(domains))
1091 for domain in domains:
1092 logger.debug("domain='%s' - BEFORE!", domain)
1093 domain = tidyup.domain(domain)
1095 logger.debug("domain='%s' - AFTER!", domain)
1097 logger.debug("domain is empty - SKIPPED!")
1099 elif not utils.is_domain_wanted(domain):
1100 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1102 elif instances.is_recent(domain):
1103 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1106 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1107 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1109 logger.debug("processed='%s'", processed)
1111 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1114 logger.debug("Success! - EXIT!")
1117 def fetch_fedipact(args: argparse.Namespace) -> int:
1118 logger.debug("args[]='%s' - CALLED!", type(args))
1120 logger.debug("Invoking locking.acquire() ...")
1123 source_domain = "fedipact.online"
1124 if sources.is_recent(source_domain):
1125 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1128 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1129 sources.update(source_domain)
1131 response = utils.fetch_url(
1132 f"https://{source_domain}",
1133 network.web_headers,
1134 (config.get("connection_timeout"), config.get("read_timeout"))
1137 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1138 if response.ok and response.status_code < 300 and response.text != "":
1139 logger.debug("Parsing %d Bytes ...", len(response.text))
1141 doc = bs4.BeautifulSoup(response.text, "html.parser")
1142 logger.debug("doc[]='%s'", type(doc))
1144 rows = doc.findAll("li")
1145 logger.info("Checking %d row(s) ...", len(rows))
1147 logger.debug("row[]='%s'", type(row))
1148 domain = tidyup.domain(row.contents[0])
1150 logger.debug("domain='%s' - AFTER!", domain)
1152 logger.debug("domain is empty - SKIPPED!")
1154 elif not utils.is_domain_wanted(domain):
1155 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1157 elif instances.is_registered(domain):
1158 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1160 elif instances.is_recent(domain):
1161 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1164 logger.info("Fetching domain='%s' ...", domain)
1165 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1167 logger.debug("Success! - EXIT!")
1170 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1171 logger.debug("args[]='%s' - CALLED!", type(args))
1173 logger.debug("Invoking locking.acquire() ...")
1176 source_domain = "joinfediverse.wiki"
1177 if sources.is_recent(source_domain):
1178 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1181 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1182 sources.update(source_domain)
1184 raw = utils.fetch_url(
1185 f"https://{source_domain}/FediBlock",
1186 network.web_headers,
1187 (config.get("connection_timeout"), config.get("read_timeout"))
1189 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1191 doc = bs4.BeautifulSoup(raw, "html.parser")
1192 logger.debug("doc[]='%s'", type(doc))
1194 tables = doc.findAll("table", {"class": "wikitable"})
1196 logger.info("Analyzing %d table(s) ...", len(tables))
1198 for table in tables:
1199 logger.debug("table[]='%s'", type(table))
1201 rows = table.findAll("tr")
1202 logger.info("Checking %d row(s) ...", len(rows))
1203 block_headers = dict()
1205 logger.debug("row[%s]='%s'", type(row), row)
1207 headers = row.findAll("th")
1208 logger.debug("Found headers()=%d header(s)", len(headers))
1209 if len(headers) > 1:
1210 block_headers = dict()
1212 for header in headers:
1214 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1215 text = header.contents[0]
1217 logger.debug("text[]='%s'", type(text))
1218 if not isinstance(text, str):
1219 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1221 elif validators.domain(text.strip()):
1222 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1225 text = tidyup.domain(text.strip())
1226 logger.debug("text='%s'", text)
1227 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1228 logger.debug("Found header: '%s'=%d", text, cnt)
1229 block_headers[cnt] = text
1231 elif len(block_headers) == 0:
1232 logger.debug("row is not scrapable - SKIPPED!")
1234 elif len(block_headers) > 0:
1235 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1239 for element in row.find_all(["th", "td"]):
1241 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1242 if cnt in block_headers:
1243 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1245 text = element.text.strip()
1246 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1248 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1249 if key in ["domain", "instance"]:
1251 elif key == "reason":
1252 block[key] = tidyup.reason(text)
1253 elif key == "subdomain(s)":
1256 block[key] = text.split("/")
1258 logger.debug("key='%s'", key)
1261 logger.debug("block()=%d ...", len(block))
1263 logger.debug("Appending block()=%d ...", len(block))
1264 blocklist.append(block)
1266 logger.debug("blocklist()=%d", len(blocklist))
1268 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1269 domains = database.cursor.fetchall()
1271 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1273 for block in blocklist:
1274 logger.debug("block='%s'", block)
1275 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1276 origin = block["blocked"]
1277 for subdomain in block["subdomain(s)"]:
1278 block["blocked"] = subdomain + "." + origin
1279 blocking.append(block)
1281 blocking.append(block)
1283 logger.debug("blocking()=%d", blocking)
1284 for block in blocking:
1285 logger.debug("block[]='%s'", type(block))
1286 block["blocked"] = tidyup.domain(block["blocked"])
1288 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1289 if block["blocked"] == "":
1290 logger.debug("block[blocked] is empty - SKIPPED!")
1292 elif not utils.is_domain_wanted(block["blocked"]):
1293 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1295 elif instances.is_recent(block["blocked"]):
1296 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1299 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1300 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1303 for blocker in domains:
1304 blocker = blocker[0]
1305 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1307 for block in blocking:
1308 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1309 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1311 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1312 if block["blocked"] == "":
1313 logger.debug("block[blocked] is empty - SKIPPED!")
1315 elif not utils.is_domain_wanted(block["blocked"]):
1316 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1319 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1320 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1321 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1323 "blocked": block["blocked"],
1324 "reason" : block["reason"],
1327 if instances.has_pending(blocker):
1328 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1329 instances.update_data(blocker)
1331 logger.debug("Invoking commit() ...")
1332 database.connection.commit()
1334 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1335 if config.get("bot_enabled") and len(blockdict) > 0:
1336 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1337 network.send_bot_post(blocker, blockdict)
1339 logger.debug("Success! - EXIT!")
1342 def recheck_obfuscation(args: argparse.Namespace) -> int:
1343 logger.debug("args[]='%s' - CALLED!", type(args))
1345 logger.debug("Invoking locking.acquire() ...")
1348 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1349 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1350 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1351 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1353 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1355 rows = database.cursor.fetchall()
1356 logger.info("Checking %d domains ...", len(rows))
1358 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1359 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1360 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1364 if row["software"] == "pleroma":
1365 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1366 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1367 elif row["software"] == "mastodon":
1368 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1369 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1370 elif row["software"] == "lemmy":
1371 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1372 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1373 elif row["software"] == "friendica":
1374 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1375 blocking = friendica.fetch_blocks(row["domain"])
1376 elif row["software"] == "misskey":
1377 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1378 blocking = misskey.fetch_blocks(row["domain"])
1380 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1382 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1383 instances.set_total_blocks(row["domain"], blocking)
1385 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1388 for block in blocking:
1389 logger.debug("block[blocked]='%s'", block["blocked"])
1392 if block["blocked"] == "":
1393 logger.debug("block[blocked] is empty - SKIPPED!")
1395 elif block["blocked"].endswith(".arpa"):
1396 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1398 elif block["blocked"].endswith(".tld"):
1399 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1401 elif block["blocked"].endswith(".onion"):
1402 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1404 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1405 logger.debug("block='%s' is obfuscated.", block["blocked"])
1406 obfuscated = obfuscated + 1
1407 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1408 elif not utils.is_domain_wanted(block["blocked"]):
1409 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1411 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1412 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1415 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1416 if blocked is not None and blocked != block["blocked"]:
1417 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1418 obfuscated = obfuscated - 1
1419 if blocks.is_instance_blocked(row["domain"], blocked):
1420 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1423 block["block_level"] = utils.alias_block_level(block["block_level"])
1425 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1426 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1427 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1430 "reason" : block["reason"],
1433 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1434 if obfuscated == 0 and len(blocking) > 0:
1435 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1436 instances.set_has_obfuscation(row["domain"], False)
1438 if instances.has_pending(row["domain"]):
1439 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1440 instances.update_data(row["domain"])
1442 logger.debug("Invoking commit() ...")
1443 database.connection.commit()
1445 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1446 if config.get("bot_enabled") and len(blockdict) > 0:
1447 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1448 network.send_bot_post(row["domain"], blockdict)
1450 logger.debug("Success! - EXIT!")
1453 def fetch_fedilist(args: argparse.Namespace) -> int:
1454 logger.debug("args[]='%s' - CALLED!", type(args))
1456 logger.debug("Invoking locking.acquire() ...")
1459 source_domain = "demo.fedilist.com"
1460 if sources.is_recent(source_domain):
1461 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1464 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1465 sources.update(source_domain)
1467 url = f"http://{source_domain}/instance/csv?onion=not"
1468 if args.software is not None and args.software != "":
1469 logger.debug("args.software='%s'", args.software)
1470 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1472 logger.info("Fetching url='%s' ...", url)
1473 response = reqto.get(
1475 headers=network.web_headers,
1476 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1477 allow_redirects=False
1480 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1481 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1482 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1485 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1487 logger.debug("reader[]='%s'", type(reader))
1490 logger.debug("row[]='%s'", type(row))
1491 domain = tidyup.domain(row["hostname"])
1492 logger.debug("domain='%s' - AFTER!", domain)
1495 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1497 elif not utils.is_domain_wanted(domain):
1498 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1500 elif (args.all is None or not args.all) and instances.is_registered(domain):
1501 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1503 elif instances.is_recent(domain):
1504 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1507 logger.info("Fetching instances from domain='%s' ...", domain)
1508 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1510 logger.debug("Success! - EXIT!")
1513 def update_nodeinfo(args: argparse.Namespace) -> int:
1514 logger.debug("args[]='%s' - CALLED!", type(args))
1516 logger.debug("Invoking locking.acquire() ...")
1519 if args.domain is not None and args.domain != "":
1520 logger.debug("Fetching args.domain='%s'", args.domain)
1521 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1522 elif args.software is not None and args.software != "":
1523 logger.info("Fetching domains for args.software='%s'", args.software)
1524 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1526 logger.info("Fetching domains for recently updated ...")
1527 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1529 domains = database.cursor.fetchall()
1531 logger.info("Checking %d domain(s) ...", len(domains))
1534 logger.debug("row[]='%s'", type(row))
1536 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1537 software = federation.determine_software(row["domain"])
1539 logger.debug("Determined software='%s'", software)
1540 if software != row["software"] and software is not None:
1541 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1542 instances.set_software(row["domain"], software)
1544 instances.set_success(row["domain"])
1545 except network.exceptions as exception:
1546 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1547 instances.set_last_error(row["domain"], exception)
1549 instances.set_last_nodeinfo(row["domain"])
1550 instances.update_data(row["domain"])
1553 logger.debug("Success! - EXIT!")
1556 def fetch_instances_social(args: argparse.Namespace) -> int:
1557 logger.debug("args[]='%s' - CALLED!", type(args))
1559 logger.debug("Invoking locking.acquire() ...")
1562 source_domain = "instances.social"
1564 if config.get("instances_social_api_key") == "":
1565 logger.error("API key not set. Please set in your config.json file.")
1567 elif sources.is_recent(source_domain):
1568 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1571 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1572 sources.update(source_domain)
1575 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1578 fetched = network.get_json_api(
1580 "/api/1.0/instances/list?count=0&sort_by=name",
1582 (config.get("connection_timeout"), config.get("read_timeout"))
1584 logger.debug("fetched[]='%s'", type(fetched))
1586 if "error_message" in fetched:
1587 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1589 elif "exception" in fetched:
1590 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1592 elif "json" not in fetched:
1593 logger.warning("fetched has no element 'json' - EXIT!")
1595 elif "instances" not in fetched["json"]:
1596 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1600 rows = fetched["json"]["instances"]
1602 logger.info("Checking %d row(s) ...", len(rows))
1604 logger.debug("row[]='%s'", type(row))
1605 domain = tidyup.domain(row["name"])
1607 logger.debug("domain='%s' - AFTER!", domain)
1609 logger.debug("domain is empty - SKIPPED!")
1611 elif not utils.is_domain_wanted(domain):
1612 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1614 elif domain in domains:
1615 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1617 elif instances.is_registered(domain):
1618 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1620 elif instances.is_recent(domain):
1621 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1624 logger.info("Fetching instances from domain='%s'", domain)
1625 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1627 logger.debug("Success! - EXIT!")