1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
43 from fba.http import federation
44 from fba.http import network
46 from fba.models import blocks
47 from fba.models import instances
48 from fba.models import sources
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def check_instance(args: argparse.Namespace) -> int:
61 logger.debug("args.domain='%s' - CALLED!", args.domain)
63 if not validators.domain(args.domain):
64 logger.warning("args.domain='%s' is not valid", args.domain)
66 elif blacklist.is_blacklisted(args.domain):
67 logger.warning("args.domain='%s' is blacklisted", args.domain)
69 elif instances.is_registered(args.domain):
70 logger.warning("args.domain='%s' is already registered", args.domain)
73 logger.info("args.domain='%s' is not known", args.domain)
75 logger.debug("status=%d - EXIT!", status)
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79 logger.debug("args[]='%s' - CALLED!", type(args))
82 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
85 for row in database.cursor.fetchall():
86 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87 punycode = row["domain"].encode("idna").decode("utf-8")
89 if row["nodeinfo_url"].startswith("/"):
90 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
96 logger.info("Found %d row(s)", cnt)
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102 logger.debug("args[]='%s' - CALLED!", type(args))
104 # No CSRF by default, you don't have to add network.source_headers by yourself here
106 source_domain = "pixelfed.org"
108 if sources.is_recent(source_domain):
109 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
112 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
113 sources.update(source_domain)
116 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
117 headers = csrf.determine(source_domain, dict())
118 except network.exceptions as exception:
119 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
123 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124 fetched = network.get_json_api(
126 "/api/v1/servers/all.json?scope=All&country=all&language=all",
128 (config.get("connection_timeout"), config.get("read_timeout"))
131 logger.debug("JSON API returned %d elements", len(fetched))
132 if "error_message" in fetched:
133 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135 elif "data" not in fetched["json"]:
136 logger.warning("API did not return JSON with 'data' element - EXIT!")
139 rows = fetched["json"]["data"]
140 logger.info("Checking %d fetched rows ...", len(rows))
142 logger.debug("row[]='%s'", type(row))
143 if "domain" not in row:
144 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146 elif row["domain"] == "":
147 logger.debug("row[domain] is empty - SKIPPED!")
149 elif not utils.is_domain_wanted(row["domain"]):
150 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
152 elif instances.is_registered(row["domain"]):
153 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
155 elif instances.is_recent(row["domain"]):
156 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
159 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
160 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
162 except network.exceptions as exception:
163 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
166 logger.debug("Success! - EXIT!")
169 def fetch_bkali(args: argparse.Namespace) -> int:
170 logger.debug("args[]='%s' - CALLED!", type(args))
172 logger.debug("Invoking locking.acquire() ...")
175 source_domain = "gql.api.bka.li"
176 if sources.is_recent(source_domain):
177 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
180 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
181 sources.update(source_domain)
185 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
186 fetched = network.post_json_api(
190 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
194 logger.debug("fetched[]='%s'", type(fetched))
195 if "error_message" in fetched:
196 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
198 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
199 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
202 rows = fetched["json"]
204 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
206 raise Exception("WARNING: Returned no records")
207 elif "data" not in rows:
208 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
209 elif "nodeinfo" not in rows["data"]:
210 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
212 for entry in rows["data"]["nodeinfo"]:
213 logger.debug("entry[%s]='%s'", type(entry), entry)
214 if "domain" not in entry:
215 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
217 elif entry["domain"] == "":
218 logger.debug("entry[domain] is empty - SKIPPED!")
220 elif not utils.is_domain_wanted(entry["domain"]):
221 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
223 elif instances.is_registered(entry["domain"]):
224 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
226 elif instances.is_recent(entry["domain"]):
227 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
230 logger.debug("Adding domain='%s' ...", entry["domain"])
231 domains.append(entry["domain"])
233 except network.exceptions as exception:
234 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
237 logger.debug("domains()=%d", len(domains))
239 logger.info("Adding %d new instances ...", len(domains))
240 for domain in domains:
242 logger.info("Fetching instances from domain='%s' ...", domain)
243 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
244 except network.exceptions as exception:
245 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
246 instances.set_last_error(domain, exception)
249 logger.debug("Success - EXIT!")
252 def fetch_blocks(args: argparse.Namespace) -> int:
253 logger.debug("args[]='%s' - CALLED!", type(args))
254 if args.domain is not None and args.domain != "":
255 logger.debug("args.domain='%s' - checking ...", args.domain)
256 if not validators.domain(args.domain):
257 logger.warning("args.domain='%s' is not valid.", args.domain)
259 elif blacklist.is_blacklisted(args.domain):
260 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
262 elif not instances.is_registered(args.domain):
263 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
266 logger.debug("Invoking locking.acquire() ...")
269 if args.domain is not None and args.domain != "":
270 # Re-check single domain
271 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
272 database.cursor.execute(
273 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
275 elif args.software is not None and args.software != "":
276 # Re-check single software
277 logger.debug("Querying database for args.software='%s' ...", args.software)
278 database.cursor.execute(
279 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
282 # Re-check after "timeout" (aka. minimum interval)
283 database.cursor.execute(
284 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
287 rows = database.cursor.fetchall()
288 logger.info("Checking %d entries ...", len(rows))
289 for blocker, software, origin, nodeinfo_url in rows:
290 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
291 blocker = tidyup.domain(blocker)
292 logger.debug("blocker='%s' - AFTER!", blocker)
295 logger.warning("blocker is now empty!")
297 elif nodeinfo_url is None or nodeinfo_url == "":
298 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
300 elif not utils.is_domain_wanted(blocker):
301 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
304 logger.debug("blocker='%s'", blocker)
305 instances.set_last_blocked(blocker)
306 instances.set_has_obfuscation(blocker, False)
310 if software == "pleroma":
311 logger.info("blocker='%s',software='%s'", blocker, software)
312 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
313 elif software == "mastodon":
314 logger.info("blocker='%s',software='%s'", blocker, software)
315 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
316 elif software == "lemmy":
317 logger.info("blocker='%s',software='%s'", blocker, software)
318 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
319 elif software == "friendica":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = friendica.fetch_blocks(blocker)
322 elif software == "misskey":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = misskey.fetch_blocks(blocker)
326 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
328 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
329 instances.set_total_blocks(blocker, blocking)
331 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
333 for block in blocking:
334 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
336 if block["block_level"] == "":
337 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
340 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
341 block["blocked"] = tidyup.domain(block["blocked"])
342 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
343 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
345 if block["blocked"] == "":
346 logger.warning("blocked is empty, blocker='%s'", blocker)
348 elif block["blocked"].endswith(".onion"):
349 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
351 elif block["blocked"].endswith(".arpa"):
352 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
354 elif block["blocked"].endswith(".tld"):
355 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
357 elif block["blocked"].find("*") >= 0:
358 logger.debug("blocker='%s' uses obfuscated domains", blocker)
360 # Some friendica servers also obscure domains without hash
361 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
363 logger.debug("row[]='%s'", type(row))
365 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
366 instances.set_has_obfuscation(blocker, True)
369 block["blocked"] = row["domain"]
370 origin = row["origin"]
371 nodeinfo_url = row["nodeinfo_url"]
372 elif block["blocked"].find("?") >= 0:
373 logger.debug("blocker='%s' uses obfuscated domains", blocker)
375 # Some obscure them with question marks, not sure if that's dependent on version or not
376 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
378 logger.debug("row[]='%s'", type(row))
380 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
381 instances.set_has_obfuscation(blocker, True)
384 block["blocked"] = row["domain"]
385 origin = row["origin"]
386 nodeinfo_url = row["nodeinfo_url"]
388 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
389 if block["blocked"] == "":
390 logger.debug("block[blocked] is empty - SKIPPED!")
392 elif not utils.is_domain_wanted(block["blocked"]):
393 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
395 elif block["block_level"] in ["accept", "accepted"]:
396 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
398 elif not instances.is_registered(block["blocked"]):
399 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
400 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
402 block["block_level"] = utils.alias_block_level(block["block_level"])
404 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
405 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
407 "blocked": block["blocked"],
408 "reason" : block["reason"],
411 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
412 cookies.clear(block["blocked"])
414 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
415 if instances.has_pending(blocker):
416 logger.debug("Flushing updates for blocker='%s' ...", blocker)
417 instances.update_data(blocker)
419 logger.debug("Invoking commit() ...")
420 database.connection.commit()
422 logger.debug("Invoking cookies.clear(%s) ...", blocker)
423 cookies.clear(blocker)
425 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
426 if config.get("bot_enabled") and len(blockdict) > 0:
427 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
428 network.send_bot_post(blocker, blockdict)
430 logger.debug("Success! - EXIT!")
433 def fetch_observer(args: argparse.Namespace) -> int:
434 logger.debug("args[]='%s' - CALLED!", type(args))
436 logger.debug("Invoking locking.acquire() ...")
439 source_domain = "fediverse.observer"
440 if sources.is_recent(source_domain):
441 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
444 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
445 sources.update(source_domain)
448 if args.software is None:
449 logger.info("Fetching software list ...")
450 raw = utils.fetch_url(
451 f"https://{source_domain}",
453 (config.get("connection_timeout"), config.get("read_timeout"))
455 logger.debug("raw[%s]()=%d", type(raw), len(raw))
457 doc = bs4.BeautifulSoup(raw, features="html.parser")
458 logger.debug("doc[]='%s'", type(doc))
460 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
461 logger.debug("items[]='%s'", type(items))
463 logger.info("Checking %d menu items ...", len(items))
465 logger.debug("item[%s]='%s'", type(item), item)
466 if item.text.lower() == "all":
467 logger.debug("Skipping 'All' menu entry ...")
470 logger.debug("Appending item.text='%s' ...", item.text)
471 types.append(tidyup.domain(item.text))
473 logger.info("Adding args.software='%s' as type ...", args.software)
474 types.append(args.software)
476 logger.info("Fetching %d different table data ...", len(types))
477 for software in types:
478 logger.debug("software='%s' - BEFORE!", software)
479 if args.software is not None and args.software != software:
480 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
485 logger.debug("Fetching table data for software='%s' ...", software)
486 raw = utils.fetch_url(
487 f"https://{source_domain}/app/views/tabledata.php?software={software}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
495 except network.exceptions as exception:
496 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
499 items = doc.findAll("a", {"class": "url"})
500 logger.info("Checking %d items,software='%s' ...", len(items), software)
502 logger.debug("item[]='%s'", type(item))
503 domain = item.decode_contents()
505 logger.debug("domain='%s' - AFTER!", domain)
507 logger.debug("domain is empty - SKIPPED!")
509 elif not utils.is_domain_wanted(domain):
510 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
512 elif instances.is_registered(domain):
513 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
515 elif instances.is_recent(domain):
516 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
519 software = software_helper.alias(software)
520 logger.info("Fetching instances for domain='%s'", domain)
521 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
523 logger.debug("Success! - EXIT!")
526 def fetch_todon_wiki(args: argparse.Namespace) -> int:
527 logger.debug("args[]='%s' - CALLED!", type(args))
529 logger.debug("Invoking locking.acquire() ...")
532 source_domain = "wiki.todon.eu"
533 if sources.is_recent(source_domain):
534 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
537 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
538 sources.update(source_domain)
545 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
546 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
548 doc = bs4.BeautifulSoup(raw, "html.parser")
549 logger.debug("doc[]='%s'", type(doc))
551 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
552 logger.info("Checking %d silenced/limited entries ...", len(silenced))
553 blocklist["silenced"] = utils.find_domains(silenced, "div")
555 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
556 logger.info("Checking %d suspended entries ...", len(suspended))
557 blocklist["reject"] = utils.find_domains(suspended, "div")
559 blocking = blocklist["silenced"] + blocklist["reject"]
562 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
563 instances.set_total_blocks(blocker, blocking)
566 for block_level in blocklist:
567 blockers = blocklist[block_level]
569 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
570 for blocked in blockers:
571 logger.debug("blocked='%s'", blocked)
573 if not instances.is_registered(blocked):
575 logger.info("Fetching instances from domain='%s' ...", blocked)
576 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
577 except network.exceptions as exception:
578 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
579 instances.set_last_error(blocked, exception)
581 if blocks.is_instance_blocked(blocker, blocked, block_level):
582 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
585 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
586 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
587 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
593 logger.debug("Invoking commit() ...")
594 database.connection.commit()
596 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
597 if config.get("bot_enabled") and len(blockdict) > 0:
598 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
599 network.send_bot_post(blocker, blockdict)
601 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
602 if instances.has_pending(blocker):
603 logger.debug("Flushing updates for blocker='%s' ...", blocker)
604 instances.update_data(blocker)
606 logger.debug("Success! - EXIT!")
609 def fetch_cs(args: argparse.Namespace):
610 logger.debug("args[]='%s' - CALLED!", type(args))
612 logger.debug("Invoking locking.acquire() ...")
640 source_domain = "raw.githubusercontent.com"
641 if sources.is_recent(source_domain):
642 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
645 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
646 sources.update(source_domain)
648 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
649 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
651 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
652 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
654 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
655 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
656 blocklist["silenced"] = federation.find_domains(silenced)
658 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
659 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
660 blocklist["reject"] = federation.find_domains(blocked)
662 blocking = blocklist["silenced"] + blocklist["reject"]
663 blocker = "chaos.social"
665 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
666 instances.set_total_blocks(blocker, blocking)
668 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
670 if len(blocking) > 0:
671 for block_level in blocklist:
672 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
674 for row in blocklist[block_level]:
675 logger.debug("row[%s]='%s'", type(row), row)
676 if instances.is_recent(row["domain"], "last_blocked"):
677 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
679 elif not instances.is_registered(row["domain"]):
681 logger.info("Fetching instances from domain='%s' ...", row["domain"])
682 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
683 except network.exceptions as exception:
684 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
685 instances.set_last_error(row["domain"], exception)
687 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
688 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
690 "blocked": row["domain"],
691 "reason" : row["reason"],
694 logger.debug("Invoking commit() ...")
695 database.connection.commit()
697 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
698 if config.get("bot_enabled") and len(blockdict) > 0:
699 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
700 network.send_bot_post(blocker, blockdict)
702 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
703 if instances.has_pending(blocker):
704 logger.debug("Flushing updates for blocker='%s' ...", blocker)
705 instances.update_data(blocker)
707 logger.debug("Success! - EXIT!")
710 def fetch_fba_rss(args: argparse.Namespace) -> int:
711 logger.debug("args[]='%s' - CALLED!", type(args))
715 logger.debug("Invoking locking.acquire() ...")
718 components = urlparse(args.feed)
720 if sources.is_recent(components.netloc):
721 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
724 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
725 sources.update(components.netloc)
727 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
728 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
730 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
731 if response.ok and response.status_code < 300 and len(response.text) > 0:
732 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
733 rss = atoma.parse_rss_bytes(response.content)
735 logger.debug("rss[]='%s'", type(rss))
736 for item in rss.items:
737 logger.debug("item='%s'", item)
738 domain = tidyup.domain(item.link.split("=")[1])
740 logger.debug("domain='%s' - AFTER!", domain)
742 logger.debug("domain is empty - SKIPPED!")
744 elif not utils.is_domain_wanted(domain):
745 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
747 elif domain in domains:
748 logger.debug("domain='%s' is already added - SKIPPED!", domain)
750 elif instances.is_registered(domain):
751 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
753 elif instances.is_recent(domain):
754 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
757 logger.debug("Adding domain='%s'", domain)
758 domains.append(domain)
760 logger.debug("domains()=%d", len(domains))
762 logger.info("Adding %d new instances ...", len(domains))
763 for domain in domains:
765 logger.info("Fetching instances from domain='%s' ...", domain)
766 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
767 except network.exceptions as exception:
768 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
769 instances.set_last_error(domain, exception)
772 logger.debug("Success! - EXIT!")
775 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
776 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.debug("Invoking locking.acquire() ...")
781 source_domain = "ryona.agency"
782 if sources.is_recent(source_domain):
783 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
786 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
787 sources.update(source_domain)
789 feed = f"https://{source_domain}/users/fba/feed.atom"
793 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
794 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
796 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
797 if response.ok and response.status_code < 300 and len(response.text) > 0:
798 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
799 atom = atoma.parse_atom_bytes(response.content)
801 logger.debug("atom[]='%s'", type(atom))
802 for entry in atom.entries:
803 logger.debug("entry[]='%s'", type(entry))
804 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
805 logger.debug("doc[]='%s'", type(doc))
806 for element in doc.findAll("a"):
807 logger.debug("element[]='%s'", type(element))
808 for href in element["href"].split(","):
809 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
810 domain = tidyup.domain(href)
812 logger.debug("domain='%s' - AFTER!", domain)
814 logger.debug("domain is empty - SKIPPED!")
816 elif not utils.is_domain_wanted(domain):
817 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
819 elif domain in domains:
820 logger.debug("domain='%s' is already added - SKIPPED!", domain)
822 elif instances.is_registered(domain):
823 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
825 elif instances.is_recent(domain):
826 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
829 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
830 domains.append(domain)
832 logger.debug("domains()=%d", len(domains))
834 logger.info("Adding %d new instances ...", len(domains))
835 for domain in domains:
836 logger.debug("domain='%s'", domain)
838 logger.info("Fetching instances from domain='%s' ...", domain)
839 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
840 except network.exceptions as exception:
841 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
842 instances.set_last_error(domain, exception)
845 logger.debug("Success! - EXIT!")
848 def fetch_instances(args: argparse.Namespace) -> int:
849 logger.debug("args[]='%s' - CALLED!", type(args))
851 logger.debug("args.domain='%s' - checking ...", args.domain)
852 if not validators.domain(args.domain):
853 logger.warning("args.domain='%s' is not valid.", args.domain)
855 elif blacklist.is_blacklisted(args.domain):
856 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
859 logger.debug("Invoking locking.acquire() ...")
864 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
865 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
866 except network.exceptions as exception:
867 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
868 instances.set_last_error(args.domain, exception)
869 instances.update_data(args.domain)
873 logger.debug("Not fetching more instances - EXIT!")
876 # Loop through some instances
877 database.cursor.execute(
878 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
881 rows = database.cursor.fetchall()
882 logger.info("Checking %d entries ...", len(rows))
884 logger.debug("row[domain]='%s'", row["domain"])
885 if row["domain"] == "":
886 logger.debug("row[domain] is empty - SKIPPED!")
888 elif not utils.is_domain_wanted(row["domain"]):
889 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
893 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
894 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
895 except network.exceptions as exception:
896 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
897 instances.set_last_error(row["domain"], exception)
899 logger.debug("Success - EXIT!")
902 def fetch_oliphant(args: argparse.Namespace) -> int:
903 logger.debug("args[]='%s' - CALLED!", type(args))
905 logger.debug("Invoking locking.acquire() ...")
908 source_domain = "codeberg.org"
909 if sources.is_recent(source_domain):
910 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
913 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
914 sources.update(source_domain)
917 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
922 "blocker": "artisan.chat",
923 "csv_url": "mastodon/artisan.chat.csv",
925 "blocker": "mastodon.art",
926 "csv_url": "mastodon/mastodon.art.csv",
928 "blocker": "pleroma.envs.net",
929 "csv_url": "mastodon/pleroma.envs.net.csv",
931 "blocker": "oliphant.social",
932 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
934 "blocker": "mastodon.online",
935 "csv_url": "mastodon/mastodon.online.csv",
937 "blocker": "mastodon.social",
938 "csv_url": "mastodon/mastodon.social.csv",
940 "blocker": "mastodon.social",
941 "csv_url": "other/missing-tier0-mastodon.social.csv",
943 "blocker": "rage.love",
944 "csv_url": "mastodon/rage.love.csv",
946 "blocker": "sunny.garden",
947 "csv_url": "mastodon/sunny.garden.csv",
949 "blocker": "sunny.garden",
950 "csv_url": "mastodon/gardenfence.csv",
952 "blocker": "solarpunk.moe",
953 "csv_url": "mastodon/solarpunk.moe.csv",
955 "blocker": "toot.wales",
956 "csv_url": "mastodon/toot.wales.csv",
958 "blocker": "union.place",
959 "csv_url": "mastodon/union.place.csv",
961 "blocker": "oliphant.social",
962 "csv_url": "mastodon/birdsite.csv",
968 logger.debug("Downloading %d files ...", len(blocklists))
969 for block in blocklists:
970 # Is domain given and not equal blocker?
971 if isinstance(args.domain, str) and args.domain != block["blocker"]:
972 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
974 elif args.domain in domains:
975 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
979 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
980 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
982 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
983 if not response.ok or response.status_code >= 300 or response.content == "":
984 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
987 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
988 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
994 logger.debug("row[%s]='%s'", type(row), row)
995 domain = severity = None
996 reject_media = reject_reports = False
999 domain = row["#domain"]
1000 elif "domain" in row:
1001 domain = row["domain"]
1003 logger.debug("row='%s' does not contain domain column", row)
1006 if "#severity" in row:
1007 severity = utils.alias_block_level(row["#severity"])
1008 elif "severity" in row:
1009 severity = utils.alias_block_level(row["severity"])
1011 logger.debug("row='%s' does not contain severity column", row)
1014 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1016 elif "reject_media" in row and row["reject_media"].lower() == "true":
1019 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1020 reject_reports = True
1021 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1022 reject_reports = True
1025 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1027 logger.debug("domain is empty - SKIPPED!")
1029 elif domain.endswith(".onion"):
1030 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1032 elif domain.endswith(".arpa"):
1033 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1035 elif domain.endswith(".tld"):
1036 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1038 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1039 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1040 domain = utils.deobfuscate(domain, block["blocker"])
1041 logger.debug("domain='%s' - AFTER!", domain)
1043 if not validators.domain(domain):
1044 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1046 elif blacklist.is_blacklisted(domain):
1047 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1050 logger.debug("Marking domain='%s' as handled", domain)
1051 domains.append(domain)
1053 logger.debug("Processing domain='%s' ...", domain)
1054 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1055 logger.debug("processed='%s'", processed)
1057 if utils.process_block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1058 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1061 "reason" : block["reason"],
1065 utils.process_block(block["blocker"], domain, None, "reject_media")
1067 utils.process_block(block["blocker"], domain, None, "reject_reports")
1069 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1070 instances.set_total_blocks(block["blocker"], domains)
1072 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1073 if instances.has_pending(block["blocker"]):
1074 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1075 instances.update_data(block["blocker"])
1077 logger.debug("Invoking commit() ...")
1078 database.connection.commit()
1080 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1081 if config.get("bot_enabled") and len(blockdict) > 0:
1082 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1083 network.send_bot_post(block["blocker"], blockdict)
1085 logger.debug("Success! - EXIT!")
1088 def fetch_txt(args: argparse.Namespace) -> int:
1089 logger.debug("args[]='%s' - CALLED!", type(args))
1091 logger.debug("Invoking locking.acquire() ...")
1096 "blocker": "seirdy.one",
1097 "url" : "https://seirdy.one/pb/bsl.txt",
1100 logger.info("Checking %d text file(s) ...", len(urls))
1102 logger.debug("Fetching row[url]='%s' ...", row["url"])
1103 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1105 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1106 if response.ok and response.status_code < 300 and response.text != "":
1107 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1108 domains = response.text.split("\n")
1110 logger.info("Processing %d domains ...", len(domains))
1111 for domain in domains:
1112 logger.debug("domain='%s' - BEFORE!", domain)
1113 domain = tidyup.domain(domain)
1115 logger.debug("domain='%s' - AFTER!", domain)
1117 logger.debug("domain is empty - SKIPPED!")
1119 elif not utils.is_domain_wanted(domain):
1120 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1122 elif instances.is_recent(domain):
1123 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1126 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1127 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1129 logger.debug("processed='%s'", processed)
1131 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1134 logger.debug("Success! - EXIT!")
1137 def fetch_fedipact(args: argparse.Namespace) -> int:
1138 logger.debug("args[]='%s' - CALLED!", type(args))
1140 logger.debug("Invoking locking.acquire() ...")
1143 source_domain = "fedipact.online"
1144 if sources.is_recent(source_domain):
1145 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1148 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1149 sources.update(source_domain)
1151 response = utils.fetch_url(
1152 f"https://{source_domain}",
1153 network.web_headers,
1154 (config.get("connection_timeout"), config.get("read_timeout"))
1157 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1158 if response.ok and response.status_code < 300 and response.text != "":
1159 logger.debug("Parsing %d Bytes ...", len(response.text))
1161 doc = bs4.BeautifulSoup(response.text, "html.parser")
1162 logger.debug("doc[]='%s'", type(doc))
1164 rows = doc.findAll("li")
1165 logger.info("Checking %d row(s) ...", len(rows))
1167 logger.debug("row[]='%s'", type(row))
1168 domain = tidyup.domain(row.contents[0])
1170 logger.debug("domain='%s' - AFTER!", domain)
1172 logger.debug("domain is empty - SKIPPED!")
1174 elif not utils.is_domain_wanted(domain):
1175 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1177 elif instances.is_registered(domain):
1178 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1180 elif instances.is_recent(domain):
1181 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1184 logger.info("Fetching domain='%s' ...", domain)
1185 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1187 logger.debug("Success! - EXIT!")
1190 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1191 logger.debug("args[]='%s' - CALLED!", type(args))
1193 logger.debug("Invoking locking.acquire() ...")
1196 source_domain = "joinfediverse.wiki"
1197 if sources.is_recent(source_domain):
1198 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1201 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1202 sources.update(source_domain)
1204 raw = utils.fetch_url(
1205 f"https://{source_domain}/FediBlock",
1206 network.web_headers,
1207 (config.get("connection_timeout"), config.get("read_timeout"))
1209 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1211 doc = bs4.BeautifulSoup(raw, "html.parser")
1212 logger.debug("doc[]='%s'", type(doc))
1214 tables = doc.findAll("table", {"class": "wikitable"})
1216 logger.info("Analyzing %d table(s) ...", len(tables))
1218 for table in tables:
1219 logger.debug("table[]='%s'", type(table))
1221 rows = table.findAll("tr")
1222 logger.info("Checking %d row(s) ...", len(rows))
1223 block_headers = dict()
1225 logger.debug("row[%s]='%s'", type(row), row)
1227 headers = row.findAll("th")
1228 logger.debug("Found headers()=%d header(s)", len(headers))
1229 if len(headers) > 1:
1230 block_headers = dict()
1232 for header in headers:
1234 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1235 text = header.contents[0]
1237 logger.debug("text[]='%s'", type(text))
1238 if not isinstance(text, str):
1239 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1241 elif validators.domain(text.strip()):
1242 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1245 text = tidyup.domain(text.strip())
1246 logger.debug("text='%s'", text)
1247 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1248 logger.debug("Found header: '%s'=%d", text, cnt)
1249 block_headers[cnt] = text
1251 elif len(block_headers) == 0:
1252 logger.debug("row is not scrapable - SKIPPED!")
1254 elif len(block_headers) > 0:
1255 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1259 for element in row.find_all(["th", "td"]):
1261 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1262 if cnt in block_headers:
1263 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1265 text = element.text.strip()
1266 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1268 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1269 if key in ["domain", "instance"]:
1271 elif key == "reason":
1272 block[key] = tidyup.reason(text)
1273 elif key == "subdomain(s)":
1276 block[key] = text.split("/")
1278 logger.debug("key='%s'", key)
1281 logger.debug("block()=%d ...", len(block))
1283 logger.debug("Appending block()=%d ...", len(block))
1284 blocklist.append(block)
1286 logger.debug("blocklist()=%d", len(blocklist))
1288 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1289 domains = database.cursor.fetchall()
1291 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1293 for block in blocklist:
1294 logger.debug("block='%s'", block)
1295 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1296 origin = block["blocked"]
1297 for subdomain in block["subdomain(s)"]:
1298 block["blocked"] = subdomain + "." + origin
1299 blocking.append(block)
1301 blocking.append(block)
1303 logger.debug("blocking()=%d", blocking)
1304 for block in blocking:
1305 logger.debug("block[]='%s'", type(block))
1306 block["blocked"] = tidyup.domain(block["blocked"])
1308 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1309 if block["blocked"] == "":
1310 logger.debug("block[blocked] is empty - SKIPPED!")
1312 elif not utils.is_domain_wanted(block["blocked"]):
1313 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1315 elif instances.is_recent(block["blocked"]):
1316 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1319 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1320 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1323 for blocker in domains:
1324 blocker = blocker[0]
1325 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1327 for block in blocking:
1328 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1329 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1331 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1332 if block["blocked"] == "":
1333 logger.debug("block[blocked] is empty - SKIPPED!")
1335 elif not utils.is_domain_wanted(block["blocked"]):
1336 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1339 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1340 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1341 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1343 "blocked": block["blocked"],
1344 "reason" : block["reason"],
1347 if instances.has_pending(blocker):
1348 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1349 instances.update_data(blocker)
1351 logger.debug("Invoking commit() ...")
1352 database.connection.commit()
1354 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1355 if config.get("bot_enabled") and len(blockdict) > 0:
1356 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1357 network.send_bot_post(blocker, blockdict)
1359 logger.debug("Success! - EXIT!")
1362 def recheck_obfuscation(args: argparse.Namespace) -> int:
1363 logger.debug("args[]='%s' - CALLED!", type(args))
1365 logger.debug("Invoking locking.acquire() ...")
1368 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1369 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1370 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1371 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1373 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1375 rows = database.cursor.fetchall()
1376 logger.info("Checking %d domains ...", len(rows))
1378 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1379 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1380 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1384 if row["software"] == "pleroma":
1385 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1386 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1387 elif row["software"] == "mastodon":
1388 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1389 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1390 elif row["software"] == "lemmy":
1391 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1392 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1393 elif row["software"] == "friendica":
1394 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1395 blocking = friendica.fetch_blocks(row["domain"])
1396 elif row["software"] == "misskey":
1397 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1398 blocking = misskey.fetch_blocks(row["domain"])
1400 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1402 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1403 instances.set_total_blocks(row["domain"], blocking)
1405 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1408 for block in blocking:
1409 logger.debug("block[blocked]='%s'", block["blocked"])
1412 if block["blocked"] == "":
1413 logger.debug("block[blocked] is empty - SKIPPED!")
1415 elif block["blocked"].endswith(".arpa"):
1416 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1418 elif block["blocked"].endswith(".tld"):
1419 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1421 elif block["blocked"].endswith(".onion"):
1422 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1424 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1425 logger.debug("block='%s' is obfuscated.", block["blocked"])
1426 obfuscated = obfuscated + 1
1427 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1428 elif not utils.is_domain_wanted(block["blocked"]):
1429 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1431 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1432 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1435 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1436 if blocked is not None and blocked != block["blocked"]:
1437 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1438 obfuscated = obfuscated - 1
1439 if blocks.is_instance_blocked(row["domain"], blocked):
1440 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1443 block["block_level"] = utils.alias_block_level(block["block_level"])
1445 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1446 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1447 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1450 "reason" : block["reason"],
1453 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1454 if obfuscated == 0 and len(blocking) > 0:
1455 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1456 instances.set_has_obfuscation(row["domain"], False)
1458 if instances.has_pending(row["domain"]):
1459 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1460 instances.update_data(row["domain"])
1462 logger.debug("Invoking commit() ...")
1463 database.connection.commit()
1465 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1466 if config.get("bot_enabled") and len(blockdict) > 0:
1467 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1468 network.send_bot_post(row["domain"], blockdict)
1470 logger.debug("Success! - EXIT!")
1473 def fetch_fedilist(args: argparse.Namespace) -> int:
1474 logger.debug("args[]='%s' - CALLED!", type(args))
1476 logger.debug("Invoking locking.acquire() ...")
1479 source_domain = "demo.fedilist.com"
1480 if sources.is_recent(source_domain):
1481 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1484 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1485 sources.update(source_domain)
1487 url = f"http://{source_domain}/instance/csv?onion=not"
1488 if args.software is not None and args.software != "":
1489 logger.debug("args.software='%s'", args.software)
1490 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1492 logger.info("Fetching url='%s' ...", url)
1493 response = reqto.get(
1495 headers=network.web_headers,
1496 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1497 allow_redirects=False
1500 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1501 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1502 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1505 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1507 logger.debug("reader[]='%s'", type(reader))
1510 logger.debug("row[]='%s'", type(row))
1511 domain = tidyup.domain(row["hostname"])
1512 logger.debug("domain='%s' - AFTER!", domain)
1515 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1517 elif not utils.is_domain_wanted(domain):
1518 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1520 elif (args.all is None or not args.all) and instances.is_registered(domain):
1521 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1523 elif instances.is_recent(domain):
1524 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1527 logger.info("Fetching instances from domain='%s' ...", domain)
1528 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1530 logger.debug("Success! - EXIT!")
1533 def update_nodeinfo(args: argparse.Namespace) -> int:
1534 logger.debug("args[]='%s' - CALLED!", type(args))
1536 logger.debug("Invoking locking.acquire() ...")
1539 if args.domain is not None and args.domain != "":
1540 logger.debug("Fetching args.domain='%s'", args.domain)
1541 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1542 elif args.software is not None and args.software != "":
1543 logger.info("Fetching domains for args.software='%s'", args.software)
1544 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1546 logger.info("Fetching domains for recently updated ...")
1547 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1549 domains = database.cursor.fetchall()
1551 logger.info("Checking %d domain(s) ...", len(domains))
1554 logger.debug("row[]='%s'", type(row))
1556 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1557 software = federation.determine_software(row["domain"])
1559 logger.debug("Determined software='%s'", software)
1560 if software != row["software"] and software is not None:
1561 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1562 instances.set_software(row["domain"], software)
1564 instances.set_success(row["domain"])
1565 except network.exceptions as exception:
1566 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1567 instances.set_last_error(row["domain"], exception)
1569 instances.set_last_nodeinfo(row["domain"])
1570 instances.update_data(row["domain"])
1573 logger.debug("Success! - EXIT!")
1576 def fetch_instances_social(args: argparse.Namespace) -> int:
1577 logger.debug("args[]='%s' - CALLED!", type(args))
1579 logger.debug("Invoking locking.acquire() ...")
1582 source_domain = "instances.social"
1584 if config.get("instances_social_api_key") == "":
1585 logger.error("API key not set. Please set in your config.json file.")
1587 elif sources.is_recent(source_domain):
1588 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1591 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1592 sources.update(source_domain)
1595 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1598 fetched = network.get_json_api(
1600 "/api/1.0/instances/list?count=0&sort_by=name",
1602 (config.get("connection_timeout"), config.get("read_timeout"))
1604 logger.debug("fetched[]='%s'", type(fetched))
1606 if "error_message" in fetched:
1607 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1609 elif "exception" in fetched:
1610 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1612 elif "json" not in fetched:
1613 logger.warning("fetched has no element 'json' - EXIT!")
1615 elif "instances" not in fetched["json"]:
1616 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1620 rows = fetched["json"]["instances"]
1622 logger.info("Checking %d row(s) ...", len(rows))
1624 logger.debug("row[]='%s'", type(row))
1625 domain = tidyup.domain(row["name"])
1627 logger.debug("domain='%s' - AFTER!", domain)
1629 logger.debug("domain is empty - SKIPPED!")
1631 elif not utils.is_domain_wanted(domain):
1632 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1634 elif domain in domains:
1635 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1637 elif instances.is_registered(domain):
1638 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1640 elif instances.is_recent(domain):
1641 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1644 logger.info("Fetching instances from domain='%s'", domain)
1645 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1647 logger.debug("Success! - EXIT!")