1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
43 from fba.http import federation
44 from fba.http import network
46 from fba.models import blocks
47 from fba.models import instances
48 from fba.models import sources
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def check_instance(args: argparse.Namespace) -> int:
61 logger.debug("args.domain='%s' - CALLED!", args.domain)
63 if not validators.domain(args.domain):
64 logger.warning("args.domain='%s' is not valid", args.domain)
66 elif blacklist.is_blacklisted(args.domain):
67 logger.warning("args.domain='%s' is blacklisted", args.domain)
69 elif instances.is_registered(args.domain):
70 logger.warning("args.domain='%s' is already registered", args.domain)
73 logger.info("args.domain='%s' is not known", args.domain)
75 logger.debug("status=%d - EXIT!", status)
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79 logger.debug("args[]='%s' - CALLED!", type(args))
82 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
85 for row in database.cursor.fetchall():
86 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87 punycode = row["domain"].encode("idna").decode("utf-8")
89 if row["nodeinfo_url"].startswith("/"):
90 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
96 logger.info("Found %d row(s)", cnt)
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102 logger.debug("args[]='%s' - CALLED!", type(args))
104 # No CSRF by default, you don't have to add network.source_headers by yourself here
106 source_domain = "pixelfed.org"
108 if sources.is_recent(source_domain):
109 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
112 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
113 sources.update(source_domain)
116 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
117 headers = csrf.determine(source_domain, dict())
118 except network.exceptions as exception:
119 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
123 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124 fetched = network.get_json_api(
126 "/api/v1/servers/all.json?scope=All&country=all&language=all",
128 (config.get("connection_timeout"), config.get("read_timeout"))
131 logger.debug("JSON API returned %d elements", len(fetched))
132 if "error_message" in fetched:
133 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135 elif "data" not in fetched["json"]:
136 logger.warning("API did not return JSON with 'data' element - EXIT!")
139 rows = fetched["json"]["data"]
140 logger.info("Checking %d fetched rows ...", len(rows))
142 logger.debug("row[]='%s'", type(row))
143 if "domain" not in row:
144 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146 elif row["domain"] == "":
147 logger.debug("row[domain] is empty - SKIPPED!")
149 elif not utils.is_domain_wanted(row["domain"]):
150 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
152 elif instances.is_registered(row["domain"]):
153 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
155 elif instances.is_recent(row["domain"]):
156 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
159 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
160 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
162 except network.exceptions as exception:
163 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
166 logger.debug("Success! - EXIT!")
169 def fetch_bkali(args: argparse.Namespace) -> int:
170 logger.debug("args[]='%s' - CALLED!", type(args))
172 logger.debug("Invoking locking.acquire() ...")
175 source_domain = "gql.api.bka.li"
176 if sources.is_recent(source_domain):
177 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
180 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
181 sources.update(source_domain)
185 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
186 fetched = network.post_json_api(
190 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
194 logger.debug("fetched[]='%s'", type(fetched))
195 if "error_message" in fetched:
196 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
198 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
199 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
202 rows = fetched["json"]
204 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
206 raise Exception("WARNING: Returned no records")
207 elif "data" not in rows:
208 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
209 elif "nodeinfo" not in rows["data"]:
210 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
212 for entry in rows["data"]["nodeinfo"]:
213 logger.debug("entry[%s]='%s'", type(entry), entry)
214 if "domain" not in entry:
215 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
217 elif entry["domain"] == "":
218 logger.debug("entry[domain] is empty - SKIPPED!")
220 elif not utils.is_domain_wanted(entry["domain"]):
221 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
223 elif instances.is_registered(entry["domain"]):
224 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
226 elif instances.is_recent(entry["domain"]):
227 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
230 logger.debug("Adding domain='%s' ...", entry["domain"])
231 domains.append(entry["domain"])
233 except network.exceptions as exception:
234 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
237 logger.debug("domains()=%d", len(domains))
239 logger.info("Adding %d new instances ...", len(domains))
240 for domain in domains:
242 logger.info("Fetching instances from domain='%s' ...", domain)
243 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
244 except network.exceptions as exception:
245 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
246 instances.set_last_error(domain, exception)
249 logger.debug("Success - EXIT!")
252 def fetch_blocks(args: argparse.Namespace) -> int:
253 logger.debug("args[]='%s' - CALLED!", type(args))
254 if args.domain is not None and args.domain != "":
255 logger.debug("args.domain='%s' - checking ...", args.domain)
256 if not validators.domain(args.domain):
257 logger.warning("args.domain='%s' is not valid.", args.domain)
259 elif blacklist.is_blacklisted(args.domain):
260 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
262 elif not instances.is_registered(args.domain):
263 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
266 logger.debug("Invoking locking.acquire() ...")
269 if args.domain is not None and args.domain != "":
270 # Re-check single domain
271 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
272 database.cursor.execute(
273 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
275 elif args.software is not None and args.software != "":
276 # Re-check single software
277 logger.debug("Querying database for args.software='%s' ...", args.software)
278 database.cursor.execute(
279 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
282 # Re-check after "timeout" (aka. minimum interval)
283 database.cursor.execute(
284 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
287 rows = database.cursor.fetchall()
288 logger.info("Checking %d entries ...", len(rows))
289 for blocker, software, origin, nodeinfo_url in rows:
290 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
291 blocker = tidyup.domain(blocker)
292 logger.debug("blocker='%s' - AFTER!", blocker)
295 logger.warning("blocker is now empty!")
297 elif nodeinfo_url is None or nodeinfo_url == "":
298 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
300 elif not utils.is_domain_wanted(blocker):
301 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
304 logger.debug("blocker='%s'", blocker)
305 instances.set_last_blocked(blocker)
306 instances.set_has_obfuscation(blocker, False)
310 if software == "pleroma":
311 logger.info("blocker='%s',software='%s'", blocker, software)
312 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
313 elif software == "mastodon":
314 logger.info("blocker='%s',software='%s'", blocker, software)
315 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
316 elif software == "lemmy":
317 logger.info("blocker='%s',software='%s'", blocker, software)
318 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
319 elif software == "friendica":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = friendica.fetch_blocks(blocker)
322 elif software == "misskey":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = misskey.fetch_blocks(blocker)
326 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
328 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
329 instances.set_total_blocks(blocker, blocking)
331 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
333 for block in blocking:
334 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
336 if block["block_level"] == "":
337 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
340 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
341 block["blocked"] = tidyup.domain(block["blocked"])
342 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
343 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
345 if block["blocked"] == "":
346 logger.warning("blocked is empty, blocker='%s'", blocker)
348 elif block["blocked"].endswith(".onion"):
349 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
351 elif block["blocked"].endswith(".arpa"):
352 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
354 elif block["blocked"].endswith(".tld"):
355 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
357 elif block["blocked"].find("*") >= 0:
358 logger.debug("blocker='%s' uses obfuscated domains", blocker)
360 # Some friendica servers also obscure domains without hash
361 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
363 logger.debug("row[]='%s'", type(row))
365 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
366 instances.set_has_obfuscation(blocker, True)
369 block["blocked"] = row["domain"]
370 origin = row["origin"]
371 nodeinfo_url = row["nodeinfo_url"]
372 elif block["blocked"].find("?") >= 0:
373 logger.debug("blocker='%s' uses obfuscated domains", blocker)
375 # Some obscure them with question marks, not sure if that's dependent on version or not
376 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
378 logger.debug("row[]='%s'", type(row))
380 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
381 instances.set_has_obfuscation(blocker, True)
384 block["blocked"] = row["domain"]
385 origin = row["origin"]
386 nodeinfo_url = row["nodeinfo_url"]
388 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
389 if block["blocked"] == "":
390 logger.debug("block[blocked] is empty - SKIPPED!")
392 elif not utils.is_domain_wanted(block["blocked"]):
393 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
395 elif block["block_level"] in ["accept", "accepted"]:
396 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
398 elif not instances.is_registered(block["blocked"]):
399 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
400 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
402 block["block_level"] = utils.alias_block_level(block["block_level"])
404 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
405 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
407 "blocked": block["blocked"],
408 "reason" : block["reason"],
411 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
412 cookies.clear(block["blocked"])
414 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
415 if instances.has_pending(blocker):
416 logger.debug("Flushing updates for blocker='%s' ...", blocker)
417 instances.update_data(blocker)
419 logger.debug("Invoking commit() ...")
420 database.connection.commit()
422 logger.debug("Invoking cookies.clear(%s) ...", blocker)
423 cookies.clear(blocker)
425 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
426 if config.get("bot_enabled") and len(blockdict) > 0:
427 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
428 network.send_bot_post(blocker, blockdict)
430 logger.debug("Success! - EXIT!")
433 def fetch_observer(args: argparse.Namespace) -> int:
434 logger.debug("args[]='%s' - CALLED!", type(args))
436 logger.debug("Invoking locking.acquire() ...")
439 source_domain = "fediverse.observer"
440 if sources.is_recent(source_domain):
441 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
444 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
445 sources.update(source_domain)
448 if args.software is None:
449 logger.info("Fetching software list ...")
450 raw = utils.fetch_url(
451 f"https://{source_domain}",
453 (config.get("connection_timeout"), config.get("read_timeout"))
455 logger.debug("raw[%s]()=%d", type(raw), len(raw))
457 doc = bs4.BeautifulSoup(raw, features="html.parser")
458 logger.debug("doc[]='%s'", type(doc))
460 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
461 logger.debug("items[]='%s'", type(items))
463 logger.info("Checking %d menu items ...", len(items))
465 logger.debug("item[%s]='%s'", type(item), item)
466 if item.text.lower() == "all":
467 logger.debug("Skipping 'All' menu entry ...")
470 logger.debug("Appending item.text='%s' ...", item.text)
471 types.append(tidyup.domain(item.text))
473 logger.info("Adding args.software='%s' as type ...", args.software)
474 types.append(args.software)
476 logger.info("Fetching %d different table data ...", len(types))
477 for software in types:
478 logger.debug("software='%s' - BEFORE!", software)
479 if args.software is not None and args.software != software:
480 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
485 logger.debug("Fetching table data for software='%s' ...", software)
486 raw = utils.fetch_url(
487 f"https://{source_domain}/app/views/tabledata.php?software={software}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
495 except network.exceptions as exception:
496 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
499 items = doc.findAll("a", {"class": "url"})
500 logger.info("Checking %d items,software='%s' ...", len(items), software)
502 logger.debug("item[]='%s'", type(item))
503 domain = item.decode_contents()
505 logger.debug("domain='%s' - AFTER!", domain)
507 logger.debug("domain is empty - SKIPPED!")
509 elif not utils.is_domain_wanted(domain):
510 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
512 elif instances.is_registered(domain):
513 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
515 elif instances.is_recent(domain):
516 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
519 software = software_helper.alias(software)
520 logger.info("Fetching instances for domain='%s'", domain)
521 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
523 logger.debug("Success! - EXIT!")
526 def fetch_todon_wiki(args: argparse.Namespace) -> int:
527 logger.debug("args[]='%s' - CALLED!", type(args))
529 logger.debug("Invoking locking.acquire() ...")
532 source_domain = "wiki.todon.eu"
533 if sources.is_recent(source_domain):
534 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
537 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
538 sources.update(source_domain)
545 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
546 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
548 doc = bs4.BeautifulSoup(raw, "html.parser")
549 logger.debug("doc[]='%s'", type(doc))
551 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
552 logger.info("Checking %d silenced/limited entries ...", len(silenced))
553 blocklist["silenced"] = utils.find_domains(silenced, "div")
555 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
556 logger.info("Checking %d suspended entries ...", len(suspended))
557 blocklist["reject"] = utils.find_domains(suspended, "div")
559 blocking = blocklist["silenced"] + blocklist["reject"]
562 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
563 instances.set_total_blocks(blocker, blocking)
566 for block_level in blocklist:
567 blockers = blocklist[block_level]
569 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
570 for blocked in blockers:
571 logger.debug("blocked='%s'", blocked)
573 if not instances.is_registered(blocked):
575 logger.info("Fetching instances from domain='%s' ...", blocked)
576 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
577 except network.exceptions as exception:
578 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
579 instances.set_last_error(blocked, exception)
581 if blocks.is_instance_blocked(blocker, blocked, block_level):
582 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
585 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
586 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
587 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
593 logger.debug("Invoking commit() ...")
594 database.connection.commit()
596 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
597 if config.get("bot_enabled") and len(blockdict) > 0:
598 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
599 network.send_bot_post(blocker, blockdict)
601 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
602 if instances.has_pending(blocker):
603 logger.debug("Flushing updates for blocker='%s' ...", blocker)
604 instances.update_data(blocker)
606 logger.debug("Success! - EXIT!")
609 def fetch_cs(args: argparse.Namespace):
610 logger.debug("args[]='%s' - CALLED!", type(args))
612 logger.debug("Invoking locking.acquire() ...")
640 source_domain = "raw.githubusercontent.com"
641 if sources.is_recent(source_domain):
642 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
645 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
646 sources.update(source_domain)
648 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
649 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
651 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
652 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
654 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
655 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
656 blocklist["silenced"] = federation.find_domains(silenced)
658 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
659 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
660 blocklist["reject"] = federation.find_domains(blocked)
662 blocking = blocklist["silenced"] + blocklist["reject"]
663 blocker = "chaos.social"
665 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
666 instances.set_total_blocks(blocker, blocking)
668 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
670 if len(blocking) > 0:
671 for block_level in blocklist:
672 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
674 for row in blocklist[block_level]:
675 logger.debug("row[%s]='%s'", type(row), row)
676 if instances.is_recent(row["domain"], "last_blocked"):
677 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
679 elif not instances.is_registered(row["domain"]):
681 logger.info("Fetching instances from domain='%s' ...", row["domain"])
682 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
683 except network.exceptions as exception:
684 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
685 instances.set_last_error(row["domain"], exception)
687 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
688 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
690 "blocked": row["domain"],
691 "reason" : row["reason"],
694 logger.debug("Invoking commit() ...")
695 database.connection.commit()
697 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
698 if config.get("bot_enabled") and len(blockdict) > 0:
699 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
700 network.send_bot_post(blocker, blockdict)
702 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
703 if instances.has_pending(blocker):
704 logger.debug("Flushing updates for blocker='%s' ...", blocker)
705 instances.update_data(blocker)
707 logger.debug("Success! - EXIT!")
710 def fetch_fba_rss(args: argparse.Namespace) -> int:
711 logger.debug("args[]='%s' - CALLED!", type(args))
715 logger.debug("Invoking locking.acquire() ...")
718 components = urlparse(args.feed)
720 if sources.is_recent(components.netloc):
721 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
724 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
725 sources.update(components.netloc)
727 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
728 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
730 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
731 if response.ok and response.status_code < 300 and len(response.text) > 0:
732 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
733 rss = atoma.parse_rss_bytes(response.content)
735 logger.debug("rss[]='%s'", type(rss))
736 for item in rss.items:
737 logger.debug("item='%s'", item)
738 domain = tidyup.domain(item.link.split("=")[1])
740 logger.debug("domain='%s' - AFTER!", domain)
742 logger.debug("domain is empty - SKIPPED!")
744 elif not utils.is_domain_wanted(domain):
745 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
747 elif domain in domains:
748 logger.debug("domain='%s' is already added - SKIPPED!", domain)
750 elif instances.is_registered(domain):
751 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
753 elif instances.is_recent(domain):
754 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
757 logger.debug("Adding domain='%s'", domain)
758 domains.append(domain)
760 logger.debug("domains()=%d", len(domains))
762 logger.info("Adding %d new instances ...", len(domains))
763 for domain in domains:
765 logger.info("Fetching instances from domain='%s' ...", domain)
766 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
767 except network.exceptions as exception:
768 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
769 instances.set_last_error(domain, exception)
772 logger.debug("Success! - EXIT!")
775 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
776 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.debug("Invoking locking.acquire() ...")
781 source_domain = "ryona.agency"
782 if sources.is_recent(source_domain):
783 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
786 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
787 sources.update(source_domain)
789 feed = f"https://{source_domain}/users/fba/feed.atom"
793 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
794 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
796 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
797 if response.ok and response.status_code < 300 and len(response.text) > 0:
798 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
799 atom = atoma.parse_atom_bytes(response.content)
801 logger.debug("atom[]='%s'", type(atom))
802 for entry in atom.entries:
803 logger.debug("entry[]='%s'", type(entry))
804 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
805 logger.debug("doc[]='%s'", type(doc))
806 for element in doc.findAll("a"):
807 logger.debug("element[]='%s'", type(element))
808 for href in element["href"].split(","):
809 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
810 domain = tidyup.domain(href)
812 logger.debug("domain='%s' - AFTER!", domain)
814 logger.debug("domain is empty - SKIPPED!")
816 elif not utils.is_domain_wanted(domain):
817 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
819 elif domain in domains:
820 logger.debug("domain='%s' is already added - SKIPPED!", domain)
822 elif instances.is_registered(domain):
823 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
825 elif instances.is_recent(domain):
826 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
829 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
830 domains.append(domain)
832 logger.debug("domains()=%d", len(domains))
834 logger.info("Adding %d new instances ...", len(domains))
835 for domain in domains:
836 logger.debug("domain='%s'", domain)
838 logger.info("Fetching instances from domain='%s' ...", domain)
839 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
840 except network.exceptions as exception:
841 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
842 instances.set_last_error(domain, exception)
845 logger.debug("Success! - EXIT!")
848 def fetch_instances(args: argparse.Namespace) -> int:
849 logger.debug("args[]='%s' - CALLED!", type(args))
851 logger.debug("args.domain='%s' - checking ...", args.domain)
852 if not validators.domain(args.domain):
853 logger.warning("args.domain='%s' is not valid.", args.domain)
855 elif blacklist.is_blacklisted(args.domain):
856 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
859 logger.debug("Invoking locking.acquire() ...")
864 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
865 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
866 except network.exceptions as exception:
867 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
868 instances.set_last_error(args.domain, exception)
869 instances.update_data(args.domain)
873 logger.debug("Not fetching more instances - EXIT!")
876 # Loop through some instances
877 database.cursor.execute(
878 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
881 rows = database.cursor.fetchall()
882 logger.info("Checking %d entries ...", len(rows))
884 logger.debug("row[domain]='%s'", row["domain"])
885 if row["domain"] == "":
886 logger.debug("row[domain] is empty - SKIPPED!")
888 elif not utils.is_domain_wanted(row["domain"]):
889 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
893 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
894 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
895 except network.exceptions as exception:
896 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
897 instances.set_last_error(row["domain"], exception)
899 logger.debug("Success - EXIT!")
902 def fetch_oliphant(args: argparse.Namespace) -> int:
903 logger.debug("args[]='%s' - CALLED!", type(args))
905 logger.debug("Invoking locking.acquire() ...")
908 source_domain = "codeberg.org"
909 if sources.is_recent(source_domain):
910 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
913 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
914 sources.update(source_domain)
917 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
922 "blocker": "artisan.chat",
923 "csv_url": "mastodon/artisan.chat.csv",
925 "blocker": "mastodon.art",
926 "csv_url": "mastodon/mastodon.art.csv",
928 "blocker": "pleroma.envs.net",
929 "csv_url": "mastodon/pleroma.envs.net.csv",
931 "blocker": "oliphant.social",
932 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
934 "blocker": "mastodon.online",
935 "csv_url": "mastodon/mastodon.online.csv",
937 "blocker": "mastodon.social",
938 "csv_url": "mastodon/mastodon.social.csv",
940 "blocker": "mastodon.social",
941 "csv_url": "other/missing-tier0-mastodon.social.csv",
943 "blocker": "rage.love",
944 "csv_url": "mastodon/rage.love.csv",
946 "blocker": "sunny.garden",
947 "csv_url": "mastodon/sunny.garden.csv",
949 "blocker": "solarpunk.moe",
950 "csv_url": "mastodon/solarpunk.moe.csv",
952 "blocker": "toot.wales",
953 "csv_url": "mastodon/toot.wales.csv",
955 "blocker": "union.place",
956 "csv_url": "mastodon/union.place.csv",
958 "blocker": "oliphant.social",
959 "csv_url": "mastodon/birdsite.csv",
965 logger.debug("Downloading %d files ...", len(blocklists))
966 for block in blocklists:
967 # Is domain given and not equal blocker?
968 if isinstance(args.domain, str) and args.domain != block["blocker"]:
969 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
971 elif args.domain in domains:
972 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
976 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
977 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
979 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
980 if not response.ok or response.status_code >= 300 or response.content == "":
981 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
984 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
985 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
991 logger.debug("row[%s]='%s'", type(row), row)
992 domain = severity = None
993 reject_media = reject_reports = False
996 domain = row["#domain"]
997 elif "domain" in row:
998 domain = row["domain"]
1000 logger.debug("row='%s' does not contain domain column", row)
1003 if "#severity" in row:
1004 severity = utils.alias_block_level(row["#severity"])
1005 elif "severity" in row:
1006 severity = utils.alias_block_level(row["severity"])
1008 logger.debug("row='%s' does not contain severity column", row)
1011 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1013 elif "reject_media" in row and row["reject_media"].lower() == "true":
1016 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1017 reject_reports = True
1018 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1019 reject_reports = True
1022 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1024 logger.debug("domain is empty - SKIPPED!")
1026 elif domain.endswith(".onion"):
1027 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1029 elif domain.endswith(".arpa"):
1030 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1032 elif domain.endswith(".tld"):
1033 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1035 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1036 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1037 domain = utils.deobfuscate(domain, block["blocker"])
1038 logger.debug("domain='%s' - AFTER!", domain)
1040 if not validators.domain(domain):
1041 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1043 elif blacklist.is_blacklisted(domain):
1044 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1047 logger.debug("Marking domain='%s' as handled", domain)
1048 domains.append(domain)
1050 logger.debug("Processing domain='%s' ...", domain)
1051 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1052 logger.debug("processed='%s'", processed)
1054 if utils.process_block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1055 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1058 "reason" : block["reason"],
1062 utils.process_block(block["blocker"], domain, None, "reject_media")
1064 utils.process_block(block["blocker"], domain, None, "reject_reports")
1066 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1067 instances.set_total_blocks(block["blocker"], domains)
1069 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1070 if instances.has_pending(block["blocker"]):
1071 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1072 instances.update_data(block["blocker"])
1074 logger.debug("Invoking commit() ...")
1075 database.connection.commit()
1077 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1078 if config.get("bot_enabled") and len(blockdict) > 0:
1079 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1080 network.send_bot_post(block["blocker"], blockdict)
1082 logger.debug("Success! - EXIT!")
1085 def fetch_txt(args: argparse.Namespace) -> int:
1086 logger.debug("args[]='%s' - CALLED!", type(args))
1088 logger.debug("Invoking locking.acquire() ...")
1093 "blocker": "seirdy.one",
1094 "url" : "https://seirdy.one/pb/bsl.txt",
1097 logger.info("Checking %d text file(s) ...", len(urls))
1099 logger.debug("Fetching row[url]='%s' ...", row["url"])
1100 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1102 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1103 if response.ok and response.status_code < 300 and response.text != "":
1104 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1105 domains = response.text.split("\n")
1107 logger.info("Processing %d domains ...", len(domains))
1108 for domain in domains:
1109 logger.debug("domain='%s' - BEFORE!", domain)
1110 domain = tidyup.domain(domain)
1112 logger.debug("domain='%s' - AFTER!", domain)
1114 logger.debug("domain is empty - SKIPPED!")
1116 elif not utils.is_domain_wanted(domain):
1117 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1119 elif instances.is_recent(domain):
1120 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1123 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1124 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1126 logger.debug("processed='%s'", processed)
1128 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1131 logger.debug("Success! - EXIT!")
1134 def fetch_fedipact(args: argparse.Namespace) -> int:
1135 logger.debug("args[]='%s' - CALLED!", type(args))
1137 logger.debug("Invoking locking.acquire() ...")
1140 source_domain = "fedipact.online"
1141 if sources.is_recent(source_domain):
1142 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1145 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1146 sources.update(source_domain)
1148 response = utils.fetch_url(
1149 f"https://{source_domain}",
1150 network.web_headers,
1151 (config.get("connection_timeout"), config.get("read_timeout"))
1154 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1155 if response.ok and response.status_code < 300 and response.text != "":
1156 logger.debug("Parsing %d Bytes ...", len(response.text))
1158 doc = bs4.BeautifulSoup(response.text, "html.parser")
1159 logger.debug("doc[]='%s'", type(doc))
1161 rows = doc.findAll("li")
1162 logger.info("Checking %d row(s) ...", len(rows))
1164 logger.debug("row[]='%s'", type(row))
1165 domain = tidyup.domain(row.contents[0])
1167 logger.debug("domain='%s' - AFTER!", domain)
1169 logger.debug("domain is empty - SKIPPED!")
1171 elif not utils.is_domain_wanted(domain):
1172 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1174 elif instances.is_registered(domain):
1175 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1177 elif instances.is_recent(domain):
1178 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1181 logger.info("Fetching domain='%s' ...", domain)
1182 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1184 logger.debug("Success! - EXIT!")
1187 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1188 logger.debug("args[]='%s' - CALLED!", type(args))
1190 logger.debug("Invoking locking.acquire() ...")
1193 source_domain = "joinfediverse.wiki"
1194 if sources.is_recent(source_domain):
1195 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1198 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1199 sources.update(source_domain)
1201 raw = utils.fetch_url(
1202 f"https://{source_domain}/FediBlock",
1203 network.web_headers,
1204 (config.get("connection_timeout"), config.get("read_timeout"))
1206 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1208 doc = bs4.BeautifulSoup(raw, "html.parser")
1209 logger.debug("doc[]='%s'", type(doc))
1211 tables = doc.findAll("table", {"class": "wikitable"})
1213 logger.info("Analyzing %d table(s) ...", len(tables))
1215 for table in tables:
1216 logger.debug("table[]='%s'", type(table))
1218 rows = table.findAll("tr")
1219 logger.info("Checking %d row(s) ...", len(rows))
1220 block_headers = dict()
1222 logger.debug("row[%s]='%s'", type(row), row)
1224 headers = row.findAll("th")
1225 logger.debug("Found headers()=%d header(s)", len(headers))
1226 if len(headers) > 1:
1227 block_headers = dict()
1229 for header in headers:
1231 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1232 text = header.contents[0]
1234 logger.debug("text[]='%s'", type(text))
1235 if not isinstance(text, str):
1236 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1238 elif validators.domain(text.strip()):
1239 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1242 text = tidyup.domain(text.strip())
1243 logger.debug("text='%s'", text)
1244 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1245 logger.debug("Found header: '%s'=%d", text, cnt)
1246 block_headers[cnt] = text
1248 elif len(block_headers) == 0:
1249 logger.debug("row is not scrapable - SKIPPED!")
1251 elif len(block_headers) > 0:
1252 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1256 for element in row.find_all(["th", "td"]):
1258 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1259 if cnt in block_headers:
1260 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1262 text = element.text.strip()
1263 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1265 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1266 if key in ["domain", "instance"]:
1268 elif key == "reason":
1269 block[key] = tidyup.reason(text)
1270 elif key == "subdomain(s)":
1273 block[key] = text.split("/")
1275 logger.debug("key='%s'", key)
1278 logger.debug("block()=%d ...", len(block))
1280 logger.debug("Appending block()=%d ...", len(block))
1281 blocklist.append(block)
1283 logger.debug("blocklist()=%d", len(blocklist))
1285 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1286 domains = database.cursor.fetchall()
1288 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1290 for block in blocklist:
1291 logger.debug("block='%s'", block)
1292 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1293 origin = block["blocked"]
1294 for subdomain in block["subdomain(s)"]:
1295 block["blocked"] = subdomain + "." + origin
1296 blocking.append(block)
1298 blocking.append(block)
1300 logger.debug("blocking()=%d", blocking)
1301 for block in blocking:
1302 logger.debug("block[]='%s'", type(block))
1303 block["blocked"] = tidyup.domain(block["blocked"])
1305 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1306 if block["blocked"] == "":
1307 logger.debug("block[blocked] is empty - SKIPPED!")
1309 elif not utils.is_domain_wanted(block["blocked"]):
1310 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1312 elif instances.is_recent(block["blocked"]):
1313 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1316 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1317 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1320 for blocker in domains:
1321 blocker = blocker[0]
1322 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1324 for block in blocking:
1325 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1326 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1328 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1329 if block["blocked"] == "":
1330 logger.debug("block[blocked] is empty - SKIPPED!")
1332 elif not utils.is_domain_wanted(block["blocked"]):
1333 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1336 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1337 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1338 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1340 "blocked": block["blocked"],
1341 "reason" : block["reason"],
1344 if instances.has_pending(blocker):
1345 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1346 instances.update_data(blocker)
1348 logger.debug("Invoking commit() ...")
1349 database.connection.commit()
1351 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1352 if config.get("bot_enabled") and len(blockdict) > 0:
1353 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1354 network.send_bot_post(blocker, blockdict)
1356 logger.debug("Success! - EXIT!")
1359 def recheck_obfuscation(args: argparse.Namespace) -> int:
1360 logger.debug("args[]='%s' - CALLED!", type(args))
1362 logger.debug("Invoking locking.acquire() ...")
1365 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1366 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1367 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1368 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1370 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1372 rows = database.cursor.fetchall()
1373 logger.info("Checking %d domains ...", len(rows))
1375 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1376 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1377 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1381 if row["software"] == "pleroma":
1382 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1383 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1384 elif row["software"] == "mastodon":
1385 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1386 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1387 elif row["software"] == "lemmy":
1388 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1389 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1390 elif row["software"] == "friendica":
1391 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1392 blocking = friendica.fetch_blocks(row["domain"])
1393 elif row["software"] == "misskey":
1394 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1395 blocking = misskey.fetch_blocks(row["domain"])
1397 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1399 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1400 instances.set_total_blocks(row["domain"], blocking)
1402 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1405 for block in blocking:
1406 logger.debug("block[blocked]='%s'", block["blocked"])
1409 if block["blocked"] == "":
1410 logger.debug("block[blocked] is empty - SKIPPED!")
1412 elif block["blocked"].endswith(".arpa"):
1413 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1415 elif block["blocked"].endswith(".tld"):
1416 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1418 elif block["blocked"].endswith(".onion"):
1419 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1421 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1422 logger.debug("block='%s' is obfuscated.", block["blocked"])
1423 obfuscated = obfuscated + 1
1424 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1425 elif not utils.is_domain_wanted(block["blocked"]):
1426 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1428 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1429 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1432 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1433 if blocked is not None and blocked != block["blocked"]:
1434 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1435 obfuscated = obfuscated - 1
1436 if blocks.is_instance_blocked(row["domain"], blocked):
1437 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1440 block["block_level"] = utils.alias_block_level(block["block_level"])
1442 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1443 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1444 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1447 "reason" : block["reason"],
1450 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1451 if obfuscated == 0 and len(blocking) > 0:
1452 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1453 instances.set_has_obfuscation(row["domain"], False)
1455 if instances.has_pending(row["domain"]):
1456 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1457 instances.update_data(row["domain"])
1459 logger.debug("Invoking commit() ...")
1460 database.connection.commit()
1462 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1463 if config.get("bot_enabled") and len(blockdict) > 0:
1464 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1465 network.send_bot_post(row["domain"], blockdict)
1467 logger.debug("Success! - EXIT!")
1470 def fetch_fedilist(args: argparse.Namespace) -> int:
1471 logger.debug("args[]='%s' - CALLED!", type(args))
1473 logger.debug("Invoking locking.acquire() ...")
1476 source_domain = "demo.fedilist.com"
1477 if sources.is_recent(source_domain):
1478 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1481 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1482 sources.update(source_domain)
1484 url = f"http://{source_domain}/instance/csv?onion=not"
1485 if args.software is not None and args.software != "":
1486 logger.debug("args.software='%s'", args.software)
1487 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1489 logger.info("Fetching url='%s' ...", url)
1490 response = reqto.get(
1492 headers=network.web_headers,
1493 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1494 allow_redirects=False
1497 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1498 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1499 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1502 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1504 logger.debug("reader[]='%s'", type(reader))
1507 logger.debug("row[]='%s'", type(row))
1508 domain = tidyup.domain(row["hostname"])
1509 logger.debug("domain='%s' - AFTER!", domain)
1512 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1514 elif not utils.is_domain_wanted(domain):
1515 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1517 elif (args.all is None or not args.all) and instances.is_registered(domain):
1518 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1520 elif instances.is_recent(domain):
1521 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1524 logger.info("Fetching instances from domain='%s' ...", domain)
1525 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1527 logger.debug("Success! - EXIT!")
1530 def update_nodeinfo(args: argparse.Namespace) -> int:
1531 logger.debug("args[]='%s' - CALLED!", type(args))
1533 logger.debug("Invoking locking.acquire() ...")
1536 if args.domain is not None and args.domain != "":
1537 logger.debug("Fetching args.domain='%s'", args.domain)
1538 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1539 elif args.software is not None and args.software != "":
1540 logger.info("Fetching domains for args.software='%s'", args.software)
1541 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1543 logger.info("Fetching domains for recently updated ...")
1544 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1546 domains = database.cursor.fetchall()
1548 logger.info("Checking %d domain(s) ...", len(domains))
1551 logger.debug("row[]='%s'", type(row))
1553 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1554 software = federation.determine_software(row["domain"])
1556 logger.debug("Determined software='%s'", software)
1557 if software != row["software"] and software is not None:
1558 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1559 instances.set_software(row["domain"], software)
1561 instances.set_success(row["domain"])
1562 except network.exceptions as exception:
1563 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1564 instances.set_last_error(row["domain"], exception)
1566 instances.set_last_nodeinfo(row["domain"])
1567 instances.update_data(row["domain"])
1570 logger.debug("Success! - EXIT!")
1573 def fetch_instances_social(args: argparse.Namespace) -> int:
1574 logger.debug("args[]='%s' - CALLED!", type(args))
1576 logger.debug("Invoking locking.acquire() ...")
1579 source_domain = "instances.social"
1581 if config.get("instances_social_api_key") == "":
1582 logger.error("API key not set. Please set in your config.json file.")
1584 elif sources.is_recent(source_domain):
1585 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1588 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1589 sources.update(source_domain)
1592 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1595 fetched = network.get_json_api(
1597 "/api/1.0/instances/list?count=0&sort_by=name",
1599 (config.get("connection_timeout"), config.get("read_timeout"))
1601 logger.debug("fetched[]='%s'", type(fetched))
1603 if "error_message" in fetched:
1604 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1606 elif "exception" in fetched:
1607 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1609 elif "json" not in fetched:
1610 logger.warning("fetched has no element 'json' - EXIT!")
1612 elif "instances" not in fetched["json"]:
1613 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1617 rows = fetched["json"]["instances"]
1619 logger.info("Checking %d row(s) ...", len(rows))
1621 logger.debug("row[]='%s'", type(row))
1622 domain = tidyup.domain(row["name"])
1624 logger.debug("domain='%s' - AFTER!", domain)
1626 logger.debug("domain is empty - SKIPPED!")
1628 elif not utils.is_domain_wanted(domain):
1629 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1631 elif domain in domains:
1632 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1634 elif instances.is_registered(domain):
1635 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1637 elif instances.is_recent(domain):
1638 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1641 logger.info("Fetching instances from domain='%s'", domain)
1642 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1644 logger.debug("Success! - EXIT!")