1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
43 from fba.http import federation
44 from fba.http import network
46 from fba.models import blocks
47 from fba.models import instances
48 from fba.models import sources
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def check_instance(args: argparse.Namespace) -> int:
61 logger.debug("args.domain='%s' - CALLED!", args.domain)
63 if not validators.domain(args.domain):
64 logger.warning("args.domain='%s' is not valid", args.domain)
66 elif blacklist.is_blacklisted(args.domain):
67 logger.warning("args.domain='%s' is blacklisted", args.domain)
69 elif instances.is_registered(args.domain):
70 logger.warning("args.domain='%s' is already registered", args.domain)
73 logger.info("args.domain='%s' is not known", args.domain)
75 logger.debug("status=%d - EXIT!", status)
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79 logger.debug("args[]='%s' - CALLED!", type(args))
82 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
85 for row in database.cursor.fetchall():
86 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87 punycode = row["domain"].encode("idna").decode("utf-8")
89 if row["nodeinfo_url"].startswith("/"):
90 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
96 logger.info("Found %d row(s)", cnt)
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102 logger.debug("args[]='%s' - CALLED!", type(args))
104 # No CSRF by default, you don't have to add network.source_headers by yourself here
106 source_domain = "pixelfed.org"
108 if sources.is_recent(source_domain):
109 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
112 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
113 sources.update(source_domain)
116 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
117 headers = csrf.determine(source_domain, dict())
118 except network.exceptions as exception:
119 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
123 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124 fetched = network.get_json_api(
126 "/api/v1/servers/all.json?scope=All&country=all&language=all",
128 (config.get("connection_timeout"), config.get("read_timeout"))
131 logger.debug("JSON API returned %d elements", len(fetched))
132 if "error_message" in fetched:
133 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135 elif "data" not in fetched["json"]:
136 logger.warning("API did not return JSON with 'data' element - EXIT!")
139 rows = fetched["json"]["data"]
140 logger.info("Checking %d fetched rows ...", len(rows))
142 logger.debug("row[]='%s'", type(row))
143 if "domain" not in row:
144 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146 elif row["domain"] == "":
147 logger.debug("row[domain] is empty - SKIPPED!")
149 elif not utils.is_domain_wanted(row["domain"]):
150 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
152 elif instances.is_registered(row["domain"]):
153 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
155 elif instances.is_recent(row["domain"]):
156 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
159 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
160 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
162 except network.exceptions as exception:
163 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
166 logger.debug("Success! - EXIT!")
169 def fetch_bkali(args: argparse.Namespace) -> int:
170 logger.debug("args[]='%s' - CALLED!", type(args))
172 logger.debug("Invoking locking.acquire() ...")
175 source_domain = "gql.api.bka.li"
176 if sources.is_recent(source_domain):
177 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
180 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
181 sources.update(source_domain)
185 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
186 fetched = network.post_json_api(
190 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
194 logger.debug("fetched[]='%s'", type(fetched))
195 if "error_message" in fetched:
196 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
198 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
199 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
202 rows = fetched["json"]
204 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
206 raise Exception("WARNING: Returned no records")
207 elif "data" not in rows:
208 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
209 elif "nodeinfo" not in rows["data"]:
210 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
212 for entry in rows["data"]["nodeinfo"]:
213 logger.debug("entry[%s]='%s'", type(entry), entry)
214 if "domain" not in entry:
215 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
217 elif entry["domain"] == "":
218 logger.debug("entry[domain] is empty - SKIPPED!")
220 elif not utils.is_domain_wanted(entry["domain"]):
221 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
223 elif instances.is_registered(entry["domain"]):
224 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
226 elif instances.is_recent(entry["domain"]):
227 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
230 logger.debug("Adding domain='%s' ...", entry["domain"])
231 domains.append(entry["domain"])
233 except network.exceptions as exception:
234 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
237 logger.debug("domains()=%d", len(domains))
239 logger.info("Adding %d new instances ...", len(domains))
240 for domain in domains:
242 logger.info("Fetching instances from domain='%s' ...", domain)
243 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
244 except network.exceptions as exception:
245 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
246 instances.set_last_error(domain, exception)
249 logger.debug("Success - EXIT!")
252 def fetch_blocks(args: argparse.Namespace) -> int:
253 logger.debug("args[]='%s' - CALLED!", type(args))
254 if args.domain is not None and args.domain != "":
255 logger.debug("args.domain='%s' - checking ...", args.domain)
256 if not validators.domain(args.domain):
257 logger.warning("args.domain='%s' is not valid.", args.domain)
259 elif blacklist.is_blacklisted(args.domain):
260 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
262 elif not instances.is_registered(args.domain):
263 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
266 logger.debug("Invoking locking.acquire() ...")
269 if args.domain is not None and args.domain != "":
270 # Re-check single domain
271 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
272 database.cursor.execute(
273 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
275 elif args.software is not None and args.software != "":
276 # Re-check single software
277 logger.debug("Querying database for args.software='%s' ...", args.software)
278 database.cursor.execute(
279 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
282 # Re-check after "timeout" (aka. minimum interval)
283 database.cursor.execute(
284 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
287 rows = database.cursor.fetchall()
288 logger.info("Checking %d entries ...", len(rows))
289 for blocker, software, origin, nodeinfo_url in rows:
290 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
291 blocker = tidyup.domain(blocker)
292 logger.debug("blocker='%s' - AFTER!", blocker)
295 logger.warning("blocker is now empty!")
297 elif nodeinfo_url is None or nodeinfo_url == "":
298 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
300 elif not utils.is_domain_wanted(blocker):
301 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
304 logger.debug("blocker='%s'", blocker)
305 instances.set_last_blocked(blocker)
306 instances.set_has_obfuscation(blocker, False)
310 if software == "pleroma":
311 logger.info("blocker='%s',software='%s'", blocker, software)
312 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
313 elif software == "mastodon":
314 logger.info("blocker='%s',software='%s'", blocker, software)
315 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
316 elif software == "lemmy":
317 logger.info("blocker='%s',software='%s'", blocker, software)
318 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
319 elif software == "friendica":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = friendica.fetch_blocks(blocker)
322 elif software == "misskey":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = misskey.fetch_blocks(blocker)
326 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
328 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
329 instances.set_total_blocks(blocker, blocking)
331 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
333 for block in blocking:
334 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
336 if block["block_level"] == "":
337 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
340 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
341 block["blocked"] = tidyup.domain(block["blocked"])
342 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
343 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
345 if block["blocked"] == "":
346 logger.warning("blocked is empty, blocker='%s'", blocker)
348 elif block["blocked"].endswith(".onion"):
349 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
351 elif block["blocked"].endswith(".arpa"):
352 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
354 elif block["blocked"].endswith(".tld"):
355 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
357 elif block["blocked"].find("*") >= 0:
358 logger.debug("blocker='%s' uses obfuscated domains", blocker)
360 # Some friendica servers also obscure domains without hash
361 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
363 logger.debug("row[]='%s'", type(row))
365 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
366 instances.set_has_obfuscation(blocker, True)
369 block["blocked"] = row["domain"]
370 origin = row["origin"]
371 nodeinfo_url = row["nodeinfo_url"]
372 elif block["blocked"].find("?") >= 0:
373 logger.debug("blocker='%s' uses obfuscated domains", blocker)
375 # Some obscure them with question marks, not sure if that's dependent on version or not
376 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
378 logger.debug("row[]='%s'", type(row))
380 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
381 instances.set_has_obfuscation(blocker, True)
384 block["blocked"] = row["domain"]
385 origin = row["origin"]
386 nodeinfo_url = row["nodeinfo_url"]
388 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
389 if block["blocked"] == "":
390 logger.debug("block[blocked] is empty - SKIPPED!")
392 elif not utils.is_domain_wanted(block["blocked"]):
393 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
395 elif block["block_level"] in ["accept", "accepted"]:
396 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
398 elif not instances.is_registered(block["blocked"]):
399 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
400 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
402 block["block_level"] = utils.alias_block_level(block["block_level"])
404 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
405 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
407 "blocked": block["blocked"],
408 "reason" : block["reason"],
411 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
412 cookies.clear(block["blocked"])
414 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
415 if instances.has_pending(blocker):
416 logger.debug("Flushing updates for blocker='%s' ...", blocker)
417 instances.update_data(blocker)
419 logger.debug("Invoking commit() ...")
420 database.connection.commit()
422 logger.debug("Invoking cookies.clear(%s) ...", blocker)
423 cookies.clear(blocker)
425 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
426 if config.get("bot_enabled") and len(blockdict) > 0:
427 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
428 network.send_bot_post(blocker, blockdict)
430 logger.debug("Success! - EXIT!")
433 def fetch_observer(args: argparse.Namespace) -> int:
434 logger.debug("args[]='%s' - CALLED!", type(args))
436 logger.debug("Invoking locking.acquire() ...")
439 source_domain = "fediverse.observer"
440 if sources.is_recent(source_domain):
441 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
444 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
445 sources.update(source_domain)
448 if args.software is None:
449 logger.info("Fetching software list ...")
450 raw = utils.fetch_url(
451 f"https://{source_domain}",
453 (config.get("connection_timeout"), config.get("read_timeout"))
455 logger.debug("raw[%s]()=%d", type(raw), len(raw))
457 doc = bs4.BeautifulSoup(raw, features="html.parser")
458 logger.debug("doc[]='%s'", type(doc))
460 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
461 logger.debug("items[]='%s'", type(items))
463 logger.info("Checking %d menu items ...", len(items))
465 logger.debug("item[%s]='%s'", type(item), item)
466 if item.text.lower() == "all":
467 logger.debug("Skipping 'All' menu entry ...")
470 logger.debug("Appending item.text='%s' ...", item.text)
471 types.append(tidyup.domain(item.text))
473 logger.info("Adding args.software='%s' as type ...", args.software)
474 types.append(args.software)
476 logger.info("Fetching %d different table data ...", len(types))
477 for software in types:
478 logger.debug("software='%s' - BEFORE!", software)
479 if args.software is not None and args.software != software:
480 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
485 logger.debug("Fetching table data for software='%s' ...", software)
486 raw = utils.fetch_url(
487 f"https://{source_domain}/app/views/tabledata.php?software={software}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
495 except network.exceptions as exception:
496 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
499 items = doc.findAll("a", {"class": "url"})
500 logger.info("Checking %d items,software='%s' ...", len(items), software)
502 logger.debug("item[]='%s'", type(item))
503 domain = item.decode_contents()
505 logger.debug("domain='%s' - AFTER!", domain)
507 logger.debug("domain is empty - SKIPPED!")
509 elif not utils.is_domain_wanted(domain):
510 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
512 elif instances.is_registered(domain):
513 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
515 elif instances.is_recent(domain):
516 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
519 software = software_helper.alias(software)
520 logger.info("Fetching instances for domain='%s'", domain)
521 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
523 logger.debug("Success! - EXIT!")
526 def fetch_todon_wiki(args: argparse.Namespace) -> int:
527 logger.debug("args[]='%s' - CALLED!", type(args))
529 logger.debug("Invoking locking.acquire() ...")
532 source_domain = "wiki.todon.eu"
533 if sources.is_recent(source_domain):
534 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
537 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
538 sources.update(source_domain)
545 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
546 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
548 doc = bs4.BeautifulSoup(raw, "html.parser")
549 logger.debug("doc[]='%s'", type(doc))
551 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
552 logger.info("Checking %d silenced/limited entries ...", len(silenced))
553 blocklist["silenced"] = utils.find_domains(silenced, "div")
555 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
556 logger.info("Checking %d suspended entries ...", len(suspended))
557 blocklist["reject"] = utils.find_domains(suspended, "div")
559 blocking = blocklist["silenced"] + blocklist["reject"]
562 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
563 instances.set_total_blocks(blocker, blocking)
566 for block_level in blocklist:
567 blockers = blocklist[block_level]
569 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
570 for blocked in blockers:
571 logger.debug("blocked='%s'", blocked)
573 if not instances.is_registered(blocked):
575 logger.info("Fetching instances from domain='%s' ...", blocked)
576 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
577 except network.exceptions as exception:
578 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
579 instances.set_last_error(blocked, exception)
581 if blocks.is_instance_blocked(blocker, blocked, block_level):
582 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
585 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
586 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
587 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
593 logger.debug("Invoking commit() ...")
594 database.connection.commit()
596 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
597 if config.get("bot_enabled") and len(blockdict) > 0:
598 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
599 network.send_bot_post(blocker, blockdict)
601 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
602 if instances.has_pending(blocker):
603 logger.debug("Flushing updates for blocker='%s' ...", blocker)
604 instances.update_data(blocker)
606 logger.debug("Success! - EXIT!")
609 def fetch_cs(args: argparse.Namespace):
610 logger.debug("args[]='%s' - CALLED!", type(args))
612 logger.debug("Invoking locking.acquire() ...")
640 source_domain = "raw.githubusercontent.com"
641 if sources.is_recent(source_domain):
642 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
645 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
646 sources.update(source_domain)
648 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
649 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
651 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
652 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
654 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
655 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
656 blocklist["silenced"] = federation.find_domains(silenced)
658 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
659 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
660 blocklist["reject"] = federation.find_domains(blocked)
662 blocking = blocklist["silenced"] + blocklist["reject"]
663 blocker = "chaos.social"
665 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
666 instances.set_total_blocks(blocker, blocking)
668 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
670 if len(blocking) > 0:
671 for block_level in blocklist:
672 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
674 for row in blocklist[block_level]:
675 logger.debug("row[%s]='%s'", type(row), row)
676 if instances.is_recent(row["domain"], "last_blocked"):
677 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
679 elif not instances.is_registered(row["domain"]):
681 logger.info("Fetching instances from domain='%s' ...", row["domain"])
682 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
683 except network.exceptions as exception:
684 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
685 instances.set_last_error(row["domain"], exception)
687 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
688 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
690 "blocked": row["domain"],
691 "reason" : row["reason"],
694 logger.debug("Invoking commit() ...")
695 database.connection.commit()
697 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
698 if config.get("bot_enabled") and len(blockdict) > 0:
699 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
700 network.send_bot_post(blocker, blockdict)
702 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
703 if instances.has_pending(blocker):
704 logger.debug("Flushing updates for blocker='%s' ...", blocker)
705 instances.update_data(blocker)
707 logger.debug("Success! - EXIT!")
710 def fetch_fba_rss(args: argparse.Namespace) -> int:
711 logger.debug("args[]='%s' - CALLED!", type(args))
715 logger.debug("Invoking locking.acquire() ...")
718 components = urlparse(args.feed)
720 if sources.is_recent(components.netloc):
721 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
724 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
725 sources.update(components.netloc)
727 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
728 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
730 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
731 if response.ok and response.status_code < 300 and len(response.text) > 0:
732 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
733 rss = atoma.parse_rss_bytes(response.content)
735 logger.debug("rss[]='%s'", type(rss))
736 for item in rss.items:
737 logger.debug("item='%s'", item)
738 domain = tidyup.domain(item.link.split("=")[1])
740 logger.debug("domain='%s' - AFTER!", domain)
742 logger.debug("domain is empty - SKIPPED!")
744 elif not utils.is_domain_wanted(domain):
745 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
747 elif domain in domains:
748 logger.debug("domain='%s' is already added - SKIPPED!", domain)
750 elif instances.is_registered(domain):
751 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
753 elif instances.is_recent(domain):
754 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
757 logger.debug("Adding domain='%s'", domain)
758 domains.append(domain)
760 logger.debug("domains()=%d", len(domains))
762 logger.info("Adding %d new instances ...", len(domains))
763 for domain in domains:
765 logger.info("Fetching instances from domain='%s' ...", domain)
766 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
767 except network.exceptions as exception:
768 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
769 instances.set_last_error(domain, exception)
772 logger.debug("Success! - EXIT!")
775 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
776 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.debug("Invoking locking.acquire() ...")
781 source_domain = "ryona.agency"
782 if sources.is_recent(source_domain):
783 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
786 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
787 sources.update(source_domain)
789 feed = f"https://{source_domain}/users/fba/feed.atom"
793 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
794 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
796 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
797 if response.ok and response.status_code < 300 and len(response.text) > 0:
798 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
799 atom = atoma.parse_atom_bytes(response.content)
801 logger.debug("atom[]='%s'", type(atom))
802 for entry in atom.entries:
803 logger.debug("entry[]='%s'", type(entry))
804 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
805 logger.debug("doc[]='%s'", type(doc))
806 for element in doc.findAll("a"):
807 logger.debug("element[]='%s'", type(element))
808 for href in element["href"].split(","):
809 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
810 domain = tidyup.domain(href)
812 logger.debug("domain='%s' - AFTER!", domain)
814 logger.debug("domain is empty - SKIPPED!")
816 elif not utils.is_domain_wanted(domain):
817 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
819 elif domain in domains:
820 logger.debug("domain='%s' is already added - SKIPPED!", domain)
822 elif instances.is_registered(domain):
823 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
825 elif instances.is_recent(domain):
826 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
829 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
830 domains.append(domain)
832 logger.debug("domains()=%d", len(domains))
834 logger.info("Adding %d new instances ...", len(domains))
835 for domain in domains:
836 logger.debug("domain='%s'", domain)
838 logger.info("Fetching instances from domain='%s' ...", domain)
839 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
840 except network.exceptions as exception:
841 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
842 instances.set_last_error(domain, exception)
845 logger.debug("Success! - EXIT!")
848 def fetch_instances(args: argparse.Namespace) -> int:
849 logger.debug("args[]='%s' - CALLED!", type(args))
851 logger.debug("args.domain='%s' - checking ...", args.domain)
852 if not validators.domain(args.domain):
853 logger.warning("args.domain='%s' is not valid.", args.domain)
855 elif blacklist.is_blacklisted(args.domain):
856 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
859 logger.debug("Invoking locking.acquire() ...")
864 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
865 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
866 except network.exceptions as exception:
867 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
868 instances.set_last_error(args.domain, exception)
869 instances.update_data(args.domain)
873 logger.debug("Not fetching more instances - EXIT!")
876 # Loop through some instances
877 database.cursor.execute(
878 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
881 rows = database.cursor.fetchall()
882 logger.info("Checking %d entries ...", len(rows))
884 logger.debug("row[domain]='%s'", row["domain"])
885 if row["domain"] == "":
886 logger.debug("row[domain] is empty - SKIPPED!")
888 elif not utils.is_domain_wanted(row["domain"]):
889 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
893 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
894 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
895 except network.exceptions as exception:
896 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
897 instances.set_last_error(row["domain"], exception)
899 logger.debug("Success - EXIT!")
902 def fetch_oliphant(args: argparse.Namespace) -> int:
903 logger.debug("args[]='%s' - CALLED!", type(args))
905 logger.debug("Invoking locking.acquire() ...")
908 source_domain = "codeberg.org"
909 if sources.is_recent(source_domain):
910 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
913 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
914 sources.update(source_domain)
917 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
922 "blocker": "artisan.chat",
923 "csv_url": "mastodon/artisan.chat.csv",
925 "blocker": "mastodon.art",
926 "csv_url": "mastodon/mastodon.art.csv",
928 "blocker": "pleroma.envs.net",
929 "csv_url": "mastodon/pleroma.envs.net.csv",
931 "blocker": "oliphant.social",
932 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
934 "blocker": "mastodon.online",
935 "csv_url": "mastodon/mastodon.online.csv",
937 "blocker": "mastodon.social",
938 "csv_url": "mastodon/mastodon.social.csv",
940 "blocker": "mastodon.social",
941 "csv_url": "other/missing-tier0-mastodon.social.csv",
943 "blocker": "rage.love",
944 "csv_url": "mastodon/rage.love.csv",
946 "blocker": "sunny.garden",
947 "csv_url": "mastodon/sunny.garden.csv",
949 "blocker": "sunny.garden",
950 "csv_url": "mastodon/gardenfence.csv",
952 "blocker": "solarpunk.moe",
953 "csv_url": "mastodon/solarpunk.moe.csv",
955 "blocker": "toot.wales",
956 "csv_url": "mastodon/toot.wales.csv",
958 "blocker": "union.place",
959 "csv_url": "mastodon/union.place.csv",
961 "blocker": "oliphant.social",
962 "csv_url": "mastodon/birdsite.csv",
968 logger.debug("Downloading %d files ...", len(blocklists))
969 for block in blocklists:
970 # Is domain given and not equal blocker?
971 if isinstance(args.domain, str) and args.domain != block["blocker"]:
972 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
974 elif args.domain in domains:
975 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
979 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
980 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
982 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
983 if not response.ok or response.status_code >= 300 or response.content == "":
984 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
987 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
988 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
994 logger.debug("row[%s]='%s'", type(row), row)
995 domain = severity = None
996 reject_media = reject_reports = False
999 domain = row["#domain"]
1000 elif "domain" in row:
1001 domain = row["domain"]
1003 logger.debug("row='%s' does not contain domain column", row)
1006 if "#severity" in row:
1007 severity = utils.alias_block_level(row["#severity"])
1008 elif "severity" in row:
1009 severity = utils.alias_block_level(row["severity"])
1011 logger.debug("row='%s' does not contain severity column", row)
1014 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1016 elif "reject_media" in row and row["reject_media"].lower() == "true":
1019 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1020 reject_reports = True
1021 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1022 reject_reports = True
1025 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1027 logger.debug("domain is empty - SKIPPED!")
1029 elif domain.endswith(".onion"):
1030 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1032 elif domain.endswith(".arpa"):
1033 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1035 elif domain.endswith(".tld"):
1036 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1038 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1039 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1040 domain = utils.deobfuscate(domain, block["blocker"])
1041 logger.debug("domain='%s' - AFTER!", domain)
1043 if not validators.domain(domain):
1044 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1046 elif blacklist.is_blacklisted(domain):
1047 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1050 logger.debug("Marking domain='%s' as handled", domain)
1051 domains.append(domain)
1053 logger.debug("Processing domain='%s' ...", domain)
1054 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1055 logger.debug("processed='%s'", processed)
1057 if utils.process_block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1058 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1061 "reason" : block["reason"],
1065 utils.process_block(block["blocker"], domain, None, "reject_media")
1067 utils.process_block(block["blocker"], domain, None, "reject_reports")
1069 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1070 instances.set_total_blocks(block["blocker"], domains)
1072 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1073 if instances.has_pending(block["blocker"]):
1074 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1075 instances.update_data(block["blocker"])
1077 logger.debug("Invoking commit() ...")
1078 database.connection.commit()
1080 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1081 if config.get("bot_enabled") and len(blockdict) > 0:
1082 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1083 network.send_bot_post(block["blocker"], blockdict)
1085 logger.debug("Success! - EXIT!")
1088 def fetch_txt(args: argparse.Namespace) -> int:
1089 logger.debug("args[]='%s' - CALLED!", type(args))
1091 logger.debug("Invoking locking.acquire() ...")
1096 "blocker": "seirdy.one",
1097 "url" : "https://seirdy.one/pb/bsl.txt",
1100 logger.info("Checking %d text file(s) ...", len(urls))
1102 logger.debug("Fetching row[url]='%s' ...", row["url"])
1103 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1105 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1106 if response.ok and response.status_code < 300 and response.text != "":
1107 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1108 domains = response.text.split("\n")
1110 logger.info("Processing %d domains ...", len(domains))
1111 for domain in domains:
1112 logger.debug("domain='%s' - BEFORE!", domain)
1113 domain = tidyup.domain(domain)
1115 logger.debug("domain='%s' - AFTER!", domain)
1117 logger.debug("domain is empty - SKIPPED!")
1119 elif not utils.is_domain_wanted(domain):
1120 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1122 elif instances.is_recent(domain):
1123 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1126 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1127 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1129 logger.debug("processed='%s'", processed)
1131 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1134 logger.debug("Success! - EXIT!")
1137 def fetch_fedipact(args: argparse.Namespace) -> int:
1138 logger.debug("args[]='%s' - CALLED!", type(args))
1140 logger.debug("Invoking locking.acquire() ...")
1143 source_domain = "fedipact.online"
1144 if sources.is_recent(source_domain):
1145 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1148 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1149 sources.update(source_domain)
1151 response = utils.fetch_url(
1152 f"https://{source_domain}",
1153 network.web_headers,
1154 (config.get("connection_timeout"), config.get("read_timeout"))
1157 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1158 if response.ok and response.status_code < 300 and response.text != "":
1159 logger.debug("Parsing %d Bytes ...", len(response.text))
1161 doc = bs4.BeautifulSoup(response.text, "html.parser")
1162 logger.debug("doc[]='%s'", type(doc))
1164 rows = doc.findAll("li")
1165 logger.info("Checking %d row(s) ...", len(rows))
1167 logger.debug("row[]='%s'", type(row))
1168 domain = tidyup.domain(row.contents[0])
1170 logger.debug("domain='%s' - AFTER!", domain)
1172 logger.debug("domain is empty - SKIPPED!")
1174 elif not utils.is_domain_wanted(domain):
1175 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1177 elif instances.is_registered(domain):
1178 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1180 elif instances.is_recent(domain):
1181 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1184 logger.info("Fetching domain='%s' ...", domain)
1185 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1187 logger.debug("Success! - EXIT!")
1190 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1191 logger.debug("args[]='%s' - CALLED!", type(args))
1193 logger.debug("Invoking locking.acquire() ...")
1196 source_domain = "joinfediverse.wiki"
1197 if sources.is_recent(source_domain):
1198 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1201 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1202 sources.update(source_domain)
1204 raw = utils.fetch_url(
1205 f"https://{source_domain}/FediBlock",
1206 network.web_headers,
1207 (config.get("connection_timeout"), config.get("read_timeout"))
1209 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1211 doc = bs4.BeautifulSoup(raw, "html.parser")
1212 logger.debug("doc[]='%s'", type(doc))
1214 tables = doc.findAll("table", {"class": "wikitable"})
1216 logger.info("Analyzing %d table(s) ...", len(tables))
1218 for table in tables:
1219 logger.debug("table[]='%s'", type(table))
1221 rows = table.findAll("tr")
1222 logger.info("Checking %d row(s) ...", len(rows))
1223 block_headers = dict()
1225 logger.debug("row[%s]='%s'", type(row), row)
1227 headers = row.findAll("th")
1228 logger.debug("Found headers()=%d header(s)", len(headers))
1229 if len(headers) > 1:
1230 block_headers = dict()
1232 for header in headers:
1234 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1235 text = header.contents[0]
1237 logger.debug("text[]='%s'", type(text))
1238 if not isinstance(text, str):
1239 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1241 elif validators.domain(text.strip()):
1242 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1245 text = tidyup.domain(text.strip())
1246 logger.debug("text='%s'", text)
1247 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1248 logger.debug("Found header: '%s'=%d", text, cnt)
1249 block_headers[cnt] = text
1251 elif len(block_headers) == 0:
1252 logger.debug("row is not scrapable - SKIPPED!")
1254 elif len(block_headers) > 0:
1255 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1259 for element in row.find_all(["th", "td"]):
1261 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1262 if cnt in block_headers:
1263 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1265 text = element.text.strip()
1266 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1268 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1269 if key in ["domain", "instance"]:
1271 elif key == "reason":
1272 block[key] = tidyup.reason(text)
1273 elif key == "subdomain(s)":
1276 block[key] = text.split("/")
1278 logger.debug("key='%s'", key)
1281 logger.debug("block()=%d ...", len(block))
1283 logger.debug("Appending block()=%d ...", len(block))
1284 blocklist.append(block)
1286 logger.debug("blocklist()=%d", len(blocklist))
1288 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1289 domains = database.cursor.fetchall()
1291 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1293 for block in blocklist:
1294 logger.debug("block='%s'", block)
1295 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1296 origin = block["blocked"]
1297 logger.debug("origin='%s'", origin)
1298 for subdomain in block["subdomain(s)"]:
1299 block["blocked"] = subdomain + "." + origin
1300 logger.debug("block[blocked]='%s'", block["blocked"])
1301 blocking.append(block)
1303 blocking.append(block)
1305 logger.debug("blocking()=%d", blocking)
1306 for block in blocking:
1307 logger.debug("block[]='%s'", type(block))
1308 block["blocked"] = tidyup.domain(block["blocked"])
1310 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1311 if block["blocked"] == "":
1312 logger.debug("block[blocked] is empty - SKIPPED!")
1314 elif not utils.is_domain_wanted(block["blocked"]):
1315 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1317 elif instances.is_recent(block["blocked"]):
1318 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1321 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1322 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1325 for blocker in domains:
1326 blocker = blocker[0]
1327 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1329 for block in blocking:
1330 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1331 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1333 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1334 if block["blocked"] == "":
1335 logger.debug("block[blocked] is empty - SKIPPED!")
1337 elif not utils.is_domain_wanted(block["blocked"]):
1338 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1341 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1342 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1343 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1345 "blocked": block["blocked"],
1346 "reason" : block["reason"],
1349 if instances.has_pending(blocker):
1350 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1351 instances.update_data(blocker)
1353 logger.debug("Invoking commit() ...")
1354 database.connection.commit()
1356 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1357 if config.get("bot_enabled") and len(blockdict) > 0:
1358 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1359 network.send_bot_post(blocker, blockdict)
1361 logger.debug("Success! - EXIT!")
1364 def recheck_obfuscation(args: argparse.Namespace) -> int:
1365 logger.debug("args[]='%s' - CALLED!", type(args))
1367 logger.debug("Invoking locking.acquire() ...")
1370 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1371 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1372 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1373 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1375 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1377 rows = database.cursor.fetchall()
1378 logger.info("Checking %d domains ...", len(rows))
1380 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1381 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1382 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1386 if row["software"] == "pleroma":
1387 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1388 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1389 elif row["software"] == "mastodon":
1390 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1391 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1392 elif row["software"] == "lemmy":
1393 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1394 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1395 elif row["software"] == "friendica":
1396 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1397 blocking = friendica.fetch_blocks(row["domain"])
1398 elif row["software"] == "misskey":
1399 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1400 blocking = misskey.fetch_blocks(row["domain"])
1402 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1404 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1405 instances.set_total_blocks(row["domain"], blocking)
1407 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1410 for block in blocking:
1411 logger.debug("block[blocked]='%s'", block["blocked"])
1414 if block["blocked"] == "":
1415 logger.debug("block[blocked] is empty - SKIPPED!")
1417 elif block["blocked"].endswith(".arpa"):
1418 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1420 elif block["blocked"].endswith(".tld"):
1421 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1423 elif block["blocked"].endswith(".onion"):
1424 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1426 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1427 logger.debug("block='%s' is obfuscated.", block["blocked"])
1428 obfuscated = obfuscated + 1
1429 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1430 elif not utils.is_domain_wanted(block["blocked"]):
1431 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1433 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1434 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1437 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1438 if blocked is not None and blocked != block["blocked"]:
1439 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1440 obfuscated = obfuscated - 1
1441 if blocks.is_instance_blocked(row["domain"], blocked):
1442 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1445 block["block_level"] = utils.alias_block_level(block["block_level"])
1447 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1448 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1449 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1452 "reason" : block["reason"],
1455 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1456 if obfuscated == 0 and len(blocking) > 0:
1457 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1458 instances.set_has_obfuscation(row["domain"], False)
1460 if instances.has_pending(row["domain"]):
1461 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1462 instances.update_data(row["domain"])
1464 logger.debug("Invoking commit() ...")
1465 database.connection.commit()
1467 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1468 if config.get("bot_enabled") and len(blockdict) > 0:
1469 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1470 network.send_bot_post(row["domain"], blockdict)
1472 logger.debug("Success! - EXIT!")
1475 def fetch_fedilist(args: argparse.Namespace) -> int:
1476 logger.debug("args[]='%s' - CALLED!", type(args))
1478 logger.debug("Invoking locking.acquire() ...")
1481 source_domain = "demo.fedilist.com"
1482 if sources.is_recent(source_domain):
1483 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1486 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1487 sources.update(source_domain)
1489 url = f"http://{source_domain}/instance/csv?onion=not"
1490 if args.software is not None and args.software != "":
1491 logger.debug("args.software='%s'", args.software)
1492 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1494 logger.info("Fetching url='%s' ...", url)
1495 response = reqto.get(
1497 headers=network.web_headers,
1498 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1499 allow_redirects=False
1502 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1503 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1504 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1507 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1509 logger.debug("reader[]='%s'", type(reader))
1512 logger.debug("row[]='%s'", type(row))
1513 domain = tidyup.domain(row["hostname"])
1514 logger.debug("domain='%s' - AFTER!", domain)
1517 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1519 elif not utils.is_domain_wanted(domain):
1520 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1522 elif (args.all is None or not args.all) and instances.is_registered(domain):
1523 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1525 elif instances.is_recent(domain):
1526 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1529 logger.info("Fetching instances from domain='%s' ...", domain)
1530 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1532 logger.debug("Success! - EXIT!")
1535 def update_nodeinfo(args: argparse.Namespace) -> int:
1536 logger.debug("args[]='%s' - CALLED!", type(args))
1538 logger.debug("Invoking locking.acquire() ...")
1541 if args.domain is not None and args.domain != "":
1542 logger.debug("Fetching args.domain='%s'", args.domain)
1543 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1544 elif args.software is not None and args.software != "":
1545 logger.info("Fetching domains for args.software='%s'", args.software)
1546 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1548 logger.info("Fetching domains for recently updated ...")
1549 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1551 domains = database.cursor.fetchall()
1553 logger.info("Checking %d domain(s) ...", len(domains))
1556 logger.debug("row[]='%s'", type(row))
1558 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1559 software = federation.determine_software(row["domain"])
1561 logger.debug("Determined software='%s'", software)
1562 if software != row["software"] and software is not None:
1563 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1564 instances.set_software(row["domain"], software)
1566 instances.set_success(row["domain"])
1567 except network.exceptions as exception:
1568 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1569 instances.set_last_error(row["domain"], exception)
1571 instances.set_last_nodeinfo(row["domain"])
1572 instances.update_data(row["domain"])
1575 logger.debug("Success! - EXIT!")
1578 def fetch_instances_social(args: argparse.Namespace) -> int:
1579 logger.debug("args[]='%s' - CALLED!", type(args))
1581 logger.debug("Invoking locking.acquire() ...")
1584 source_domain = "instances.social"
1586 if config.get("instances_social_api_key") == "":
1587 logger.error("API key not set. Please set in your config.json file.")
1589 elif sources.is_recent(source_domain):
1590 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1593 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1594 sources.update(source_domain)
1597 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1600 fetched = network.get_json_api(
1602 "/api/1.0/instances/list?count=0&sort_by=name",
1604 (config.get("connection_timeout"), config.get("read_timeout"))
1606 logger.debug("fetched[]='%s'", type(fetched))
1608 if "error_message" in fetched:
1609 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1611 elif "exception" in fetched:
1612 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1614 elif "json" not in fetched:
1615 logger.warning("fetched has no element 'json' - EXIT!")
1617 elif "instances" not in fetched["json"]:
1618 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1622 rows = fetched["json"]["instances"]
1624 logger.info("Checking %d row(s) ...", len(rows))
1626 logger.debug("row[]='%s'", type(row))
1627 domain = tidyup.domain(row["name"])
1629 logger.debug("domain='%s' - AFTER!", domain)
1631 logger.debug("domain is empty - SKIPPED!")
1633 elif not utils.is_domain_wanted(domain):
1634 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1636 elif domain in domains:
1637 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1639 elif instances.is_registered(domain):
1640 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1642 elif instances.is_recent(domain):
1643 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1646 logger.info("Fetching instances from domain='%s'", domain)
1647 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1649 logger.debug("Success! - EXIT!")