1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
43 from fba.http import federation
44 from fba.http import network
46 from fba.models import blocks
47 from fba.models import instances
48 from fba.models import sources
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def check_instance(args: argparse.Namespace) -> int:
61 logger.debug("args.domain='%s' - CALLED!", args.domain)
63 if not validators.domain(args.domain):
64 logger.warning("args.domain='%s' is not valid", args.domain)
66 elif blacklist.is_blacklisted(args.domain):
67 logger.warning("args.domain='%s' is blacklisted", args.domain)
69 elif instances.is_registered(args.domain):
70 logger.warning("args.domain='%s' is already registered", args.domain)
73 logger.info("args.domain='%s' is not known", args.domain)
75 logger.debug("status=%d - EXIT!", status)
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79 logger.debug("args[]='%s' - CALLED!", type(args))
82 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
85 for row in database.cursor.fetchall():
86 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87 punycode = row["domain"].encode("idna").decode("utf-8")
89 if row["nodeinfo_url"].startswith("/"):
90 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
96 logger.info("Found %d row(s)", cnt)
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102 logger.debug("args[]='%s' - CALLED!", type(args))
104 # No CSRF by default, you don't have to add network.source_headers by yourself here
106 source_domain = "pixelfed.org"
108 if sources.is_recent(source_domain):
109 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
112 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
113 sources.update(source_domain)
116 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
117 headers = csrf.determine(source_domain, dict())
118 except network.exceptions as exception:
119 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
123 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124 fetched = network.get_json_api(
126 "/api/v1/servers/all.json?scope=All&country=all&language=all",
128 (config.get("connection_timeout"), config.get("read_timeout"))
131 logger.debug("JSON API returned %d elements", len(fetched))
132 if "error_message" in fetched:
133 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135 elif "data" not in fetched["json"]:
136 logger.warning("API did not return JSON with 'data' element - EXIT!")
139 rows = fetched["json"]["data"]
140 logger.info("Checking %d fetched rows ...", len(rows))
142 logger.debug("row[]='%s'", type(row))
143 if "domain" not in row:
144 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146 elif row["domain"] == "":
147 logger.debug("row[domain] is empty - SKIPPED!")
149 elif not utils.is_domain_wanted(row["domain"]):
150 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
152 elif instances.is_registered(row["domain"]):
153 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
155 elif instances.is_recent(row["domain"]):
156 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
159 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
160 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
162 except network.exceptions as exception:
163 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
166 logger.debug("Success! - EXIT!")
169 def fetch_bkali(args: argparse.Namespace) -> int:
170 logger.debug("args[]='%s' - CALLED!", type(args))
172 logger.debug("Invoking locking.acquire() ...")
175 source_domain = "gql.api.bka.li"
176 if sources.is_recent(source_domain):
177 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
180 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
181 sources.update(source_domain)
185 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
186 fetched = network.post_json_api(
190 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
194 logger.debug("fetched[]='%s'", type(fetched))
195 if "error_message" in fetched:
196 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
198 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
199 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
202 rows = fetched["json"]
204 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
206 raise Exception("WARNING: Returned no records")
207 elif "data" not in rows:
208 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
209 elif "nodeinfo" not in rows["data"]:
210 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
212 for entry in rows["data"]["nodeinfo"]:
213 logger.debug("entry[%s]='%s'", type(entry), entry)
214 if "domain" not in entry:
215 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
217 elif entry["domain"] == "":
218 logger.debug("entry[domain] is empty - SKIPPED!")
220 elif not utils.is_domain_wanted(entry["domain"]):
221 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
223 elif instances.is_registered(entry["domain"]):
224 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
226 elif instances.is_recent(entry["domain"]):
227 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
230 logger.debug("Adding domain='%s' ...", entry["domain"])
231 domains.append(entry["domain"])
233 except network.exceptions as exception:
234 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
237 logger.debug("domains()=%d", len(domains))
239 logger.info("Adding %d new instances ...", len(domains))
240 for domain in domains:
242 logger.info("Fetching instances from domain='%s' ...", domain)
243 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
244 except network.exceptions as exception:
245 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
246 instances.set_last_error(domain, exception)
249 logger.debug("Success - EXIT!")
252 def fetch_blocks(args: argparse.Namespace) -> int:
253 logger.debug("args[]='%s' - CALLED!", type(args))
254 if args.domain is not None and args.domain != "":
255 logger.debug("args.domain='%s' - checking ...", args.domain)
256 if not validators.domain(args.domain):
257 logger.warning("args.domain='%s' is not valid.", args.domain)
259 elif blacklist.is_blacklisted(args.domain):
260 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
262 elif not instances.is_registered(args.domain):
263 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
266 logger.debug("Invoking locking.acquire() ...")
269 if args.domain is not None and args.domain != "":
270 # Re-check single domain
271 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
272 database.cursor.execute(
273 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
275 elif args.software is not None and args.software != "":
276 # Re-check single software
277 logger.debug("Querying database for args.software='%s' ...", args.software)
278 database.cursor.execute(
279 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
282 # Re-check after "timeout" (aka. minimum interval)
283 database.cursor.execute(
284 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
287 rows = database.cursor.fetchall()
288 logger.info("Checking %d entries ...", len(rows))
289 for blocker, software, origin, nodeinfo_url in rows:
290 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
291 blocker = tidyup.domain(blocker)
292 logger.debug("blocker='%s' - AFTER!", blocker)
295 logger.warning("blocker is now empty!")
297 elif nodeinfo_url is None or nodeinfo_url == "":
298 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
300 elif not utils.is_domain_wanted(blocker):
301 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
304 logger.debug("blocker='%s'", blocker)
305 instances.set_last_blocked(blocker)
306 instances.set_has_obfuscation(blocker, False)
310 if software == "pleroma":
311 logger.info("blocker='%s',software='%s'", blocker, software)
312 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
313 elif software == "mastodon":
314 logger.info("blocker='%s',software='%s'", blocker, software)
315 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
316 elif software == "lemmy":
317 logger.info("blocker='%s',software='%s'", blocker, software)
318 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
319 elif software == "friendica":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = friendica.fetch_blocks(blocker)
322 elif software == "misskey":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = misskey.fetch_blocks(blocker)
326 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
328 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
329 instances.set_total_blocks(blocker, blocking)
331 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
333 for block in blocking:
334 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
336 if block["block_level"] == "":
337 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
340 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
341 block["blocked"] = tidyup.domain(block["blocked"])
342 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
343 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
345 if block["blocked"] == "":
346 logger.warning("blocked is empty, blocker='%s'", blocker)
348 elif block["blocked"].endswith(".onion"):
349 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
351 elif block["blocked"].endswith(".arpa"):
352 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
354 elif block["blocked"].endswith(".tld"):
355 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
357 elif block["blocked"].find("*") >= 0:
358 logger.debug("blocker='%s' uses obfuscated domains", blocker)
360 # Some friendica servers also obscure domains without hash
361 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
363 logger.debug("row[]='%s'", type(row))
365 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
366 instances.set_has_obfuscation(blocker, True)
369 block["blocked"] = row["domain"]
370 origin = row["origin"]
371 nodeinfo_url = row["nodeinfo_url"]
372 elif block["blocked"].find("?") >= 0:
373 logger.debug("blocker='%s' uses obfuscated domains", blocker)
375 # Some obscure them with question marks, not sure if that's dependent on version or not
376 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
378 logger.debug("row[]='%s'", type(row))
380 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
381 instances.set_has_obfuscation(blocker, True)
384 block["blocked"] = row["domain"]
385 origin = row["origin"]
386 nodeinfo_url = row["nodeinfo_url"]
388 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
389 if block["blocked"] == "":
390 logger.debug("block[blocked] is empty - SKIPPED!")
392 elif not utils.is_domain_wanted(block["blocked"]):
393 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
395 elif block["block_level"] in ["accept", "accepted"]:
396 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
398 elif not instances.is_registered(block["blocked"]):
399 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
400 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
402 block["block_level"] = utils.alias_block_level(block["block_level"])
404 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
405 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
407 "blocked": block["blocked"],
408 "reason" : block["reason"],
411 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
412 cookies.clear(block["blocked"])
414 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
415 if instances.has_pending(blocker):
416 logger.debug("Flushing updates for blocker='%s' ...", blocker)
417 instances.update_data(blocker)
419 logger.debug("Invoking commit() ...")
420 database.connection.commit()
422 logger.debug("Invoking cookies.clear(%s) ...", blocker)
423 cookies.clear(blocker)
425 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
426 if config.get("bot_enabled") and len(blockdict) > 0:
427 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
428 network.send_bot_post(blocker, blockdict)
430 logger.debug("Success! - EXIT!")
433 def fetch_observer(args: argparse.Namespace) -> int:
434 logger.debug("args[]='%s' - CALLED!", type(args))
436 logger.debug("Invoking locking.acquire() ...")
439 source_domain = "fediverse.observer"
440 if sources.is_recent(source_domain):
441 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
444 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
445 sources.update(source_domain)
448 if args.software is None:
449 logger.info("Fetching software list ...")
450 raw = utils.fetch_url(
451 f"https://{source_domain}",
453 (config.get("connection_timeout"), config.get("read_timeout"))
455 logger.debug("raw[%s]()=%d", type(raw), len(raw))
457 doc = bs4.BeautifulSoup(raw, features="html.parser")
458 logger.debug("doc[]='%s'", type(doc))
460 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
461 logger.debug("items[]='%s'", type(items))
463 logger.info("Checking %d menu items ...", len(items))
465 logger.debug("item[%s]='%s'", type(item), item)
466 if item.text.lower() == "all":
467 logger.debug("Skipping 'All' menu entry ...")
470 logger.debug("Appending item.text='%s' ...", item.text)
471 types.append(tidyup.domain(item.text))
473 logger.info("Adding args.software='%s' as type ...", args.software)
474 types.append(args.software)
476 logger.info("Fetching %d different table data ...", len(types))
477 for software in types:
478 logger.debug("software='%s' - BEFORE!", software)
479 if args.software is not None and args.software != software:
480 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
485 logger.debug("Fetching table data for software='%s' ...", software)
486 raw = utils.fetch_url(
487 f"https://{source_domain}/app/views/tabledata.php?software={software}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
495 except network.exceptions as exception:
496 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
499 items = doc.findAll("a", {"class": "url"})
500 logger.info("Checking %d items,software='%s' ...", len(items), software)
502 logger.debug("item[]='%s'", type(item))
503 domain = item.decode_contents()
505 logger.debug("domain='%s' - AFTER!", domain)
507 logger.debug("domain is empty - SKIPPED!")
509 elif not utils.is_domain_wanted(domain):
510 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
512 elif instances.is_registered(domain):
513 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
515 elif instances.is_recent(domain):
516 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
519 software = software_helper.alias(software)
520 logger.info("Fetching instances for domain='%s'", domain)
521 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
523 logger.debug("Success! - EXIT!")
526 def fetch_todon_wiki(args: argparse.Namespace) -> int:
527 logger.debug("args[]='%s' - CALLED!", type(args))
529 logger.debug("Invoking locking.acquire() ...")
532 source_domain = "wiki.todon.eu"
533 if sources.is_recent(source_domain):
534 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
537 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
538 sources.update(source_domain)
545 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
546 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
548 doc = bs4.BeautifulSoup(raw, "html.parser")
549 logger.debug("doc[]='%s'", type(doc))
551 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
552 logger.info("Checking %d silenced/limited entries ...", len(silenced))
553 blocklist["silenced"] = utils.find_domains(silenced, "div")
555 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
556 logger.info("Checking %d suspended entries ...", len(suspended))
557 blocklist["reject"] = utils.find_domains(suspended, "div")
559 blocking = blocklist["silenced"] + blocklist["reject"]
562 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
563 instances.set_total_blocks(blocker, blocking)
566 for block_level in blocklist:
567 blockers = blocklist[block_level]
569 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
570 for blocked in blockers:
571 logger.debug("blocked='%s'", blocked)
573 if not instances.is_registered(blocked):
575 logger.info("Fetching instances from domain='%s' ...", blocked)
576 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
577 except network.exceptions as exception:
578 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
579 instances.set_last_error(blocked, exception)
581 if blocks.is_instance_blocked(blocker, blocked, block_level):
582 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
585 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
586 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
587 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
593 logger.debug("Invoking commit() ...")
594 database.connection.commit()
596 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
597 if config.get("bot_enabled") and len(blockdict) > 0:
598 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
599 network.send_bot_post(blocker, blockdict)
601 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
602 if instances.has_pending(blocker):
603 logger.debug("Flushing updates for blocker='%s' ...", blocker)
604 instances.update_data(blocker)
606 logger.debug("Success! - EXIT!")
609 def fetch_cs(args: argparse.Namespace):
610 logger.debug("args[]='%s' - CALLED!", type(args))
612 logger.debug("Invoking locking.acquire() ...")
640 source_domain = "raw.githubusercontent.com"
641 if sources.is_recent(source_domain):
642 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
645 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
646 sources.update(source_domain)
648 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
649 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
651 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
652 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
654 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
655 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
656 blocklist["silenced"] = federation.find_domains(silenced)
658 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
659 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
660 blocklist["reject"] = federation.find_domains(blocked)
662 blocking = blocklist["silenced"] + blocklist["reject"]
663 blocker = "chaos.social"
665 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
666 instances.set_total_blocks(blocker, blocking)
668 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
670 if len(blocking) > 0:
671 for block_level in blocklist:
672 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
674 for row in blocklist[block_level]:
675 logger.debug("row[%s]='%s'", type(row), row)
676 if instances.is_recent(row["domain"], "last_blocked"):
677 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
679 elif not instances.is_registered(row["domain"]):
681 logger.info("Fetching instances from domain='%s' ...", row["domain"])
682 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
683 except network.exceptions as exception:
684 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
685 instances.set_last_error(row["domain"], exception)
687 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
688 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
690 "blocked": row["domain"],
691 "reason" : row["reason"],
694 logger.debug("Invoking commit() ...")
695 database.connection.commit()
697 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
698 if config.get("bot_enabled") and len(blockdict) > 0:
699 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
700 network.send_bot_post(blocker, blockdict)
702 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
703 if instances.has_pending(blocker):
704 logger.debug("Flushing updates for blocker='%s' ...", blocker)
705 instances.update_data(blocker)
707 logger.debug("Success! - EXIT!")
710 def fetch_fba_rss(args: argparse.Namespace) -> int:
711 logger.debug("args[]='%s' - CALLED!", type(args))
715 logger.debug("Invoking locking.acquire() ...")
718 components = urlparse(args.feed)
720 if sources.is_recent(components.netloc):
721 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
724 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
725 sources.update(components.netloc)
727 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
728 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
730 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
731 if response.ok and response.status_code < 300 and len(response.text) > 0:
732 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
733 rss = atoma.parse_rss_bytes(response.content)
735 logger.debug("rss[]='%s'", type(rss))
736 for item in rss.items:
737 logger.debug("item='%s'", item)
738 domain = tidyup.domain(item.link.split("=")[1])
740 logger.debug("domain='%s' - AFTER!", domain)
742 logger.debug("domain is empty - SKIPPED!")
744 elif not utils.is_domain_wanted(domain):
745 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
747 elif domain in domains:
748 logger.debug("domain='%s' is already added - SKIPPED!", domain)
750 elif instances.is_registered(domain):
751 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
753 elif instances.is_recent(domain):
754 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
757 logger.debug("Adding domain='%s'", domain)
758 domains.append(domain)
760 logger.debug("domains()=%d", len(domains))
762 logger.info("Adding %d new instances ...", len(domains))
763 for domain in domains:
765 logger.info("Fetching instances from domain='%s' ...", domain)
766 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
767 except network.exceptions as exception:
768 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
769 instances.set_last_error(domain, exception)
772 logger.debug("Success! - EXIT!")
775 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
776 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.debug("Invoking locking.acquire() ...")
781 source_domain = "ryona.agency"
782 if sources.is_recent(source_domain):
783 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
786 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
787 sources.update(source_domain)
789 feed = f"https://{source_domain}/users/fba/feed.atom"
793 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
794 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
796 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
797 if response.ok and response.status_code < 300 and len(response.text) > 0:
798 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
799 atom = atoma.parse_atom_bytes(response.content)
801 logger.debug("atom[]='%s'", type(atom))
802 for entry in atom.entries:
803 logger.debug("entry[]='%s'", type(entry))
804 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
805 logger.debug("doc[]='%s'", type(doc))
806 for element in doc.findAll("a"):
807 logger.debug("element[]='%s'", type(element))
808 for href in element["href"].split(","):
809 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
810 domain = tidyup.domain(href)
812 logger.debug("domain='%s' - AFTER!", domain)
814 logger.debug("domain is empty - SKIPPED!")
816 elif not utils.is_domain_wanted(domain):
817 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
819 elif domain in domains:
820 logger.debug("domain='%s' is already added - SKIPPED!", domain)
822 elif instances.is_registered(domain):
823 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
825 elif instances.is_recent(domain):
826 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
829 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
830 domains.append(domain)
832 logger.debug("domains()=%d", len(domains))
834 logger.info("Adding %d new instances ...", len(domains))
835 for domain in domains:
836 logger.debug("domain='%s'", domain)
838 logger.info("Fetching instances from domain='%s' ...", domain)
839 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
840 except network.exceptions as exception:
841 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
842 instances.set_last_error(domain, exception)
845 logger.debug("Success! - EXIT!")
848 def fetch_instances(args: argparse.Namespace) -> int:
849 logger.debug("args[]='%s' - CALLED!", type(args))
851 logger.debug("args.domain='%s' - checking ...", args.domain)
852 if not validators.domain(args.domain):
853 logger.warning("args.domain='%s' is not valid.", args.domain)
855 elif blacklist.is_blacklisted(args.domain):
856 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
859 logger.debug("Invoking locking.acquire() ...")
864 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
865 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
866 except network.exceptions as exception:
867 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
868 instances.set_last_error(args.domain, exception)
869 instances.update_data(args.domain)
873 logger.debug("Not fetching more instances - EXIT!")
876 # Loop through some instances
877 database.cursor.execute(
878 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
881 rows = database.cursor.fetchall()
882 logger.info("Checking %d entries ...", len(rows))
884 logger.debug("row[domain]='%s'", row["domain"])
885 if row["domain"] == "":
886 logger.debug("row[domain] is empty - SKIPPED!")
888 elif not utils.is_domain_wanted(row["domain"]):
889 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
893 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
894 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
895 except network.exceptions as exception:
896 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
897 instances.set_last_error(row["domain"], exception)
899 logger.debug("Success - EXIT!")
902 def fetch_oliphant(args: argparse.Namespace) -> int:
903 logger.debug("args[]='%s' - CALLED!", type(args))
905 logger.debug("Invoking locking.acquire() ...")
908 source_domain = "codeberg.org"
909 if sources.is_recent(source_domain):
910 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
913 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
914 sources.update(source_domain)
917 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
922 "blocker": "artisan.chat",
923 "csv_url": "mastodon/artisan.chat.csv",
925 "blocker": "mastodon.art",
926 "csv_url": "mastodon/mastodon.art.csv",
928 "blocker": "pleroma.envs.net",
929 "csv_url": "mastodon/pleroma.envs.net.csv",
931 "blocker": "oliphant.social",
932 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
934 "blocker": "mastodon.online",
935 "csv_url": "mastodon/mastodon.online.csv",
937 "blocker": "mastodon.social",
938 "csv_url": "mastodon/mastodon.social.csv",
940 "blocker": "mastodon.social",
941 "csv_url": "other/missing-tier0-mastodon.social.csv",
943 "blocker": "rage.love",
944 "csv_url": "mastodon/rage.love.csv",
946 "blocker": "sunny.garden",
947 "csv_url": "mastodon/sunny.garden.csv",
949 "blocker": "solarpunk.moe",
950 "csv_url": "mastodon/solarpunk.moe.csv",
952 "blocker": "toot.wales",
953 "csv_url": "mastodon/toot.wales.csv",
955 "blocker": "union.place",
956 "csv_url": "mastodon/union.place.csv",
962 logger.debug("Downloading %d files ...", len(blocklists))
963 for block in blocklists:
964 # Is domain given and not equal blocker?
965 if isinstance(args.domain, str) and args.domain != block["blocker"]:
966 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
968 elif args.domain in domains:
969 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
971 elif instances.is_recent(block["blocker"]):
972 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
976 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
977 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
979 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
980 if not response.ok or response.status_code >= 300 or response.content == "":
981 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
984 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
985 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
989 logger.info("Processing %d rows ...", len(reader))
992 logger.debug("row[%s]='%s'", type(row), row)
993 domain = severity = None
994 reject_media = reject_reports = False
997 domain = row["#domain"]
998 elif "domain" in row:
999 domain = row["domain"]
1001 logger.debug("row='%s' does not contain domain column", row)
1004 if "#severity" in row:
1005 severity = row["#severity"]
1006 elif "severity" in row:
1007 severity = row["severity"]
1009 logger.debug("row='%s' does not contain severity column", row)
1012 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1014 elif "reject_media" in row and row["reject_media"].lower() == "true":
1017 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1018 reject_reports = True
1019 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1020 reject_reports = True
1023 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1025 logger.debug("domain is empty - SKIPPED!")
1027 elif not utils.is_domain_wanted(domain):
1028 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1031 logger.debug("Marking domain='%s' as handled", domain)
1032 domains.append(domain)
1034 logger.debug("Processing domain='%s' ...", domain)
1035 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1036 logger.debug("processed='%s'", processed)
1038 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
1039 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1042 "reason" : block["reason"],
1046 utils.process_block(block["blocker"], domain, None, "reject_media")
1048 utils.process_block(block["blocker"], domain, None, "reject_reports")
1050 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
1051 instances.set_total_blocks(block["blocker"], cnt)
1053 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1054 if instances.has_pending(block["blocker"]):
1055 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1056 instances.update_data(block["blocker"])
1058 logger.debug("Invoking commit() ...")
1059 database.connection.commit()
1061 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1062 if config.get("bot_enabled") and len(blockdict) > 0:
1063 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1064 network.send_bot_post(block["blocker"], blockdict)
1066 logger.debug("Success! - EXIT!")
1069 def fetch_txt(args: argparse.Namespace) -> int:
1070 logger.debug("args[]='%s' - CALLED!", type(args))
1072 logger.debug("Invoking locking.acquire() ...")
1077 "blocker": "seirdy.one",
1078 "url" : "https://seirdy.one/pb/bsl.txt",
1081 logger.info("Checking %d text file(s) ...", len(urls))
1083 logger.debug("Fetching row[url]='%s' ...", row["url"])
1084 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1086 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1087 if response.ok and response.status_code < 300 and response.text != "":
1088 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1089 domains = response.text.split("\n")
1091 logger.info("Processing %d domains ...", len(domains))
1092 for domain in domains:
1093 logger.debug("domain='%s' - BEFORE!", domain)
1094 domain = tidyup.domain(domain)
1096 logger.debug("domain='%s' - AFTER!", domain)
1098 logger.debug("domain is empty - SKIPPED!")
1100 elif not utils.is_domain_wanted(domain):
1101 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1103 elif instances.is_recent(domain):
1104 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1107 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1108 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1110 logger.debug("processed='%s'", processed)
1112 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1115 logger.debug("Success! - EXIT!")
1118 def fetch_fedipact(args: argparse.Namespace) -> int:
1119 logger.debug("args[]='%s' - CALLED!", type(args))
1121 logger.debug("Invoking locking.acquire() ...")
1124 source_domain = "fedipact.online"
1125 if sources.is_recent(source_domain):
1126 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1129 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1130 sources.update(source_domain)
1132 response = utils.fetch_url(
1133 f"https://{source_domain}",
1134 network.web_headers,
1135 (config.get("connection_timeout"), config.get("read_timeout"))
1138 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1139 if response.ok and response.status_code < 300 and response.text != "":
1140 logger.debug("Parsing %d Bytes ...", len(response.text))
1142 doc = bs4.BeautifulSoup(response.text, "html.parser")
1143 logger.debug("doc[]='%s'", type(doc))
1145 rows = doc.findAll("li")
1146 logger.info("Checking %d row(s) ...", len(rows))
1148 logger.debug("row[]='%s'", type(row))
1149 domain = tidyup.domain(row.contents[0])
1151 logger.debug("domain='%s' - AFTER!", domain)
1153 logger.debug("domain is empty - SKIPPED!")
1155 elif not utils.is_domain_wanted(domain):
1156 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1158 elif instances.is_registered(domain):
1159 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1161 elif instances.is_recent(domain):
1162 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1165 logger.info("Fetching domain='%s' ...", domain)
1166 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1168 logger.debug("Success! - EXIT!")
1171 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1172 logger.debug("args[]='%s' - CALLED!", type(args))
1174 logger.debug("Invoking locking.acquire() ...")
1177 source_domain = "joinfediverse.wiki"
1178 if sources.is_recent(source_domain):
1179 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1182 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1183 sources.update(source_domain)
1185 raw = utils.fetch_url(
1186 f"https://{source_domain}/FediBlock",
1187 network.web_headers,
1188 (config.get("connection_timeout"), config.get("read_timeout"))
1190 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1192 doc = bs4.BeautifulSoup(raw, "html.parser")
1193 logger.debug("doc[]='%s'", type(doc))
1195 tables = doc.findAll("table", {"class": "wikitable"})
1197 logger.info("Analyzing %d table(s) ...", len(tables))
1199 for table in tables:
1200 logger.debug("table[]='%s'", type(table))
1202 rows = table.findAll("tr")
1203 logger.info("Checking %d row(s) ...", len(rows))
1204 block_headers = dict()
1206 logger.debug("row[%s]='%s'", type(row), row)
1208 headers = row.findAll("th")
1209 logger.debug("Found headers()=%d header(s)", len(headers))
1210 if len(headers) > 1:
1211 block_headers = dict()
1213 for header in headers:
1215 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1216 text = header.contents[0]
1218 logger.debug("text[]='%s'", type(text))
1219 if not isinstance(text, str):
1220 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1222 elif validators.domain(text.strip()):
1223 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1226 text = tidyup.domain(text.strip())
1227 logger.debug("text='%s'", text)
1228 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1229 logger.debug("Found header: '%s'=%d", text, cnt)
1230 block_headers[cnt] = text
1232 elif len(block_headers) == 0:
1233 logger.debug("row is not scrapable - SKIPPED!")
1235 elif len(block_headers) > 0:
1236 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1240 for element in row.find_all(["th", "td"]):
1242 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1243 if cnt in block_headers:
1244 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1246 text = element.text.strip()
1247 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1249 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1250 if key in ["domain", "instance"]:
1252 elif key == "reason":
1253 block[key] = tidyup.reason(text)
1254 elif key == "subdomain(s)":
1257 block[key] = text.split("/")
1259 logger.debug("key='%s'", key)
1262 logger.debug("block()=%d ...", len(block))
1264 logger.debug("Appending block()=%d ...", len(block))
1265 blocklist.append(block)
1267 logger.debug("blocklist()=%d", len(blocklist))
1269 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1270 domains = database.cursor.fetchall()
1272 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1274 for block in blocklist:
1275 logger.debug("block='%s'", block)
1276 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1277 origin = block["blocked"]
1278 for subdomain in block["subdomain(s)"]:
1279 block["blocked"] = subdomain + "." + origin
1280 blocking.append(block)
1282 blocking.append(block)
1284 logger.debug("blocking()=%d", blocking)
1285 for block in blocking:
1286 logger.debug("block[]='%s'", type(block))
1287 block["blocked"] = tidyup.domain(block["blocked"])
1289 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1290 if block["blocked"] == "":
1291 logger.debug("block[blocked] is empty - SKIPPED!")
1293 elif not utils.is_domain_wanted(block["blocked"]):
1294 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1296 elif instances.is_recent(block["blocked"]):
1297 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1300 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1301 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1304 for blocker in domains:
1305 blocker = blocker[0]
1306 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1308 for block in blocking:
1309 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1310 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1312 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1313 if block["blocked"] == "":
1314 logger.debug("block[blocked] is empty - SKIPPED!")
1316 elif not utils.is_domain_wanted(block["blocked"]):
1317 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1320 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1321 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1322 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1324 "blocked": block["blocked"],
1325 "reason" : block["reason"],
1328 if instances.has_pending(blocker):
1329 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1330 instances.update_data(blocker)
1332 logger.debug("Invoking commit() ...")
1333 database.connection.commit()
1335 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1336 if config.get("bot_enabled") and len(blockdict) > 0:
1337 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1338 network.send_bot_post(blocker, blockdict)
1340 logger.debug("Success! - EXIT!")
1343 def recheck_obfuscation(args: argparse.Namespace) -> int:
1344 logger.debug("args[]='%s' - CALLED!", type(args))
1346 logger.debug("Invoking locking.acquire() ...")
1349 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1350 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1351 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1352 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1354 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1356 rows = database.cursor.fetchall()
1357 logger.info("Checking %d domains ...", len(rows))
1359 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1360 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1361 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1365 if row["software"] == "pleroma":
1366 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1367 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1368 elif row["software"] == "mastodon":
1369 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1370 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1371 elif row["software"] == "lemmy":
1372 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1373 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1374 elif row["software"] == "friendica":
1375 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1376 blocking = friendica.fetch_blocks(row["domain"])
1377 elif row["software"] == "misskey":
1378 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1379 blocking = misskey.fetch_blocks(row["domain"])
1381 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1383 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1384 instances.set_total_blocks(row["domain"], blocking)
1386 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1389 for block in blocking:
1390 logger.debug("block[blocked]='%s'", block["blocked"])
1393 if block["blocked"] == "":
1394 logger.debug("block[blocked] is empty - SKIPPED!")
1396 elif block["blocked"].endswith(".arpa"):
1397 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1399 elif block["blocked"].endswith(".tld"):
1400 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1402 elif block["blocked"].endswith(".onion"):
1403 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1405 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1406 logger.debug("block='%s' is obfuscated.", block["blocked"])
1407 obfuscated = obfuscated + 1
1408 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1409 elif not utils.is_domain_wanted(block["blocked"]):
1410 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1412 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1413 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1416 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1417 if blocked is not None and blocked != block["blocked"]:
1418 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1419 obfuscated = obfuscated - 1
1420 if blocks.is_instance_blocked(row["domain"], blocked):
1421 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1424 block["block_level"] = utils.alias_block_level(block["block_level"])
1426 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1427 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1428 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1431 "reason" : block["reason"],
1434 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1435 if obfuscated == 0 and len(blocking) > 0:
1436 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1437 instances.set_has_obfuscation(row["domain"], False)
1439 if instances.has_pending(row["domain"]):
1440 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1441 instances.update_data(row["domain"])
1443 logger.debug("Invoking commit() ...")
1444 database.connection.commit()
1446 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1447 if config.get("bot_enabled") and len(blockdict) > 0:
1448 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1449 network.send_bot_post(row["domain"], blockdict)
1451 logger.debug("Success! - EXIT!")
1454 def fetch_fedilist(args: argparse.Namespace) -> int:
1455 logger.debug("args[]='%s' - CALLED!", type(args))
1457 logger.debug("Invoking locking.acquire() ...")
1460 source_domain = "demo.fedilist.com"
1461 if sources.is_recent(source_domain):
1462 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1465 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1466 sources.update(source_domain)
1468 url = f"http://{source_domain}/instance/csv?onion=not"
1469 if args.software is not None and args.software != "":
1470 logger.debug("args.software='%s'", args.software)
1471 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1473 logger.info("Fetching url='%s' ...", url)
1474 response = reqto.get(
1476 headers=network.web_headers,
1477 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1478 allow_redirects=False
1481 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1482 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1483 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1486 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1488 logger.debug("reader[]='%s'", type(reader))
1491 logger.debug("row[]='%s'", type(row))
1492 domain = tidyup.domain(row["hostname"])
1493 logger.debug("domain='%s' - AFTER!", domain)
1496 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1498 elif not utils.is_domain_wanted(domain):
1499 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1501 elif (args.all is None or not args.all) and instances.is_registered(domain):
1502 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1504 elif instances.is_recent(domain):
1505 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1508 logger.info("Fetching instances from domain='%s' ...", domain)
1509 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1511 logger.debug("Success! - EXIT!")
1514 def update_nodeinfo(args: argparse.Namespace) -> int:
1515 logger.debug("args[]='%s' - CALLED!", type(args))
1517 logger.debug("Invoking locking.acquire() ...")
1520 if args.domain is not None and args.domain != "":
1521 logger.debug("Fetching args.domain='%s'", args.domain)
1522 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1523 elif args.software is not None and args.software != "":
1524 logger.info("Fetching domains for args.software='%s'", args.software)
1525 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1527 logger.info("Fetching domains for recently updated ...")
1528 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1530 domains = database.cursor.fetchall()
1532 logger.info("Checking %d domain(s) ...", len(domains))
1535 logger.debug("row[]='%s'", type(row))
1537 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1538 software = federation.determine_software(row["domain"])
1540 logger.debug("Determined software='%s'", software)
1541 if software != row["software"] and software is not None:
1542 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1543 instances.set_software(row["domain"], software)
1545 instances.set_success(row["domain"])
1546 except network.exceptions as exception:
1547 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1548 instances.set_last_error(row["domain"], exception)
1550 instances.set_last_nodeinfo(row["domain"])
1551 instances.update_data(row["domain"])
1554 logger.debug("Success! - EXIT!")
1557 def fetch_instances_social(args: argparse.Namespace) -> int:
1558 logger.debug("args[]='%s' - CALLED!", type(args))
1560 logger.debug("Invoking locking.acquire() ...")
1563 source_domain = "instances.social"
1565 if config.get("instances_social_api_key") == "":
1566 logger.error("API key not set. Please set in your config.json file.")
1568 elif sources.is_recent(source_domain):
1569 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1572 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1573 sources.update(source_domain)
1576 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1579 fetched = network.get_json_api(
1581 "/api/1.0/instances/list?count=0&sort_by=name",
1583 (config.get("connection_timeout"), config.get("read_timeout"))
1585 logger.debug("fetched[]='%s'", type(fetched))
1587 if "error_message" in fetched:
1588 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1590 elif "exception" in fetched:
1591 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1593 elif "json" not in fetched:
1594 logger.warning("fetched has no element 'json' - EXIT!")
1596 elif "instances" not in fetched["json"]:
1597 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1601 rows = fetched["json"]["instances"]
1603 logger.info("Checking %d row(s) ...", len(rows))
1605 logger.debug("row[]='%s'", type(row))
1606 domain = tidyup.domain(row["name"])
1608 logger.debug("domain='%s' - AFTER!", domain)
1610 logger.debug("domain is empty - SKIPPED!")
1612 elif not utils.is_domain_wanted(domain):
1613 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1615 elif domain in domains:
1616 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1618 elif instances.is_registered(domain):
1619 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1621 elif instances.is_recent(domain):
1622 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1625 logger.info("Fetching instances from domain='%s'", domain)
1626 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1628 logger.debug("Success! - EXIT!")