1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
43 from fba.http import federation
44 from fba.http import network
46 from fba.models import blocks
47 from fba.models import instances
48 from fba.models import sources
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def check_instance(args: argparse.Namespace) -> int:
61 logger.debug("args.domain='%s' - CALLED!", args.domain)
63 if not validators.domain(args.domain):
64 logger.warning("args.domain='%s' is not valid", args.domain)
66 elif blacklist.is_blacklisted(args.domain):
67 logger.warning("args.domain='%s' is blacklisted", args.domain)
69 elif instances.is_registered(args.domain):
70 logger.warning("args.domain='%s' is already registered", args.domain)
73 logger.info("args.domain='%s' is not known", args.domain)
75 logger.debug("status=%d - EXIT!", status)
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79 logger.debug("args[]='%s' - CALLED!", type(args))
82 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
85 for row in database.cursor.fetchall():
86 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87 punycode = row["domain"].encode("idna").decode("utf-8")
89 if row["nodeinfo_url"].startswith("/"):
90 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
96 logger.info("Found %d row(s)", cnt)
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102 logger.debug("args[]='%s' - CALLED!", type(args))
104 # No CSRF by default, you don't have to add network.source_headers by yourself here
106 source_domain = "pixelfed.org"
108 if sources.is_recent(source_domain):
109 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
112 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
113 sources.update(source_domain)
116 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
117 headers = csrf.determine(source_domain, dict())
118 except network.exceptions as exception:
119 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
123 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124 fetched = network.get_json_api(
126 "/api/v1/servers/all.json?scope=All&country=all&language=all",
128 (config.get("connection_timeout"), config.get("read_timeout"))
131 logger.debug("JSON API returned %d elements", len(fetched))
132 if "error_message" in fetched:
133 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135 elif "data" not in fetched["json"]:
136 logger.warning("API did not return JSON with 'data' element - EXIT!")
139 rows = fetched["json"]["data"]
140 logger.info("Checking %d fetched rows ...", len(rows))
142 logger.debug("row[]='%s'", type(row))
143 if "domain" not in row:
144 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146 elif row["domain"] == "":
147 logger.debug("row[domain] is empty - SKIPPED!")
150 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
151 row["domain"] = row["domain"].encode("idna").decode("utf-8")
152 logger.debug("row[domain]='%s' - AFTER!", row["domain"])
154 if not utils.is_domain_wanted(row["domain"]):
155 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
157 elif instances.is_registered(row["domain"]):
158 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
160 elif instances.is_recent(row["domain"]):
161 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
164 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
165 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
167 except network.exceptions as exception:
168 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
171 logger.debug("Success! - EXIT!")
174 def fetch_bkali(args: argparse.Namespace) -> int:
175 logger.debug("args[]='%s' - CALLED!", type(args))
177 logger.debug("Invoking locking.acquire() ...")
180 source_domain = "gql.api.bka.li"
181 if sources.is_recent(source_domain):
182 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
185 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
186 sources.update(source_domain)
190 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
191 fetched = network.post_json_api(
195 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
199 logger.debug("fetched[]='%s'", type(fetched))
200 if "error_message" in fetched:
201 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
203 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
204 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
207 rows = fetched["json"]
209 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
211 raise Exception("WARNING: Returned no records")
212 elif "data" not in rows:
213 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
214 elif "nodeinfo" not in rows["data"]:
215 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
217 for entry in rows["data"]["nodeinfo"]:
218 logger.debug("entry[%s]='%s'", type(entry), entry)
219 if "domain" not in entry:
220 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
222 elif entry["domain"] == "":
223 logger.debug("entry[domain] is empty - SKIPPED!")
225 elif not utils.is_domain_wanted(entry["domain"]):
226 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
228 elif instances.is_registered(entry["domain"]):
229 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
231 elif instances.is_recent(entry["domain"]):
232 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
235 logger.debug("Adding domain='%s' ...", entry["domain"])
236 domains.append(entry["domain"])
238 except network.exceptions as exception:
239 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
242 logger.debug("domains()=%d", len(domains))
244 logger.info("Adding %d new instances ...", len(domains))
245 for domain in domains:
246 logger.debug("domain='%s' - BEFORE!", domain)
247 domain = domain.encode("idna").decode("utf-8")
248 logger.debug("domain='%s' - AFTER!", domain)
251 logger.info("Fetching instances from domain='%s' ...", domain)
252 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
253 except network.exceptions as exception:
254 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
255 instances.set_last_error(domain, exception)
258 logger.debug("Success - EXIT!")
261 def fetch_blocks(args: argparse.Namespace) -> int:
262 logger.debug("args[]='%s' - CALLED!", type(args))
263 if args.domain is not None and args.domain != "":
264 logger.debug("args.domain='%s' - checking ...", args.domain)
265 if not validators.domain(args.domain):
266 logger.warning("args.domain='%s' is not valid.", args.domain)
268 elif blacklist.is_blacklisted(args.domain):
269 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
271 elif not instances.is_registered(args.domain):
272 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
275 logger.debug("Invoking locking.acquire() ...")
278 if args.domain is not None and args.domain != "":
279 # Re-check single domain
280 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
281 database.cursor.execute(
282 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
284 elif args.software is not None and args.software != "":
285 # Re-check single software
286 logger.debug("Querying database for args.software='%s' ...", args.software)
287 database.cursor.execute(
288 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
291 # Re-check after "timeout" (aka. minimum interval)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
296 rows = database.cursor.fetchall()
297 logger.info("Checking %d entries ...", len(rows))
298 for blocker, software, origin, nodeinfo_url in rows:
299 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
300 blocker = tidyup.domain(blocker)
301 logger.debug("blocker='%s' - AFTER!", blocker)
304 logger.warning("blocker is now empty!")
306 elif nodeinfo_url is None or nodeinfo_url == "":
307 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
309 elif not utils.is_domain_wanted(blocker):
310 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
313 logger.debug("blocker='%s'", blocker)
314 instances.set_last_blocked(blocker)
315 instances.set_has_obfuscation(blocker, False)
319 if software == "pleroma":
320 logger.info("blocker='%s',software='%s'", blocker, software)
321 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322 elif software == "mastodon":
323 logger.info("blocker='%s',software='%s'", blocker, software)
324 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325 elif software == "lemmy":
326 logger.info("blocker='%s',software='%s'", blocker, software)
327 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328 elif software == "friendica":
329 logger.info("blocker='%s',software='%s'", blocker, software)
330 blocking = friendica.fetch_blocks(blocker)
331 elif software == "misskey":
332 logger.info("blocker='%s',software='%s'", blocker, software)
333 blocking = misskey.fetch_blocks(blocker)
335 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
337 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
338 instances.set_total_blocks(blocker, blocking)
340 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
342 for block in blocking:
343 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
345 if block["block_level"] == "":
346 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
349 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
350 block["blocked"] = tidyup.domain(block["blocked"])
351 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
352 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
354 if block["blocked"] == "":
355 logger.warning("blocked is empty, blocker='%s'", blocker)
357 elif block["blocked"].endswith(".onion"):
358 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
360 elif block["blocked"].endswith(".arpa"):
361 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
363 elif block["blocked"].endswith(".tld"):
364 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
366 elif block["blocked"].find("*") >= 0:
367 logger.debug("blocker='%s' uses obfuscated domains", blocker)
369 # Some friendica servers also obscure domains without hash
370 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
372 logger.debug("row[]='%s'", type(row))
374 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
375 instances.set_has_obfuscation(blocker, True)
378 block["blocked"] = row["domain"]
379 origin = row["origin"]
380 nodeinfo_url = row["nodeinfo_url"]
381 elif block["blocked"].find("?") >= 0:
382 logger.debug("blocker='%s' uses obfuscated domains", blocker)
384 # Some obscure them with question marks, not sure if that's dependent on version or not
385 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
387 logger.debug("row[]='%s'", type(row))
389 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
390 instances.set_has_obfuscation(blocker, True)
393 block["blocked"] = row["domain"]
394 origin = row["origin"]
395 nodeinfo_url = row["nodeinfo_url"]
397 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
398 if block["blocked"] == "":
399 logger.debug("block[blocked] is empty - SKIPPED!")
402 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
403 block["blocked"] = block["blocked"].encode("idna").decode("utf-8")
404 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
406 if not utils.is_domain_wanted(block["blocked"]):
407 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
409 elif block["block_level"] in ["accept", "accepted"]:
410 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
412 elif not instances.is_registered(block["blocked"]):
413 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
414 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
416 block["block_level"] = utils.alias_block_level(block["block_level"])
418 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
419 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
421 "blocked": block["blocked"],
422 "reason" : block["reason"],
425 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
426 cookies.clear(block["blocked"])
428 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
429 if instances.has_pending(blocker):
430 logger.debug("Flushing updates for blocker='%s' ...", blocker)
431 instances.update_data(blocker)
433 logger.debug("Invoking commit() ...")
434 database.connection.commit()
436 logger.debug("Invoking cookies.clear(%s) ...", blocker)
437 cookies.clear(blocker)
439 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
440 if config.get("bot_enabled") and len(blockdict) > 0:
441 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
442 network.send_bot_post(blocker, blockdict)
444 logger.debug("Success! - EXIT!")
447 def fetch_observer(args: argparse.Namespace) -> int:
448 logger.debug("args[]='%s' - CALLED!", type(args))
450 logger.debug("Invoking locking.acquire() ...")
453 source_domain = "fediverse.observer"
454 if sources.is_recent(source_domain):
455 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
458 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
459 sources.update(source_domain)
462 if args.software is None:
463 logger.info("Fetching software list ...")
464 raw = utils.fetch_url(
465 f"https://{source_domain}",
467 (config.get("connection_timeout"), config.get("read_timeout"))
469 logger.debug("raw[%s]()=%d", type(raw), len(raw))
471 doc = bs4.BeautifulSoup(raw, features="html.parser")
472 logger.debug("doc[]='%s'", type(doc))
474 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
475 logger.debug("items[]='%s'", type(items))
477 logger.info("Checking %d menu items ...", len(items))
479 logger.debug("item[%s]='%s'", type(item), item)
480 if item.text.lower() == "all":
481 logger.debug("Skipping 'All' menu entry ...")
484 logger.debug("Appending item.text='%s' ...", item.text)
485 types.append(tidyup.domain(item.text))
487 logger.info("Adding args.software='%s' as type ...", args.software)
488 types.append(args.software)
490 logger.info("Fetching %d different table data ...", len(types))
491 for software in types:
492 logger.debug("software='%s' - BEFORE!", software)
493 if args.software is not None and args.software != software:
494 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
499 logger.debug("Fetching table data for software='%s' ...", software)
500 raw = utils.fetch_url(
501 f"https://{source_domain}/app/views/tabledata.php?software={software}",
503 (config.get("connection_timeout"), config.get("read_timeout"))
505 logger.debug("raw[%s]()=%d", type(raw), len(raw))
507 doc = bs4.BeautifulSoup(raw, features="html.parser")
508 logger.debug("doc[]='%s'", type(doc))
509 except network.exceptions as exception:
510 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
513 items = doc.findAll("a", {"class": "url"})
514 logger.info("Checking %d items,software='%s' ...", len(items), software)
516 logger.debug("item[]='%s'", type(item))
517 domain = item.decode_contents()
518 logger.debug("domain='%s' - AFTER!", domain)
521 logger.debug("domain is empty - SKIPPED!")
524 logger.debug("domain='%s' - BEFORE!", domain)
525 domain = domain.encode("idna").decode("utf-8")
526 logger.debug("domain='%s' - AFTER!", domain)
528 if not utils.is_domain_wanted(domain):
529 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
531 elif instances.is_registered(domain):
532 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
534 elif instances.is_recent(domain):
535 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
538 software = software_helper.alias(software)
539 logger.info("Fetching instances for domain='%s'", domain)
540 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
542 logger.debug("Success! - EXIT!")
545 def fetch_todon_wiki(args: argparse.Namespace) -> int:
546 logger.debug("args[]='%s' - CALLED!", type(args))
548 logger.debug("Invoking locking.acquire() ...")
551 source_domain = "wiki.todon.eu"
552 if sources.is_recent(source_domain):
553 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
556 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
557 sources.update(source_domain)
564 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
565 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
567 doc = bs4.BeautifulSoup(raw, "html.parser")
568 logger.debug("doc[]='%s'", type(doc))
570 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
571 logger.info("Checking %d silenced/limited entries ...", len(silenced))
572 blocklist["silenced"] = utils.find_domains(silenced, "div")
574 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
575 logger.info("Checking %d suspended entries ...", len(suspended))
576 blocklist["reject"] = utils.find_domains(suspended, "div")
578 blocking = blocklist["silenced"] + blocklist["reject"]
581 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
582 instances.set_total_blocks(blocker, blocking)
585 for block_level in blocklist:
586 blockers = blocklist[block_level]
588 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
589 for blocked in blockers:
590 logger.debug("blocked='%s'", blocked)
592 if not instances.is_registered(blocked):
594 logger.info("Fetching instances from domain='%s' ...", blocked)
595 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
596 except network.exceptions as exception:
597 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
598 instances.set_last_error(blocked, exception)
600 if blocks.is_instance_blocked(blocker, blocked, block_level):
601 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
604 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
605 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
606 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
612 logger.debug("Invoking commit() ...")
613 database.connection.commit()
615 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
616 if config.get("bot_enabled") and len(blockdict) > 0:
617 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
618 network.send_bot_post(blocker, blockdict)
620 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
621 if instances.has_pending(blocker):
622 logger.debug("Flushing updates for blocker='%s' ...", blocker)
623 instances.update_data(blocker)
625 logger.debug("Success! - EXIT!")
628 def fetch_cs(args: argparse.Namespace):
629 logger.debug("args[]='%s' - CALLED!", type(args))
631 logger.debug("Invoking locking.acquire() ...")
659 source_domain = "raw.githubusercontent.com"
660 if sources.is_recent(source_domain):
661 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
664 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
665 sources.update(source_domain)
667 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
668 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
670 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
671 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
673 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
674 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
675 blocklist["silenced"] = federation.find_domains(silenced)
677 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
678 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
679 blocklist["reject"] = federation.find_domains(blocked)
681 blocking = blocklist["silenced"] + blocklist["reject"]
682 blocker = "chaos.social"
684 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
685 instances.set_total_blocks(blocker, blocking)
687 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
689 if len(blocking) > 0:
690 for block_level in blocklist:
691 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
693 for row in blocklist[block_level]:
694 logger.debug("row[%s]='%s'", type(row), row)
695 if not "domain" in row:
696 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
698 elif instances.is_recent(row["domain"], "last_blocked"):
699 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
701 elif not instances.is_registered(row["domain"]):
703 logger.info("Fetching instances from domain='%s' ...", row["domain"])
704 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
705 except network.exceptions as exception:
706 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
707 instances.set_last_error(row["domain"], exception)
709 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
710 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
712 "blocked": row["domain"],
713 "reason" : row["reason"],
716 logger.debug("Invoking commit() ...")
717 database.connection.commit()
719 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
720 if config.get("bot_enabled") and len(blockdict) > 0:
721 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
722 network.send_bot_post(blocker, blockdict)
724 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
725 if instances.has_pending(blocker):
726 logger.debug("Flushing updates for blocker='%s' ...", blocker)
727 instances.update_data(blocker)
729 logger.debug("Success! - EXIT!")
732 def fetch_fba_rss(args: argparse.Namespace) -> int:
733 logger.debug("args[]='%s' - CALLED!", type(args))
737 logger.debug("Invoking locking.acquire() ...")
740 components = urlparse(args.feed)
742 if sources.is_recent(components.netloc):
743 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
746 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
747 sources.update(components.netloc)
749 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
750 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
752 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
753 if response.ok and response.status_code < 300 and len(response.text) > 0:
754 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
755 rss = atoma.parse_rss_bytes(response.content)
757 logger.debug("rss[]='%s'", type(rss))
758 for item in rss.items:
759 logger.debug("item='%s'", item)
760 domain = tidyup.domain(item.link.split("=")[1])
762 logger.debug("domain='%s' - AFTER!", domain)
764 logger.debug("domain is empty - SKIPPED!")
767 logger.debug("domain='%s' - BEFORE!", domain)
768 domain = domain.encode("idna").decode("utf-8")
769 logger.debug("domain='%s' - AFTER!", domain)
771 if not utils.is_domain_wanted(domain):
772 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
774 elif domain in domains:
775 logger.debug("domain='%s' is already added - SKIPPED!", domain)
777 elif instances.is_registered(domain):
778 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
780 elif instances.is_recent(domain):
781 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
784 logger.debug("Adding domain='%s'", domain)
785 domains.append(domain)
787 logger.debug("domains()=%d", len(domains))
789 logger.info("Adding %d new instances ...", len(domains))
790 for domain in domains:
791 logger.debug("domain='%s'", domain)
793 logger.info("Fetching instances from domain='%s' ...", domain)
794 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
795 except network.exceptions as exception:
796 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
797 instances.set_last_error(domain, exception)
800 logger.debug("Success! - EXIT!")
803 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
804 logger.debug("args[]='%s' - CALLED!", type(args))
806 logger.debug("Invoking locking.acquire() ...")
809 source_domain = "ryona.agency"
810 if sources.is_recent(source_domain):
811 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
814 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
815 sources.update(source_domain)
817 feed = f"https://{source_domain}/users/fba/feed.atom"
821 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
822 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
824 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
825 if response.ok and response.status_code < 300 and len(response.text) > 0:
826 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
827 atom = atoma.parse_atom_bytes(response.content)
829 logger.debug("atom[]='%s'", type(atom))
830 for entry in atom.entries:
831 logger.debug("entry[]='%s'", type(entry))
832 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
833 logger.debug("doc[]='%s'", type(doc))
834 for element in doc.findAll("a"):
835 logger.debug("element[]='%s'", type(element))
836 for href in element["href"].split(","):
837 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
838 domain = tidyup.domain(href)
840 logger.debug("domain='%s' - AFTER!", domain)
842 logger.debug("domain is empty - SKIPPED!")
845 logger.debug("domain='%s' - BEFORE!", domain)
846 domain = domain.encode("idna").decode("utf-8")
847 logger.debug("domain='%s' - AFTER!", domain)
849 if not utils.is_domain_wanted(domain):
850 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
852 elif domain in domains:
853 logger.debug("domain='%s' is already added - SKIPPED!", domain)
855 elif instances.is_registered(domain):
856 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
858 elif instances.is_recent(domain):
859 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
862 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
863 domains.append(domain)
865 logger.debug("domains()=%d", len(domains))
867 logger.info("Adding %d new instances ...", len(domains))
868 for domain in domains:
869 logger.debug("domain='%s'", domain)
871 logger.info("Fetching instances from domain='%s' ...", domain)
872 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
873 except network.exceptions as exception:
874 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
875 instances.set_last_error(domain, exception)
878 logger.debug("Success! - EXIT!")
881 def fetch_instances(args: argparse.Namespace) -> int:
882 logger.debug("args[]='%s' - CALLED!", type(args))
884 logger.debug("args.domain='%s' - checking ...", args.domain)
885 if not validators.domain(args.domain):
886 logger.warning("args.domain='%s' is not valid.", args.domain)
888 elif blacklist.is_blacklisted(args.domain):
889 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
892 logger.debug("Invoking locking.acquire() ...")
897 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
898 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
899 except network.exceptions as exception:
900 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
901 instances.set_last_error(args.domain, exception)
902 instances.update_data(args.domain)
906 logger.debug("Not fetching more instances - EXIT!")
909 # Loop through some instances
910 database.cursor.execute(
911 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
914 rows = database.cursor.fetchall()
915 logger.info("Checking %d entries ...", len(rows))
917 logger.debug("row[domain]='%s'", row["domain"])
918 if row["domain"] == "":
919 logger.debug("row[domain] is empty - SKIPPED!")
922 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
923 row["domain"] = row["domain"].encode("idna").decode("utf-8")
924 logger.debug("row[domain]='%s' - AFTER!", row["domain"])
926 if not utils.is_domain_wanted(row["domain"]):
927 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
931 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
932 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
933 except network.exceptions as exception:
934 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
935 instances.set_last_error(row["domain"], exception)
937 logger.debug("Success - EXIT!")
940 def fetch_oliphant(args: argparse.Namespace) -> int:
941 logger.debug("args[]='%s' - CALLED!", type(args))
943 logger.debug("Invoking locking.acquire() ...")
946 source_domain = "codeberg.org"
947 if sources.is_recent(source_domain):
948 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
951 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
952 sources.update(source_domain)
955 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
960 "blocker": "artisan.chat",
961 "csv_url": "mastodon/artisan.chat.csv",
963 "blocker": "mastodon.art",
964 "csv_url": "mastodon/mastodon.art.csv",
966 "blocker": "pleroma.envs.net",
967 "csv_url": "mastodon/pleroma.envs.net.csv",
969 "blocker": "oliphant.social",
970 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
972 "blocker": "mastodon.online",
973 "csv_url": "mastodon/mastodon.online.csv",
975 "blocker": "mastodon.social",
976 "csv_url": "mastodon/mastodon.social.csv",
978 "blocker": "mastodon.social",
979 "csv_url": "other/missing-tier0-mastodon.social.csv",
981 "blocker": "rage.love",
982 "csv_url": "mastodon/rage.love.csv",
984 "blocker": "sunny.garden",
985 "csv_url": "mastodon/sunny.garden.csv",
987 "blocker": "sunny.garden",
988 "csv_url": "mastodon/gardenfence.csv",
990 "blocker": "solarpunk.moe",
991 "csv_url": "mastodon/solarpunk.moe.csv",
993 "blocker": "toot.wales",
994 "csv_url": "mastodon/toot.wales.csv",
996 "blocker": "union.place",
997 "csv_url": "mastodon/union.place.csv",
999 "blocker": "oliphant.social",
1000 "csv_url": "mastodon/birdsite.csv",
1006 logger.debug("Downloading %d files ...", len(blocklists))
1007 for block in blocklists:
1008 # Is domain given and not equal blocker?
1009 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1010 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1012 elif args.domain in domains:
1013 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1017 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1018 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1020 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1021 if not response.ok or response.status_code >= 300 or response.content == "":
1022 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1025 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1026 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1032 logger.debug("row[%s]='%s'", type(row), row)
1033 domain = severity = None
1034 reject_media = reject_reports = False
1036 if "#domain" in row:
1037 domain = row["#domain"]
1038 elif "domain" in row:
1039 domain = row["domain"]
1041 logger.debug("row='%s' does not contain domain column", row)
1044 if "#severity" in row:
1045 severity = utils.alias_block_level(row["#severity"])
1046 elif "severity" in row:
1047 severity = utils.alias_block_level(row["severity"])
1049 logger.debug("row='%s' does not contain severity column", row)
1052 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1054 elif "reject_media" in row and row["reject_media"].lower() == "true":
1057 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1058 reject_reports = True
1059 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1060 reject_reports = True
1063 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1065 logger.debug("domain is empty - SKIPPED!")
1067 elif domain.endswith(".onion"):
1068 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1070 elif domain.endswith(".arpa"):
1071 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1073 elif domain.endswith(".tld"):
1074 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1076 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1077 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1078 domain = utils.deobfuscate(domain, block["blocker"])
1079 logger.debug("domain='%s' - AFTER!", domain)
1081 if not validators.domain(domain):
1082 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1084 elif blacklist.is_blacklisted(domain):
1085 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1088 logger.debug("Marking domain='%s' as handled", domain)
1089 domains.append(domain)
1091 logger.debug("Processing domain='%s' ...", domain)
1092 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1093 logger.debug("processed='%s'", processed)
1095 if utils.process_block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1096 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1099 "reason" : block["reason"],
1103 utils.process_block(block["blocker"], domain, None, "reject_media")
1105 utils.process_block(block["blocker"], domain, None, "reject_reports")
1107 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1108 instances.set_total_blocks(block["blocker"], domains)
1110 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1111 if instances.has_pending(block["blocker"]):
1112 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1113 instances.update_data(block["blocker"])
1115 logger.debug("Invoking commit() ...")
1116 database.connection.commit()
1118 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1119 if config.get("bot_enabled") and len(blockdict) > 0:
1120 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1121 network.send_bot_post(block["blocker"], blockdict)
1123 logger.debug("Success! - EXIT!")
1126 def fetch_txt(args: argparse.Namespace) -> int:
1127 logger.debug("args[]='%s' - CALLED!", type(args))
1129 logger.debug("Invoking locking.acquire() ...")
1134 "blocker": "seirdy.one",
1135 "url" : "https://seirdy.one/pb/bsl.txt",
1138 logger.info("Checking %d text file(s) ...", len(urls))
1140 logger.debug("Fetching row[url]='%s' ...", row["url"])
1141 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1143 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1144 if response.ok and response.status_code < 300 and response.text != "":
1145 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1146 domains = response.text.split("\n")
1148 logger.info("Processing %d domains ...", len(domains))
1149 for domain in domains:
1150 logger.debug("domain='%s' - BEFORE!", domain)
1151 domain = tidyup.domain(domain)
1153 logger.debug("domain='%s' - AFTER!", domain)
1155 logger.debug("domain is empty - SKIPPED!")
1157 elif not utils.is_domain_wanted(domain):
1158 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1160 elif instances.is_recent(domain):
1161 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1164 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1165 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1167 logger.debug("processed='%s'", processed)
1169 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1172 logger.debug("Success! - EXIT!")
1175 def fetch_fedipact(args: argparse.Namespace) -> int:
1176 logger.debug("args[]='%s' - CALLED!", type(args))
1178 logger.debug("Invoking locking.acquire() ...")
1181 source_domain = "fedipact.online"
1182 if sources.is_recent(source_domain):
1183 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1186 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1187 sources.update(source_domain)
1189 response = utils.fetch_url(
1190 f"https://{source_domain}",
1191 network.web_headers,
1192 (config.get("connection_timeout"), config.get("read_timeout"))
1195 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1196 if response.ok and response.status_code < 300 and response.text != "":
1197 logger.debug("Parsing %d Bytes ...", len(response.text))
1199 doc = bs4.BeautifulSoup(response.text, "html.parser")
1200 logger.debug("doc[]='%s'", type(doc))
1202 rows = doc.findAll("li")
1203 logger.info("Checking %d row(s) ...", len(rows))
1205 logger.debug("row[]='%s'", type(row))
1206 domain = tidyup.domain(row.contents[0])
1208 logger.debug("domain='%s' - AFTER!", domain)
1210 logger.debug("domain is empty - SKIPPED!")
1213 logger.debug("domain='%s' - BEFORE!", domain)
1214 domain = domain.encode("idna").decode("utf-8")
1215 logger.debug("domain='%s' - AFTER!", domain)
1217 if not utils.is_domain_wanted(domain):
1218 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1220 elif instances.is_registered(domain):
1221 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1223 elif instances.is_recent(domain):
1224 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1227 logger.info("Fetching domain='%s' ...", domain)
1228 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1230 logger.debug("Success! - EXIT!")
1233 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1234 logger.debug("args[]='%s' - CALLED!", type(args))
1236 logger.debug("Invoking locking.acquire() ...")
1239 source_domain = "joinfediverse.wiki"
1240 if sources.is_recent(source_domain):
1241 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1244 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1245 sources.update(source_domain)
1247 raw = utils.fetch_url(
1248 f"https://{source_domain}/FediBlock",
1249 network.web_headers,
1250 (config.get("connection_timeout"), config.get("read_timeout"))
1252 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1254 doc = bs4.BeautifulSoup(raw, "html.parser")
1255 logger.debug("doc[]='%s'", type(doc))
1257 tables = doc.findAll("table", {"class": "wikitable"})
1259 logger.info("Analyzing %d table(s) ...", len(tables))
1261 for table in tables:
1262 logger.debug("table[]='%s'", type(table))
1264 rows = table.findAll("tr")
1265 logger.info("Checking %d row(s) ...", len(rows))
1266 block_headers = dict()
1268 logger.debug("row[%s]='%s'", type(row), row)
1270 headers = row.findAll("th")
1271 logger.debug("Found headers()=%d header(s)", len(headers))
1272 if len(headers) > 1:
1273 block_headers = dict()
1275 for header in headers:
1277 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1278 text = header.contents[0]
1280 logger.debug("text[]='%s'", type(text))
1281 if not isinstance(text, str):
1282 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1284 elif validators.domain(text.strip()):
1285 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1288 text = tidyup.domain(text.strip())
1289 logger.debug("text='%s'", text)
1290 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1291 logger.debug("Found header: '%s'=%d", text, cnt)
1292 block_headers[cnt] = text
1294 elif len(block_headers) == 0:
1295 logger.debug("row is not scrapable - SKIPPED!")
1297 elif len(block_headers) > 0:
1298 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1302 for element in row.find_all(["th", "td"]):
1304 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1305 if cnt in block_headers:
1306 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1308 text = element.text.strip()
1309 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1311 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1312 if key in ["domain", "instance"]:
1314 elif key == "reason":
1315 block[key] = tidyup.reason(text)
1316 elif key == "subdomain(s)":
1319 block[key] = text.split("/")
1321 logger.debug("key='%s'", key)
1324 logger.debug("block()=%d ...", len(block))
1326 logger.debug("Appending block()=%d ...", len(block))
1327 blocklist.append(block)
1329 logger.debug("blocklist()=%d", len(blocklist))
1331 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1332 domains = database.cursor.fetchall()
1334 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1336 for block in blocklist:
1337 logger.debug("block='%s'", block)
1338 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1339 origin = block["blocked"]
1340 logger.debug("origin='%s'", origin)
1341 for subdomain in block["subdomain(s)"]:
1342 block["blocked"] = subdomain + "." + origin
1343 logger.debug("block[blocked]='%s'", block["blocked"])
1344 blocking.append(block)
1346 blocking.append(block)
1348 logger.debug("blocking()=%d", blocking)
1349 for block in blocking:
1350 logger.debug("block[]='%s'", type(block))
1351 block["blocked"] = tidyup.domain(block["blocked"])
1353 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1354 if block["blocked"] == "":
1355 logger.debug("block[blocked] is empty - SKIPPED!")
1357 elif not utils.is_domain_wanted(block["blocked"]):
1358 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1360 elif instances.is_recent(block["blocked"]):
1361 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1364 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1365 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1368 for blocker in domains:
1369 blocker = blocker[0]
1370 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1372 for block in blocking:
1373 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1374 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1376 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1377 if block["blocked"] == "":
1378 logger.debug("block[blocked] is empty - SKIPPED!")
1380 elif not utils.is_domain_wanted(block["blocked"]):
1381 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1384 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1385 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1386 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1388 "blocked": block["blocked"],
1389 "reason" : block["reason"],
1392 if instances.has_pending(blocker):
1393 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1394 instances.update_data(blocker)
1396 logger.debug("Invoking commit() ...")
1397 database.connection.commit()
1399 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1400 if config.get("bot_enabled") and len(blockdict) > 0:
1401 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1402 network.send_bot_post(blocker, blockdict)
1404 logger.debug("Success! - EXIT!")
1407 def recheck_obfuscation(args: argparse.Namespace) -> int:
1408 logger.debug("args[]='%s' - CALLED!", type(args))
1410 logger.debug("Invoking locking.acquire() ...")
1413 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1414 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1415 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1416 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1418 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1420 rows = database.cursor.fetchall()
1421 logger.info("Checking %d domains ...", len(rows))
1423 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1424 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1425 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1429 if row["software"] == "pleroma":
1430 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1431 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1432 elif row["software"] == "mastodon":
1433 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1434 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1435 elif row["software"] == "lemmy":
1436 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1437 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1438 elif row["software"] == "friendica":
1439 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1440 blocking = friendica.fetch_blocks(row["domain"])
1441 elif row["software"] == "misskey":
1442 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1443 blocking = misskey.fetch_blocks(row["domain"])
1445 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1447 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1448 instances.set_total_blocks(row["domain"], blocking)
1450 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1453 for block in blocking:
1454 logger.debug("block[blocked]='%s'", block["blocked"])
1457 if block["blocked"] == "":
1458 logger.debug("block[blocked] is empty - SKIPPED!")
1460 elif block["blocked"].endswith(".arpa"):
1461 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1463 elif block["blocked"].endswith(".tld"):
1464 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1466 elif block["blocked"].endswith(".onion"):
1467 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1469 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1470 logger.debug("block='%s' is obfuscated.", block["blocked"])
1471 obfuscated = obfuscated + 1
1472 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1473 elif not utils.is_domain_wanted(block["blocked"]):
1474 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1476 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1477 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1480 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1481 if blocked is not None and blocked != block["blocked"]:
1482 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1483 obfuscated = obfuscated - 1
1484 if blocks.is_instance_blocked(row["domain"], blocked):
1485 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1488 block["block_level"] = utils.alias_block_level(block["block_level"])
1490 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1491 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1492 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1495 "reason" : block["reason"],
1498 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1499 if obfuscated == 0 and len(blocking) > 0:
1500 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1501 instances.set_has_obfuscation(row["domain"], False)
1503 if instances.has_pending(row["domain"]):
1504 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1505 instances.update_data(row["domain"])
1507 logger.debug("Invoking commit() ...")
1508 database.connection.commit()
1510 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1511 if config.get("bot_enabled") and len(blockdict) > 0:
1512 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1513 network.send_bot_post(row["domain"], blockdict)
1515 logger.debug("Success! - EXIT!")
1518 def fetch_fedilist(args: argparse.Namespace) -> int:
1519 logger.debug("args[]='%s' - CALLED!", type(args))
1521 logger.debug("Invoking locking.acquire() ...")
1524 source_domain = "demo.fedilist.com"
1525 if sources.is_recent(source_domain):
1526 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1529 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1530 sources.update(source_domain)
1532 url = f"http://{source_domain}/instance/csv?onion=not"
1533 if args.software is not None and args.software != "":
1534 logger.debug("args.software='%s'", args.software)
1535 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1537 logger.info("Fetching url='%s' ...", url)
1538 response = reqto.get(
1540 headers=network.web_headers,
1541 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1542 allow_redirects=False
1545 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1546 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1547 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1550 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1552 logger.debug("reader[]='%s'", type(reader))
1555 logger.debug("row[]='%s'", type(row))
1556 domain = tidyup.domain(row["hostname"])
1557 logger.debug("domain='%s' - AFTER!", domain)
1560 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1563 logger.debug("domain='%s' - BEFORE!", domain)
1564 domain = domain.encode("idna").decode("utf-8")
1565 logger.debug("domain='%s' - AFTER!", domain)
1567 if not utils.is_domain_wanted(domain):
1568 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1570 elif (args.all is None or not args.all) and instances.is_registered(domain):
1571 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1573 elif instances.is_recent(domain):
1574 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1577 logger.info("Fetching instances from domain='%s' ...", domain)
1578 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1580 logger.debug("Success! - EXIT!")
1583 def update_nodeinfo(args: argparse.Namespace) -> int:
1584 logger.debug("args[]='%s' - CALLED!", type(args))
1586 logger.debug("Invoking locking.acquire() ...")
1589 if args.domain is not None and args.domain != "":
1590 logger.debug("Fetching args.domain='%s'", args.domain)
1591 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1592 elif args.software is not None and args.software != "":
1593 logger.info("Fetching domains for args.software='%s'", args.software)
1594 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1596 logger.info("Fetching domains for recently updated ...")
1597 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1599 domains = database.cursor.fetchall()
1601 logger.info("Checking %d domain(s) ...", len(domains))
1604 logger.debug("row[]='%s'", type(row))
1606 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1607 software = federation.determine_software(row["domain"])
1609 logger.debug("Determined software='%s'", software)
1610 if software != row["software"] and software is not None:
1611 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1612 instances.set_software(row["domain"], software)
1614 instances.set_success(row["domain"])
1615 except network.exceptions as exception:
1616 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1617 instances.set_last_error(row["domain"], exception)
1619 instances.set_last_nodeinfo(row["domain"])
1620 instances.update_data(row["domain"])
1623 logger.debug("Success! - EXIT!")
1626 def fetch_instances_social(args: argparse.Namespace) -> int:
1627 logger.debug("args[]='%s' - CALLED!", type(args))
1629 logger.debug("Invoking locking.acquire() ...")
1632 source_domain = "instances.social"
1634 if config.get("instances_social_api_key") == "":
1635 logger.error("API key not set. Please set in your config.json file.")
1637 elif sources.is_recent(source_domain):
1638 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1641 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1642 sources.update(source_domain)
1645 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1648 fetched = network.get_json_api(
1650 "/api/1.0/instances/list?count=0&sort_by=name",
1652 (config.get("connection_timeout"), config.get("read_timeout"))
1654 logger.debug("fetched[]='%s'", type(fetched))
1656 if "error_message" in fetched:
1657 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1659 elif "exception" in fetched:
1660 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1662 elif "json" not in fetched:
1663 logger.warning("fetched has no element 'json' - EXIT!")
1665 elif "instances" not in fetched["json"]:
1666 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1670 rows = fetched["json"]["instances"]
1672 logger.info("Checking %d row(s) ...", len(rows))
1674 logger.debug("row[]='%s'", type(row))
1675 domain = tidyup.domain(row["name"])
1676 logger.debug("domain='%s' - AFTER!", domain)
1679 logger.debug("domain is empty - SKIPPED!")
1682 logger.debug("domain='%s' - BEFORE!", domain)
1683 domain = domain.encode("idna").decode("utf-8")
1684 logger.debug("domain='%s' - AFTER!", domain)
1686 if not utils.is_domain_wanted(domain):
1687 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1689 elif domain in domains:
1690 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1692 elif instances.is_registered(domain):
1693 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1695 elif instances.is_recent(domain):
1696 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1699 logger.info("Fetching instances from domain='%s'", domain)
1700 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1702 logger.debug("Success! - EXIT!")
1705 def convert_idna(args: argparse.Namespace) -> int:
1706 logger.debug("args[]='%s' - CALLED!", type(args))
1708 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1709 rows = database.cursor.fetchall()
1711 logger.debug("rows[]='%s'", type(rows))
1712 instances.translate_idnas(rows, "domain")
1714 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1715 rows = database.cursor.fetchall()
1717 logger.debug("rows[]='%s'", type(rows))
1718 instances.translate_idnas(rows, "origin")
1720 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1721 rows = database.cursor.fetchall()
1723 logger.debug("rows[]='%s'", type(rows))
1724 blocks.translate_idnas(rows, "blocker")
1726 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1727 rows = database.cursor.fetchall()
1729 logger.debug("rows[]='%s'", type(rows))
1730 blocks.translate_idnas(rows, "blocked")
1732 logger.debug("Success! - EXIT!")