1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
44 from fba.http import federation
45 from fba.http import network
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
61 def check_instance(args: argparse.Namespace) -> int:
62 logger.debug("args.domain='%s' - CALLED!", args.domain)
64 if not validators.domain(args.domain):
65 logger.warning("args.domain='%s' is not valid", args.domain)
67 elif blacklist.is_blacklisted(args.domain):
68 logger.warning("args.domain='%s' is blacklisted", args.domain)
70 elif instances.is_registered(args.domain):
71 logger.warning("args.domain='%s' is already registered", args.domain)
74 logger.info("args.domain='%s' is not known", args.domain)
76 logger.debug("status=%d - EXIT!", status)
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80 logger.debug("args[]='%s' - CALLED!", type(args))
83 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
86 for row in database.cursor.fetchall():
87 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88 punycode = row["domain"].encode("idna").decode("utf-8")
90 if row["nodeinfo_url"].startswith("/"):
91 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
93 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
97 logger.info("Found %d row(s)", cnt)
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103 logger.debug("args[]='%s' - CALLED!", type(args))
105 # No CSRF by default, you don't have to add network.source_headers by yourself here
107 source_domain = "pixelfed.org"
109 if sources.is_recent(source_domain):
110 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
113 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114 sources.update(source_domain)
117 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118 headers = csrf.determine(source_domain, dict())
119 except network.exceptions as exception:
120 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
124 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125 fetched = network.get_json_api(
127 "/api/v1/servers/all.json?scope=All&country=all&language=all",
129 (config.get("connection_timeout"), config.get("read_timeout"))
132 logger.debug("JSON API returned %d elements", len(fetched))
133 if "error_message" in fetched:
134 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
136 elif "data" not in fetched["json"]:
137 logger.warning("API did not return JSON with 'data' element - EXIT!")
140 rows = fetched["json"]["data"]
141 logger.info("Checking %d fetched rows ...", len(rows))
143 logger.debug("row[]='%s'", type(row))
144 if "domain" not in row:
145 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
147 elif row["domain"] == "":
148 logger.debug("row[domain] is empty - SKIPPED!")
151 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152 domain = row["domain"].encode("idna").decode("utf-8")
153 logger.debug("domain='%s' - AFTER!", domain)
155 if not utils.is_domain_wanted(domain):
156 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
158 elif instances.is_registered(domain):
159 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
161 elif instances.is_recent(domain):
162 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
165 logger.debug("Fetching instances from domain='%s' ...", domain)
166 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
168 except network.exceptions as exception:
169 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
172 logger.debug("Success! - EXIT!")
175 def fetch_bkali(args: argparse.Namespace) -> int:
176 logger.debug("args[]='%s' - CALLED!", type(args))
178 logger.debug("Invoking locking.acquire() ...")
181 source_domain = "gql.api.bka.li"
182 if sources.is_recent(source_domain):
183 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
186 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187 sources.update(source_domain)
191 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192 fetched = network.post_json_api(
196 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
200 logger.debug("fetched[]='%s'", type(fetched))
201 if "error_message" in fetched:
202 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
204 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
208 rows = fetched["json"]
210 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
212 raise Exception("WARNING: Returned no records")
213 elif "data" not in rows:
214 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215 elif "nodeinfo" not in rows["data"]:
216 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
218 for entry in rows["data"]["nodeinfo"]:
219 logger.debug("entry[%s]='%s'", type(entry), entry)
220 if "domain" not in entry:
221 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
223 elif entry["domain"] == "":
224 logger.debug("entry[domain] is empty - SKIPPED!")
226 elif not utils.is_domain_wanted(entry["domain"]):
227 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
229 elif instances.is_registered(entry["domain"]):
230 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
232 elif instances.is_recent(entry["domain"]):
233 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
236 logger.debug("Adding domain='%s' ...", entry["domain"])
237 domains.append(entry["domain"])
239 except network.exceptions as exception:
240 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
243 logger.debug("domains()=%d", len(domains))
245 logger.info("Adding %d new instances ...", len(domains))
246 for domain in domains:
247 logger.debug("domain='%s' - BEFORE!", domain)
248 domain = domain.encode("idna").decode("utf-8")
249 logger.debug("domain='%s' - AFTER!", domain)
252 logger.info("Fetching instances from domain='%s' ...", domain)
253 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254 except network.exceptions as exception:
255 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256 instances.set_last_error(domain, exception)
259 logger.debug("Success - EXIT!")
262 def fetch_blocks(args: argparse.Namespace) -> int:
263 logger.debug("args[]='%s' - CALLED!", type(args))
264 if args.domain is not None and args.domain != "":
265 logger.debug("args.domain='%s' - checking ...", args.domain)
266 if not validators.domain(args.domain):
267 logger.warning("args.domain='%s' is not valid.", args.domain)
269 elif blacklist.is_blacklisted(args.domain):
270 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
272 elif not instances.is_registered(args.domain):
273 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
276 logger.debug("Invoking locking.acquire() ...")
279 if args.domain is not None and args.domain != "":
280 # Re-check single domain
281 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282 database.cursor.execute(
283 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
285 elif args.software is not None and args.software != "":
286 # Re-check single software
287 logger.debug("Querying database for args.software='%s' ...", args.software)
288 database.cursor.execute(
289 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
292 # Re-check after "timeout" (aka. minimum interval)
293 database.cursor.execute(
294 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
297 rows = database.cursor.fetchall()
298 logger.info("Checking %d entries ...", len(rows))
299 for blocker, software, origin, nodeinfo_url in rows:
300 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301 blocker = tidyup.domain(blocker)
302 logger.debug("blocker='%s' - AFTER!", blocker)
305 logger.warning("blocker is now empty!")
307 elif nodeinfo_url is None or nodeinfo_url == "":
308 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
310 elif not utils.is_domain_wanted(blocker):
311 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314 logger.debug("blocker='%s'", blocker)
315 instances.set_last_blocked(blocker)
316 instances.set_has_obfuscation(blocker, False)
320 if software == "pleroma":
321 logger.info("blocker='%s',software='%s'", blocker, software)
322 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
323 elif software == "mastodon":
324 logger.info("blocker='%s',software='%s'", blocker, software)
325 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
326 elif software == "lemmy":
327 logger.info("blocker='%s',software='%s'", blocker, software)
328 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
329 elif software == "friendica":
330 logger.info("blocker='%s',software='%s'", blocker, software)
331 blocking = friendica.fetch_blocks(blocker)
332 elif software == "misskey":
333 logger.info("blocker='%s',software='%s'", blocker, software)
334 blocking = misskey.fetch_blocks(blocker)
336 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
338 logger.debug("blocker='%s'", blocker)
339 if blocker != "chaos.social":
340 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
341 instances.set_total_blocks(blocker, blocking)
343 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
345 for block in blocking:
346 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
348 if block["block_level"] == "":
349 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
352 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
353 block["blocked"] = tidyup.domain(block["blocked"])
354 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
355 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
357 if block["blocked"] == "":
358 logger.warning("blocked is empty, blocker='%s'", blocker)
360 elif block["blocked"].endswith(".onion"):
361 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
363 elif block["blocked"].endswith(".arpa"):
364 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
366 elif block["blocked"].endswith(".tld"):
367 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
369 elif block["blocked"].find("*") >= 0:
370 logger.debug("blocker='%s' uses obfuscated domains", blocker)
372 # Some friendica servers also obscure domains without hash
373 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
375 logger.debug("row[]='%s'", type(row))
377 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
378 instances.set_has_obfuscation(blocker, True)
381 block["blocked"] = row["domain"]
382 origin = row["origin"]
383 nodeinfo_url = row["nodeinfo_url"]
384 elif block["blocked"].find("?") >= 0:
385 logger.debug("blocker='%s' uses obfuscated domains", blocker)
387 # Some obscure them with question marks, not sure if that's dependent on version or not
388 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
390 logger.debug("row[]='%s'", type(row))
392 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 instances.set_has_obfuscation(blocker, True)
396 block["blocked"] = row["domain"]
397 origin = row["origin"]
398 nodeinfo_url = row["nodeinfo_url"]
400 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
401 if block["blocked"] == "":
402 logger.debug("block[blocked] is empty - SKIPPED!")
405 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
406 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
407 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
409 if not utils.is_domain_wanted(block["blocked"]):
410 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
412 elif block["block_level"] in ["accept", "accepted"]:
413 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
415 elif not instances.is_registered(block["blocked"]):
416 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
417 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
419 block["block_level"] = utils.alias_block_level(block["block_level"])
421 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
422 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
424 "blocked": block["blocked"],
425 "reason" : block["reason"],
428 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
429 cookies.clear(block["blocked"])
431 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
432 if instances.has_pending(blocker):
433 logger.debug("Flushing updates for blocker='%s' ...", blocker)
434 instances.update_data(blocker)
436 logger.debug("Invoking commit() ...")
437 database.connection.commit()
439 logger.debug("Invoking cookies.clear(%s) ...", blocker)
440 cookies.clear(blocker)
442 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
443 if config.get("bot_enabled") and len(blockdict) > 0:
444 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
445 network.send_bot_post(blocker, blockdict)
447 logger.debug("Success! - EXIT!")
450 def fetch_observer(args: argparse.Namespace) -> int:
451 logger.debug("args[]='%s' - CALLED!", type(args))
453 logger.debug("Invoking locking.acquire() ...")
456 source_domain = "fediverse.observer"
457 if sources.is_recent(source_domain):
458 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
461 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
462 sources.update(source_domain)
465 if args.software is None:
466 logger.info("Fetching software list ...")
467 raw = utils.fetch_url(
468 f"https://{source_domain}",
470 (config.get("connection_timeout"), config.get("read_timeout"))
472 logger.debug("raw[%s]()=%d", type(raw), len(raw))
474 doc = bs4.BeautifulSoup(raw, features="html.parser")
475 logger.debug("doc[]='%s'", type(doc))
477 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
478 logger.debug("items[]='%s'", type(items))
480 logger.info("Checking %d menu items ...", len(items))
482 logger.debug("item[%s]='%s'", type(item), item)
483 if item.text.lower() == "all":
484 logger.debug("Skipping 'All' menu entry ...")
487 logger.debug("Appending item.text='%s' ...", item.text)
488 types.append(tidyup.domain(item.text))
490 logger.info("Adding args.software='%s' as type ...", args.software)
491 types.append(args.software)
493 logger.info("Fetching %d different table data ...", len(types))
494 for software in types:
495 logger.debug("software='%s' - BEFORE!", software)
496 if args.software is not None and args.software != software:
497 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
502 logger.debug("Fetching table data for software='%s' ...", software)
503 raw = utils.fetch_url(
504 f"https://{source_domain}/app/views/tabledata.php?software={software}",
506 (config.get("connection_timeout"), config.get("read_timeout"))
508 logger.debug("raw[%s]()=%d", type(raw), len(raw))
510 doc = bs4.BeautifulSoup(raw, features="html.parser")
511 logger.debug("doc[]='%s'", type(doc))
512 except network.exceptions as exception:
513 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
516 items = doc.findAll("a", {"class": "url"})
517 logger.info("Checking %d items,software='%s' ...", len(items), software)
519 logger.debug("item[]='%s'", type(item))
520 domain = item.decode_contents()
521 logger.debug("domain='%s' - AFTER!", domain)
524 logger.debug("domain is empty - SKIPPED!")
527 logger.debug("domain='%s' - BEFORE!", domain)
528 domain = domain.encode("idna").decode("utf-8")
529 logger.debug("domain='%s' - AFTER!", domain)
531 if not utils.is_domain_wanted(domain):
532 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
534 elif instances.is_registered(domain):
535 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
537 elif instances.is_recent(domain):
538 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
541 software = software_helper.alias(software)
542 logger.info("Fetching instances for domain='%s'", domain)
543 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
545 logger.debug("Success! - EXIT!")
548 def fetch_todon_wiki(args: argparse.Namespace) -> int:
549 logger.debug("args[]='%s' - CALLED!", type(args))
551 logger.debug("Invoking locking.acquire() ...")
554 source_domain = "wiki.todon.eu"
555 if sources.is_recent(source_domain):
556 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
559 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
560 sources.update(source_domain)
567 raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
568 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
570 doc = bs4.BeautifulSoup(raw, "html.parser")
571 logger.debug("doc[]='%s'", type(doc))
573 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
574 logger.info("Checking %d silenced/limited entries ...", len(silenced))
575 blocklist["silenced"] = utils.find_domains(silenced, "div")
577 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
578 logger.info("Checking %d suspended entries ...", len(suspended))
579 blocklist["reject"] = utils.find_domains(suspended, "div")
581 blocking = blocklist["silenced"] + blocklist["reject"]
584 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
585 instances.set_total_blocks(blocker, blocking)
588 for block_level in blocklist:
589 blockers = blocklist[block_level]
591 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
592 for blocked in blockers:
593 logger.debug("blocked='%s'", blocked)
595 if not instances.is_registered(blocked):
597 logger.info("Fetching instances from domain='%s' ...", blocked)
598 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
599 except network.exceptions as exception:
600 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
601 instances.set_last_error(blocked, exception)
603 if blocks.is_instance_blocked(blocker, blocked, block_level):
604 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
607 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
608 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
609 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
615 logger.debug("Invoking commit() ...")
616 database.connection.commit()
618 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
619 if config.get("bot_enabled") and len(blockdict) > 0:
620 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
621 network.send_bot_post(blocker, blockdict)
623 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
624 if instances.has_pending(blocker):
625 logger.debug("Flushing updates for blocker='%s' ...", blocker)
626 instances.update_data(blocker)
628 logger.debug("Success! - EXIT!")
631 def fetch_cs(args: argparse.Namespace):
632 logger.debug("args[]='%s' - CALLED!", type(args))
634 logger.debug("Invoking locking.acquire() ...")
662 source_domain = "raw.githubusercontent.com"
663 if sources.is_recent(source_domain):
664 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
667 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
668 sources.update(source_domain)
670 raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
671 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
673 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
674 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
676 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
677 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
678 blocklist["silenced"] = federation.find_domains(silenced)
680 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
681 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
682 blocklist["reject"] = federation.find_domains(blocked)
684 blocking = blocklist["silenced"] + blocklist["reject"]
685 blocker = "chaos.social"
687 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
688 instances.set_total_blocks(blocker, blocking)
690 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
691 if len(blocking) > 0:
693 for block_level in blocklist:
694 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
696 for row in blocklist[block_level]:
697 logger.debug("row[%s]='%s'", type(row), row)
698 if not "domain" in row:
699 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
701 elif instances.is_recent(row["domain"], "last_blocked"):
702 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
704 elif not instances.is_registered(row["domain"]):
706 logger.info("Fetching instances from domain='%s' ...", row["domain"])
707 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
708 except network.exceptions as exception:
709 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
710 instances.set_last_error(row["domain"], exception)
712 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
713 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
715 "blocked": row["domain"],
716 "reason" : row["reason"],
719 logger.debug("Invoking commit() ...")
720 database.connection.commit()
722 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
723 if config.get("bot_enabled") and len(blockdict) > 0:
724 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
725 network.send_bot_post(blocker, blockdict)
727 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
728 if instances.has_pending(blocker):
729 logger.debug("Flushing updates for blocker='%s' ...", blocker)
730 instances.update_data(blocker)
732 logger.debug("Success! - EXIT!")
735 def fetch_fba_rss(args: argparse.Namespace) -> int:
736 logger.debug("args[]='%s' - CALLED!", type(args))
740 logger.debug("Invoking locking.acquire() ...")
743 components = urlparse(args.feed)
745 if sources.is_recent(components.netloc):
746 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
749 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
750 sources.update(components.netloc)
752 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
753 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
755 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
756 if response.ok and response.status_code < 300 and len(response.text) > 0:
757 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
758 rss = atoma.parse_rss_bytes(response.content)
760 logger.debug("rss[]='%s'", type(rss))
761 for item in rss.items:
762 logger.debug("item='%s'", item)
763 domain = tidyup.domain(item.link.split("=")[1])
765 logger.debug("domain='%s' - AFTER!", domain)
767 logger.debug("domain is empty - SKIPPED!")
770 logger.debug("domain='%s' - BEFORE!", domain)
771 domain = domain.encode("idna").decode("utf-8")
772 logger.debug("domain='%s' - AFTER!", domain)
774 if not utils.is_domain_wanted(domain):
775 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
777 elif domain in domains:
778 logger.debug("domain='%s' is already added - SKIPPED!", domain)
780 elif instances.is_registered(domain):
781 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
783 elif instances.is_recent(domain):
784 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
787 logger.debug("Adding domain='%s'", domain)
788 domains.append(domain)
790 logger.debug("domains()=%d", len(domains))
792 logger.info("Adding %d new instances ...", len(domains))
793 for domain in domains:
794 logger.debug("domain='%s'", domain)
796 logger.info("Fetching instances from domain='%s' ...", domain)
797 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
798 except network.exceptions as exception:
799 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
800 instances.set_last_error(domain, exception)
803 logger.debug("Success! - EXIT!")
806 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
807 logger.debug("args[]='%s' - CALLED!", type(args))
809 logger.debug("Invoking locking.acquire() ...")
812 source_domain = "ryona.agency"
813 if sources.is_recent(source_domain):
814 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
817 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
818 sources.update(source_domain)
820 feed = f"https://{source_domain}/users/fba/feed.atom"
824 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
825 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
827 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
828 if response.ok and response.status_code < 300 and len(response.text) > 0:
829 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
830 atom = atoma.parse_atom_bytes(response.content)
832 logger.debug("atom[]='%s'", type(atom))
833 for entry in atom.entries:
834 logger.debug("entry[]='%s'", type(entry))
835 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
836 logger.debug("doc[]='%s'", type(doc))
837 for element in doc.findAll("a"):
838 logger.debug("element[]='%s'", type(element))
839 for href in element["href"].split(","):
840 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
841 domain = tidyup.domain(href)
843 logger.debug("domain='%s' - AFTER!", domain)
845 logger.debug("domain is empty - SKIPPED!")
848 logger.debug("domain='%s' - BEFORE!", domain)
849 domain = domain.encode("idna").decode("utf-8")
850 logger.debug("domain='%s' - AFTER!", domain)
852 if not utils.is_domain_wanted(domain):
853 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
855 elif domain in domains:
856 logger.debug("domain='%s' is already added - SKIPPED!", domain)
858 elif instances.is_registered(domain):
859 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
861 elif instances.is_recent(domain):
862 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
865 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
866 domains.append(domain)
868 logger.debug("domains()=%d", len(domains))
870 logger.info("Adding %d new instances ...", len(domains))
871 for domain in domains:
872 logger.debug("domain='%s'", domain)
874 logger.info("Fetching instances from domain='%s' ...", domain)
875 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
876 except network.exceptions as exception:
877 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
878 instances.set_last_error(domain, exception)
881 logger.debug("Success! - EXIT!")
884 def fetch_instances(args: argparse.Namespace) -> int:
885 logger.debug("args[]='%s' - CALLED!", type(args))
887 logger.debug("args.domain='%s' - checking ...", args.domain)
888 if not validators.domain(args.domain):
889 logger.warning("args.domain='%s' is not valid.", args.domain)
891 elif blacklist.is_blacklisted(args.domain):
892 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
895 logger.debug("Invoking locking.acquire() ...")
900 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
901 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
902 except network.exceptions as exception:
903 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
904 instances.set_last_error(args.domain, exception)
905 instances.update_data(args.domain)
909 logger.debug("Not fetching more instances - EXIT!")
912 # Loop through some instances
913 database.cursor.execute(
914 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
917 rows = database.cursor.fetchall()
918 logger.info("Checking %d entries ...", len(rows))
920 logger.debug("row[domain]='%s'", row["domain"])
921 if row["domain"] == "":
922 logger.debug("row[domain] is empty - SKIPPED!")
925 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
926 domain = row["domain"].encode("idna").decode("utf-8")
927 logger.debug("domain='%s' - AFTER!", domain)
929 if not utils.is_domain_wanted(domain):
930 logger.warning("Domain domain='%s' is not wanted - SKIPPED!", domain)
934 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
935 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
936 except network.exceptions as exception:
937 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
938 instances.set_last_error(domain, exception)
940 logger.debug("Success - EXIT!")
943 def fetch_oliphant(args: argparse.Namespace) -> int:
944 logger.debug("args[]='%s' - CALLED!", type(args))
946 logger.debug("Invoking locking.acquire() ...")
949 source_domain = "codeberg.org"
950 if sources.is_recent(source_domain):
951 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
954 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
955 sources.update(source_domain)
958 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
963 "blocker": "artisan.chat",
964 "csv_url": "mastodon/artisan.chat.csv",
966 "blocker": "mastodon.art",
967 "csv_url": "mastodon/mastodon.art.csv",
969 "blocker": "pleroma.envs.net",
970 "csv_url": "mastodon/pleroma.envs.net.csv",
972 "blocker": "oliphant.social",
973 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
975 "blocker": "mastodon.online",
976 "csv_url": "mastodon/mastodon.online.csv",
978 "blocker": "mastodon.social",
979 "csv_url": "mastodon/mastodon.social.csv",
981 "blocker": "mastodon.social",
982 "csv_url": "other/missing-tier0-mastodon.social.csv",
984 "blocker": "rage.love",
985 "csv_url": "mastodon/rage.love.csv",
987 "blocker": "sunny.garden",
988 "csv_url": "mastodon/sunny.garden.csv",
990 "blocker": "sunny.garden",
991 "csv_url": "mastodon/gardenfence.csv",
993 "blocker": "solarpunk.moe",
994 "csv_url": "mastodon/solarpunk.moe.csv",
996 "blocker": "toot.wales",
997 "csv_url": "mastodon/toot.wales.csv",
999 "blocker": "union.place",
1000 "csv_url": "mastodon/union.place.csv",
1002 "blocker": "oliphant.social",
1003 "csv_url": "mastodon/birdsite.csv",
1009 logger.debug("Downloading %d files ...", len(blocklists))
1010 for block in blocklists:
1011 # Is domain given and not equal blocker?
1012 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1013 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1015 elif args.domain in domains:
1016 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1020 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1021 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1023 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1024 if not response.ok or response.status_code >= 300 or response.content == "":
1025 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1028 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1029 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1035 logger.debug("row[%s]='%s'", type(row), row)
1036 domain = severity = None
1037 reject_media = reject_reports = False
1039 if "#domain" in row:
1040 domain = row["#domain"]
1041 elif "domain" in row:
1042 domain = row["domain"]
1044 logger.debug("row='%s' does not contain domain column", row)
1047 if "#severity" in row:
1048 severity = utils.alias_block_level(row["#severity"])
1049 elif "severity" in row:
1050 severity = utils.alias_block_level(row["severity"])
1052 logger.debug("row='%s' does not contain severity column", row)
1055 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1057 elif "reject_media" in row and row["reject_media"].lower() == "true":
1060 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1061 reject_reports = True
1062 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1063 reject_reports = True
1066 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1068 logger.debug("domain is empty - SKIPPED!")
1070 elif domain.endswith(".onion"):
1071 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1073 elif domain.endswith(".arpa"):
1074 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1076 elif domain.endswith(".tld"):
1077 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1079 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1080 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1081 domain = utils.deobfuscate(domain, block["blocker"])
1082 logger.debug("domain='%s' - AFTER!", domain)
1084 if not validators.domain(domain):
1085 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1087 elif blacklist.is_blacklisted(domain):
1088 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1091 logger.debug("Marking domain='%s' as handled", domain)
1092 domains.append(domain)
1094 logger.debug("Processing domain='%s' ...", domain)
1095 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1096 logger.debug("processed='%s'", processed)
1098 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1099 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1102 "reason" : block["reason"],
1106 processing.block(block["blocker"], domain, None, "reject_media")
1108 processing.block(block["blocker"], domain, None, "reject_reports")
1110 logger.debug("block[blocker]='%s'", block["blocker"])
1111 if block["blocker"] != "chaos.social":
1112 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1113 instances.set_total_blocks(block["blocker"], domains)
1115 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1116 if instances.has_pending(block["blocker"]):
1117 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1118 instances.update_data(block["blocker"])
1120 logger.debug("Invoking commit() ...")
1121 database.connection.commit()
1123 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1124 if config.get("bot_enabled") and len(blockdict) > 0:
1125 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1126 network.send_bot_post(block["blocker"], blockdict)
1128 logger.debug("Success! - EXIT!")
1131 def fetch_txt(args: argparse.Namespace) -> int:
1132 logger.debug("args[]='%s' - CALLED!", type(args))
1134 logger.debug("Invoking locking.acquire() ...")
1139 "blocker": "seirdy.one",
1140 "url" : "https://seirdy.one/pb/bsl.txt",
1143 logger.info("Checking %d text file(s) ...", len(urls))
1145 logger.debug("Fetching row[url]='%s' ...", row["url"])
1146 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1148 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1149 if response.ok and response.status_code < 300 and response.text != "":
1150 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1151 domains = response.text.split("\n")
1153 logger.info("Processing %d domains ...", len(domains))
1154 for domain in domains:
1155 logger.debug("domain='%s' - BEFORE!", domain)
1156 domain = tidyup.domain(domain)
1158 logger.debug("domain='%s' - AFTER!", domain)
1160 logger.debug("domain is empty - SKIPPED!")
1162 elif not utils.is_domain_wanted(domain):
1163 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1165 elif instances.is_recent(domain):
1166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1169 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1170 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1172 logger.debug("processed='%s'", processed)
1174 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1177 logger.debug("Success! - EXIT!")
1180 def fetch_fedipact(args: argparse.Namespace) -> int:
1181 logger.debug("args[]='%s' - CALLED!", type(args))
1183 logger.debug("Invoking locking.acquire() ...")
1186 source_domain = "fedipact.online"
1187 if sources.is_recent(source_domain):
1188 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1191 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1192 sources.update(source_domain)
1194 response = utils.fetch_url(
1195 f"https://{source_domain}",
1196 network.web_headers,
1197 (config.get("connection_timeout"), config.get("read_timeout"))
1200 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1201 if response.ok and response.status_code < 300 and response.text != "":
1202 logger.debug("Parsing %d Bytes ...", len(response.text))
1204 doc = bs4.BeautifulSoup(response.text, "html.parser")
1205 logger.debug("doc[]='%s'", type(doc))
1207 rows = doc.findAll("li")
1208 logger.info("Checking %d row(s) ...", len(rows))
1210 logger.debug("row[]='%s'", type(row))
1211 domain = tidyup.domain(row.contents[0])
1213 logger.debug("domain='%s' - AFTER!", domain)
1215 logger.debug("domain is empty - SKIPPED!")
1218 logger.debug("domain='%s' - BEFORE!", domain)
1219 domain = domain.encode("idna").decode("utf-8")
1220 logger.debug("domain='%s' - AFTER!", domain)
1222 if not utils.is_domain_wanted(domain):
1223 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1225 elif instances.is_registered(domain):
1226 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1228 elif instances.is_recent(domain):
1229 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1232 logger.info("Fetching domain='%s' ...", domain)
1233 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1235 logger.debug("Success! - EXIT!")
1238 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1239 logger.debug("args[]='%s' - CALLED!", type(args))
1241 logger.debug("Invoking locking.acquire() ...")
1244 source_domain = "joinfediverse.wiki"
1245 if sources.is_recent(source_domain):
1246 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1249 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1250 sources.update(source_domain)
1252 raw = utils.fetch_url(
1253 f"https://{source_domain}/FediBlock",
1254 network.web_headers,
1255 (config.get("connection_timeout"), config.get("read_timeout"))
1257 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1259 doc = bs4.BeautifulSoup(raw, "html.parser")
1260 logger.debug("doc[]='%s'", type(doc))
1262 tables = doc.findAll("table", {"class": "wikitable"})
1264 logger.info("Analyzing %d table(s) ...", len(tables))
1266 for table in tables:
1267 logger.debug("table[]='%s'", type(table))
1269 rows = table.findAll("tr")
1270 logger.info("Checking %d row(s) ...", len(rows))
1271 block_headers = dict()
1273 logger.debug("row[%s]='%s'", type(row), row)
1275 headers = row.findAll("th")
1276 logger.debug("Found headers()=%d header(s)", len(headers))
1277 if len(headers) > 1:
1278 block_headers = dict()
1280 for header in headers:
1282 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1283 text = header.contents[0]
1285 logger.debug("text[]='%s'", type(text))
1286 if not isinstance(text, str):
1287 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1289 elif validators.domain(text.strip()):
1290 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1293 text = tidyup.domain(text.strip())
1294 logger.debug("text='%s'", text)
1295 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1296 logger.debug("Found header: '%s'=%d", text, cnt)
1297 block_headers[cnt] = text
1299 elif len(block_headers) == 0:
1300 logger.debug("row is not scrapable - SKIPPED!")
1302 elif len(block_headers) > 0:
1303 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1307 for element in row.find_all(["th", "td"]):
1309 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1310 if cnt in block_headers:
1311 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1313 text = element.text.strip()
1314 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1316 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1317 if key in ["domain", "instance"]:
1319 elif key == "reason":
1320 block[key] = tidyup.reason(text)
1321 elif key == "subdomain(s)":
1324 block[key] = text.split("/")
1326 logger.debug("key='%s'", key)
1329 logger.debug("block()=%d ...", len(block))
1331 logger.debug("Appending block()=%d ...", len(block))
1332 blocklist.append(block)
1334 logger.debug("blocklist()=%d", len(blocklist))
1336 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1337 domains = database.cursor.fetchall()
1339 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1341 for block in blocklist:
1342 logger.debug("block='%s'", block)
1343 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1344 origin = block["blocked"]
1345 logger.debug("origin='%s'", origin)
1346 for subdomain in block["subdomain(s)"]:
1347 block["blocked"] = subdomain + "." + origin
1348 logger.debug("block[blocked]='%s'", block["blocked"])
1349 blocking.append(block)
1351 blocking.append(block)
1353 logger.debug("blocking()=%d", blocking)
1354 for block in blocking:
1355 logger.debug("block[]='%s'", type(block))
1356 block["blocked"] = tidyup.domain(block["blocked"])
1358 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1359 if block["blocked"] == "":
1360 logger.debug("block[blocked] is empty - SKIPPED!")
1362 elif not utils.is_domain_wanted(block["blocked"]):
1363 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1365 elif instances.is_recent(block["blocked"]):
1366 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1369 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1370 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1373 for blocker in domains:
1374 blocker = blocker[0]
1375 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1377 for block in blocking:
1378 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1379 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1381 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1382 if block["blocked"] == "":
1383 logger.debug("block[blocked] is empty - SKIPPED!")
1385 elif not utils.is_domain_wanted(block["blocked"]):
1386 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1389 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1390 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1391 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1393 "blocked": block["blocked"],
1394 "reason" : block["reason"],
1397 if instances.has_pending(blocker):
1398 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1399 instances.update_data(blocker)
1401 logger.debug("Invoking commit() ...")
1402 database.connection.commit()
1404 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1405 if config.get("bot_enabled") and len(blockdict) > 0:
1406 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1407 network.send_bot_post(blocker, blockdict)
1409 logger.debug("Success! - EXIT!")
1412 def recheck_obfuscation(args: argparse.Namespace) -> int:
1413 logger.debug("args[]='%s' - CALLED!", type(args))
1415 logger.debug("Invoking locking.acquire() ...")
1418 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1419 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1420 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1421 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1423 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1425 rows = database.cursor.fetchall()
1426 logger.info("Checking %d domains ...", len(rows))
1428 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1429 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1430 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1434 if row["software"] == "pleroma":
1435 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1436 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1437 elif row["software"] == "mastodon":
1438 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1439 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1440 elif row["software"] == "lemmy":
1441 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1442 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1443 elif row["software"] == "friendica":
1444 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1445 blocking = friendica.fetch_blocks(row["domain"])
1446 elif row["software"] == "misskey":
1447 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1448 blocking = misskey.fetch_blocks(row["domain"])
1450 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1452 logger.debug("row[domain]='%s'", row["domain"])
1453 if row["domain"] != "chaos.social":
1454 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1455 instances.set_total_blocks(row["domain"], blocking)
1457 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1460 for block in blocking:
1461 logger.debug("block[blocked]='%s'", block["blocked"])
1464 if block["blocked"] == "":
1465 logger.debug("block[blocked] is empty - SKIPPED!")
1467 elif block["blocked"].endswith(".arpa"):
1468 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1470 elif block["blocked"].endswith(".tld"):
1471 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1473 elif block["blocked"].endswith(".onion"):
1474 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1476 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1477 logger.debug("block='%s' is obfuscated.", block["blocked"])
1478 obfuscated = obfuscated + 1
1479 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1480 elif not utils.is_domain_wanted(block["blocked"]):
1481 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1483 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1484 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1487 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1488 if blocked is not None and blocked != block["blocked"]:
1489 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1490 obfuscated = obfuscated - 1
1491 if blocks.is_instance_blocked(row["domain"], blocked):
1492 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1495 block["block_level"] = utils.alias_block_level(block["block_level"])
1497 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1498 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1499 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1502 "reason" : block["reason"],
1505 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1506 if obfuscated == 0 and len(blocking) > 0:
1507 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1508 instances.set_has_obfuscation(row["domain"], False)
1510 if instances.has_pending(row["domain"]):
1511 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1512 instances.update_data(row["domain"])
1514 logger.debug("Invoking commit() ...")
1515 database.connection.commit()
1517 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1518 if config.get("bot_enabled") and len(blockdict) > 0:
1519 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1520 network.send_bot_post(row["domain"], blockdict)
1522 logger.debug("Success! - EXIT!")
1525 def fetch_fedilist(args: argparse.Namespace) -> int:
1526 logger.debug("args[]='%s' - CALLED!", type(args))
1528 logger.debug("Invoking locking.acquire() ...")
1531 source_domain = "demo.fedilist.com"
1532 if sources.is_recent(source_domain):
1533 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1536 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1537 sources.update(source_domain)
1539 url = f"http://{source_domain}/instance/csv?onion=not"
1540 if args.software is not None and args.software != "":
1541 logger.debug("args.software='%s'", args.software)
1542 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1544 logger.info("Fetching url='%s' ...", url)
1545 response = reqto.get(
1547 headers=network.web_headers,
1548 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1549 allow_redirects=False
1552 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1553 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1554 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1557 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1559 logger.debug("reader[]='%s'", type(reader))
1562 logger.debug("row[]='%s'", type(row))
1563 domain = tidyup.domain(row["hostname"])
1564 logger.debug("domain='%s' - AFTER!", domain)
1567 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1570 logger.debug("domain='%s' - BEFORE!", domain)
1571 domain = domain.encode("idna").decode("utf-8")
1572 logger.debug("domain='%s' - AFTER!", domain)
1574 if not utils.is_domain_wanted(domain):
1575 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1577 elif (args.all is None or not args.all) and instances.is_registered(domain):
1578 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1580 elif instances.is_recent(domain):
1581 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1584 logger.info("Fetching instances from domain='%s' ...", domain)
1585 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1587 logger.debug("Success! - EXIT!")
1590 def update_nodeinfo(args: argparse.Namespace) -> int:
1591 logger.debug("args[]='%s' - CALLED!", type(args))
1593 logger.debug("Invoking locking.acquire() ...")
1596 if args.domain is not None and args.domain != "":
1597 logger.debug("Fetching args.domain='%s'", args.domain)
1598 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1599 elif args.software is not None and args.software != "":
1600 logger.info("Fetching domains for args.software='%s'", args.software)
1601 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1603 logger.info("Fetching domains for recently updated ...")
1604 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1606 domains = database.cursor.fetchall()
1608 logger.info("Checking %d domain(s) ...", len(domains))
1611 logger.debug("row[]='%s'", type(row))
1613 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1614 software = federation.determine_software(row["domain"])
1616 logger.debug("Determined software='%s'", software)
1617 if (software != row["software"] and software is not None) or args.force == True:
1618 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1619 instances.set_software(row["domain"], software)
1621 instances.set_success(row["domain"])
1622 except network.exceptions as exception:
1623 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1624 instances.set_last_error(row["domain"], exception)
1626 instances.set_last_nodeinfo(row["domain"])
1627 instances.update_data(row["domain"])
1630 logger.debug("Success! - EXIT!")
1633 def fetch_instances_social(args: argparse.Namespace) -> int:
1634 logger.debug("args[]='%s' - CALLED!", type(args))
1636 logger.debug("Invoking locking.acquire() ...")
1639 source_domain = "instances.social"
1641 if config.get("instances_social_api_key") == "":
1642 logger.error("API key not set. Please set in your config.json file.")
1644 elif sources.is_recent(source_domain):
1645 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1648 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1649 sources.update(source_domain)
1652 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1655 fetched = network.get_json_api(
1657 "/api/1.0/instances/list?count=0&sort_by=name",
1659 (config.get("connection_timeout"), config.get("read_timeout"))
1661 logger.debug("fetched[]='%s'", type(fetched))
1663 if "error_message" in fetched:
1664 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1666 elif "exception" in fetched:
1667 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1669 elif "json" not in fetched:
1670 logger.warning("fetched has no element 'json' - EXIT!")
1672 elif "instances" not in fetched["json"]:
1673 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1677 rows = fetched["json"]["instances"]
1679 logger.info("Checking %d row(s) ...", len(rows))
1681 logger.debug("row[]='%s'", type(row))
1682 domain = tidyup.domain(row["name"])
1683 logger.debug("domain='%s' - AFTER!", domain)
1686 logger.debug("domain is empty - SKIPPED!")
1689 logger.debug("domain='%s' - BEFORE!", domain)
1690 domain = domain.encode("idna").decode("utf-8")
1691 logger.debug("domain='%s' - AFTER!", domain)
1693 if not utils.is_domain_wanted(domain):
1694 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1696 elif domain in domains:
1697 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1699 elif instances.is_registered(domain):
1700 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1702 elif instances.is_recent(domain):
1703 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1706 logger.info("Fetching instances from domain='%s'", domain)
1707 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1709 logger.debug("Success! - EXIT!")
1712 def convert_idna(args: argparse.Namespace) -> int:
1713 logger.debug("args[]='%s' - CALLED!", type(args))
1715 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1716 rows = database.cursor.fetchall()
1718 logger.debug("rows[]='%s'", type(rows))
1719 instances.translate_idnas(rows, "domain")
1721 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1722 rows = database.cursor.fetchall()
1724 logger.debug("rows[]='%s'", type(rows))
1725 instances.translate_idnas(rows, "origin")
1727 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1728 rows = database.cursor.fetchall()
1730 logger.debug("rows[]='%s'", type(rows))
1731 blocks.translate_idnas(rows, "blocker")
1733 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1734 rows = database.cursor.fetchall()
1736 logger.debug("rows[]='%s'", type(rows))
1737 blocks.translate_idnas(rows, "blocked")
1739 logger.debug("Success! - EXIT!")