1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocking()=%d,nodeinfo_url='%s'", len(blocking), nodeinfo_url)
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s'", blocker, software)
331 if software == "pleroma":
332 logger.info("blocker='%s',software='%s'", blocker, software)
333 blocking = pleroma.fetch_blocks(blocker)
334 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
335 elif software == "mastodon":
336 logger.info("blocker='%s',software='%s'", blocker, software)
337 blocking = mastodon.fetch_blocks(blocker)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "lemmy":
340 logger.info("blocker='%s',software='%s'", blocker, software)
341 blocking = lemmy.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "friendica":
344 logger.info("blocker='%s',software='%s'", blocker, software)
345 blocking = friendica.fetch_blocks(blocker)
346 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 elif software == "misskey":
348 logger.info("blocker='%s',software='%s'", blocker, software)
349 blocking = misskey.fetch_blocks(blocker)
350 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
352 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
354 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
355 instances.set_total_blocks(blocker, blocking)
359 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
360 for block in blocking:
361 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
363 if block["block_level"] == "":
364 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
367 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
368 block["blocked"] = tidyup.domain(block["blocked"])
369 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
370 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
372 if block["blocked"] == "":
373 logger.warning("blocked is empty, blocker='%s'", blocker)
375 elif block["blocked"].endswith(".onion"):
376 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
378 elif block["blocked"].endswith(".arpa"):
379 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
381 elif block["blocked"].endswith(".tld"):
382 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
384 elif block["blocked"].find("*") >= 0:
385 logger.debug("blocker='%s' uses obfuscated domains", blocker)
387 # Some friendica servers also obscure domains without hash
388 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
390 logger.debug("row[]='%s'", type(row))
392 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 instances.set_has_obfuscation(blocker, True)
396 block["blocked"] = row["domain"]
397 origin = row["origin"]
398 nodeinfo_url = row["nodeinfo_url"]
399 elif block["blocked"].find("?") >= 0:
400 logger.debug("blocker='%s' uses obfuscated domains", blocker)
402 # Some obscure them with question marks, not sure if that's dependent on version or not
403 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
405 logger.debug("row[]='%s'", type(row))
407 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
408 instances.set_has_obfuscation(blocker, True)
411 block["blocked"] = row["domain"]
412 origin = row["origin"]
413 nodeinfo_url = row["nodeinfo_url"]
415 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416 if block["blocked"] == "":
417 logger.debug("block[blocked] is empty - SKIPPED!")
420 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
424 if not domain_helper.is_wanted(block["blocked"]):
425 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
427 elif block["block_level"] in ["accept", "accepted"]:
428 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
430 elif not instances.is_registered(block["blocked"]):
431 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
434 block["block_level"] = blocks.alias_block_level(block["block_level"])
436 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
439 "blocked": block["blocked"],
440 "reason" : block["reason"],
443 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444 cookies.clear(block["blocked"])
446 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
447 if instances.has_pending(blocker):
448 logger.debug("Flushing updates for blocker='%s' ...", blocker)
449 instances.update(blocker)
451 logger.debug("Invoking commit() ...")
452 database.connection.commit()
454 logger.debug("Invoking cookies.clear(%s) ...", blocker)
455 cookies.clear(blocker)
457 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
458 if config.get("bot_enabled") and len(blockdict) > 0:
459 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
460 network.send_bot_post(blocker, blockdict)
462 logger.debug("Success! - EXIT!")
465 def fetch_observer(args: argparse.Namespace) -> int:
466 logger.debug("args[]='%s' - CALLED!", type(args))
468 logger.debug("Invoking locking.acquire() ...")
471 source_domain = "fediverse.observer"
472 if sources.is_recent(source_domain):
473 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
476 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
477 sources.update(source_domain)
480 if args.software is None:
481 logger.info("Fetching software list ...")
482 raw = utils.fetch_url(
483 f"https://{source_domain}",
485 (config.get("connection_timeout"), config.get("read_timeout"))
487 logger.debug("raw[%s]()=%d", type(raw), len(raw))
489 doc = bs4.BeautifulSoup(raw, features="html.parser")
490 logger.debug("doc[]='%s'", type(doc))
492 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
493 logger.debug("navbar[]='%s'", type(navbar))
495 logger.warning("Cannot find navigation bar, cannot continue!")
498 items = navbar.findAll("a", {"class": "dropdown-item"})
499 logger.debug("items[]='%s'", type(items))
501 logger.info("Checking %d menu items ...", len(items))
503 logger.debug("item[%s]='%s'", type(item), item)
504 if item.text.lower() == "all":
505 logger.debug("Skipping 'All' menu entry ...")
508 logger.debug("Appending item.text='%s' ...", item.text)
509 types.append(tidyup.domain(item.text))
511 logger.info("Adding args.software='%s' as type ...", args.software)
512 types.append(args.software)
514 logger.info("Fetching %d different table data ...", len(types))
515 for software in types:
516 logger.debug("software='%s' - BEFORE!", software)
517 if args.software is not None and args.software != software:
518 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
523 logger.debug("Fetching table data for software='%s' ...", software)
524 raw = utils.fetch_url(
525 f"https://{source_domain}/app/views/tabledata.php?software={software}",
527 (config.get("connection_timeout"), config.get("read_timeout"))
529 logger.debug("raw[%s]()=%d", type(raw), len(raw))
531 doc = bs4.BeautifulSoup(raw, features="html.parser")
532 logger.debug("doc[]='%s'", type(doc))
533 except network.exceptions as exception:
534 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
537 items = doc.findAll("a", {"class": "url"})
538 logger.info("Checking %d items,software='%s' ...", len(items), software)
540 logger.debug("item[]='%s'", type(item))
541 domain = item.decode_contents()
542 logger.debug("domain='%s' - AFTER!", domain)
545 logger.debug("domain is empty - SKIPPED!")
548 logger.debug("domain='%s' - BEFORE!", domain)
549 domain = domain.encode("idna").decode("utf-8")
550 logger.debug("domain='%s' - AFTER!", domain)
552 if not domain_helper.is_wanted(domain):
553 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
555 elif instances.is_registered(domain):
556 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
559 software = software_helper.alias(software)
560 logger.info("Fetching instances for domain='%s'", domain)
561 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
563 logger.debug("Success! - EXIT!")
566 def fetch_todon_wiki(args: argparse.Namespace) -> int:
567 logger.debug("args[]='%s' - CALLED!", type(args))
569 logger.debug("Invoking locking.acquire() ...")
572 source_domain = "wiki.todon.eu"
573 if sources.is_recent(source_domain):
574 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
577 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
578 sources.update(source_domain)
585 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
586 raw = utils.fetch_url(
587 f"https://{source_domain}/todon/domainblocks",
589 (config.get("connection_timeout"), config.get("read_timeout"))
591 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
593 doc = bs4.BeautifulSoup(raw, "html.parser")
594 logger.debug("doc[]='%s'", type(doc))
596 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
597 logger.info("Checking %d silenced/limited entries ...", len(silenced))
598 blocklist["silenced"] = utils.find_domains(silenced, "div")
600 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
601 logger.info("Checking %d suspended entries ...", len(suspended))
602 blocklist["reject"] = utils.find_domains(suspended, "div")
604 blocking = blocklist["silenced"] + blocklist["reject"]
607 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
608 instances.set_last_blocked(blocker)
609 instances.set_total_blocks(blocker, blocking)
612 for block_level in blocklist:
613 blockers = blocklist[block_level]
615 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
616 for blocked in blockers:
617 logger.debug("blocked='%s'", blocked)
619 if not instances.is_registered(blocked):
621 logger.info("Fetching instances from domain='%s' ...", blocked)
622 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
623 except network.exceptions as exception:
624 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
625 instances.set_last_error(blocked, exception)
627 if blocks.is_instance_blocked(blocker, blocked, block_level):
628 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
631 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
632 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
633 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
639 logger.debug("Invoking commit() ...")
640 database.connection.commit()
642 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
643 if config.get("bot_enabled") and len(blockdict) > 0:
644 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
645 network.send_bot_post(blocker, blockdict)
647 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
648 if instances.has_pending(blocker):
649 logger.debug("Flushing updates for blocker='%s' ...", blocker)
650 instances.update(blocker)
652 logger.debug("Success! - EXIT!")
655 def fetch_cs(args: argparse.Namespace):
656 logger.debug("args[]='%s' - CALLED!", type(args))
658 logger.debug("Invoking locking.acquire() ...")
686 source_domain = "raw.githubusercontent.com"
687 if sources.is_recent(source_domain):
688 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
691 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
692 sources.update(source_domain)
694 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
695 raw = utils.fetch_url(
696 f"https://{source_domain}/chaossocial/meta/master/federation.md",
698 (config.get("connection_timeout"), config.get("read_timeout"))
700 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
702 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
703 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
705 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
706 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
707 blocklist["silenced"] = federation.find_domains(silenced)
709 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
710 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
711 blocklist["reject"] = federation.find_domains(blocked)
713 blocking = blocklist["silenced"] + blocklist["reject"]
714 blocker = "chaos.social"
716 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
717 instances.set_last_blocked(blocker)
718 instances.set_total_blocks(blocker, blocking)
720 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
721 if len(blocking) > 0:
723 for block_level in blocklist:
724 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
726 for row in blocklist[block_level]:
727 logger.debug("row[%s]='%s'", type(row), row)
728 if not "domain" in row:
729 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
731 elif not instances.is_registered(row["domain"]):
733 logger.info("Fetching instances from domain='%s' ...", row["domain"])
734 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
735 except network.exceptions as exception:
736 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
737 instances.set_last_error(row["domain"], exception)
739 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
740 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
742 "blocked": row["domain"],
743 "reason" : row["reason"],
746 logger.debug("Invoking commit() ...")
747 database.connection.commit()
749 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
750 if config.get("bot_enabled") and len(blockdict) > 0:
751 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
752 network.send_bot_post(blocker, blockdict)
754 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
755 if instances.has_pending(blocker):
756 logger.debug("Flushing updates for blocker='%s' ...", blocker)
757 instances.update(blocker)
759 logger.debug("Success! - EXIT!")
762 def fetch_fba_rss(args: argparse.Namespace) -> int:
763 logger.debug("args[]='%s' - CALLED!", type(args))
767 logger.debug("Invoking locking.acquire() ...")
770 components = urlparse(args.feed)
772 if sources.is_recent(components.netloc):
773 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
776 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
777 sources.update(components.netloc)
779 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
780 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
782 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
783 if response.ok and response.status_code == 200 and len(response.text) > 0:
784 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
785 rss = atoma.parse_rss_bytes(response.content)
787 logger.debug("rss[]='%s'", type(rss))
788 for item in rss.items:
789 logger.debug("item[%s]='%s'", type(item), item)
790 domain = tidyup.domain(item.link.split("=")[1])
792 logger.debug("domain='%s' - AFTER!", domain)
794 logger.debug("domain is empty - SKIPPED!")
797 logger.debug("domain='%s' - BEFORE!", domain)
798 domain = domain.encode("idna").decode("utf-8")
799 logger.debug("domain='%s' - AFTER!", domain)
801 if not domain_helper.is_wanted(domain):
802 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
804 elif domain in domains:
805 logger.debug("domain='%s' is already added - SKIPPED!", domain)
807 elif instances.is_registered(domain):
808 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
810 elif instances.is_recent(domain):
811 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
814 logger.debug("Adding domain='%s'", domain)
815 domains.append(domain)
817 logger.debug("domains()=%d", len(domains))
819 logger.info("Adding %d new instances ...", len(domains))
820 for domain in domains:
821 logger.debug("domain='%s'", domain)
823 logger.info("Fetching instances from domain='%s' ...", domain)
824 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
825 except network.exceptions as exception:
826 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
827 instances.set_last_error(domain, exception)
830 logger.debug("Success! - EXIT!")
833 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
834 logger.debug("args[]='%s' - CALLED!", type(args))
836 logger.debug("Invoking locking.acquire() ...")
839 source_domain = "ryona.agency"
840 feed = f"https://{source_domain}/users/fba/feed.atom"
842 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
843 if args.feed is not None and validators.url(args.feed):
844 logger.debug("Setting feed='%s' ...", args.feed)
845 feed = str(args.feed)
846 source_domain = urlparse(args.feed).netloc
848 if sources.is_recent(source_domain):
849 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
852 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
853 sources.update(source_domain)
857 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
858 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
860 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
861 if response.ok and response.status_code == 200 and len(response.text) > 0:
862 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
863 atom = atoma.parse_atom_bytes(response.content)
865 logger.debug("atom[]='%s'", type(atom))
866 for entry in atom.entries:
867 logger.debug("entry[]='%s'", type(entry))
868 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
869 logger.debug("doc[]='%s'", type(doc))
870 for element in doc.findAll("a"):
871 logger.debug("element[]='%s'", type(element))
872 for href in element["href"].split(","):
873 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
874 domain = tidyup.domain(href)
876 logger.debug("domain='%s' - AFTER!", domain)
878 logger.debug("domain is empty - SKIPPED!")
881 logger.debug("domain='%s' - BEFORE!", domain)
882 domain = domain.encode("idna").decode("utf-8")
883 logger.debug("domain='%s' - AFTER!", domain)
885 if not domain_helper.is_wanted(domain):
886 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
888 elif domain in domains:
889 logger.debug("domain='%s' is already added - SKIPPED!", domain)
891 elif instances.is_registered(domain):
892 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
894 elif instances.is_recent(domain):
895 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
898 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
899 domains.append(domain)
901 logger.debug("domains()=%d", len(domains))
903 logger.info("Adding %d new instances ...", len(domains))
904 for domain in domains:
905 logger.debug("domain='%s'", domain)
907 logger.info("Fetching instances from domain='%s' ...", domain)
908 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
909 except network.exceptions as exception:
910 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
911 instances.set_last_error(domain, exception)
914 logger.debug("Success! - EXIT!")
917 def fetch_instances(args: argparse.Namespace) -> int:
918 logger.debug("args[]='%s' - CALLED!", type(args))
920 logger.debug("args.domain='%s' - checking ...", args.domain)
921 if not validators.domain(args.domain):
922 logger.warning("args.domain='%s' is not valid.", args.domain)
924 elif blacklist.is_blacklisted(args.domain):
925 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
928 logger.debug("Invoking locking.acquire() ...")
932 domain = tidyup.domain(args.domain)
933 origin = software = None
936 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
937 row = database.cursor.fetchone()
939 origin = row["origin"]
940 software = row["software"]
944 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
945 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
946 except network.exceptions as exception:
947 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
948 instances.set_last_error(args.domain, exception)
949 instances.update(args.domain)
953 logger.debug("Not fetching more instances - EXIT!")
956 # Loop through some instances
957 database.cursor.execute(
958 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_updated ASC", [time.time() - config.get("recheck_instance")]
961 rows = database.cursor.fetchall()
962 logger.info("Checking %d entries ...", len(rows))
964 logger.debug("row[domain]='%s'", row["domain"])
965 if row["domain"] == "":
966 logger.debug("row[domain] is empty - SKIPPED!")
969 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
970 domain = row["domain"].encode("idna").decode("utf-8")
971 logger.debug("domain='%s' - AFTER!", domain)
973 if not domain_helper.is_wanted(domain):
974 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
978 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
979 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
980 except network.exceptions as exception:
981 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
982 instances.set_last_error(domain, exception)
984 logger.debug("Success - EXIT!")
987 def fetch_oliphant(args: argparse.Namespace) -> int:
988 logger.debug("args[]='%s' - CALLED!", type(args))
990 logger.debug("Invoking locking.acquire() ...")
993 source_domain = "codeberg.org"
994 if sources.is_recent(source_domain):
995 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
998 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
999 sources.update(source_domain)
1002 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1006 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1007 for block in blocklists.oliphant_blocklists:
1008 # Is domain given and not equal blocker?
1009 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1010 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1012 elif args.domain in domains:
1013 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1016 instances.set_last_blocked(block["blocker"])
1019 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1020 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1022 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1023 if not response.ok or response.status_code > 200 or response.content == "":
1024 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1027 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1028 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1034 logger.debug("row[%s]='%s'", type(row), row)
1035 domain = severity = None
1036 reject_media = reject_reports = False
1038 if "#domain" in row:
1039 domain = row["#domain"]
1040 elif "domain" in row:
1041 domain = row["domain"]
1043 logger.debug("row='%s' does not contain domain column", row)
1046 if "#severity" in row:
1047 severity = blocks.alias_block_level(row["#severity"])
1048 elif "severity" in row:
1049 severity = blocks.alias_block_level(row["severity"])
1051 logger.debug("row='%s' does not contain severity column", row)
1054 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1056 elif "reject_media" in row and row["reject_media"].lower() == "true":
1059 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1060 reject_reports = True
1061 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1062 reject_reports = True
1065 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1067 logger.debug("domain is empty - SKIPPED!")
1069 elif domain.endswith(".onion"):
1070 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1072 elif domain.endswith(".arpa"):
1073 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1075 elif domain.endswith(".tld"):
1076 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1078 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1079 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1080 domain = utils.deobfuscate(domain, block["blocker"])
1081 logger.debug("domain='%s' - AFTER!", domain)
1083 if not validators.domain(domain):
1084 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1086 elif blacklist.is_blacklisted(domain):
1087 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1089 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1090 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1093 logger.debug("Marking domain='%s' as handled", domain)
1094 domains.append(domain)
1096 logger.debug("Processing domain='%s' ...", domain)
1097 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1098 logger.debug("processed='%s'", processed)
1100 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1101 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1104 "reason" : block["reason"],
1108 processing.block(block["blocker"], domain, None, "reject_media")
1110 processing.block(block["blocker"], domain, None, "reject_reports")
1112 logger.debug("block[blocker]='%s'", block["blocker"])
1113 if not blocklists.has(block["blocker"]):
1114 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1115 instances.set_total_blocks(block["blocker"], domains)
1117 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1118 if instances.has_pending(block["blocker"]):
1119 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1120 instances.update(block["blocker"])
1122 logger.debug("Invoking commit() ...")
1123 database.connection.commit()
1125 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1126 if config.get("bot_enabled") and len(blockdict) > 0:
1127 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1128 network.send_bot_post(block["blocker"], blockdict)
1130 logger.debug("Success! - EXIT!")
1133 def fetch_txt(args: argparse.Namespace) -> int:
1134 logger.debug("args[]='%s' - CALLED!", type(args))
1136 logger.debug("Invoking locking.acquire() ...")
1141 "blocker": "seirdy.one",
1142 "url" : "https://seirdy.one/pb/bsl.txt",
1145 logger.info("Checking %d text file(s) ...", len(urls))
1147 logger.debug("Fetching row[url]='%s' ...", row["url"])
1148 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1150 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1151 if response.ok and response.status_code == 200 and response.text != "":
1152 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1153 domains = response.text.split("\n")
1155 logger.info("Processing %d domains ...", len(domains))
1156 for domain in domains:
1157 logger.debug("domain='%s' - BEFORE!", domain)
1158 domain = tidyup.domain(domain)
1160 logger.debug("domain='%s' - AFTER!", domain)
1162 logger.debug("domain is empty - SKIPPED!")
1164 elif not domain_helper.is_wanted(domain):
1165 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1167 elif instances.is_recent(domain):
1168 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1171 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1172 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1174 logger.debug("processed='%s'", processed)
1176 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1179 logger.debug("Success! - EXIT!")
1182 def fetch_fedipact(args: argparse.Namespace) -> int:
1183 logger.debug("args[]='%s' - CALLED!", type(args))
1185 logger.debug("Invoking locking.acquire() ...")
1188 source_domain = "fedipact.online"
1189 if sources.is_recent(source_domain):
1190 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1193 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1194 sources.update(source_domain)
1196 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1197 response = utils.fetch_url(
1198 f"https://{source_domain}",
1199 network.web_headers,
1200 (config.get("connection_timeout"), config.get("read_timeout"))
1203 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1204 if response.ok and response.status_code == 200 and response.text != "":
1205 logger.debug("Parsing %d Bytes ...", len(response.text))
1207 doc = bs4.BeautifulSoup(response.text, "html.parser")
1208 logger.debug("doc[]='%s'", type(doc))
1210 rows = doc.findAll("li")
1211 logger.info("Checking %d row(s) ...", len(rows))
1213 logger.debug("row[]='%s'", type(row))
1214 domain = tidyup.domain(row.contents[0])
1216 logger.debug("domain='%s' - AFTER!", domain)
1218 logger.debug("domain is empty - SKIPPED!")
1221 logger.debug("domain='%s' - BEFORE!", domain)
1222 domain = domain.encode("idna").decode("utf-8")
1223 logger.debug("domain='%s' - AFTER!", domain)
1225 if not domain_helper.is_wanted(domain):
1226 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1228 elif instances.is_registered(domain):
1229 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1231 elif instances.is_recent(domain):
1232 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1235 logger.info("Fetching domain='%s' ...", domain)
1236 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1238 logger.debug("Success! - EXIT!")
1241 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1242 logger.debug("args[]='%s' - CALLED!", type(args))
1244 logger.debug("Invoking locking.acquire() ...")
1247 source_domain = "instances.joinmobilizon.org"
1248 if sources.is_recent(source_domain):
1249 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1252 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1253 sources.update(source_domain)
1255 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1256 raw = utils.fetch_url(
1257 f"https://{source_domain}/api/v1/instances",
1258 network.web_headers,
1259 (config.get("connection_timeout"), config.get("read_timeout"))
1261 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1263 parsed = json.loads(raw)
1264 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1266 if "data" not in parsed:
1267 logger.warning("parsed()=%d does not contain key 'data'")
1270 logger.info("Checking %d instances ...", len(parsed["data"]))
1271 for row in parsed["data"]:
1272 logger.debug("row[]='%s'", type(row))
1273 if "host" not in row:
1274 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1276 elif not domain_helper.is_wanted(row["host"]):
1277 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1279 elif instances.is_registered(row["host"]):
1280 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1283 logger.info("Fetching row[host]='%s' ...", row["host"])
1284 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1286 logger.debug("Success! - EXIT!")
1289 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1290 logger.debug("args[]='%s' - CALLED!", type(args))
1292 logger.debug("Invoking locking.acquire() ...")
1295 source_domain = "instanceapp.misskey.page"
1296 if sources.is_recent(source_domain):
1297 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1300 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1301 sources.update(source_domain)
1303 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1304 raw = utils.fetch_url(
1305 f"https://{source_domain}/instances.json",
1306 network.web_headers,
1307 (config.get("connection_timeout"), config.get("read_timeout"))
1309 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1311 parsed = json.loads(raw)
1312 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1314 if "instancesInfos" not in parsed:
1315 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1318 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1319 for row in parsed["instancesInfos"]:
1320 logger.debug("row[%s]='%s'", type(row), row)
1321 if "url" not in row:
1322 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1324 elif not domain_helper.is_wanted(row["url"]):
1325 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1327 elif instances.is_registered(row["url"]):
1328 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1331 logger.info("Fetching row[url]='%s' ...", row["url"])
1332 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1334 logger.debug("Success! - EXIT!")
1337 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1338 logger.debug("args[]='%s' - CALLED!", type(args))
1340 logger.debug("Invoking locking.acquire() ...")
1343 source_domain = "joinfediverse.wiki"
1344 if sources.is_recent(source_domain):
1345 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1348 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1349 sources.update(source_domain)
1351 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1352 raw = utils.fetch_url(
1353 f"https://{source_domain}/FediBlock",
1354 network.web_headers,
1355 (config.get("connection_timeout"), config.get("read_timeout"))
1357 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1359 doc = bs4.BeautifulSoup(raw, "html.parser")
1360 logger.debug("doc[]='%s'", type(doc))
1362 tables = doc.findAll("table", {"class": "wikitable"})
1364 logger.info("Analyzing %d table(s) ...", len(tables))
1366 for table in tables:
1367 logger.debug("table[]='%s'", type(table))
1369 rows = table.findAll("tr")
1370 logger.info("Checking %d row(s) ...", len(rows))
1371 block_headers = dict()
1373 logger.debug("row[%s]='%s'", type(row), row)
1375 headers = row.findAll("th")
1376 logger.debug("Found headers()=%d header(s)", len(headers))
1377 if len(headers) > 1:
1378 block_headers = dict()
1380 for header in headers:
1382 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1383 text = header.contents[0]
1385 logger.debug("text[]='%s'", type(text))
1386 if not isinstance(text, str):
1387 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1389 elif validators.domain(text.strip()):
1390 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1393 text = tidyup.domain(text.strip())
1394 logger.debug("text='%s' - AFTER!", text)
1395 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1396 logger.debug("Found header: '%s'=%d", text, cnt)
1397 block_headers[cnt] = text
1399 elif len(block_headers) == 0:
1400 logger.debug("row is not scrapable - SKIPPED!")
1402 elif len(block_headers) > 0:
1403 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1407 for element in row.find_all(["th", "td"]):
1409 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1410 if cnt in block_headers:
1411 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1413 text = element.text.strip()
1414 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1416 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1417 if key in ["domain", "instance"]:
1419 elif key == "reason":
1420 block[key] = tidyup.reason(text)
1421 elif key == "subdomain(s)":
1424 block[key] = text.split("/")
1426 logger.debug("key='%s'", key)
1429 logger.debug("block()=%d ...", len(block))
1431 logger.debug("Appending block()=%d ...", len(block))
1432 blocklist.append(block)
1434 logger.debug("blocklist()=%d", len(blocklist))
1436 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1437 domains = database.cursor.fetchall()
1439 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1441 for block in blocklist:
1442 logger.debug("block='%s'", block)
1443 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1444 origin = block["blocked"]
1445 logger.debug("origin='%s'", origin)
1446 for subdomain in block["subdomain(s)"]:
1447 block["blocked"] = subdomain + "." + origin
1448 logger.debug("block[blocked]='%s'", block["blocked"])
1449 blocking.append(block)
1451 blocking.append(block)
1453 logger.debug("blocking()=%d", blocking)
1454 for block in blocking:
1455 logger.debug("block[]='%s'", type(block))
1456 if "blocked" not in block:
1457 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1459 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1460 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1462 if block["blocked"] == "":
1463 logger.debug("block[blocked] is empty - SKIPPED!")
1465 elif not domain_helper.is_wanted(block["blocked"]):
1466 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1468 elif instances.is_recent(block["blocked"]):
1469 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1472 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1473 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1476 for blocker in domains:
1477 blocker = blocker[0]
1478 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1479 instances.set_last_blocked(blocker)
1481 for block in blocking:
1482 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1483 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1485 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1486 if block["blocked"] == "":
1487 logger.debug("block[blocked] is empty - SKIPPED!")
1489 elif not domain_helper.is_wanted(block["blocked"]):
1490 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1493 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1494 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1495 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1497 "blocked": block["blocked"],
1498 "reason" : block["reason"],
1501 if instances.has_pending(blocker):
1502 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1503 instances.update(blocker)
1505 logger.debug("Invoking commit() ...")
1506 database.connection.commit()
1508 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1509 if config.get("bot_enabled") and len(blockdict) > 0:
1510 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1511 network.send_bot_post(blocker, blockdict)
1513 logger.debug("Success! - EXIT!")
1516 def recheck_obfuscation(args: argparse.Namespace) -> int:
1517 logger.debug("args[]='%s' - CALLED!", type(args))
1519 logger.debug("Invoking locking.acquire() ...")
1522 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1523 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1524 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1525 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1527 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1529 rows = database.cursor.fetchall()
1530 logger.info("Checking %d domains ...", len(rows))
1532 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1533 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1534 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1537 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1538 blocking = federation.fetch_blocks(row["domain"])
1540 logger.debug("blocking()=%d", len(blocking))
1541 if len(blocking) == 0:
1542 if row["software"] == "pleroma":
1543 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1544 blocking = pleroma.fetch_blocks(row["domain"])
1545 elif row["software"] == "mastodon":
1546 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1547 blocking = mastodon.fetch_blocks(row["domain"])
1548 elif row["software"] == "lemmy":
1549 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1550 blocking = lemmy.fetch_blocks(row["domain"])
1551 elif row["software"] == "friendica":
1552 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1553 blocking = friendica.fetch_blocks(row["domain"])
1554 elif row["software"] == "misskey":
1555 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1556 blocking = misskey.fetch_blocks(row["domain"])
1558 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1560 # c.s isn't part of oliphant's "hidden" blocklists
1561 logger.debug("row[domain]='%s'", row["domain"])
1562 if row["domain"] != "chaos.social" and not blocklists.has(row["domain"]):
1563 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1564 instances.set_last_blocked(row["domain"])
1565 instances.set_total_blocks(row["domain"], blocking)
1570 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1571 for block in blocking:
1572 logger.debug("block[blocked]='%s'", block["blocked"])
1575 if block["blocked"] == "":
1576 logger.debug("block[blocked] is empty - SKIPPED!")
1578 elif block["blocked"].endswith(".arpa"):
1579 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1581 elif block["blocked"].endswith(".tld"):
1582 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1584 elif block["blocked"].endswith(".onion"):
1585 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1587 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1588 logger.debug("block='%s' is obfuscated.", block["blocked"])
1589 obfuscated = obfuscated + 1
1590 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1591 elif not domain_helper.is_wanted(block["blocked"]):
1592 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1594 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1595 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1598 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1599 if blocked is not None and blocked != block["blocked"]:
1600 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1601 obfuscated = obfuscated - 1
1603 if blocks.is_instance_blocked(row["domain"], blocked):
1604 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1606 elif blacklist.is_blacklisted(blocked):
1607 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1610 block["block_level"] = blocks.alias_block_level(block["block_level"])
1612 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1613 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1614 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1617 "reason" : block["reason"],
1620 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1621 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1623 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1624 if obfuscated == 0 and len(blocking) > 0:
1625 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1626 instances.set_has_obfuscation(row["domain"], False)
1628 if instances.has_pending(row["domain"]):
1629 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1630 instances.update(row["domain"])
1632 logger.debug("Invoking commit() ...")
1633 database.connection.commit()
1635 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1636 if config.get("bot_enabled") and len(blockdict) > 0:
1637 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1638 network.send_bot_post(row["domain"], blockdict)
1640 logger.debug("Success! - EXIT!")
1643 def fetch_fedilist(args: argparse.Namespace) -> int:
1644 logger.debug("args[]='%s' - CALLED!", type(args))
1646 logger.debug("Invoking locking.acquire() ...")
1649 source_domain = "demo.fedilist.com"
1650 if sources.is_recent(source_domain):
1651 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1654 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1655 sources.update(source_domain)
1657 url = f"http://{source_domain}/instance/csv?onion=not"
1658 if args.software is not None and args.software != "":
1659 logger.debug("args.software='%s'", args.software)
1660 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1662 logger.info("Fetching url='%s' ...", url)
1663 response = reqto.get(
1665 headers=network.web_headers,
1666 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1667 allow_redirects=False
1670 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1671 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1672 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1675 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1677 logger.debug("reader[]='%s'", type(reader))
1679 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1684 logger.info("Checking %d rows ...", len(rows))
1686 logger.debug("row[]='%s'", type(row))
1687 if "hostname" not in row:
1688 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1691 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1692 domain = tidyup.domain(row["hostname"])
1693 logger.debug("domain='%s' - AFTER!", domain)
1696 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1699 logger.debug("domain='%s' - BEFORE!", domain)
1700 domain = domain.encode("idna").decode("utf-8")
1701 logger.debug("domain='%s' - AFTER!", domain)
1703 if not domain_helper.is_wanted(domain):
1704 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1706 elif (args.force is None or not args.force) and instances.is_registered(domain):
1707 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1709 elif instances.is_recent(domain):
1710 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1713 logger.info("Fetching instances from domain='%s' ...", domain)
1714 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1716 logger.debug("Success! - EXIT!")
1719 def update_nodeinfo(args: argparse.Namespace) -> int:
1720 logger.debug("args[]='%s' - CALLED!", type(args))
1722 logger.debug("Invoking locking.acquire() ...")
1725 if args.domain is not None and args.domain != "":
1726 logger.debug("Fetching args.domain='%s'", args.domain)
1727 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1728 elif args.software is not None and args.software != "":
1729 logger.info("Fetching domains for args.software='%s'", args.software)
1730 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1731 elif args.mode is not None and args.mode != "":
1732 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1733 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1734 elif args.no_software:
1735 logger.info("Fetching domains with no software type detected ...")
1736 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1738 logger.info("Fetching domains for recently updated ...")
1739 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1741 domains = database.cursor.fetchall()
1743 logger.info("Checking %d domain(s) ...", len(domains))
1746 logger.debug("row[]='%s'", type(row))
1747 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1748 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1752 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1753 software = federation.determine_software(row["domain"])
1755 logger.debug("Determined software='%s'", software)
1756 if (software != row["software"] and software is not None) or args.force is True:
1757 logger.debug("software='%s'", software)
1758 if software is None:
1759 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1760 instances.set_nodeinfo_url(row["domain"], None)
1762 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1763 instances.set_software(row["domain"], software)
1765 if software is not None:
1766 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1767 instances.set_success(row["domain"])
1768 except network.exceptions as exception:
1769 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1770 instances.set_last_error(row["domain"], exception)
1772 instances.set_last_nodeinfo(row["domain"])
1773 instances.update(row["domain"])
1776 logger.debug("Success! - EXIT!")
1779 def fetch_instances_social(args: argparse.Namespace) -> int:
1780 logger.debug("args[]='%s' - CALLED!", type(args))
1782 logger.debug("Invoking locking.acquire() ...")
1785 source_domain = "instances.social"
1787 if config.get("instances_social_api_key") == "":
1788 logger.error("API key not set. Please set in your config.json file.")
1790 elif sources.is_recent(source_domain):
1791 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1794 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1795 sources.update(source_domain)
1798 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1801 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1802 fetched = network.get_json_api(
1804 "/api/1.0/instances/list?count=0&sort_by=name",
1806 (config.get("connection_timeout"), config.get("read_timeout"))
1808 logger.debug("fetched[]='%s'", type(fetched))
1810 if "error_message" in fetched:
1811 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1813 elif "exception" in fetched:
1814 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1816 elif "json" not in fetched:
1817 logger.warning("fetched has no element 'json' - EXIT!")
1819 elif "instances" not in fetched["json"]:
1820 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1824 rows = fetched["json"]["instances"]
1826 logger.info("Checking %d row(s) ...", len(rows))
1828 logger.debug("row[]='%s'", type(row))
1829 domain = tidyup.domain(row["name"])
1830 logger.debug("domain='%s' - AFTER!", domain)
1833 logger.debug("domain is empty - SKIPPED!")
1836 logger.debug("domain='%s' - BEFORE!", domain)
1837 domain = domain.encode("idna").decode("utf-8")
1838 logger.debug("domain='%s' - AFTER!", domain)
1840 if not domain_helper.is_wanted(domain):
1841 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1843 elif domain in domains:
1844 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1846 elif instances.is_registered(domain):
1847 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1849 elif instances.is_recent(domain):
1850 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1853 logger.info("Fetching instances from domain='%s'", domain)
1854 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1856 logger.debug("Success! - EXIT!")
1859 def fetch_relays(args: argparse.Namespace) -> int:
1860 logger.debug("args[]='%s' - CALLED!", type(args))
1862 logger.debug("Invoking locking.acquire() ...")
1865 if args.domain is not None and args.domain != "":
1866 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1868 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1871 rows = database.cursor.fetchall()
1873 logger.info("Checking %d relays ...", len(rows))
1875 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1877 if not args.force and instances.is_recent(row["domain"]):
1878 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1882 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1883 raw = utils.fetch_url(
1884 f"https://{row['domain']}",
1885 network.web_headers,
1886 (config.get("connection_timeout"), config.get("read_timeout"))
1888 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1889 except network.exceptions as exception:
1890 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1891 instances.set_last_error(row["domain"], exception)
1892 instances.set_last_instance_fetch(row["domain"])
1893 instances.update(row["domain"])
1896 doc = bs4.BeautifulSoup(raw, features="html.parser")
1897 logger.debug("doc[]='%s'", type(doc))
1899 logger.debug("row[software]='%s'", row["software"])
1900 if row["software"] == "activityrelay":
1901 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1902 tags = doc.findAll("p")
1904 logger.debug("Checking %d paragraphs ...", len(tags))
1906 logger.debug("tag[]='%s'", type(tag))
1907 if len(tag.contents) == 0:
1908 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1910 elif "registered instances" not in tag.contents[0]:
1911 logger.debug("Skipping paragraph, text not found.")
1914 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1915 for domain in tag.contents:
1916 logger.debug("domain[%s]='%s'", type(domain), domain)
1917 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1920 domain = str(domain)
1921 logger.debug("domain='%s'", domain)
1922 if not domain_helper.is_wanted(domain):
1923 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1926 logger.debug("domain='%s' - BEFORE!", domain)
1927 domain = tidyup.domain(domain)
1928 logger.debug("domain='%s' - AFTER!", domain)
1931 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1933 elif domain not in peers:
1934 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1935 peers.append(domain)
1937 if dict_helper.has_key(domains, "domain", domain):
1938 logger.debug("domain='%s' already added", domain)
1941 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1944 "origin": row["domain"],
1946 elif row["software"] in ["aoderelay", "selective-relay"]:
1947 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1948 if row["software"] == "aoderelay":
1949 tags = doc.findAll("section", {"class": "instance"})
1951 tags = doc.find("div", {"id": "instances"}).findAll("li")
1953 logger.debug("Checking %d tags ...", len(tags))
1955 logger.debug("tag[]='%s'", type(tag))
1957 link = tag.find("a")
1958 logger.debug("link[%s]='%s'", type(link), link)
1960 logger.warning("tag='%s' has no a-tag ...", tag)
1963 components = urlparse(link["href"])
1964 domain = components.netloc.lower()
1966 if not domain_helper.is_wanted(domain):
1967 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1970 logger.debug("domain='%s' - BEFORE!", domain)
1971 domain = tidyup.domain(domain)
1972 logger.debug("domain='%s' - AFTER!", domain)
1975 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1977 elif domain not in peers:
1978 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1979 peers.append(domain)
1981 if dict_helper.has_key(domains, "domain", domain):
1982 logger.debug("domain='%s' already added", domain)
1985 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1988 "origin": row["domain"],
1991 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1993 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1994 instances.set_last_instance_fetch(row["domain"])
1996 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1997 instances.set_total_peers(row["domain"], peers)
1999 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
2000 instances.update(row["domain"])
2002 logger.info("Checking %d domains ...", len(domains))
2004 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
2005 if instances.is_registered(row["domain"]):
2006 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2009 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2010 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2012 logger.debug("Success! - EXIT!")
2015 def convert_idna(args: argparse.Namespace) -> int:
2016 logger.debug("args[]='%s' - CALLED!", type(args))
2018 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2019 rows = database.cursor.fetchall()
2021 logger.debug("rows[]='%s'", type(rows))
2022 instances.translate_idnas(rows, "domain")
2024 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2025 rows = database.cursor.fetchall()
2027 logger.debug("rows[]='%s'", type(rows))
2028 instances.translate_idnas(rows, "origin")
2030 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2031 rows = database.cursor.fetchall()
2033 logger.debug("rows[]='%s'", type(rows))
2034 blocks.translate_idnas(rows, "blocker")
2036 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2037 rows = database.cursor.fetchall()
2039 logger.debug("rows[]='%s'", type(rows))
2040 blocks.translate_idnas(rows, "blocked")
2042 logger.debug("Success! - EXIT!")
2045 def remove_invalid(args: argparse.Namespace) -> int:
2046 logger.debug("args[]='%s' - CALLED!", type(args))
2048 logger.debug("Invoking locking.acquire() ...")
2051 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2052 rows = database.cursor.fetchall()
2054 logger.info("Checking %d domains ...", len(rows))
2056 logger.debug("row[domain]='%s'", row["domain"])
2057 if not validators.domain(row["domain"].split("/")[0]):
2058 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2059 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2060 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2062 logger.debug("Invoking commit() ...")
2063 database.connection.commit()
2065 logger.info("Vaccum cleaning database ...")
2066 database.cursor.execute("VACUUM")
2068 logger.debug("Success! - EXIT!")