1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] in [None, ""]:
152 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s' - EXIT!", fetched["error_message"])
208 elif "json" not in fetched:
209 logger.warning("post_json_api() returned fetched[]='%s' with missing 'json' element - EXIT!", type(fetched))
211 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
212 logger.warning("post_json_api() returned error: '%s' - EXIT!", fetched["json"]["error"]["message"])
215 rows = fetched["json"]
217 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
219 raise Exception("WARNING: Returned no records")
220 elif "data" not in rows:
221 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
222 elif "nodeinfo" not in rows["data"]:
223 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
225 for entry in rows["data"]["nodeinfo"]:
226 logger.debug("entry[%s]='%s'", type(entry), entry)
227 if "domain" not in entry:
228 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
230 elif entry["domain"] in [None, ""]:
231 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
233 elif not domain_helper.is_wanted(entry["domain"]):
234 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
236 elif instances.is_registered(entry["domain"]):
237 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
239 elif instances.is_recent(entry["domain"]):
240 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
243 logger.debug("Adding domain='%s' ...", entry["domain"])
244 domains.append(entry["domain"])
246 except network.exceptions as exception:
247 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
250 logger.debug("domains()=%d", len(domains))
252 logger.info("Adding %d new instances ...", len(domains))
253 for domain in domains:
254 logger.debug("domain='%s' - BEFORE!", domain)
255 domain = domain.encode("idna").decode("utf-8")
256 logger.debug("domain='%s' - AFTER!", domain)
259 logger.info("Fetching instances from domain='%s' ...", domain)
260 federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
261 except network.exceptions as exception:
262 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
263 instances.set_last_error(domain, exception)
266 logger.debug("Success - EXIT!")
269 def fetch_blocks(args: argparse.Namespace) -> int:
270 logger.debug("args[]='%s' - CALLED!", type(args))
271 if args.domain is not None and args.domain != "":
272 logger.debug("args.domain='%s' - checking ...", args.domain)
273 if not validators.domain(args.domain):
274 logger.warning("args.domain='%s' is not valid.", args.domain)
276 elif blacklist.is_blacklisted(args.domain):
277 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
279 elif not instances.is_registered(args.domain):
280 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
283 logger.debug("Invoking locking.acquire() ...")
286 if args.domain is not None and args.domain != "":
287 # Re-check single domain
288 logger.debug("Querying database for args.domain='%s' ...", args.domain)
289 database.cursor.execute(
290 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
292 elif args.software is not None and args.software != "":
293 # Re-check single software
294 logger.debug("Querying database for args.software='%s' ...", args.software)
295 database.cursor.execute(
296 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
299 # Check only entries with total_blocked=None
300 database.cursor.execute(
301 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
304 # Re-check after "timeout" (aka. minimum interval)
305 database.cursor.execute(
306 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
309 rows = database.cursor.fetchall()
310 logger.info("Checking %d entries ...", len(rows))
311 for blocker, software, origin, nodeinfo_url in rows:
312 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
314 if not domain_helper.is_wanted(blocker):
315 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
317 elif not args.force and instances.is_recent(blocker, "last_blocked"):
318 logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker)
321 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
322 instances.set_last_blocked(blocker)
323 instances.set_has_obfuscation(blocker, False)
325 # c.s isn't part of oliphant's "hidden" blocklists
326 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
327 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
330 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
331 blocking = federation.fetch_blocks(blocker)
333 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
334 if len(blocking) == 0:
335 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
336 if software == "pleroma":
337 blocking = pleroma.fetch_blocks(blocker)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "mastodon":
340 blocking = mastodon.fetch_blocks(blocker)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "lemmy":
343 blocking = lemmy.fetch_blocks(blocker)
344 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
345 elif software == "friendica":
346 blocking = friendica.fetch_blocks(blocker)
347 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
348 elif software == "misskey":
349 blocking = misskey.fetch_blocks(blocker)
350 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
352 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
354 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
355 instances.set_total_blocks(blocker, blocking)
358 deobfuscated = obfuscated = 0
360 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
361 for block in blocking:
362 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
364 if block["block_level"] == "":
365 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
368 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
369 block["blocked"] = tidyup.domain(block["blocked"])
370 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
371 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
373 if block["blocked"] in [None, ""]:
374 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
376 elif block["blocked"].endswith(".onion"):
377 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
379 elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"):
380 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
382 elif block["blocked"].endswith(".arpa"):
383 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
385 elif block["blocked"].endswith(".tld"):
386 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
388 elif block["blocked"].find("*") >= 0:
389 logger.debug("blocker='%s' uses obfuscated domains", blocker)
390 instances.set_has_obfuscation(blocker, True)
391 obfuscated = obfuscated + 1
393 # Some friendica servers also obscure domains without hash
394 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
396 logger.debug("row[]='%s'", type(row))
398 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
401 deobfuscated = deobfuscated + 1
402 block["blocked"] = row["domain"]
403 origin = row["origin"]
404 nodeinfo_url = row["nodeinfo_url"]
405 elif block["blocked"].find("?") >= 0:
406 logger.debug("blocker='%s' uses obfuscated domains", blocker)
407 instances.set_has_obfuscation(blocker, True)
408 obfuscated = obfuscated + 1
410 # Some obscure them with question marks, not sure if that's dependent on version or not
411 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
413 logger.debug("row[]='%s'", type(row))
415 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
418 deobfuscated = deobfuscated + 1
419 block["blocked"] = row["domain"]
420 origin = row["origin"]
421 nodeinfo_url = row["nodeinfo_url"]
423 logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
424 if block["blocked"] in [None, ""]:
425 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
428 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
429 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
430 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
432 if not domain_helper.is_wanted(block["blocked"]):
433 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
435 elif block["block_level"] in ["accept", "accepted"]:
436 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
438 elif not instances.is_registered(block["blocked"]):
439 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
440 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
442 block["block_level"] = blocks.alias_block_level(block["block_level"])
444 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
445 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
447 "blocked": block["blocked"],
448 "reason" : block["reason"],
451 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
452 cookies.clear(block["blocked"])
454 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
455 instances.set_obfuscated_blocks(blocker, obfuscated)
457 logger.debug("Flushing updates for blocker='%s' ...", blocker)
458 instances.update(blocker)
460 logger.debug("Invoking commit() ...")
461 database.connection.commit()
463 logger.debug("Invoking cookies.clear(%s) ...", blocker)
464 cookies.clear(blocker)
466 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
467 if config.get("bot_enabled") and len(blockdict) > 0:
468 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
469 network.send_bot_post(blocker, blockdict)
471 logger.debug("Success! - EXIT!")
474 def fetch_observer(args: argparse.Namespace) -> int:
475 logger.debug("args[]='%s' - CALLED!", type(args))
477 logger.debug("Invoking locking.acquire() ...")
480 source_domain = "fediverse.observer"
481 if sources.is_recent(source_domain):
482 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
485 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
486 sources.update(source_domain)
489 if args.software is None:
490 logger.info("Fetching software list ...")
491 raw = network.fetch_url(
492 f"https://{source_domain}",
494 (config.get("connection_timeout"), config.get("read_timeout"))
496 logger.debug("raw[%s]()=%d", type(raw), len(raw))
498 doc = bs4.BeautifulSoup(raw, features="html.parser")
499 logger.debug("doc[]='%s'", type(doc))
501 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
502 logger.debug("navbar[]='%s'", type(navbar))
504 logger.warning("Cannot find navigation bar, cannot continue!")
507 items = navbar.findAll("a", {"class": "dropdown-item"})
508 logger.debug("items[]='%s'", type(items))
510 logger.info("Checking %d menu items ...", len(items))
512 logger.debug("item[%s]='%s'", type(item), item)
513 if item.text.lower() == "all":
514 logger.debug("Skipping 'All' menu entry ...")
517 logger.debug("Appending item.text='%s' ...", item.text)
518 types.append(tidyup.domain(item.text))
520 logger.info("Adding args.software='%s' as type ...", args.software)
521 types.append(args.software)
523 logger.info("Fetching %d different table data ...", len(types))
524 for software in types:
525 logger.debug("software='%s'", software)
527 if args.software is not None and args.software != software:
528 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
533 logger.debug("Fetching table data for software='%s' ...", software)
534 raw = network.post_json_api(
535 f"api.{source_domain}",
538 "query": "{nodes(softwarename:\"" + software + "\"){domain}}"
542 logger.debug("raw[%s]()=%d", type(raw), len(raw))
543 if "exception" in raw:
544 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
545 raise raw["exception"]
546 elif "error_message" in raw:
547 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
549 elif not "data" in raw["json"]:
550 logger.warning("Cannot find key 'nodes' in raw[json]()=%d", len(raw["json"]))
552 elif not "nodes" in raw["json"]["data"]:
553 logger.warning("Cannot find key 'nodes' in raw[json][data]()=%d", len(raw["json"]["data"]))
556 items = raw["json"]["data"]["nodes"]
557 logger.debug("items()=%d", len(items))
559 except network.exceptions as exception:
560 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
563 logger.info("Checking %d items,software='%s' ...", len(items), software)
565 logger.debug("item[]='%s'", type(item))
566 if not "domain" in item:
567 logger.debug("item()=%d has not element 'domain'", len(item))
570 logger.debug("item[domain]='%s' - BEFORE!", item["domain"])
571 domain = tidyup.domain(item["domain"]) if item["domain"] not in [None, ""] else None
572 logger.debug("domain='%s' - AFTER!", domain)
574 if domain in [None, ""]:
575 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
578 logger.debug("domain='%s' - BEFORE!", domain)
579 domain = domain.encode("idna").decode("utf-8")
580 logger.debug("domain='%s' - AFTER!", domain)
582 if not domain_helper.is_wanted(domain):
583 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
585 elif instances.is_registered(domain):
586 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
589 logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software)
590 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
592 logger.debug("Success! - EXIT!")
595 def fetch_todon_wiki(args: argparse.Namespace) -> int:
596 logger.debug("args[]='%s' - CALLED!", type(args))
598 logger.debug("Invoking locking.acquire() ...")
601 source_domain = "wiki.todon.eu"
602 if sources.is_recent(source_domain):
603 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
606 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
607 sources.update(source_domain)
614 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
615 raw = network.fetch_url(
616 f"https://{source_domain}/todon/domainblocks",
618 (config.get("connection_timeout"), config.get("read_timeout"))
620 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
622 doc = bs4.BeautifulSoup(raw, "html.parser")
623 logger.debug("doc[]='%s'", type(doc))
625 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
626 logger.info("Checking %d silenced/limited entries ...", len(silenced))
627 blocklist["silenced"] = utils.find_domains(silenced, "div")
629 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
630 logger.info("Checking %d suspended entries ...", len(suspended))
631 blocklist["reject"] = utils.find_domains(suspended, "div")
633 blocking = blocklist["silenced"] + blocklist["reject"]
636 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
637 instances.set_last_blocked(blocker)
638 instances.set_total_blocks(blocker, blocking)
641 for block_level in blocklist:
642 blockers = blocklist[block_level]
644 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
645 for blocked in blockers:
646 logger.debug("blocked='%s'", blocked)
648 if not instances.is_registered(blocked):
650 logger.info("Fetching instances from domain='%s' ...", blocked)
651 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
652 except network.exceptions as exception:
653 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
654 instances.set_last_error(blocked, exception)
656 if not domain_helper.is_wanted(blocked):
657 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
659 elif not domain_helper.is_wanted(blocker):
660 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
662 elif blocks.is_instance_blocked(blocker, blocked, block_level):
663 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
666 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
667 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
668 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
674 logger.debug("Invoking commit() ...")
675 database.connection.commit()
677 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
678 if config.get("bot_enabled") and len(blockdict) > 0:
679 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
680 network.send_bot_post(blocker, blockdict)
682 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
683 if instances.has_pending(blocker):
684 logger.debug("Flushing updates for blocker='%s' ...", blocker)
685 instances.update(blocker)
687 logger.debug("Success! - EXIT!")
690 def fetch_cs(args: argparse.Namespace):
691 logger.debug("args[]='%s' - CALLED!", type(args))
693 logger.debug("Invoking locking.acquire() ...")
721 source_domain = "raw.githubusercontent.com"
722 if sources.is_recent(source_domain):
723 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
726 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
727 sources.update(source_domain)
729 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
730 raw = network.fetch_url(
731 f"https://{source_domain}/chaossocial/meta/master/federation.md",
733 (config.get("connection_timeout"), config.get("read_timeout"))
735 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
737 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
738 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
740 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
741 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
742 blocklist["silenced"] = federation.find_domains(silenced)
744 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
745 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
746 blocklist["reject"] = federation.find_domains(blocked)
748 blocking = blocklist["silenced"] + blocklist["reject"]
749 blocker = "chaos.social"
751 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
752 instances.set_last_blocked(blocker)
753 instances.set_total_blocks(blocker, blocking)
755 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
756 if len(blocking) > 0:
758 for block_level in blocklist:
759 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
761 for row in blocklist[block_level]:
762 logger.debug("row[%s]='%s'", type(row), row)
763 if not "domain" in row:
764 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
766 elif not instances.is_registered(row["domain"]):
768 logger.info("Fetching instances from domain='%s' ...", row["domain"])
769 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
770 except network.exceptions as exception:
771 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
772 instances.set_last_error(row["domain"], exception)
774 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
775 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
777 "blocked": row["domain"],
778 "reason" : row["reason"],
781 logger.debug("Invoking commit() ...")
782 database.connection.commit()
784 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
785 if config.get("bot_enabled") and len(blockdict) > 0:
786 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
787 network.send_bot_post(blocker, blockdict)
789 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
790 if instances.has_pending(blocker):
791 logger.debug("Flushing updates for blocker='%s' ...", blocker)
792 instances.update(blocker)
794 logger.debug("Success! - EXIT!")
797 def fetch_fba_rss(args: argparse.Namespace) -> int:
798 logger.debug("args[]='%s' - CALLED!", type(args))
802 logger.debug("Invoking locking.acquire() ...")
805 components = urlparse(args.feed)
806 domain = components.netloc.lower().split(":")[0]
808 logger.debug("domain='%s'", domain)
809 if sources.is_recent(domain):
810 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
813 logger.debug("domain='%s' has not been recently used, marking ...", domain)
814 sources.update(domain)
816 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
817 response = network.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
819 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
820 if response.ok and response.status_code == 200 and len(response.text) > 0:
821 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
822 rss = atoma.parse_rss_bytes(response.content)
824 logger.debug("rss[]='%s'", type(rss))
825 for item in rss.items:
826 logger.debug("item[%s]='%s'", type(item), item)
827 domain = item.link.split("=")[1]
828 domain = tidyup.domain(domain) if domain not in[None, ""] else None
830 logger.debug("domain='%s' - AFTER!", domain)
831 if domain in [None, ""]:
832 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
835 logger.debug("domain='%s' - BEFORE!", domain)
836 domain = domain.encode("idna").decode("utf-8")
837 logger.debug("domain='%s' - AFTER!", domain)
839 if not domain_helper.is_wanted(domain):
840 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
842 elif domain in domains:
843 logger.debug("domain='%s' is already added - SKIPPED!", domain)
845 elif instances.is_registered(domain):
846 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
848 elif instances.is_recent(domain):
849 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
852 logger.debug("Adding domain='%s'", domain)
853 domains.append(domain)
855 logger.debug("domains()=%d", len(domains))
857 logger.info("Adding %d new instances ...", len(domains))
858 for domain in domains:
859 logger.debug("domain='%s'", domain)
861 logger.info("Fetching instances from domain='%s' ...", domain)
862 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
863 except network.exceptions as exception:
864 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
865 instances.set_last_error(domain, exception)
868 logger.debug("Success! - EXIT!")
871 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
872 logger.debug("args[]='%s' - CALLED!", type(args))
874 logger.debug("Invoking locking.acquire() ...")
877 source_domain = "ryona.agency"
878 feed = f"https://{source_domain}/users/fba/feed.atom"
880 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
881 if args.feed is not None and validators.url(args.feed):
882 logger.debug("Setting feed='%s' ...", args.feed)
883 feed = str(args.feed)
884 source_domain = urlparse(args.feed).netloc
886 if sources.is_recent(source_domain):
887 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
890 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
891 sources.update(source_domain)
895 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
896 response = network.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
898 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
899 if response.ok and response.status_code == 200 and len(response.text) > 0:
900 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
901 atom = atoma.parse_atom_bytes(response.content)
903 logger.debug("atom[]='%s'", type(atom))
904 for entry in atom.entries:
905 logger.debug("entry[]='%s'", type(entry))
906 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
907 logger.debug("doc[]='%s'", type(doc))
908 elements = doc.findAll("a")
910 logger.debug("Checking %d element(s) ...", len(elements))
911 for element in elements:
912 logger.debug("element[%s]='%s'", type(element), element)
913 for href in element["href"].split(","):
914 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
915 domain = tidyup.domain(href) if href not in [None, ""] else None
917 logger.debug("domain='%s' - AFTER!", domain)
918 if domain in [None, ""]:
919 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
922 logger.debug("domain='%s' - BEFORE!", domain)
923 domain = domain.encode("idna").decode("utf-8")
924 logger.debug("domain='%s' - AFTER!", domain)
926 if not domain_helper.is_wanted(domain):
927 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
929 elif domain in domains:
930 logger.debug("domain='%s' is already added - SKIPPED!", domain)
932 elif instances.is_registered(domain):
933 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
935 elif instances.is_recent(domain):
936 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
939 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
940 domains.append(domain)
942 logger.debug("domains()=%d", len(domains))
944 logger.info("Adding %d new instances ...", len(domains))
945 for domain in domains:
946 logger.debug("domain='%s'", domain)
948 logger.info("Fetching instances from domain='%s' ...", domain)
949 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
950 except network.exceptions as exception:
951 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
952 instances.set_last_error(domain, exception)
955 logger.debug("Success! - EXIT!")
958 def fetch_instances(args: argparse.Namespace) -> int:
959 logger.debug("args[]='%s' - CALLED!", type(args))
961 logger.debug("Invoking locking.acquire() ...")
967 # Is domain or software set?
968 if args.domain not in [None, ""]:
969 logger.debug("args.domain='%s' - checking ...", args.domain)
970 if not validators.domain(args.domain):
971 logger.warning("args.domain='%s' is not valid.", args.domain)
973 elif blacklist.is_blacklisted(args.domain):
974 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
977 logger.debug("args.domain='%s' - BEFORE!", args.domain)
978 domain = tidyup.domain(args.domain)
979 logger.debug("domain='%s' - AFTER!", domain)
982 database.cursor.execute("SELECT domain, origin, software FROM instances WHERE domain = ? LIMIT 1", [domain])
983 rows = database.cursor.fetchall()
984 elif args.software not in [None, ""]:
985 logger.debug("args.software='%s' - BEFORE!", args.software)
986 software = software_helper.alias(args.software)
987 logger.debug("software='%s' - AFTER!", software)
990 database.cursor.execute("SELECT domain, origin, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [software])
991 rows = database.cursor.fetchall()
993 logger.info("Checking %d entries ...", len(rows))
995 logger.debug("row[domain]='%s',row[origin]='%s',row[software]='%s'", row["domain"], row["origin"], row["software"])
996 if row["software"] is None and instances.is_registered(row["domain"]) :
997 logger.warning("row[domain]='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated - SKIPPED!", row["domain"], row["domain"])
999 elif software_helper.is_relay(row["software"]) and instances.is_registered(row["domain"]):
1000 logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"])
1002 elif not args.force and not args.software in [None, ""]and instances.is_recent(row["domain"]):
1003 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
1008 logger.info("Fetching instances from row[domain]='%s',row[origin]='%s',row[software]='%s' ...", row["domain"], row["origin"], row["software"])
1009 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1010 except network.exceptions as exception:
1011 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
1012 instances.set_last_error(row["domain"], exception)
1013 instances.update(row["domain"])
1017 logger.debug("Not fetching more instances - BREAK!")
1020 # Loop through some instances
1021 database.cursor.execute(
1022 "SELECT domain, origin, software \
1024 WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb', 'smithereen', 'vebinet') \
1025 AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) \
1026 ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
1029 rows = database.cursor.fetchall()
1030 logger.info("Checking %d entries ...", len(rows))
1032 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
1033 domain = row["domain"].encode("idna").decode("utf-8")
1034 logger.debug("domain='%s' - AFTER!", domain)
1036 if not domain_helper.is_wanted(domain):
1037 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
1041 logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
1042 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1043 except network.exceptions as exception:
1044 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1045 instances.set_last_error(domain, exception)
1047 logger.debug("Success - EXIT!")
1050 def fetch_csv(args: argparse.Namespace) -> int:
1051 logger.debug("args[]='%s' - CALLED!", type(args))
1053 logger.debug("Invoking locking.acquire() ...")
1056 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1057 for block in blocklists.csv_files:
1058 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1060 # Is domain given and not equal blocker?
1061 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1062 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1065 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1066 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1068 logger.debug("Success - EXIT!")
1071 def fetch_oliphant(args: argparse.Namespace) -> int:
1072 logger.debug("args[]='%s' - CALLED!", type(args))
1074 logger.debug("Invoking locking.acquire() ...")
1077 source_domain = "codeberg.org"
1078 if sources.is_recent(source_domain):
1079 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1082 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1083 sources.update(source_domain)
1086 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1088 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1089 for block in blocklists.oliphant_blocklists:
1090 # Is domain given and not equal blocker?
1091 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1092 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1093 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1096 url = f"{base_url}/{block['csv_url']}"
1098 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1099 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1101 logger.debug("Success! - EXIT!")
1104 def fetch_txt(args: argparse.Namespace) -> int:
1105 logger.debug("args[]='%s' - CALLED!", type(args))
1107 logger.debug("Invoking locking.acquire() ...")
1110 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1111 for row in blocklists.txt_files:
1112 logger.debug("Fetching row[url]='%s' ...", row["url"])
1113 response = network.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1115 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1116 if response.ok and response.status_code == 200 and response.text != "":
1117 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1118 domains = response.text.strip().split("\n")
1120 logger.info("Processing %d domains ...", len(domains))
1121 for domain in domains:
1122 logger.debug("domain='%s' - BEFORE!", domain)
1123 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1124 logger.debug("domain='%s' - AFTER!", domain)
1126 if domain in [None, ""]:
1127 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1129 elif not domain_helper.is_wanted(domain):
1130 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1132 elif not args.force and instances.is_registered(domain):
1133 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1136 logger.debug("Processing domain='%s',row[blocker]='%s' ...", domain, row["blocker"])
1137 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name, force=args.force)
1138 logger.debug("processed='%s'", processed)
1140 logger.debug("Success! - EXIT!")
1143 def fetch_fedipact(args: argparse.Namespace) -> int:
1144 logger.debug("args[]='%s' - CALLED!", type(args))
1146 logger.debug("Invoking locking.acquire() ...")
1149 source_domain = "fedipact.online"
1150 if sources.is_recent(source_domain):
1151 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1154 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1155 sources.update(source_domain)
1157 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1158 response = network.fetch_url(
1159 f"https://{source_domain}",
1160 network.web_headers,
1161 (config.get("connection_timeout"), config.get("read_timeout"))
1164 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1165 if response.ok and response.status_code == 200 and response.text != "":
1166 logger.debug("Parsing %d Bytes ...", len(response.text))
1168 doc = bs4.BeautifulSoup(response.text, "html.parser")
1169 logger.debug("doc[]='%s'", type(doc))
1171 rows = doc.findAll("li")
1172 logger.info("Checking %d row(s) ...", len(rows))
1174 logger.debug("row[]='%s'", type(row))
1175 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1177 logger.debug("domain='%s' - AFTER!", domain)
1178 if domain in [None, ""]:
1179 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1182 logger.debug("domain='%s' - BEFORE!", domain)
1183 domain = domain.encode("idna").decode("utf-8")
1184 logger.debug("domain='%s' - AFTER!", domain)
1186 if not domain_helper.is_wanted(domain):
1187 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1189 elif instances.is_registered(domain):
1190 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1192 elif instances.is_recent(domain):
1193 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1196 logger.info("Fetching domain='%s' ...", domain)
1197 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1199 logger.debug("Success! - EXIT!")
1202 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1203 logger.debug("args[]='%s' - CALLED!", type(args))
1205 logger.debug("Invoking locking.acquire() ...")
1208 source_domain = "instances.joinmobilizon.org"
1209 if sources.is_recent(source_domain):
1210 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1213 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1214 sources.update(source_domain)
1216 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1217 raw = network.fetch_url(
1218 f"https://{source_domain}/api/v1/instances",
1219 network.web_headers,
1220 (config.get("connection_timeout"), config.get("read_timeout"))
1222 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1224 parsed = json.loads(raw)
1225 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1227 if "data" not in parsed:
1228 logger.warning("parsed()=%d does not contain key 'data'")
1231 logger.info("Checking %d instances ...", len(parsed["data"]))
1232 for row in parsed["data"]:
1233 logger.debug("row[]='%s'", type(row))
1234 if "host" not in row:
1235 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1237 elif not domain_helper.is_wanted(row["host"]):
1238 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1240 elif instances.is_registered(row["host"]):
1241 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1244 logger.info("Fetching row[host]='%s' ...", row["host"])
1245 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1247 logger.debug("Success! - EXIT!")
1250 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1251 logger.debug("args[]='%s' - CALLED!", type(args))
1253 logger.debug("Invoking locking.acquire() ...")
1256 source_domain = "instanceapp.misskey.page"
1257 if sources.is_recent(source_domain):
1258 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1261 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1262 sources.update(source_domain)
1264 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1265 raw = network.fetch_url(
1266 f"https://{source_domain}/instances.json",
1267 network.web_headers,
1268 (config.get("connection_timeout"), config.get("read_timeout"))
1270 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1272 parsed = json.loads(raw)
1273 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1275 if "instancesInfos" not in parsed:
1276 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1279 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1280 for row in parsed["instancesInfos"]:
1281 logger.debug("row[%s]='%s'", type(row), row)
1282 if "url" not in row:
1283 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1285 elif not domain_helper.is_wanted(row["url"]):
1286 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1288 elif instances.is_registered(row["url"]):
1289 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1292 logger.info("Fetching row[url]='%s' ...", row["url"])
1293 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1295 logger.debug("Success! - EXIT!")
1298 def recheck_obfuscation(args: argparse.Namespace) -> int:
1299 logger.debug("args[]='%s' - CALLED!", type(args))
1301 logger.debug("Invoking locking.acquire() ...")
1304 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1305 logger.debug("Fetching record for args.domain='%s' ...", args.domain)
1306 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1307 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1308 logger.debug("Fetching records for args.software='%s' ...", args.software)
1309 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1311 logger.debug("Fetching records where domains have obfuscated block entries ...")
1312 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1314 rows = database.cursor.fetchall()
1315 logger.info("Checking %d domains ...", len(rows))
1317 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1318 if blacklist.is_blacklisted(row["domain"]):
1319 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1321 elif (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1322 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1325 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1326 blocking = federation.fetch_blocks(row["domain"])
1328 logger.debug("blocking()=%d", len(blocking))
1329 if len(blocking) == 0:
1330 logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
1331 if row["software"] == "pleroma":
1332 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1333 blocking = pleroma.fetch_blocks(row["domain"])
1334 elif row["software"] == "mastodon":
1335 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1336 blocking = mastodon.fetch_blocks(row["domain"])
1337 elif row["software"] == "lemmy":
1338 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1339 blocking = lemmy.fetch_blocks(row["domain"])
1340 elif row["software"] == "friendica":
1341 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1342 blocking = friendica.fetch_blocks(row["domain"])
1343 elif row["software"] == "misskey":
1344 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1345 blocking = misskey.fetch_blocks(row["domain"])
1347 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1349 # c.s isn't part of oliphant's "hidden" blocklists
1350 logger.debug("row[domain]='%s'", row["domain"])
1351 if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1352 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1353 instances.set_last_blocked(row["domain"])
1354 instances.set_total_blocks(row["domain"], blocking)
1359 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1360 for block in blocking:
1361 logger.debug("block[blocked]='%s'", block["blocked"])
1364 if block["blocked"] == "":
1365 logger.debug("block[blocked] is empty - SKIPPED!")
1367 elif block["blocked"].endswith(".onion"):
1368 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1370 elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"):
1371 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
1373 elif block["blocked"].endswith(".arpa"):
1374 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1376 elif block["blocked"].endswith(".tld"):
1377 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1379 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1380 logger.debug("block='%s' is obfuscated.", block["blocked"])
1381 obfuscated = obfuscated + 1
1382 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1383 elif not domain_helper.is_wanted(block["blocked"]):
1384 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1386 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1387 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1390 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1391 if blocked is not None and blocked != block["blocked"]:
1392 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1393 obfuscated = obfuscated - 1
1395 if blacklist.is_blacklisted(blocked):
1396 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1398 elif blacklist.is_blacklisted(row["domain"]):
1399 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1401 elif blocks.is_instance_blocked(row["domain"], blocked):
1402 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1405 block["block_level"] = blocks.alias_block_level(block["block_level"])
1407 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1408 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1409 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1412 "reason" : block["reason"],
1415 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1416 instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1417 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1419 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1420 if instances.has_pending(row["domain"]):
1421 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1422 instances.update(row["domain"])
1424 logger.debug("Invoking commit() ...")
1425 database.connection.commit()
1427 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1428 if config.get("bot_enabled") and len(blockdict) > 0:
1429 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1430 network.send_bot_post(row["domain"], blockdict)
1432 logger.debug("Success! - EXIT!")
1435 def fetch_fedilist(args: argparse.Namespace) -> int:
1436 logger.debug("args[]='%s' - CALLED!", type(args))
1438 logger.debug("Invoking locking.acquire() ...")
1441 source_domain = "demo.fedilist.com"
1442 if sources.is_recent(source_domain):
1443 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1446 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1447 sources.update(source_domain)
1449 url = f"http://{source_domain}/instance/csv?onion=not"
1450 if args.software is not None and args.software != "":
1451 logger.debug("args.software='%s'", args.software)
1452 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1454 logger.info("Fetching url='%s' ...", url)
1455 response = reqto.get(
1457 headers=network.web_headers,
1458 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1459 allow_redirects=False
1462 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1463 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1464 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1467 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1469 logger.debug("reader[]='%s'", type(reader))
1471 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1476 logger.info("Checking %d rows ...", len(rows))
1478 logger.debug("row[]='%s'", type(row))
1479 if "hostname" not in row:
1480 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1483 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1484 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1485 logger.debug("domain='%s' - AFTER!", domain)
1487 if domain in [None, ""]:
1488 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1491 logger.debug("domain='%s' - BEFORE!", domain)
1492 domain = domain.encode("idna").decode("utf-8")
1493 logger.debug("domain='%s' - AFTER!", domain)
1495 if not domain_helper.is_wanted(domain):
1496 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1498 elif (args.force is None or not args.force) and instances.is_registered(domain):
1499 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1501 elif instances.is_recent(domain):
1502 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1505 logger.info("Fetching instances from domain='%s' ...", domain)
1506 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1508 logger.debug("Success! - EXIT!")
1511 def update_nodeinfo(args: argparse.Namespace) -> int:
1512 logger.debug("args[]='%s' - CALLED!", type(args))
1514 logger.debug("Invoking locking.acquire() ...")
1517 if args.domain is not None and args.domain != "":
1518 logger.debug("Fetching args.domain='%s'", args.domain)
1519 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1520 elif args.software is not None and args.software != "":
1521 logger.info("Fetching domains for args.software='%s'", args.software)
1522 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1523 elif args.mode is not None and args.mode != "":
1524 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1525 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1526 elif args.no_software:
1527 logger.info("Fetching domains with no software type detected ...")
1528 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1529 elif args.with_software:
1530 logger.info("Fetching domains with any software type detected ...")
1531 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC")
1533 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1534 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1535 elif args.no_detection:
1536 logger.info("Fetching domains with no detection mode being set ...")
1537 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1539 logger.info("Fetching domains with domain name and software being the same ...")
1540 database.cursor.execute("SELECT domain, software FROM instances WHERE domain=software ORDER BY last_updated ASC")
1542 logger.info("Fetching domains for recently updated ...")
1543 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1545 domains = database.cursor.fetchall()
1547 logger.info("Checking %d domain(s) ...", len(domains))
1550 logger.debug("row[]='%s'", type(row))
1551 if row["domain"].endswith(".i2p") and not config.get("allow_i2p_domain"):
1552 logger.debug("row[domain]='%s' is an I2P address - SKIPPED", row["domain"])
1554 elif row["domain"].endswith(".onion"):
1555 logger.debug("row[domain]='%s' is a TOR .onion domain - SKIPPED", row["domain"])
1557 elif row["domain"].endswith(".arpa"):
1558 logger.debug("row[domain]='%s' is a reverse IP address - SKIPPED", row["domain"])
1560 elif row["domain"].endswith(".tld"):
1561 logger.debug("row[domain]='%s' is a fake domain - SKIPPED", row["domain"])
1563 elif blacklist.is_blacklisted(row["domain"]):
1564 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1566 elif not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1567 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1571 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1572 software = federation.determine_software(row["domain"])
1574 logger.debug("Determined software='%s'", software)
1575 if (software != row["software"] and software is not None) or args.force is True:
1576 logger.debug("software='%s'", software)
1577 if software is None:
1578 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1579 instances.set_nodeinfo_url(row["domain"], None)
1581 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1582 instances.set_software(row["domain"], software)
1584 if software is not None:
1585 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1586 instances.set_success(row["domain"])
1587 except network.exceptions as exception:
1588 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1589 instances.set_last_error(row["domain"], exception)
1591 instances.set_last_nodeinfo(row["domain"])
1592 instances.update(row["domain"])
1595 logger.debug("Success! - EXIT!")
1598 def fetch_instances_social(args: argparse.Namespace) -> int:
1599 logger.debug("args[]='%s' - CALLED!", type(args))
1601 logger.debug("Invoking locking.acquire() ...")
1604 source_domain = "instances.social"
1606 if config.get("instances_social_api_key") == "":
1607 logger.error("API key not set. Please set in your config.json file.")
1609 elif sources.is_recent(source_domain):
1610 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1613 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1614 sources.update(source_domain)
1617 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1620 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1621 fetched = network.get_json_api(
1623 "/api/1.0/instances/list?count=0&sort_by=name",
1625 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1627 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1629 if "error_message" in fetched:
1630 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1632 elif "exception" in fetched:
1633 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1635 elif "json" not in fetched:
1636 logger.warning("fetched has no element 'json' - EXIT!")
1638 elif "instances" not in fetched["json"]:
1639 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1643 rows = fetched["json"]["instances"]
1645 logger.info("Checking %d row(s) ...", len(rows))
1647 logger.debug("row[]='%s'", type(row))
1648 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1649 logger.debug("domain='%s' - AFTER!", domain)
1651 if domain is None and domain == "":
1652 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1655 logger.debug("domain='%s' - BEFORE!", domain)
1656 domain = domain.encode("idna").decode("utf-8")
1657 logger.debug("domain='%s' - AFTER!", domain)
1659 if not domain_helper.is_wanted(domain):
1660 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1662 elif domain in domains:
1663 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1665 elif instances.is_registered(domain):
1666 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1668 elif instances.is_recent(domain):
1669 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1672 logger.info("Fetching instances from domain='%s' ...", domain)
1673 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1675 logger.debug("Success! - EXIT!")
1678 def fetch_relaylist(args: argparse.Namespace) -> int:
1679 logger.debug("args[]='%s' - CALLED!", type(args))
1681 logger.debug("Invoking locking.acquire() ...")
1684 source_domain = "api.relaylist.com"
1686 if sources.is_recent(source_domain):
1687 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1690 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1691 sources.update(source_domain)
1693 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1694 fetched = network.get_json_api(
1698 (config.get("connection_timeout"), config.get("read_timeout"))
1700 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1702 if "error_message" in fetched:
1703 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1705 elif "exception" in fetched:
1706 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1708 elif "json" not in fetched:
1709 logger.warning("fetched has no element 'json' - EXIT!")
1714 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1715 for row in fetched["json"]:
1716 logger.debug("row[]='%s'", type(row))
1717 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1718 logger.debug("domain='%s' - AFTER!", domain)
1720 if domain is None and domain == "":
1721 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1724 logger.debug("domain='%s' - BEFORE!", domain)
1725 domain = domain.encode("idna").decode("utf-8")
1726 logger.debug("domain='%s' - AFTER!", domain)
1728 if not domain_helper.is_wanted(domain):
1729 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1731 elif domain in domains:
1732 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1734 elif instances.is_registered(domain):
1735 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1737 elif instances.is_recent(domain):
1738 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1741 logger.info("Fetching instances from domain='%s'", domain)
1742 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1744 logger.debug("Success! - EXIT!")
1747 def fetch_relays(args: argparse.Namespace) -> int:
1748 logger.debug("args[]='%s' - CALLED!", type(args))
1750 logger.debug("Invoking locking.acquire() ...")
1753 if args.domain is not None and args.domain != "":
1754 logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
1755 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1756 elif args.software is not None and args.software != "":
1757 logger.debug("Fetching instances records for args.software='%s' ...", args.software)
1758 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL AND software = ? ORDER BY last_updated DESC", [args.software])
1760 logger.debug("Fetch all relay instances ...")
1761 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL ORDER BY last_updated DESC")
1764 rows = database.cursor.fetchall()
1766 logger.info("Checking %d relays ...", len(rows))
1768 logger.debug("row[domain]='%s',row[software]='%s'", row["domain"], row["software"])
1769 if not args.force and instances.is_recent(row["domain"]):
1770 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1772 elif row["nodeinfo_url"] is None:
1773 logger.warning("row[domain]='%s' has empty nodeinfo_url but this is required - SKIPPED!", row["domain"])
1778 logger.debug("row[domain]='%s',row[software]='%s' - checking ....", row["domain"], row["software"])
1779 if row["software"] == "pub-relay":
1780 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1781 raw = network.fetch_api_url(
1782 row["nodeinfo_url"],
1783 (config.get("connection_timeout"), config.get("read_timeout"))
1786 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1787 if "exception" in raw:
1788 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1789 raise raw["exception"]
1790 elif "error_message" in raw:
1791 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1792 instances.set_last_error(row["domain"], raw)
1793 instances.set_last_instance_fetch(row["domain"])
1794 instances.update(row["domain"])
1796 elif "json" not in raw:
1797 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1799 elif not "metadata" in raw["json"]:
1800 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1802 elif not "peers" in raw["json"]["metadata"]:
1803 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1806 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1807 raw = network.fetch_url(
1808 f"https://{row['domain']}",
1809 network.web_headers,
1810 (config.get("connection_timeout"), config.get("read_timeout"))
1812 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1814 doc = bs4.BeautifulSoup(raw, features="html.parser")
1815 logger.debug("doc[]='%s'", type(doc))
1817 except network.exceptions as exception:
1818 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1819 instances.set_last_error(row["domain"], exception)
1820 instances.set_last_instance_fetch(row["domain"])
1821 instances.update(row["domain"])
1824 logger.debug("row[software]='%s'", row["software"])
1825 if row["software"] == "activityrelay":
1826 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1827 tags = doc.findAll("p")
1829 logger.debug("Checking %d paragraphs ...", len(tags))
1831 logger.debug("tag[]='%s'", type(tag))
1832 if len(tag.contents) == 0:
1833 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1835 elif "registered instances" not in tag.contents[0]:
1836 logger.debug("Skipping paragraph, text not found.")
1839 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1840 for domain in tag.contents:
1841 logger.debug("domain[%s]='%s'", type(domain), domain)
1842 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1845 domain = str(domain)
1846 logger.debug("domain='%s'", domain)
1847 if not domain_helper.is_wanted(domain):
1848 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1851 logger.debug("domain='%s' - BEFORE!", domain)
1852 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1853 logger.debug("domain='%s' - AFTER!", domain)
1855 if domain in [None, ""]:
1856 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1858 elif domain not in peers:
1859 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1860 peers.append(domain)
1862 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1863 if dict_helper.has_key(domains, "domain", domain):
1864 logger.debug("domain='%s' already added", domain)
1867 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1870 "origin": row["domain"],
1872 elif row["software"] in ["aoderelay", "selective-relay"]:
1873 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1874 if row["software"] == "aoderelay":
1875 tags = doc.findAll("section", {"class": "instance"})
1877 tags = doc.find("div", {"id": "instances"}).findAll("li")
1879 logger.debug("Checking %d tags ...", len(tags))
1881 logger.debug("tag[]='%s'", type(tag))
1883 link = tag.find("a")
1884 logger.debug("link[%s]='%s'", type(link), link)
1885 if not isinstance(link, bs4.element.Tag):
1886 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1889 components = urlparse(link.get("href"))
1890 logger.debug("components(%d)='%s'", len(components), components)
1891 domain = components.netloc.lower().split(":")[0]
1893 logger.debug("domain='%s' - BEFORE!", domain)
1894 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1895 logger.debug("domain='%s' - AFTER!", domain)
1897 if domain in [None, ""]:
1898 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1900 elif domain not in peers:
1901 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1902 peers.append(domain)
1904 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1905 if dict_helper.has_key(domains, "domain", domain):
1906 logger.debug("domain='%s' already added", domain)
1909 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1912 "origin": row["domain"],
1914 elif row["software"] == "pub-relay":
1915 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1916 for domain in raw["json"]["metadata"]["peers"]:
1917 logger.debug("domain='%s' - BEFORE!", domain)
1918 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1919 logger.debug("domain='%s' - AFTER!", domain)
1921 if domain in [None, ""]:
1922 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1924 elif domain not in peers:
1925 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1926 peers.append(domain)
1928 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1929 if dict_helper.has_key(domains, "domain", domain):
1930 logger.debug("domain='%s' already added", domain)
1933 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1936 "origin": row["domain"],
1939 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1942 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1943 instances.set_last_instance_fetch(row["domain"])
1945 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1946 instances.set_total_peers(row["domain"], peers)
1948 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1949 instances.update(row["domain"])
1951 logger.info("Checking %d domains ...", len(domains))
1953 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1954 if not domain_helper.is_wanted(row["domain"]):
1955 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1957 elif instances.is_registered(row["domain"]):
1958 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1961 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1962 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1964 logger.debug("Success! - EXIT!")
1967 def convert_idna(args: argparse.Namespace) -> int:
1968 logger.debug("args[]='%s' - CALLED!", type(args))
1970 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1971 rows = database.cursor.fetchall()
1973 logger.debug("rows[]='%s'", type(rows))
1974 instances.translate_idnas(rows, "domain")
1976 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1977 rows = database.cursor.fetchall()
1979 logger.debug("rows[]='%s'", type(rows))
1980 instances.translate_idnas(rows, "origin")
1982 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1983 rows = database.cursor.fetchall()
1985 logger.debug("rows[]='%s'", type(rows))
1986 blocks.translate_idnas(rows, "blocker")
1988 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1989 rows = database.cursor.fetchall()
1991 logger.debug("rows[]='%s'", type(rows))
1992 blocks.translate_idnas(rows, "blocked")
1994 logger.debug("Success! - EXIT!")
1997 def remove_invalid(args: argparse.Namespace) -> int:
1998 logger.debug("args[]='%s' - CALLED!", type(args))
2000 logger.debug("Invoking locking.acquire() ...")
2003 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2004 rows = database.cursor.fetchall()
2006 logger.info("Checking %d domains ...", len(rows))
2008 logger.debug("row[domain]='%s'", row["domain"])
2009 if not validators.domain(row["domain"].split("/")[0]):
2010 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2011 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2012 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2014 logger.debug("Invoking commit() ...")
2015 database.connection.commit()
2017 logger.info("Vaccum cleaning database ...")
2018 database.cursor.execute("VACUUM")
2020 logger.debug("Success! - EXIT!")