1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] in [None, ""]:
152 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] in [None, ""]:
228 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
296 # Check only entries with total_blocked=None
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if not domain_helper.is_wanted(blocker):
312 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314 elif not args.force and instances.is_recent(blocker, "last_blocked"):
315 logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker)
318 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
319 instances.set_last_blocked(blocker)
320 instances.set_has_obfuscation(blocker, False)
322 # c.s isn't part of oliphant's "hidden" blocklists
323 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
324 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
327 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
328 blocking = federation.fetch_blocks(blocker)
330 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
331 if len(blocking) == 0:
332 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
333 if software == "pleroma":
334 blocking = pleroma.fetch_blocks(blocker)
335 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
336 elif software == "mastodon":
337 blocking = mastodon.fetch_blocks(blocker)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "lemmy":
340 blocking = lemmy.fetch_blocks(blocker)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "friendica":
343 blocking = friendica.fetch_blocks(blocker)
344 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
345 elif software == "misskey":
346 blocking = misskey.fetch_blocks(blocker)
347 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
349 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
351 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
352 instances.set_total_blocks(blocker, blocking)
355 deobfuscated = obfuscated = 0
357 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
358 for block in blocking:
359 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
361 if block["block_level"] == "":
362 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
365 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
366 block["blocked"] = tidyup.domain(block["blocked"])
367 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
368 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
370 if block["blocked"] in [None, ""]:
371 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
373 elif block["blocked"].endswith(".onion"):
374 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"):
377 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
379 elif block["blocked"].endswith(".arpa"):
380 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
382 elif block["blocked"].endswith(".tld"):
383 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
385 elif block["blocked"].find("*") >= 0:
386 logger.debug("blocker='%s' uses obfuscated domains", blocker)
387 instances.set_has_obfuscation(blocker, True)
388 obfuscated = obfuscated + 1
390 # Some friendica servers also obscure domains without hash
391 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
393 logger.debug("row[]='%s'", type(row))
395 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
398 deobfuscated = deobfuscated + 1
399 block["blocked"] = row["domain"]
400 origin = row["origin"]
401 nodeinfo_url = row["nodeinfo_url"]
402 elif block["blocked"].find("?") >= 0:
403 logger.debug("blocker='%s' uses obfuscated domains", blocker)
404 instances.set_has_obfuscation(blocker, True)
405 obfuscated = obfuscated + 1
407 # Some obscure them with question marks, not sure if that's dependent on version or not
408 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
410 logger.debug("row[]='%s'", type(row))
412 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
415 deobfuscated = deobfuscated + 1
416 block["blocked"] = row["domain"]
417 origin = row["origin"]
418 nodeinfo_url = row["nodeinfo_url"]
420 logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
421 if block["blocked"] in [None, ""]:
422 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
425 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
426 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
427 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
429 if not domain_helper.is_wanted(block["blocked"]):
430 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
432 elif block["block_level"] in ["accept", "accepted"]:
433 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
435 elif not instances.is_registered(block["blocked"]):
436 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
437 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
439 block["block_level"] = blocks.alias_block_level(block["block_level"])
441 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
442 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
444 "blocked": block["blocked"],
445 "reason" : block["reason"],
448 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
449 cookies.clear(block["blocked"])
451 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
452 instances.set_obfuscated_blocks(blocker, obfuscated)
454 logger.debug("Flushing updates for blocker='%s' ...", blocker)
455 instances.update(blocker)
457 logger.debug("Invoking commit() ...")
458 database.connection.commit()
460 logger.debug("Invoking cookies.clear(%s) ...", blocker)
461 cookies.clear(blocker)
463 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
464 if config.get("bot_enabled") and len(blockdict) > 0:
465 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
466 network.send_bot_post(blocker, blockdict)
468 logger.debug("Success! - EXIT!")
471 def fetch_observer(args: argparse.Namespace) -> int:
472 logger.debug("args[]='%s' - CALLED!", type(args))
474 logger.debug("Invoking locking.acquire() ...")
477 source_domain = "fediverse.observer"
478 if sources.is_recent(source_domain):
479 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
482 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
483 sources.update(source_domain)
486 if args.software is None:
487 logger.info("Fetching software list ...")
488 raw = network.fetch_url(
489 f"https://{source_domain}",
491 (config.get("connection_timeout"), config.get("read_timeout"))
493 logger.debug("raw[%s]()=%d", type(raw), len(raw))
495 doc = bs4.BeautifulSoup(raw, features="html.parser")
496 logger.debug("doc[]='%s'", type(doc))
498 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
499 logger.debug("navbar[]='%s'", type(navbar))
501 logger.warning("Cannot find navigation bar, cannot continue!")
504 items = navbar.findAll("a", {"class": "dropdown-item"})
505 logger.debug("items[]='%s'", type(items))
507 logger.info("Checking %d menu items ...", len(items))
509 logger.debug("item[%s]='%s'", type(item), item)
510 if item.text.lower() == "all":
511 logger.debug("Skipping 'All' menu entry ...")
514 logger.debug("Appending item.text='%s' ...", item.text)
515 types.append(tidyup.domain(item.text))
517 logger.info("Adding args.software='%s' as type ...", args.software)
518 types.append(args.software)
520 logger.info("Fetching %d different table data ...", len(types))
521 for software in types:
522 logger.debug("software='%s'", software)
524 if args.software is not None and args.software != software:
525 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
530 logger.debug("Fetching table data for software='%s' ...", software)
531 raw = network.post_json_api(
532 f"api.{source_domain}",
535 "query": "{nodes(softwarename:\"" + software + "\"){domain}}"
539 logger.debug("raw[%s]()=%d", type(raw), len(raw))
540 if "exception" in raw:
541 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
542 raise raw["exception"]
543 elif "error_message" in raw:
544 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
546 elif not "data" in raw["json"]:
547 logger.warning("Cannot find key 'nodes' in raw[json]()=%d", len(raw["json"]))
549 elif not "nodes" in raw["json"]["data"]:
550 logger.warning("Cannot find key 'nodes' in raw[json][data]()=%d", len(raw["json"]["data"]))
553 items = raw["json"]["data"]["nodes"]
554 logger.debug("items()=%d", len(items))
556 except network.exceptions as exception:
557 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
560 logger.info("Checking %d items,software='%s' ...", len(items), software)
562 logger.debug("item[]='%s'", type(item))
563 if not "domain" in item:
564 logger.debug("item()=%d has not element 'domain'", len(item))
567 logger.debug("item[domain]='%s' - BEFORE!", item["domain"])
568 domain = tidyup.domain(item["domain"]) if item["domain"] not in [None, ""] else None
569 logger.debug("domain='%s' - AFTER!", domain)
571 if domain in [None, ""]:
572 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
575 logger.debug("domain='%s' - BEFORE!", domain)
576 domain = domain.encode("idna").decode("utf-8")
577 logger.debug("domain='%s' - AFTER!", domain)
579 if not domain_helper.is_wanted(domain):
580 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
582 elif instances.is_registered(domain):
583 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
586 logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software)
587 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
589 logger.debug("Success! - EXIT!")
592 def fetch_todon_wiki(args: argparse.Namespace) -> int:
593 logger.debug("args[]='%s' - CALLED!", type(args))
595 logger.debug("Invoking locking.acquire() ...")
598 source_domain = "wiki.todon.eu"
599 if sources.is_recent(source_domain):
600 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
603 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
604 sources.update(source_domain)
611 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
612 raw = network.fetch_url(
613 f"https://{source_domain}/todon/domainblocks",
615 (config.get("connection_timeout"), config.get("read_timeout"))
617 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
619 doc = bs4.BeautifulSoup(raw, "html.parser")
620 logger.debug("doc[]='%s'", type(doc))
622 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
623 logger.info("Checking %d silenced/limited entries ...", len(silenced))
624 blocklist["silenced"] = utils.find_domains(silenced, "div")
626 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
627 logger.info("Checking %d suspended entries ...", len(suspended))
628 blocklist["reject"] = utils.find_domains(suspended, "div")
630 blocking = blocklist["silenced"] + blocklist["reject"]
633 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
634 instances.set_last_blocked(blocker)
635 instances.set_total_blocks(blocker, blocking)
638 for block_level in blocklist:
639 blockers = blocklist[block_level]
641 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
642 for blocked in blockers:
643 logger.debug("blocked='%s'", blocked)
645 if not instances.is_registered(blocked):
647 logger.info("Fetching instances from domain='%s' ...", blocked)
648 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
649 except network.exceptions as exception:
650 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
651 instances.set_last_error(blocked, exception)
653 if not domain_helper.is_wanted(blocked):
654 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
656 elif not domain_helper.is_wanted(blocker):
657 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
659 elif blocks.is_instance_blocked(blocker, blocked, block_level):
660 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
663 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
664 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
665 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
671 logger.debug("Invoking commit() ...")
672 database.connection.commit()
674 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
675 if config.get("bot_enabled") and len(blockdict) > 0:
676 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
677 network.send_bot_post(blocker, blockdict)
679 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
680 if instances.has_pending(blocker):
681 logger.debug("Flushing updates for blocker='%s' ...", blocker)
682 instances.update(blocker)
684 logger.debug("Success! - EXIT!")
687 def fetch_cs(args: argparse.Namespace):
688 logger.debug("args[]='%s' - CALLED!", type(args))
690 logger.debug("Invoking locking.acquire() ...")
718 source_domain = "raw.githubusercontent.com"
719 if sources.is_recent(source_domain):
720 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
723 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
724 sources.update(source_domain)
726 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
727 raw = network.fetch_url(
728 f"https://{source_domain}/chaossocial/meta/master/federation.md",
730 (config.get("connection_timeout"), config.get("read_timeout"))
732 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
734 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
735 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
737 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
738 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
739 blocklist["silenced"] = federation.find_domains(silenced)
741 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
742 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
743 blocklist["reject"] = federation.find_domains(blocked)
745 blocking = blocklist["silenced"] + blocklist["reject"]
746 blocker = "chaos.social"
748 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
749 instances.set_last_blocked(blocker)
750 instances.set_total_blocks(blocker, blocking)
752 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
753 if len(blocking) > 0:
755 for block_level in blocklist:
756 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
758 for row in blocklist[block_level]:
759 logger.debug("row[%s]='%s'", type(row), row)
760 if not "domain" in row:
761 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
763 elif not instances.is_registered(row["domain"]):
765 logger.info("Fetching instances from domain='%s' ...", row["domain"])
766 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
767 except network.exceptions as exception:
768 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
769 instances.set_last_error(row["domain"], exception)
771 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
772 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
774 "blocked": row["domain"],
775 "reason" : row["reason"],
778 logger.debug("Invoking commit() ...")
779 database.connection.commit()
781 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
782 if config.get("bot_enabled") and len(blockdict) > 0:
783 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
784 network.send_bot_post(blocker, blockdict)
786 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
787 if instances.has_pending(blocker):
788 logger.debug("Flushing updates for blocker='%s' ...", blocker)
789 instances.update(blocker)
791 logger.debug("Success! - EXIT!")
794 def fetch_fba_rss(args: argparse.Namespace) -> int:
795 logger.debug("args[]='%s' - CALLED!", type(args))
799 logger.debug("Invoking locking.acquire() ...")
802 components = urlparse(args.feed)
803 domain = components.netloc.lower().split(":")[0]
805 logger.debug("domain='%s'", domain)
806 if sources.is_recent(domain):
807 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
810 logger.debug("domain='%s' has not been recently used, marking ...", domain)
811 sources.update(domain)
813 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
814 response = network.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
816 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
817 if response.ok and response.status_code == 200 and len(response.text) > 0:
818 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
819 rss = atoma.parse_rss_bytes(response.content)
821 logger.debug("rss[]='%s'", type(rss))
822 for item in rss.items:
823 logger.debug("item[%s]='%s'", type(item), item)
824 domain = item.link.split("=")[1]
825 domain = tidyup.domain(domain) if domain not in[None, ""] else None
827 logger.debug("domain='%s' - AFTER!", domain)
828 if domain in [None, ""]:
829 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
832 logger.debug("domain='%s' - BEFORE!", domain)
833 domain = domain.encode("idna").decode("utf-8")
834 logger.debug("domain='%s' - AFTER!", domain)
836 if not domain_helper.is_wanted(domain):
837 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
839 elif domain in domains:
840 logger.debug("domain='%s' is already added - SKIPPED!", domain)
842 elif instances.is_registered(domain):
843 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
845 elif instances.is_recent(domain):
846 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
849 logger.debug("Adding domain='%s'", domain)
850 domains.append(domain)
852 logger.debug("domains()=%d", len(domains))
854 logger.info("Adding %d new instances ...", len(domains))
855 for domain in domains:
856 logger.debug("domain='%s'", domain)
858 logger.info("Fetching instances from domain='%s' ...", domain)
859 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
860 except network.exceptions as exception:
861 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
862 instances.set_last_error(domain, exception)
865 logger.debug("Success! - EXIT!")
868 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
869 logger.debug("args[]='%s' - CALLED!", type(args))
871 logger.debug("Invoking locking.acquire() ...")
874 source_domain = "ryona.agency"
875 feed = f"https://{source_domain}/users/fba/feed.atom"
877 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
878 if args.feed is not None and validators.url(args.feed):
879 logger.debug("Setting feed='%s' ...", args.feed)
880 feed = str(args.feed)
881 source_domain = urlparse(args.feed).netloc
883 if sources.is_recent(source_domain):
884 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
887 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
888 sources.update(source_domain)
892 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
893 response = network.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
895 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
896 if response.ok and response.status_code == 200 and len(response.text) > 0:
897 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
898 atom = atoma.parse_atom_bytes(response.content)
900 logger.debug("atom[]='%s'", type(atom))
901 for entry in atom.entries:
902 logger.debug("entry[]='%s'", type(entry))
903 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
904 logger.debug("doc[]='%s'", type(doc))
905 elements = doc.findAll("a")
907 logger.debug("Checking %d element(s) ...", len(elements))
908 for element in elements:
909 logger.debug("element[%s]='%s'", type(element), element)
910 for href in element["href"].split(","):
911 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
912 domain = tidyup.domain(href) if href not in [None, ""] else None
914 logger.debug("domain='%s' - AFTER!", domain)
915 if domain in [None, ""]:
916 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
919 logger.debug("domain='%s' - BEFORE!", domain)
920 domain = domain.encode("idna").decode("utf-8")
921 logger.debug("domain='%s' - AFTER!", domain)
923 if not domain_helper.is_wanted(domain):
924 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
926 elif domain in domains:
927 logger.debug("domain='%s' is already added - SKIPPED!", domain)
929 elif instances.is_registered(domain):
930 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
932 elif instances.is_recent(domain):
933 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
936 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
937 domains.append(domain)
939 logger.debug("domains()=%d", len(domains))
941 logger.info("Adding %d new instances ...", len(domains))
942 for domain in domains:
943 logger.debug("domain='%s'", domain)
945 logger.info("Fetching instances from domain='%s' ...", domain)
946 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
947 except network.exceptions as exception:
948 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
949 instances.set_last_error(domain, exception)
952 logger.debug("Success! - EXIT!")
955 def fetch_instances(args: argparse.Namespace) -> int:
956 logger.debug("args[]='%s' - CALLED!", type(args))
958 logger.debug("Invoking locking.acquire() ...")
961 # Is domain or software set?
962 if args.domain != "":
963 logger.debug("args.domain='%s' - checking ...", args.domain)
964 if not validators.domain(args.domain):
965 logger.warning("args.domain='%s' is not valid.", args.domain)
967 elif blacklist.is_blacklisted(args.domain):
968 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
971 logger.debug("args.domain='%s' - BEFORE!", args.domain)
972 domain = tidyup.domain(args.domain)
973 logger.debug("domain='%s' - AFTER!", domain)
976 database.cursor.execute("SELECT domain, origin, software FROM instances WHERE domain = ? LIMIT 1", [domain])
978 rows = database.cursor.fetchall()
979 logger.info("Checking %d entries ...", len(rows))
981 logger.debug("row[domain]='%s',row[origin]='%s',row[software]='%s'", row["domain"], row["origin"], row["software"])
982 if row["software"] is None:
983 logger.warning("row[domain]='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated - SKIPPED!", row["domain"], row["domain"])
985 elif software_helper.is_relay(row["software"]):
986 logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"])
991 logger.info("Fetching instances from row[domain]='%s',row[origin]='%s',row[software]='%s' ...", row["domain"], row["origin"], row["software"])
992 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name)
993 except network.exceptions as exception:
994 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
995 instances.set_last_error(row["domain"], exception)
996 instances.update(row["domain"])
1000 logger.debug("Not fetching more instances - BREAK!")
1003 # Loop through some instances
1004 database.cursor.execute(
1005 "SELECT domain, origin, software \
1007 WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') \
1008 AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) \
1009 ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
1012 rows = database.cursor.fetchall()
1013 logger.info("Checking %d entries ...", len(rows))
1015 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
1016 domain = row["domain"].encode("idna").decode("utf-8")
1017 logger.debug("domain='%s' - AFTER!", domain)
1019 if not domain_helper.is_wanted(domain):
1020 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
1024 logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
1025 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1026 except network.exceptions as exception:
1027 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1028 instances.set_last_error(domain, exception)
1030 logger.debug("Success - EXIT!")
1033 def fetch_csv(args: argparse.Namespace) -> int:
1034 logger.debug("args[]='%s' - CALLED!", type(args))
1036 logger.debug("Invoking locking.acquire() ...")
1039 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1040 for block in blocklists.csv_files:
1041 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1043 # Is domain given and not equal blocker?
1044 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1045 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1048 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1049 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1051 logger.debug("Success - EXIT!")
1054 def fetch_oliphant(args: argparse.Namespace) -> int:
1055 logger.debug("args[]='%s' - CALLED!", type(args))
1057 logger.debug("Invoking locking.acquire() ...")
1060 source_domain = "codeberg.org"
1061 if sources.is_recent(source_domain):
1062 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1065 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1066 sources.update(source_domain)
1069 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1071 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1072 for block in blocklists.oliphant_blocklists:
1073 # Is domain given and not equal blocker?
1074 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1075 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1076 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1079 url = f"{base_url}/{block['csv_url']}"
1081 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1082 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1084 logger.debug("Success! - EXIT!")
1087 def fetch_txt(args: argparse.Namespace) -> int:
1088 logger.debug("args[]='%s' - CALLED!", type(args))
1090 logger.debug("Invoking locking.acquire() ...")
1093 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1094 for row in blocklists.txt_files:
1095 logger.debug("Fetching row[url]='%s' ...", row["url"])
1096 response = network.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1098 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1099 if response.ok and response.status_code == 200 and response.text != "":
1100 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1101 domains = response.text.strip().split("\n")
1103 logger.info("Processing %d domains ...", len(domains))
1104 for domain in domains:
1105 logger.debug("domain='%s' - BEFORE!", domain)
1106 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1107 logger.debug("domain='%s' - AFTER!", domain)
1109 if domain in [None, ""]:
1110 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1112 elif not domain_helper.is_wanted(domain):
1113 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1115 elif not args.force and instances.is_registered(domain):
1116 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1119 logger.debug("Processing domain='%s',row[blocker]='%s' ...", domain, row["blocker"])
1120 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name, force=args.force)
1121 logger.debug("processed='%s'", processed)
1123 logger.debug("Success! - EXIT!")
1126 def fetch_fedipact(args: argparse.Namespace) -> int:
1127 logger.debug("args[]='%s' - CALLED!", type(args))
1129 logger.debug("Invoking locking.acquire() ...")
1132 source_domain = "fedipact.online"
1133 if sources.is_recent(source_domain):
1134 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1137 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1138 sources.update(source_domain)
1140 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1141 response = network.fetch_url(
1142 f"https://{source_domain}",
1143 network.web_headers,
1144 (config.get("connection_timeout"), config.get("read_timeout"))
1147 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1148 if response.ok and response.status_code == 200 and response.text != "":
1149 logger.debug("Parsing %d Bytes ...", len(response.text))
1151 doc = bs4.BeautifulSoup(response.text, "html.parser")
1152 logger.debug("doc[]='%s'", type(doc))
1154 rows = doc.findAll("li")
1155 logger.info("Checking %d row(s) ...", len(rows))
1157 logger.debug("row[]='%s'", type(row))
1158 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1160 logger.debug("domain='%s' - AFTER!", domain)
1161 if domain in [None, ""]:
1162 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1165 logger.debug("domain='%s' - BEFORE!", domain)
1166 domain = domain.encode("idna").decode("utf-8")
1167 logger.debug("domain='%s' - AFTER!", domain)
1169 if not domain_helper.is_wanted(domain):
1170 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1172 elif instances.is_registered(domain):
1173 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1175 elif instances.is_recent(domain):
1176 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1179 logger.info("Fetching domain='%s' ...", domain)
1180 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1182 logger.debug("Success! - EXIT!")
1185 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1186 logger.debug("args[]='%s' - CALLED!", type(args))
1188 logger.debug("Invoking locking.acquire() ...")
1191 source_domain = "instances.joinmobilizon.org"
1192 if sources.is_recent(source_domain):
1193 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1196 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1197 sources.update(source_domain)
1199 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1200 raw = network.fetch_url(
1201 f"https://{source_domain}/api/v1/instances",
1202 network.web_headers,
1203 (config.get("connection_timeout"), config.get("read_timeout"))
1205 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1207 parsed = json.loads(raw)
1208 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1210 if "data" not in parsed:
1211 logger.warning("parsed()=%d does not contain key 'data'")
1214 logger.info("Checking %d instances ...", len(parsed["data"]))
1215 for row in parsed["data"]:
1216 logger.debug("row[]='%s'", type(row))
1217 if "host" not in row:
1218 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1220 elif not domain_helper.is_wanted(row["host"]):
1221 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1223 elif instances.is_registered(row["host"]):
1224 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1227 logger.info("Fetching row[host]='%s' ...", row["host"])
1228 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1230 logger.debug("Success! - EXIT!")
1233 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1234 logger.debug("args[]='%s' - CALLED!", type(args))
1236 logger.debug("Invoking locking.acquire() ...")
1239 source_domain = "instanceapp.misskey.page"
1240 if sources.is_recent(source_domain):
1241 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1244 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1245 sources.update(source_domain)
1247 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1248 raw = network.fetch_url(
1249 f"https://{source_domain}/instances.json",
1250 network.web_headers,
1251 (config.get("connection_timeout"), config.get("read_timeout"))
1253 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1255 parsed = json.loads(raw)
1256 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1258 if "instancesInfos" not in parsed:
1259 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1262 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1263 for row in parsed["instancesInfos"]:
1264 logger.debug("row[%s]='%s'", type(row), row)
1265 if "url" not in row:
1266 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1268 elif not domain_helper.is_wanted(row["url"]):
1269 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1271 elif instances.is_registered(row["url"]):
1272 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1275 logger.info("Fetching row[url]='%s' ...", row["url"])
1276 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1278 logger.debug("Success! - EXIT!")
1281 def recheck_obfuscation(args: argparse.Namespace) -> int:
1282 logger.debug("args[]='%s' - CALLED!", type(args))
1284 logger.debug("Invoking locking.acquire() ...")
1287 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1288 logger.debug("Fetching record for args.domain='%s' ...", args.domain)
1289 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1290 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1291 logger.debug("Fetching records for args.software='%s' ...", args.software)
1292 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1294 logger.debug("Fetching records where domains have obfuscated block entries ...")
1295 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1297 rows = database.cursor.fetchall()
1298 logger.info("Checking %d domains ...", len(rows))
1300 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1301 if blacklist.is_blacklisted(row["domain"]):
1302 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1304 elif (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1305 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1308 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1309 blocking = federation.fetch_blocks(row["domain"])
1311 logger.debug("blocking()=%d", len(blocking))
1312 if len(blocking) == 0:
1313 logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
1314 if row["software"] == "pleroma":
1315 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1316 blocking = pleroma.fetch_blocks(row["domain"])
1317 elif row["software"] == "mastodon":
1318 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1319 blocking = mastodon.fetch_blocks(row["domain"])
1320 elif row["software"] == "lemmy":
1321 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1322 blocking = lemmy.fetch_blocks(row["domain"])
1323 elif row["software"] == "friendica":
1324 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1325 blocking = friendica.fetch_blocks(row["domain"])
1326 elif row["software"] == "misskey":
1327 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1328 blocking = misskey.fetch_blocks(row["domain"])
1330 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1332 # c.s isn't part of oliphant's "hidden" blocklists
1333 logger.debug("row[domain]='%s'", row["domain"])
1334 if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1335 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1336 instances.set_last_blocked(row["domain"])
1337 instances.set_total_blocks(row["domain"], blocking)
1342 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1343 for block in blocking:
1344 logger.debug("block[blocked]='%s'", block["blocked"])
1347 if block["blocked"] == "":
1348 logger.debug("block[blocked] is empty - SKIPPED!")
1350 elif block["blocked"].endswith(".onion"):
1351 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1353 elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"):
1354 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
1356 elif block["blocked"].endswith(".arpa"):
1357 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1359 elif block["blocked"].endswith(".tld"):
1360 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1362 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1363 logger.debug("block='%s' is obfuscated.", block["blocked"])
1364 obfuscated = obfuscated + 1
1365 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1366 elif not domain_helper.is_wanted(block["blocked"]):
1367 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1369 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1370 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1373 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1374 if blocked is not None and blocked != block["blocked"]:
1375 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1376 obfuscated = obfuscated - 1
1378 if blacklist.is_blacklisted(blocked):
1379 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1381 elif blacklist.is_blacklisted(row["domain"]):
1382 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1384 elif blocks.is_instance_blocked(row["domain"], blocked):
1385 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1388 block["block_level"] = blocks.alias_block_level(block["block_level"])
1390 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1391 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1392 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1395 "reason" : block["reason"],
1398 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1399 instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1400 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1402 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1403 if instances.has_pending(row["domain"]):
1404 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1405 instances.update(row["domain"])
1407 logger.debug("Invoking commit() ...")
1408 database.connection.commit()
1410 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1411 if config.get("bot_enabled") and len(blockdict) > 0:
1412 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1413 network.send_bot_post(row["domain"], blockdict)
1415 logger.debug("Success! - EXIT!")
1418 def fetch_fedilist(args: argparse.Namespace) -> int:
1419 logger.debug("args[]='%s' - CALLED!", type(args))
1421 logger.debug("Invoking locking.acquire() ...")
1424 source_domain = "demo.fedilist.com"
1425 if sources.is_recent(source_domain):
1426 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1429 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1430 sources.update(source_domain)
1432 url = f"http://{source_domain}/instance/csv?onion=not"
1433 if args.software is not None and args.software != "":
1434 logger.debug("args.software='%s'", args.software)
1435 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1437 logger.info("Fetching url='%s' ...", url)
1438 response = reqto.get(
1440 headers=network.web_headers,
1441 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1442 allow_redirects=False
1445 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1446 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1447 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1450 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1452 logger.debug("reader[]='%s'", type(reader))
1454 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1459 logger.info("Checking %d rows ...", len(rows))
1461 logger.debug("row[]='%s'", type(row))
1462 if "hostname" not in row:
1463 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1466 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1467 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1468 logger.debug("domain='%s' - AFTER!", domain)
1470 if domain in [None, ""]:
1471 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1474 logger.debug("domain='%s' - BEFORE!", domain)
1475 domain = domain.encode("idna").decode("utf-8")
1476 logger.debug("domain='%s' - AFTER!", domain)
1478 if not domain_helper.is_wanted(domain):
1479 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1481 elif (args.force is None or not args.force) and instances.is_registered(domain):
1482 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1484 elif instances.is_recent(domain):
1485 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1488 logger.info("Fetching instances from domain='%s' ...", domain)
1489 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1491 logger.debug("Success! - EXIT!")
1494 def update_nodeinfo(args: argparse.Namespace) -> int:
1495 logger.debug("args[]='%s' - CALLED!", type(args))
1497 logger.debug("Invoking locking.acquire() ...")
1500 if args.domain is not None and args.domain != "":
1501 logger.debug("Fetching args.domain='%s'", args.domain)
1502 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1503 elif args.software is not None and args.software != "":
1504 logger.info("Fetching domains for args.software='%s'", args.software)
1505 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1506 elif args.mode is not None and args.mode != "":
1507 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1508 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1509 elif args.no_software:
1510 logger.info("Fetching domains with no software type detected ...")
1511 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1512 elif args.with_software:
1513 logger.info("Fetching domains with any software type detected ...")
1514 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC")
1516 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1517 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1518 elif args.no_detection:
1519 logger.info("Fetching domains with no detection mode being set ...")
1520 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1522 logger.info("Fetching domains for recently updated ...")
1523 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1525 domains = database.cursor.fetchall()
1527 logger.info("Checking %d domain(s) ...", len(domains))
1530 logger.debug("row[]='%s'", type(row))
1531 if blacklist.is_blacklisted(row["domain"]):
1532 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1534 elif not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1535 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1539 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1540 software = federation.determine_software(row["domain"])
1542 logger.debug("Determined software='%s'", software)
1543 if (software != row["software"] and software is not None) or args.force is True:
1544 logger.debug("software='%s'", software)
1545 if software is None:
1546 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1547 instances.set_nodeinfo_url(row["domain"], None)
1549 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1550 instances.set_software(row["domain"], software)
1552 if software is not None:
1553 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1554 instances.set_success(row["domain"])
1555 except network.exceptions as exception:
1556 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1557 instances.set_last_error(row["domain"], exception)
1559 instances.set_last_nodeinfo(row["domain"])
1560 instances.update(row["domain"])
1563 logger.debug("Success! - EXIT!")
1566 def fetch_instances_social(args: argparse.Namespace) -> int:
1567 logger.debug("args[]='%s' - CALLED!", type(args))
1569 logger.debug("Invoking locking.acquire() ...")
1572 source_domain = "instances.social"
1574 if config.get("instances_social_api_key") == "":
1575 logger.error("API key not set. Please set in your config.json file.")
1577 elif sources.is_recent(source_domain):
1578 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1581 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1582 sources.update(source_domain)
1585 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1588 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1589 fetched = network.get_json_api(
1591 "/api/1.0/instances/list?count=0&sort_by=name",
1593 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1595 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1597 if "error_message" in fetched:
1598 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1600 elif "exception" in fetched:
1601 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1603 elif "json" not in fetched:
1604 logger.warning("fetched has no element 'json' - EXIT!")
1606 elif "instances" not in fetched["json"]:
1607 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1611 rows = fetched["json"]["instances"]
1613 logger.info("Checking %d row(s) ...", len(rows))
1615 logger.debug("row[]='%s'", type(row))
1616 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1617 logger.debug("domain='%s' - AFTER!", domain)
1619 if domain is None and domain == "":
1620 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1623 logger.debug("domain='%s' - BEFORE!", domain)
1624 domain = domain.encode("idna").decode("utf-8")
1625 logger.debug("domain='%s' - AFTER!", domain)
1627 if not domain_helper.is_wanted(domain):
1628 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1630 elif domain in domains:
1631 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1633 elif instances.is_registered(domain):
1634 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1636 elif instances.is_recent(domain):
1637 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1640 logger.info("Fetching instances from domain='%s' ...", domain)
1641 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1643 logger.debug("Success! - EXIT!")
1646 def fetch_relaylist(args: argparse.Namespace) -> int:
1647 logger.debug("args[]='%s' - CALLED!", type(args))
1649 logger.debug("Invoking locking.acquire() ...")
1652 source_domain = "api.relaylist.com"
1654 if sources.is_recent(source_domain):
1655 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1658 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1659 sources.update(source_domain)
1661 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1662 fetched = network.get_json_api(
1666 (config.get("connection_timeout"), config.get("read_timeout"))
1668 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1670 if "error_message" in fetched:
1671 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1673 elif "exception" in fetched:
1674 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1676 elif "json" not in fetched:
1677 logger.warning("fetched has no element 'json' - EXIT!")
1682 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1683 for row in fetched["json"]:
1684 logger.debug("row[]='%s'", type(row))
1685 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1686 logger.debug("domain='%s' - AFTER!", domain)
1688 if domain is None and domain == "":
1689 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1692 logger.debug("domain='%s' - BEFORE!", domain)
1693 domain = domain.encode("idna").decode("utf-8")
1694 logger.debug("domain='%s' - AFTER!", domain)
1696 if not domain_helper.is_wanted(domain):
1697 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1699 elif domain in domains:
1700 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1702 elif instances.is_registered(domain):
1703 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1705 elif instances.is_recent(domain):
1706 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1709 logger.info("Fetching instances from domain='%s'", domain)
1710 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1712 logger.debug("Success! - EXIT!")
1715 def fetch_relays(args: argparse.Namespace) -> int:
1716 logger.debug("args[]='%s' - CALLED!", type(args))
1718 logger.debug("Invoking locking.acquire() ...")
1721 if args.domain is not None and args.domain != "":
1722 logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
1723 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1724 elif args.software is not None and args.software != "":
1725 logger.debug("Fetching instances records for args.software='%s' ...", args.software)
1726 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL AND software = ? ORDER BY last_updated DESC", [args.software])
1728 logger.debug("Fetch all relay instances ...")
1729 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL ORDER BY last_updated DESC")
1732 rows = database.cursor.fetchall()
1734 logger.info("Checking %d relays ...", len(rows))
1736 logger.debug("row[domain]='%s',row[software]='%s'", row["domain"], row["software"])
1737 if not args.force and instances.is_recent(row["domain"]):
1738 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1740 elif row["nodeinfo_url"] is None:
1741 logger.warning("row[domain]='%s' has empty nodeinfo_url but this is required - SKIPPED!", row["domain"])
1746 logger.debug("row[domain]='%s',row[software]='%s' - checking ....", row["domain"], row["software"])
1747 if row["software"] == "pub-relay":
1748 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1749 raw = network.fetch_api_url(
1750 row["nodeinfo_url"],
1751 (config.get("connection_timeout"), config.get("read_timeout"))
1754 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1755 if "exception" in raw:
1756 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1757 raise raw["exception"]
1758 elif "error_message" in raw:
1759 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1760 instances.set_last_error(row["domain"], raw)
1761 instances.set_last_instance_fetch(row["domain"])
1762 instances.update(row["domain"])
1764 elif "json" not in raw:
1765 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1767 elif not "metadata" in raw["json"]:
1768 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1770 elif not "peers" in raw["json"]["metadata"]:
1771 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1774 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1775 raw = network.fetch_url(
1776 f"https://{row['domain']}",
1777 network.web_headers,
1778 (config.get("connection_timeout"), config.get("read_timeout"))
1780 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1782 doc = bs4.BeautifulSoup(raw, features="html.parser")
1783 logger.debug("doc[]='%s'", type(doc))
1785 except network.exceptions as exception:
1786 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1787 instances.set_last_error(row["domain"], exception)
1788 instances.set_last_instance_fetch(row["domain"])
1789 instances.update(row["domain"])
1792 logger.debug("row[software]='%s'", row["software"])
1793 if row["software"] == "activityrelay":
1794 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1795 tags = doc.findAll("p")
1797 logger.debug("Checking %d paragraphs ...", len(tags))
1799 logger.debug("tag[]='%s'", type(tag))
1800 if len(tag.contents) == 0:
1801 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1803 elif "registered instances" not in tag.contents[0]:
1804 logger.debug("Skipping paragraph, text not found.")
1807 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1808 for domain in tag.contents:
1809 logger.debug("domain[%s]='%s'", type(domain), domain)
1810 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1813 domain = str(domain)
1814 logger.debug("domain='%s'", domain)
1815 if not domain_helper.is_wanted(domain):
1816 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1819 logger.debug("domain='%s' - BEFORE!", domain)
1820 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1821 logger.debug("domain='%s' - AFTER!", domain)
1823 if domain in [None, ""]:
1824 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1826 elif domain not in peers:
1827 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1828 peers.append(domain)
1830 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1831 if dict_helper.has_key(domains, "domain", domain):
1832 logger.debug("domain='%s' already added", domain)
1835 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1838 "origin": row["domain"],
1840 elif row["software"] in ["aoderelay", "selective-relay"]:
1841 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1842 if row["software"] == "aoderelay":
1843 tags = doc.findAll("section", {"class": "instance"})
1845 tags = doc.find("div", {"id": "instances"}).findAll("li")
1847 logger.debug("Checking %d tags ...", len(tags))
1849 logger.debug("tag[]='%s'", type(tag))
1851 link = tag.find("a")
1852 logger.debug("link[%s]='%s'", type(link), link)
1853 if not isinstance(link, bs4.element.Tag):
1854 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1857 components = urlparse(link.get("href"))
1858 logger.debug("components(%d)='%s'", len(components), components)
1859 domain = components.netloc.lower().split(":")[0]
1861 logger.debug("domain='%s' - BEFORE!", domain)
1862 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1863 logger.debug("domain='%s' - AFTER!", domain)
1865 if domain in [None, ""]:
1866 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1868 elif domain not in peers:
1869 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1870 peers.append(domain)
1872 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1873 if dict_helper.has_key(domains, "domain", domain):
1874 logger.debug("domain='%s' already added", domain)
1877 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1880 "origin": row["domain"],
1882 elif row["software"] == "pub-relay":
1883 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1884 for domain in raw["json"]["metadata"]["peers"]:
1885 logger.debug("domain='%s' - BEFORE!", domain)
1886 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1887 logger.debug("domain='%s' - AFTER!", domain)
1889 if domain in [None, ""]:
1890 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1892 elif domain not in peers:
1893 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1894 peers.append(domain)
1896 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1897 if dict_helper.has_key(domains, "domain", domain):
1898 logger.debug("domain='%s' already added", domain)
1901 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1904 "origin": row["domain"],
1907 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1910 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1911 instances.set_last_instance_fetch(row["domain"])
1913 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1914 instances.set_total_peers(row["domain"], peers)
1916 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1917 instances.update(row["domain"])
1919 logger.info("Checking %d domains ...", len(domains))
1921 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1922 if not domain_helper.is_wanted(row["domain"]):
1923 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1925 elif instances.is_registered(row["domain"]):
1926 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1929 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1930 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1932 logger.debug("Success! - EXIT!")
1935 def convert_idna(args: argparse.Namespace) -> int:
1936 logger.debug("args[]='%s' - CALLED!", type(args))
1938 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1939 rows = database.cursor.fetchall()
1941 logger.debug("rows[]='%s'", type(rows))
1942 instances.translate_idnas(rows, "domain")
1944 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1945 rows = database.cursor.fetchall()
1947 logger.debug("rows[]='%s'", type(rows))
1948 instances.translate_idnas(rows, "origin")
1950 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1951 rows = database.cursor.fetchall()
1953 logger.debug("rows[]='%s'", type(rows))
1954 blocks.translate_idnas(rows, "blocker")
1956 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1957 rows = database.cursor.fetchall()
1959 logger.debug("rows[]='%s'", type(rows))
1960 blocks.translate_idnas(rows, "blocked")
1962 logger.debug("Success! - EXIT!")
1965 def remove_invalid(args: argparse.Namespace) -> int:
1966 logger.debug("args[]='%s' - CALLED!", type(args))
1968 logger.debug("Invoking locking.acquire() ...")
1971 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1972 rows = database.cursor.fetchall()
1974 logger.info("Checking %d domains ...", len(rows))
1976 logger.debug("row[domain]='%s'", row["domain"])
1977 if not validators.domain(row["domain"].split("/")[0]):
1978 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1979 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1980 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1982 logger.debug("Invoking commit() ...")
1983 database.connection.commit()
1985 logger.info("Vaccum cleaning database ...")
1986 database.cursor.execute("VACUUM")
1988 logger.debug("Success! - EXIT!")