1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] in [None, ""]:
152 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s' - EXIT!", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s' - EXIT!", fetched["json"]["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] in [None, ""]:
228 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
296 # Check only entries with total_blocked=None
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if not domain_helper.is_wanted(blocker):
312 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314 elif not args.force and instances.is_recent(blocker, "last_blocked"):
315 logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker)
318 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
319 instances.set_last_blocked(blocker)
320 instances.set_has_obfuscation(blocker, False)
322 # c.s isn't part of oliphant's "hidden" blocklists
323 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
324 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
327 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
328 blocking = federation.fetch_blocks(blocker)
330 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
331 if len(blocking) == 0:
332 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
333 if software == "pleroma":
334 blocking = pleroma.fetch_blocks(blocker)
335 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
336 elif software == "mastodon":
337 blocking = mastodon.fetch_blocks(blocker)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "lemmy":
340 blocking = lemmy.fetch_blocks(blocker)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "friendica":
343 blocking = friendica.fetch_blocks(blocker)
344 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
345 elif software == "misskey":
346 blocking = misskey.fetch_blocks(blocker)
347 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
349 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
351 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
352 instances.set_total_blocks(blocker, blocking)
355 deobfuscated = obfuscated = 0
357 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
358 for block in blocking:
359 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
361 if block["block_level"] == "":
362 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
365 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
366 block["blocked"] = tidyup.domain(block["blocked"])
367 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
368 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
370 if block["blocked"] in [None, ""]:
371 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
373 elif block["blocked"].endswith(".onion"):
374 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"):
377 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
379 elif block["blocked"].endswith(".arpa"):
380 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
382 elif block["blocked"].endswith(".tld"):
383 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
385 elif block["blocked"].find("*") >= 0:
386 logger.debug("blocker='%s' uses obfuscated domains", blocker)
387 instances.set_has_obfuscation(blocker, True)
388 obfuscated = obfuscated + 1
390 # Some friendica servers also obscure domains without hash
391 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
393 logger.debug("row[]='%s'", type(row))
395 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
398 deobfuscated = deobfuscated + 1
399 block["blocked"] = row["domain"]
400 origin = row["origin"]
401 nodeinfo_url = row["nodeinfo_url"]
402 elif block["blocked"].find("?") >= 0:
403 logger.debug("blocker='%s' uses obfuscated domains", blocker)
404 instances.set_has_obfuscation(blocker, True)
405 obfuscated = obfuscated + 1
407 # Some obscure them with question marks, not sure if that's dependent on version or not
408 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
410 logger.debug("row[]='%s'", type(row))
412 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
415 deobfuscated = deobfuscated + 1
416 block["blocked"] = row["domain"]
417 origin = row["origin"]
418 nodeinfo_url = row["nodeinfo_url"]
420 logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
421 if block["blocked"] in [None, ""]:
422 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
425 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
426 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
427 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
429 if not domain_helper.is_wanted(block["blocked"]):
430 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
432 elif block["block_level"] in ["accept", "accepted"]:
433 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
435 elif not instances.is_registered(block["blocked"]):
436 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
437 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
439 block["block_level"] = blocks.alias_block_level(block["block_level"])
441 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
442 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
444 "blocked": block["blocked"],
445 "reason" : block["reason"],
448 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
449 cookies.clear(block["blocked"])
451 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
452 instances.set_obfuscated_blocks(blocker, obfuscated)
454 logger.debug("Flushing updates for blocker='%s' ...", blocker)
455 instances.update(blocker)
457 logger.debug("Invoking commit() ...")
458 database.connection.commit()
460 logger.debug("Invoking cookies.clear(%s) ...", blocker)
461 cookies.clear(blocker)
463 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
464 if config.get("bot_enabled") and len(blockdict) > 0:
465 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
466 network.send_bot_post(blocker, blockdict)
468 logger.debug("Success! - EXIT!")
471 def fetch_observer(args: argparse.Namespace) -> int:
472 logger.debug("args[]='%s' - CALLED!", type(args))
474 logger.debug("Invoking locking.acquire() ...")
477 source_domain = "fediverse.observer"
478 if sources.is_recent(source_domain):
479 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
482 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
483 sources.update(source_domain)
486 if args.software is None:
487 logger.info("Fetching software list ...")
488 raw = network.fetch_url(
489 f"https://{source_domain}",
491 (config.get("connection_timeout"), config.get("read_timeout"))
493 logger.debug("raw[%s]()=%d", type(raw), len(raw))
495 doc = bs4.BeautifulSoup(raw, features="html.parser")
496 logger.debug("doc[]='%s'", type(doc))
498 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
499 logger.debug("navbar[]='%s'", type(navbar))
501 logger.warning("Cannot find navigation bar, cannot continue!")
504 items = navbar.findAll("a", {"class": "dropdown-item"})
505 logger.debug("items[]='%s'", type(items))
507 logger.info("Checking %d menu items ...", len(items))
509 logger.debug("item[%s]='%s'", type(item), item)
510 if item.text.lower() == "all":
511 logger.debug("Skipping 'All' menu entry ...")
514 logger.debug("Appending item.text='%s' ...", item.text)
515 types.append(tidyup.domain(item.text))
517 logger.info("Adding args.software='%s' as type ...", args.software)
518 types.append(args.software)
520 logger.info("Fetching %d different table data ...", len(types))
521 for software in types:
522 logger.debug("software='%s'", software)
524 if args.software is not None and args.software != software:
525 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
530 logger.debug("Fetching table data for software='%s' ...", software)
531 raw = network.post_json_api(
532 f"api.{source_domain}",
535 "query": "{nodes(softwarename:\"" + software + "\"){domain}}"
539 logger.debug("raw[%s]()=%d", type(raw), len(raw))
540 if "exception" in raw:
541 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
542 raise raw["exception"]
543 elif "error_message" in raw:
544 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
546 elif not "data" in raw["json"]:
547 logger.warning("Cannot find key 'nodes' in raw[json]()=%d", len(raw["json"]))
549 elif not "nodes" in raw["json"]["data"]:
550 logger.warning("Cannot find key 'nodes' in raw[json][data]()=%d", len(raw["json"]["data"]))
553 items = raw["json"]["data"]["nodes"]
554 logger.debug("items()=%d", len(items))
556 except network.exceptions as exception:
557 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
560 logger.info("Checking %d items,software='%s' ...", len(items), software)
562 logger.debug("item[]='%s'", type(item))
563 if not "domain" in item:
564 logger.debug("item()=%d has not element 'domain'", len(item))
567 logger.debug("item[domain]='%s' - BEFORE!", item["domain"])
568 domain = tidyup.domain(item["domain"]) if item["domain"] not in [None, ""] else None
569 logger.debug("domain='%s' - AFTER!", domain)
571 if domain in [None, ""]:
572 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
575 logger.debug("domain='%s' - BEFORE!", domain)
576 domain = domain.encode("idna").decode("utf-8")
577 logger.debug("domain='%s' - AFTER!", domain)
579 if not domain_helper.is_wanted(domain):
580 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
582 elif instances.is_registered(domain):
583 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
586 logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software)
587 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
589 logger.debug("Success! - EXIT!")
592 def fetch_todon_wiki(args: argparse.Namespace) -> int:
593 logger.debug("args[]='%s' - CALLED!", type(args))
595 logger.debug("Invoking locking.acquire() ...")
598 source_domain = "wiki.todon.eu"
599 if sources.is_recent(source_domain):
600 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
603 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
604 sources.update(source_domain)
611 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
612 raw = network.fetch_url(
613 f"https://{source_domain}/todon/domainblocks",
615 (config.get("connection_timeout"), config.get("read_timeout"))
617 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
619 doc = bs4.BeautifulSoup(raw, "html.parser")
620 logger.debug("doc[]='%s'", type(doc))
622 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
623 logger.info("Checking %d silenced/limited entries ...", len(silenced))
624 blocklist["silenced"] = utils.find_domains(silenced, "div")
626 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
627 logger.info("Checking %d suspended entries ...", len(suspended))
628 blocklist["reject"] = utils.find_domains(suspended, "div")
630 blocking = blocklist["silenced"] + blocklist["reject"]
633 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
634 instances.set_last_blocked(blocker)
635 instances.set_total_blocks(blocker, blocking)
638 for block_level in blocklist:
639 blockers = blocklist[block_level]
641 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
642 for blocked in blockers:
643 logger.debug("blocked='%s'", blocked)
645 if not instances.is_registered(blocked):
647 logger.info("Fetching instances from domain='%s' ...", blocked)
648 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
649 except network.exceptions as exception:
650 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
651 instances.set_last_error(blocked, exception)
653 if not domain_helper.is_wanted(blocked):
654 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
656 elif not domain_helper.is_wanted(blocker):
657 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
659 elif blocks.is_instance_blocked(blocker, blocked, block_level):
660 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
663 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
664 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
665 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
671 logger.debug("Invoking commit() ...")
672 database.connection.commit()
674 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
675 if config.get("bot_enabled") and len(blockdict) > 0:
676 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
677 network.send_bot_post(blocker, blockdict)
679 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
680 if instances.has_pending(blocker):
681 logger.debug("Flushing updates for blocker='%s' ...", blocker)
682 instances.update(blocker)
684 logger.debug("Success! - EXIT!")
687 def fetch_cs(args: argparse.Namespace):
688 logger.debug("args[]='%s' - CALLED!", type(args))
690 logger.debug("Invoking locking.acquire() ...")
718 source_domain = "raw.githubusercontent.com"
719 if sources.is_recent(source_domain):
720 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
723 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
724 sources.update(source_domain)
726 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
727 raw = network.fetch_url(
728 f"https://{source_domain}/chaossocial/meta/master/federation.md",
730 (config.get("connection_timeout"), config.get("read_timeout"))
732 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
734 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
735 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
737 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
738 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
739 blocklist["silenced"] = federation.find_domains(silenced)
741 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
742 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
743 blocklist["reject"] = federation.find_domains(blocked)
745 blocking = blocklist["silenced"] + blocklist["reject"]
746 blocker = "chaos.social"
748 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
749 instances.set_last_blocked(blocker)
750 instances.set_total_blocks(blocker, blocking)
752 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
753 if len(blocking) > 0:
755 for block_level in blocklist:
756 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
758 for row in blocklist[block_level]:
759 logger.debug("row[%s]='%s'", type(row), row)
760 if not "domain" in row:
761 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
763 elif not instances.is_registered(row["domain"]):
765 logger.info("Fetching instances from domain='%s' ...", row["domain"])
766 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
767 except network.exceptions as exception:
768 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
769 instances.set_last_error(row["domain"], exception)
771 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
772 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
774 "blocked": row["domain"],
775 "reason" : row["reason"],
778 logger.debug("Invoking commit() ...")
779 database.connection.commit()
781 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
782 if config.get("bot_enabled") and len(blockdict) > 0:
783 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
784 network.send_bot_post(blocker, blockdict)
786 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
787 if instances.has_pending(blocker):
788 logger.debug("Flushing updates for blocker='%s' ...", blocker)
789 instances.update(blocker)
791 logger.debug("Success! - EXIT!")
794 def fetch_fba_rss(args: argparse.Namespace) -> int:
795 logger.debug("args[]='%s' - CALLED!", type(args))
799 logger.debug("Invoking locking.acquire() ...")
802 components = urlparse(args.feed)
803 domain = components.netloc.lower().split(":")[0]
805 logger.debug("domain='%s'", domain)
806 if sources.is_recent(domain):
807 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
810 logger.debug("domain='%s' has not been recently used, marking ...", domain)
811 sources.update(domain)
813 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
814 response = network.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
816 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
817 if response.ok and response.status_code == 200 and len(response.text) > 0:
818 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
819 rss = atoma.parse_rss_bytes(response.content)
821 logger.debug("rss[]='%s'", type(rss))
822 for item in rss.items:
823 logger.debug("item[%s]='%s'", type(item), item)
824 domain = item.link.split("=")[1]
825 domain = tidyup.domain(domain) if domain not in[None, ""] else None
827 logger.debug("domain='%s' - AFTER!", domain)
828 if domain in [None, ""]:
829 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
832 logger.debug("domain='%s' - BEFORE!", domain)
833 domain = domain.encode("idna").decode("utf-8")
834 logger.debug("domain='%s' - AFTER!", domain)
836 if not domain_helper.is_wanted(domain):
837 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
839 elif domain in domains:
840 logger.debug("domain='%s' is already added - SKIPPED!", domain)
842 elif instances.is_registered(domain):
843 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
845 elif instances.is_recent(domain):
846 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
849 logger.debug("Adding domain='%s'", domain)
850 domains.append(domain)
852 logger.debug("domains()=%d", len(domains))
854 logger.info("Adding %d new instances ...", len(domains))
855 for domain in domains:
856 logger.debug("domain='%s'", domain)
858 logger.info("Fetching instances from domain='%s' ...", domain)
859 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
860 except network.exceptions as exception:
861 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
862 instances.set_last_error(domain, exception)
865 logger.debug("Success! - EXIT!")
868 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
869 logger.debug("args[]='%s' - CALLED!", type(args))
871 logger.debug("Invoking locking.acquire() ...")
874 source_domain = "ryona.agency"
875 feed = f"https://{source_domain}/users/fba/feed.atom"
877 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
878 if args.feed is not None and validators.url(args.feed):
879 logger.debug("Setting feed='%s' ...", args.feed)
880 feed = str(args.feed)
881 source_domain = urlparse(args.feed).netloc
883 if sources.is_recent(source_domain):
884 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
887 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
888 sources.update(source_domain)
892 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
893 response = network.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
895 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
896 if response.ok and response.status_code == 200 and len(response.text) > 0:
897 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
898 atom = atoma.parse_atom_bytes(response.content)
900 logger.debug("atom[]='%s'", type(atom))
901 for entry in atom.entries:
902 logger.debug("entry[]='%s'", type(entry))
903 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
904 logger.debug("doc[]='%s'", type(doc))
905 elements = doc.findAll("a")
907 logger.debug("Checking %d element(s) ...", len(elements))
908 for element in elements:
909 logger.debug("element[%s]='%s'", type(element), element)
910 for href in element["href"].split(","):
911 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
912 domain = tidyup.domain(href) if href not in [None, ""] else None
914 logger.debug("domain='%s' - AFTER!", domain)
915 if domain in [None, ""]:
916 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
919 logger.debug("domain='%s' - BEFORE!", domain)
920 domain = domain.encode("idna").decode("utf-8")
921 logger.debug("domain='%s' - AFTER!", domain)
923 if not domain_helper.is_wanted(domain):
924 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
926 elif domain in domains:
927 logger.debug("domain='%s' is already added - SKIPPED!", domain)
929 elif instances.is_registered(domain):
930 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
932 elif instances.is_recent(domain):
933 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
936 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
937 domains.append(domain)
939 logger.debug("domains()=%d", len(domains))
941 logger.info("Adding %d new instances ...", len(domains))
942 for domain in domains:
943 logger.debug("domain='%s'", domain)
945 logger.info("Fetching instances from domain='%s' ...", domain)
946 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
947 except network.exceptions as exception:
948 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
949 instances.set_last_error(domain, exception)
952 logger.debug("Success! - EXIT!")
955 def fetch_instances(args: argparse.Namespace) -> int:
956 logger.debug("args[]='%s' - CALLED!", type(args))
958 logger.debug("Invoking locking.acquire() ...")
964 # Is domain or software set?
965 if args.domain not in [None, ""]:
966 logger.debug("args.domain='%s' - checking ...", args.domain)
967 if not validators.domain(args.domain):
968 logger.warning("args.domain='%s' is not valid.", args.domain)
970 elif blacklist.is_blacklisted(args.domain):
971 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
974 logger.debug("args.domain='%s' - BEFORE!", args.domain)
975 domain = tidyup.domain(args.domain)
976 logger.debug("domain='%s' - AFTER!", domain)
979 database.cursor.execute("SELECT domain, origin, software FROM instances WHERE domain = ? LIMIT 1", [domain])
980 rows = database.cursor.fetchall()
981 elif args.software not in [None, ""]:
982 logger.debug("args.software='%s' - BEFORE!", args.software)
983 software = software_helper.alias(args.software)
984 logger.debug("software='%s' - AFTER!", software)
987 database.cursor.execute("SELECT domain, origin, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [software])
988 rows = database.cursor.fetchall()
990 logger.info("Checking %d entries ...", len(rows))
992 logger.debug("row[domain]='%s',row[origin]='%s',row[software]='%s'", row["domain"], row["origin"], row["software"])
993 if row["software"] is None and instances.is_registered(row["domain"]) :
994 logger.warning("row[domain]='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated - SKIPPED!", row["domain"], row["domain"])
996 elif software_helper.is_relay(row["software"]) and instances.is_registered(row["domain"]):
997 logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"])
999 elif not args.force and not args.software in [None, ""]and instances.is_recent(row["domain"]):
1000 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
1005 logger.info("Fetching instances from row[domain]='%s',row[origin]='%s',row[software]='%s' ...", row["domain"], row["origin"], row["software"])
1006 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1007 except network.exceptions as exception:
1008 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
1009 instances.set_last_error(row["domain"], exception)
1010 instances.update(row["domain"])
1014 logger.debug("Not fetching more instances - BREAK!")
1017 # Loop through some instances
1018 database.cursor.execute(
1019 "SELECT domain, origin, software \
1021 WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb', 'smithereen', 'vebinet') \
1022 AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) \
1023 ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
1026 rows = database.cursor.fetchall()
1027 logger.info("Checking %d entries ...", len(rows))
1029 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
1030 domain = row["domain"].encode("idna").decode("utf-8")
1031 logger.debug("domain='%s' - AFTER!", domain)
1033 if not domain_helper.is_wanted(domain):
1034 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
1038 logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
1039 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1040 except network.exceptions as exception:
1041 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1042 instances.set_last_error(domain, exception)
1044 logger.debug("Success - EXIT!")
1047 def fetch_csv(args: argparse.Namespace) -> int:
1048 logger.debug("args[]='%s' - CALLED!", type(args))
1050 logger.debug("Invoking locking.acquire() ...")
1053 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1054 for block in blocklists.csv_files:
1055 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1057 # Is domain given and not equal blocker?
1058 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1059 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1062 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1063 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1065 logger.debug("Success - EXIT!")
1068 def fetch_oliphant(args: argparse.Namespace) -> int:
1069 logger.debug("args[]='%s' - CALLED!", type(args))
1071 logger.debug("Invoking locking.acquire() ...")
1074 source_domain = "codeberg.org"
1075 if sources.is_recent(source_domain):
1076 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1079 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1080 sources.update(source_domain)
1083 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1085 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1086 for block in blocklists.oliphant_blocklists:
1087 # Is domain given and not equal blocker?
1088 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1089 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1090 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1093 url = f"{base_url}/{block['csv_url']}"
1095 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1096 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1098 logger.debug("Success! - EXIT!")
1101 def fetch_txt(args: argparse.Namespace) -> int:
1102 logger.debug("args[]='%s' - CALLED!", type(args))
1104 logger.debug("Invoking locking.acquire() ...")
1107 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1108 for row in blocklists.txt_files:
1109 logger.debug("Fetching row[url]='%s' ...", row["url"])
1110 response = network.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1112 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1113 if response.ok and response.status_code == 200 and response.text != "":
1114 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1115 domains = response.text.strip().split("\n")
1117 logger.info("Processing %d domains ...", len(domains))
1118 for domain in domains:
1119 logger.debug("domain='%s' - BEFORE!", domain)
1120 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1121 logger.debug("domain='%s' - AFTER!", domain)
1123 if domain in [None, ""]:
1124 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1126 elif not domain_helper.is_wanted(domain):
1127 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1129 elif not args.force and instances.is_registered(domain):
1130 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1133 logger.debug("Processing domain='%s',row[blocker]='%s' ...", domain, row["blocker"])
1134 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name, force=args.force)
1135 logger.debug("processed='%s'", processed)
1137 logger.debug("Success! - EXIT!")
1140 def fetch_fedipact(args: argparse.Namespace) -> int:
1141 logger.debug("args[]='%s' - CALLED!", type(args))
1143 logger.debug("Invoking locking.acquire() ...")
1146 source_domain = "fedipact.online"
1147 if sources.is_recent(source_domain):
1148 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1151 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1152 sources.update(source_domain)
1154 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1155 response = network.fetch_url(
1156 f"https://{source_domain}",
1157 network.web_headers,
1158 (config.get("connection_timeout"), config.get("read_timeout"))
1161 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1162 if response.ok and response.status_code == 200 and response.text != "":
1163 logger.debug("Parsing %d Bytes ...", len(response.text))
1165 doc = bs4.BeautifulSoup(response.text, "html.parser")
1166 logger.debug("doc[]='%s'", type(doc))
1168 rows = doc.findAll("li")
1169 logger.info("Checking %d row(s) ...", len(rows))
1171 logger.debug("row[]='%s'", type(row))
1172 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1174 logger.debug("domain='%s' - AFTER!", domain)
1175 if domain in [None, ""]:
1176 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1179 logger.debug("domain='%s' - BEFORE!", domain)
1180 domain = domain.encode("idna").decode("utf-8")
1181 logger.debug("domain='%s' - AFTER!", domain)
1183 if not domain_helper.is_wanted(domain):
1184 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1186 elif instances.is_registered(domain):
1187 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1189 elif instances.is_recent(domain):
1190 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1193 logger.info("Fetching domain='%s' ...", domain)
1194 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1196 logger.debug("Success! - EXIT!")
1199 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1200 logger.debug("args[]='%s' - CALLED!", type(args))
1202 logger.debug("Invoking locking.acquire() ...")
1205 source_domain = "instances.joinmobilizon.org"
1206 if sources.is_recent(source_domain):
1207 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1210 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1211 sources.update(source_domain)
1213 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1214 raw = network.fetch_url(
1215 f"https://{source_domain}/api/v1/instances",
1216 network.web_headers,
1217 (config.get("connection_timeout"), config.get("read_timeout"))
1219 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1221 parsed = json.loads(raw)
1222 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1224 if "data" not in parsed:
1225 logger.warning("parsed()=%d does not contain key 'data'")
1228 logger.info("Checking %d instances ...", len(parsed["data"]))
1229 for row in parsed["data"]:
1230 logger.debug("row[]='%s'", type(row))
1231 if "host" not in row:
1232 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1234 elif not domain_helper.is_wanted(row["host"]):
1235 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1237 elif instances.is_registered(row["host"]):
1238 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1241 logger.info("Fetching row[host]='%s' ...", row["host"])
1242 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1244 logger.debug("Success! - EXIT!")
1247 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1248 logger.debug("args[]='%s' - CALLED!", type(args))
1250 logger.debug("Invoking locking.acquire() ...")
1253 source_domain = "instanceapp.misskey.page"
1254 if sources.is_recent(source_domain):
1255 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1258 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1259 sources.update(source_domain)
1261 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1262 raw = network.fetch_url(
1263 f"https://{source_domain}/instances.json",
1264 network.web_headers,
1265 (config.get("connection_timeout"), config.get("read_timeout"))
1267 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1269 parsed = json.loads(raw)
1270 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1272 if "instancesInfos" not in parsed:
1273 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1276 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1277 for row in parsed["instancesInfos"]:
1278 logger.debug("row[%s]='%s'", type(row), row)
1279 if "url" not in row:
1280 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1282 elif not domain_helper.is_wanted(row["url"]):
1283 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1285 elif instances.is_registered(row["url"]):
1286 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1289 logger.info("Fetching row[url]='%s' ...", row["url"])
1290 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1292 logger.debug("Success! - EXIT!")
1295 def recheck_obfuscation(args: argparse.Namespace) -> int:
1296 logger.debug("args[]='%s' - CALLED!", type(args))
1298 logger.debug("Invoking locking.acquire() ...")
1301 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1302 logger.debug("Fetching record for args.domain='%s' ...", args.domain)
1303 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1304 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1305 logger.debug("Fetching records for args.software='%s' ...", args.software)
1306 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1308 logger.debug("Fetching records where domains have obfuscated block entries ...")
1309 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1311 rows = database.cursor.fetchall()
1312 logger.info("Checking %d domains ...", len(rows))
1314 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1315 if blacklist.is_blacklisted(row["domain"]):
1316 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1318 elif (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1319 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1322 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1323 blocking = federation.fetch_blocks(row["domain"])
1325 logger.debug("blocking()=%d", len(blocking))
1326 if len(blocking) == 0:
1327 logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
1328 if row["software"] == "pleroma":
1329 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1330 blocking = pleroma.fetch_blocks(row["domain"])
1331 elif row["software"] == "mastodon":
1332 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1333 blocking = mastodon.fetch_blocks(row["domain"])
1334 elif row["software"] == "lemmy":
1335 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1336 blocking = lemmy.fetch_blocks(row["domain"])
1337 elif row["software"] == "friendica":
1338 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1339 blocking = friendica.fetch_blocks(row["domain"])
1340 elif row["software"] == "misskey":
1341 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1342 blocking = misskey.fetch_blocks(row["domain"])
1344 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1346 # c.s isn't part of oliphant's "hidden" blocklists
1347 logger.debug("row[domain]='%s'", row["domain"])
1348 if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1350 instances.set_last_blocked(row["domain"])
1351 instances.set_total_blocks(row["domain"], blocking)
1356 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1357 for block in blocking:
1358 logger.debug("block[blocked]='%s'", block["blocked"])
1361 if block["blocked"] == "":
1362 logger.debug("block[blocked] is empty - SKIPPED!")
1364 elif block["blocked"].endswith(".onion"):
1365 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1367 elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"):
1368 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
1370 elif block["blocked"].endswith(".arpa"):
1371 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1373 elif block["blocked"].endswith(".tld"):
1374 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1376 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1377 logger.debug("block='%s' is obfuscated.", block["blocked"])
1378 obfuscated = obfuscated + 1
1379 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1380 elif not domain_helper.is_wanted(block["blocked"]):
1381 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1383 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1384 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1387 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1388 if blocked is not None and blocked != block["blocked"]:
1389 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1390 obfuscated = obfuscated - 1
1392 if blacklist.is_blacklisted(blocked):
1393 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1395 elif blacklist.is_blacklisted(row["domain"]):
1396 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1398 elif blocks.is_instance_blocked(row["domain"], blocked):
1399 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1402 block["block_level"] = blocks.alias_block_level(block["block_level"])
1404 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1405 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1406 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1409 "reason" : block["reason"],
1412 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1413 instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1414 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1416 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1417 if instances.has_pending(row["domain"]):
1418 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1419 instances.update(row["domain"])
1421 logger.debug("Invoking commit() ...")
1422 database.connection.commit()
1424 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1425 if config.get("bot_enabled") and len(blockdict) > 0:
1426 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1427 network.send_bot_post(row["domain"], blockdict)
1429 logger.debug("Success! - EXIT!")
1432 def fetch_fedilist(args: argparse.Namespace) -> int:
1433 logger.debug("args[]='%s' - CALLED!", type(args))
1435 logger.debug("Invoking locking.acquire() ...")
1438 source_domain = "demo.fedilist.com"
1439 if sources.is_recent(source_domain):
1440 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1443 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1444 sources.update(source_domain)
1446 url = f"http://{source_domain}/instance/csv?onion=not"
1447 if args.software is not None and args.software != "":
1448 logger.debug("args.software='%s'", args.software)
1449 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1451 logger.info("Fetching url='%s' ...", url)
1452 response = reqto.get(
1454 headers=network.web_headers,
1455 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1456 allow_redirects=False
1459 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1460 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1461 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1464 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1466 logger.debug("reader[]='%s'", type(reader))
1468 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1473 logger.info("Checking %d rows ...", len(rows))
1475 logger.debug("row[]='%s'", type(row))
1476 if "hostname" not in row:
1477 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1480 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1481 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1482 logger.debug("domain='%s' - AFTER!", domain)
1484 if domain in [None, ""]:
1485 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1488 logger.debug("domain='%s' - BEFORE!", domain)
1489 domain = domain.encode("idna").decode("utf-8")
1490 logger.debug("domain='%s' - AFTER!", domain)
1492 if not domain_helper.is_wanted(domain):
1493 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1495 elif (args.force is None or not args.force) and instances.is_registered(domain):
1496 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1498 elif instances.is_recent(domain):
1499 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1502 logger.info("Fetching instances from domain='%s' ...", domain)
1503 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1505 logger.debug("Success! - EXIT!")
1508 def update_nodeinfo(args: argparse.Namespace) -> int:
1509 logger.debug("args[]='%s' - CALLED!", type(args))
1511 logger.debug("Invoking locking.acquire() ...")
1514 if args.domain is not None and args.domain != "":
1515 logger.debug("Fetching args.domain='%s'", args.domain)
1516 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1517 elif args.software is not None and args.software != "":
1518 logger.info("Fetching domains for args.software='%s'", args.software)
1519 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1520 elif args.mode is not None and args.mode != "":
1521 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1522 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1523 elif args.no_software:
1524 logger.info("Fetching domains with no software type detected ...")
1525 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1526 elif args.with_software:
1527 logger.info("Fetching domains with any software type detected ...")
1528 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC")
1530 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1531 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1532 elif args.no_detection:
1533 logger.info("Fetching domains with no detection mode being set ...")
1534 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1536 logger.info("Fetching domains with domain name and software being the same ...")
1537 database.cursor.execute("SELECT domain, software FROM instances WHERE domain=software ORDER BY last_updated ASC")
1539 logger.info("Fetching domains for recently updated ...")
1540 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1542 domains = database.cursor.fetchall()
1544 logger.info("Checking %d domain(s) ...", len(domains))
1547 logger.debug("row[]='%s'", type(row))
1548 if row["domain"].endswith(".i2p") and not config.get("allow_i2p_domain"):
1549 logger.debug("row[domain]='%s' is an I2P address - SKIPPED", row["domain"])
1551 elif row["domain"].endswith(".onion"):
1552 logger.debug("row[domain]='%s' is a TOR .onion domain - SKIPPED", row["domain"])
1554 elif row["domain"].endswith(".arpa"):
1555 logger.debug("row[domain]='%s' is a reverse IP address - SKIPPED", row["domain"])
1557 elif row["domain"].endswith(".tld"):
1558 logger.debug("row[domain]='%s' is a fake domain - SKIPPED", row["domain"])
1560 elif blacklist.is_blacklisted(row["domain"]):
1561 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1563 elif not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1564 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1568 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1569 software = federation.determine_software(row["domain"])
1571 logger.debug("Determined software='%s'", software)
1572 if (software != row["software"] and software is not None) or args.force is True:
1573 logger.debug("software='%s'", software)
1574 if software is None:
1575 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1576 instances.set_nodeinfo_url(row["domain"], None)
1578 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1579 instances.set_software(row["domain"], software)
1581 if software is not None:
1582 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1583 instances.set_success(row["domain"])
1584 except network.exceptions as exception:
1585 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1586 instances.set_last_error(row["domain"], exception)
1588 instances.set_last_nodeinfo(row["domain"])
1589 instances.update(row["domain"])
1592 logger.debug("Success! - EXIT!")
1595 def fetch_instances_social(args: argparse.Namespace) -> int:
1596 logger.debug("args[]='%s' - CALLED!", type(args))
1598 logger.debug("Invoking locking.acquire() ...")
1601 source_domain = "instances.social"
1603 if config.get("instances_social_api_key") == "":
1604 logger.error("API key not set. Please set in your config.json file.")
1606 elif sources.is_recent(source_domain):
1607 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1610 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1611 sources.update(source_domain)
1614 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1617 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1618 fetched = network.get_json_api(
1620 "/api/1.0/instances/list?count=0&sort_by=name",
1622 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1624 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1626 if "error_message" in fetched:
1627 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1629 elif "exception" in fetched:
1630 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1632 elif "json" not in fetched:
1633 logger.warning("fetched has no element 'json' - EXIT!")
1635 elif "instances" not in fetched["json"]:
1636 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1640 rows = fetched["json"]["instances"]
1642 logger.info("Checking %d row(s) ...", len(rows))
1644 logger.debug("row[]='%s'", type(row))
1645 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1646 logger.debug("domain='%s' - AFTER!", domain)
1648 if domain is None and domain == "":
1649 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1652 logger.debug("domain='%s' - BEFORE!", domain)
1653 domain = domain.encode("idna").decode("utf-8")
1654 logger.debug("domain='%s' - AFTER!", domain)
1656 if not domain_helper.is_wanted(domain):
1657 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1659 elif domain in domains:
1660 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1662 elif instances.is_registered(domain):
1663 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1665 elif instances.is_recent(domain):
1666 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1669 logger.info("Fetching instances from domain='%s' ...", domain)
1670 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1672 logger.debug("Success! - EXIT!")
1675 def fetch_relaylist(args: argparse.Namespace) -> int:
1676 logger.debug("args[]='%s' - CALLED!", type(args))
1678 logger.debug("Invoking locking.acquire() ...")
1681 source_domain = "api.relaylist.com"
1683 if sources.is_recent(source_domain):
1684 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1687 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1688 sources.update(source_domain)
1690 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1691 fetched = network.get_json_api(
1695 (config.get("connection_timeout"), config.get("read_timeout"))
1697 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1699 if "error_message" in fetched:
1700 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1702 elif "exception" in fetched:
1703 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1705 elif "json" not in fetched:
1706 logger.warning("fetched has no element 'json' - EXIT!")
1711 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1712 for row in fetched["json"]:
1713 logger.debug("row[]='%s'", type(row))
1714 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1715 logger.debug("domain='%s' - AFTER!", domain)
1717 if domain is None and domain == "":
1718 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1721 logger.debug("domain='%s' - BEFORE!", domain)
1722 domain = domain.encode("idna").decode("utf-8")
1723 logger.debug("domain='%s' - AFTER!", domain)
1725 if not domain_helper.is_wanted(domain):
1726 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1728 elif domain in domains:
1729 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1731 elif instances.is_registered(domain):
1732 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1734 elif instances.is_recent(domain):
1735 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1738 logger.info("Fetching instances from domain='%s'", domain)
1739 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1741 logger.debug("Success! - EXIT!")
1744 def fetch_relays(args: argparse.Namespace) -> int:
1745 logger.debug("args[]='%s' - CALLED!", type(args))
1747 logger.debug("Invoking locking.acquire() ...")
1750 if args.domain is not None and args.domain != "":
1751 logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
1752 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1753 elif args.software is not None and args.software != "":
1754 logger.debug("Fetching instances records for args.software='%s' ...", args.software)
1755 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL AND software = ? ORDER BY last_updated DESC", [args.software])
1757 logger.debug("Fetch all relay instances ...")
1758 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL ORDER BY last_updated DESC")
1761 rows = database.cursor.fetchall()
1763 logger.info("Checking %d relays ...", len(rows))
1765 logger.debug("row[domain]='%s',row[software]='%s'", row["domain"], row["software"])
1766 if not args.force and instances.is_recent(row["domain"]):
1767 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1769 elif row["nodeinfo_url"] is None:
1770 logger.warning("row[domain]='%s' has empty nodeinfo_url but this is required - SKIPPED!", row["domain"])
1775 logger.debug("row[domain]='%s',row[software]='%s' - checking ....", row["domain"], row["software"])
1776 if row["software"] == "pub-relay":
1777 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1778 raw = network.fetch_api_url(
1779 row["nodeinfo_url"],
1780 (config.get("connection_timeout"), config.get("read_timeout"))
1783 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1784 if "exception" in raw:
1785 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1786 raise raw["exception"]
1787 elif "error_message" in raw:
1788 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1789 instances.set_last_error(row["domain"], raw)
1790 instances.set_last_instance_fetch(row["domain"])
1791 instances.update(row["domain"])
1793 elif "json" not in raw:
1794 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1796 elif not "metadata" in raw["json"]:
1797 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1799 elif not "peers" in raw["json"]["metadata"]:
1800 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1803 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1804 raw = network.fetch_url(
1805 f"https://{row['domain']}",
1806 network.web_headers,
1807 (config.get("connection_timeout"), config.get("read_timeout"))
1809 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1811 doc = bs4.BeautifulSoup(raw, features="html.parser")
1812 logger.debug("doc[]='%s'", type(doc))
1814 except network.exceptions as exception:
1815 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1816 instances.set_last_error(row["domain"], exception)
1817 instances.set_last_instance_fetch(row["domain"])
1818 instances.update(row["domain"])
1821 logger.debug("row[software]='%s'", row["software"])
1822 if row["software"] == "activityrelay":
1823 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1824 tags = doc.findAll("p")
1826 logger.debug("Checking %d paragraphs ...", len(tags))
1828 logger.debug("tag[]='%s'", type(tag))
1829 if len(tag.contents) == 0:
1830 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1832 elif "registered instances" not in tag.contents[0]:
1833 logger.debug("Skipping paragraph, text not found.")
1836 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1837 for domain in tag.contents:
1838 logger.debug("domain[%s]='%s'", type(domain), domain)
1839 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1842 domain = str(domain)
1843 logger.debug("domain='%s'", domain)
1844 if not domain_helper.is_wanted(domain):
1845 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1848 logger.debug("domain='%s' - BEFORE!", domain)
1849 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1850 logger.debug("domain='%s' - AFTER!", domain)
1852 if domain in [None, ""]:
1853 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1855 elif domain not in peers:
1856 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1857 peers.append(domain)
1859 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1860 if dict_helper.has_key(domains, "domain", domain):
1861 logger.debug("domain='%s' already added", domain)
1864 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1867 "origin": row["domain"],
1869 elif row["software"] in ["aoderelay", "selective-relay"]:
1870 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1871 if row["software"] == "aoderelay":
1872 tags = doc.findAll("section", {"class": "instance"})
1874 tags = doc.find("div", {"id": "instances"}).findAll("li")
1876 logger.debug("Checking %d tags ...", len(tags))
1878 logger.debug("tag[]='%s'", type(tag))
1880 link = tag.find("a")
1881 logger.debug("link[%s]='%s'", type(link), link)
1882 if not isinstance(link, bs4.element.Tag):
1883 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1886 components = urlparse(link.get("href"))
1887 logger.debug("components(%d)='%s'", len(components), components)
1888 domain = components.netloc.lower().split(":")[0]
1890 logger.debug("domain='%s' - BEFORE!", domain)
1891 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1892 logger.debug("domain='%s' - AFTER!", domain)
1894 if domain in [None, ""]:
1895 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1897 elif domain not in peers:
1898 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1899 peers.append(domain)
1901 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1902 if dict_helper.has_key(domains, "domain", domain):
1903 logger.debug("domain='%s' already added", domain)
1906 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1909 "origin": row["domain"],
1911 elif row["software"] == "pub-relay":
1912 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1913 for domain in raw["json"]["metadata"]["peers"]:
1914 logger.debug("domain='%s' - BEFORE!", domain)
1915 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1916 logger.debug("domain='%s' - AFTER!", domain)
1918 if domain in [None, ""]:
1919 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1921 elif domain not in peers:
1922 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1923 peers.append(domain)
1925 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1926 if dict_helper.has_key(domains, "domain", domain):
1927 logger.debug("domain='%s' already added", domain)
1930 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1933 "origin": row["domain"],
1936 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1939 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1940 instances.set_last_instance_fetch(row["domain"])
1942 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1943 instances.set_total_peers(row["domain"], peers)
1945 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1946 instances.update(row["domain"])
1948 logger.info("Checking %d domains ...", len(domains))
1950 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1951 if not domain_helper.is_wanted(row["domain"]):
1952 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1954 elif instances.is_registered(row["domain"]):
1955 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1958 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1959 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1961 logger.debug("Success! - EXIT!")
1964 def convert_idna(args: argparse.Namespace) -> int:
1965 logger.debug("args[]='%s' - CALLED!", type(args))
1967 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1968 rows = database.cursor.fetchall()
1970 logger.debug("rows[]='%s'", type(rows))
1971 instances.translate_idnas(rows, "domain")
1973 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1974 rows = database.cursor.fetchall()
1976 logger.debug("rows[]='%s'", type(rows))
1977 instances.translate_idnas(rows, "origin")
1979 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1980 rows = database.cursor.fetchall()
1982 logger.debug("rows[]='%s'", type(rows))
1983 blocks.translate_idnas(rows, "blocker")
1985 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1986 rows = database.cursor.fetchall()
1988 logger.debug("rows[]='%s'", type(rows))
1989 blocks.translate_idnas(rows, "blocked")
1991 logger.debug("Success! - EXIT!")
1994 def remove_invalid(args: argparse.Namespace) -> int:
1995 logger.debug("args[]='%s' - CALLED!", type(args))
1997 logger.debug("Invoking locking.acquire() ...")
2000 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2001 rows = database.cursor.fetchall()
2003 logger.info("Checking %d domains ...", len(rows))
2005 logger.debug("row[domain]='%s'", row["domain"])
2006 if not validators.domain(row["domain"].split("/")[0]):
2007 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2008 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2009 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2011 logger.debug("Invoking commit() ...")
2012 database.connection.commit()
2014 logger.info("Vaccum cleaning database ...")
2015 database.cursor.execute("VACUUM")
2017 logger.debug("Success! - EXIT!")