1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
353 deobfuscated = obfuscated = 0
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] == "":
369 logger.warning("blocked is empty, blocker='%s'", blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".arpa"):
375 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".tld"):
378 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
380 elif block["blocked"].find("*") >= 0:
381 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 instances.set_has_obfuscation(blocker, True)
383 obfuscated = obfuscated + 1
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 deobfuscated = deobfuscated + 1
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 instances.set_has_obfuscation(blocker, True)
400 obfuscated = obfuscated + 1
402 # Some obscure them with question marks, not sure if that's dependent on version or not
403 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
405 logger.debug("row[]='%s'", type(row))
407 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
410 deobfuscated = deobfuscated + 1
411 block["blocked"] = row["domain"]
412 origin = row["origin"]
413 nodeinfo_url = row["nodeinfo_url"]
415 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416 if block["blocked"] == "":
417 logger.debug("block[blocked] is empty - SKIPPED!")
420 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
424 if not domain_helper.is_wanted(block["blocked"]):
425 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
427 elif block["block_level"] in ["accept", "accepted"]:
428 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
430 elif not instances.is_registered(block["blocked"]):
431 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
434 block["block_level"] = blocks.alias_block_level(block["block_level"])
436 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
439 "blocked": block["blocked"],
440 "reason" : block["reason"],
443 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444 cookies.clear(block["blocked"])
446 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
447 instances.set_obfuscated_blocks(blocker, obfuscated)
449 logger.debug("Flushing updates for blocker='%s' ...", blocker)
450 instances.update(blocker)
452 logger.debug("Invoking commit() ...")
453 database.connection.commit()
455 logger.debug("Invoking cookies.clear(%s) ...", blocker)
456 cookies.clear(blocker)
458 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
459 if config.get("bot_enabled") and len(blockdict) > 0:
460 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
461 network.send_bot_post(blocker, blockdict)
463 logger.debug("Success! - EXIT!")
466 def fetch_observer(args: argparse.Namespace) -> int:
467 logger.debug("args[]='%s' - CALLED!", type(args))
469 logger.debug("Invoking locking.acquire() ...")
472 source_domain = "fediverse.observer"
473 if sources.is_recent(source_domain):
474 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
477 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
478 sources.update(source_domain)
481 if args.software is None:
482 logger.info("Fetching software list ...")
483 raw = utils.fetch_url(
484 f"https://{source_domain}",
486 (config.get("connection_timeout"), config.get("read_timeout"))
488 logger.debug("raw[%s]()=%d", type(raw), len(raw))
490 doc = bs4.BeautifulSoup(raw, features="html.parser")
491 logger.debug("doc[]='%s'", type(doc))
493 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
494 logger.debug("navbar[]='%s'", type(navbar))
496 logger.warning("Cannot find navigation bar, cannot continue!")
499 items = navbar.findAll("a", {"class": "dropdown-item"})
500 logger.debug("items[]='%s'", type(items))
502 logger.info("Checking %d menu items ...", len(items))
504 logger.debug("item[%s]='%s'", type(item), item)
505 if item.text.lower() == "all":
506 logger.debug("Skipping 'All' menu entry ...")
509 logger.debug("Appending item.text='%s' ...", item.text)
510 types.append(tidyup.domain(item.text))
512 logger.info("Adding args.software='%s' as type ...", args.software)
513 types.append(args.software)
515 logger.info("Fetching %d different table data ...", len(types))
516 for software in types:
517 logger.debug("software='%s'", software)
519 if args.software is not None and args.software != software:
520 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
525 logger.debug("Fetching table data for software='%s' ...", software)
526 raw = utils.fetch_url(
527 f"https://{source_domain}/app/views/tabledata.php?software={software}",
529 (config.get("connection_timeout"), config.get("read_timeout"))
531 logger.debug("raw[%s]()=%d", type(raw), len(raw))
533 doc = bs4.BeautifulSoup(raw, features="html.parser")
534 logger.debug("doc[]='%s'", type(doc))
535 except network.exceptions as exception:
536 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
539 items = doc.findAll("a", {"class": "url"})
540 logger.info("Checking %d items,software='%s' ...", len(items), software)
542 logger.debug("item[]='%s'", type(item))
543 domain = item.decode_contents()
544 domain = tidyup.domain(domain) if domain not in [None, ""] else None
545 logger.debug("domain='%s' - AFTER!", domain)
547 if domain is None or domain == "":
548 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
551 logger.debug("domain='%s' - BEFORE!", domain)
552 domain = domain.encode("idna").decode("utf-8")
553 logger.debug("domain='%s' - AFTER!", domain)
555 if not domain_helper.is_wanted(domain):
556 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
558 elif instances.is_registered(domain):
559 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
562 logger.info("Fetching instances for domain='%s'", domain)
563 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
565 logger.debug("Success! - EXIT!")
568 def fetch_todon_wiki(args: argparse.Namespace) -> int:
569 logger.debug("args[]='%s' - CALLED!", type(args))
571 logger.debug("Invoking locking.acquire() ...")
574 source_domain = "wiki.todon.eu"
575 if sources.is_recent(source_domain):
576 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
579 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
580 sources.update(source_domain)
587 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
588 raw = utils.fetch_url(
589 f"https://{source_domain}/todon/domainblocks",
591 (config.get("connection_timeout"), config.get("read_timeout"))
593 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
595 doc = bs4.BeautifulSoup(raw, "html.parser")
596 logger.debug("doc[]='%s'", type(doc))
598 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
599 logger.info("Checking %d silenced/limited entries ...", len(silenced))
600 blocklist["silenced"] = utils.find_domains(silenced, "div")
602 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
603 logger.info("Checking %d suspended entries ...", len(suspended))
604 blocklist["reject"] = utils.find_domains(suspended, "div")
606 blocking = blocklist["silenced"] + blocklist["reject"]
609 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
610 instances.set_last_blocked(blocker)
611 instances.set_total_blocks(blocker, blocking)
614 for block_level in blocklist:
615 blockers = blocklist[block_level]
617 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
618 for blocked in blockers:
619 logger.debug("blocked='%s'", blocked)
621 if not instances.is_registered(blocked):
623 logger.info("Fetching instances from domain='%s' ...", blocked)
624 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
625 except network.exceptions as exception:
626 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
627 instances.set_last_error(blocked, exception)
629 if not domain_helper.is_wanted(blocked):
630 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
632 elif not domain_helper.is_wanted(blocker):
633 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
635 elif blocks.is_instance_blocked(blocker, blocked, block_level):
636 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
639 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
640 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
641 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
647 logger.debug("Invoking commit() ...")
648 database.connection.commit()
650 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
651 if config.get("bot_enabled") and len(blockdict) > 0:
652 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
653 network.send_bot_post(blocker, blockdict)
655 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
656 if instances.has_pending(blocker):
657 logger.debug("Flushing updates for blocker='%s' ...", blocker)
658 instances.update(blocker)
660 logger.debug("Success! - EXIT!")
663 def fetch_cs(args: argparse.Namespace):
664 logger.debug("args[]='%s' - CALLED!", type(args))
666 logger.debug("Invoking locking.acquire() ...")
694 source_domain = "raw.githubusercontent.com"
695 if sources.is_recent(source_domain):
696 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
699 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
700 sources.update(source_domain)
702 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
703 raw = utils.fetch_url(
704 f"https://{source_domain}/chaossocial/meta/master/federation.md",
706 (config.get("connection_timeout"), config.get("read_timeout"))
708 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
710 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
711 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
713 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
714 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
715 blocklist["silenced"] = federation.find_domains(silenced)
717 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
718 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
719 blocklist["reject"] = federation.find_domains(blocked)
721 blocking = blocklist["silenced"] + blocklist["reject"]
722 blocker = "chaos.social"
724 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
725 instances.set_last_blocked(blocker)
726 instances.set_total_blocks(blocker, blocking)
728 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
729 if len(blocking) > 0:
731 for block_level in blocklist:
732 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
734 for row in blocklist[block_level]:
735 logger.debug("row[%s]='%s'", type(row), row)
736 if not "domain" in row:
737 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
739 elif not instances.is_registered(row["domain"]):
741 logger.info("Fetching instances from domain='%s' ...", row["domain"])
742 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
743 except network.exceptions as exception:
744 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
745 instances.set_last_error(row["domain"], exception)
747 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
748 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
750 "blocked": row["domain"],
751 "reason" : row["reason"],
754 logger.debug("Invoking commit() ...")
755 database.connection.commit()
757 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
758 if config.get("bot_enabled") and len(blockdict) > 0:
759 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
760 network.send_bot_post(blocker, blockdict)
762 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
763 if instances.has_pending(blocker):
764 logger.debug("Flushing updates for blocker='%s' ...", blocker)
765 instances.update(blocker)
767 logger.debug("Success! - EXIT!")
770 def fetch_fba_rss(args: argparse.Namespace) -> int:
771 logger.debug("args[]='%s' - CALLED!", type(args))
775 logger.debug("Invoking locking.acquire() ...")
778 components = urlparse(args.feed)
779 domain = components.netloc.lower().split(":")[0]
781 logger.debug("domain='%s'", domain)
782 if sources.is_recent(domain):
783 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
786 logger.debug("domain='%s' has not been recently used, marking ...", domain)
787 sources.update(domain)
789 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
790 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
792 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
793 if response.ok and response.status_code == 200 and len(response.text) > 0:
794 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
795 rss = atoma.parse_rss_bytes(response.content)
797 logger.debug("rss[]='%s'", type(rss))
798 for item in rss.items:
799 logger.debug("item[%s]='%s'", type(item), item)
800 domain = item.link.split("=")[1]
801 domain = tidyup.domain(domain) if domain not in[None, ""] else None
803 logger.debug("domain='%s' - AFTER!", domain)
804 if domain is None or domain == "":
805 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
808 logger.debug("domain='%s' - BEFORE!", domain)
809 domain = domain.encode("idna").decode("utf-8")
810 logger.debug("domain='%s' - AFTER!", domain)
812 if not domain_helper.is_wanted(domain):
813 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
815 elif domain in domains:
816 logger.debug("domain='%s' is already added - SKIPPED!", domain)
818 elif instances.is_registered(domain):
819 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
821 elif instances.is_recent(domain):
822 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
825 logger.debug("Adding domain='%s'", domain)
826 domains.append(domain)
828 logger.debug("domains()=%d", len(domains))
830 logger.info("Adding %d new instances ...", len(domains))
831 for domain in domains:
832 logger.debug("domain='%s'", domain)
834 logger.info("Fetching instances from domain='%s' ...", domain)
835 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
836 except network.exceptions as exception:
837 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
838 instances.set_last_error(domain, exception)
841 logger.debug("Success! - EXIT!")
844 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
845 logger.debug("args[]='%s' - CALLED!", type(args))
847 logger.debug("Invoking locking.acquire() ...")
850 source_domain = "ryona.agency"
851 feed = f"https://{source_domain}/users/fba/feed.atom"
853 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
854 if args.feed is not None and validators.url(args.feed):
855 logger.debug("Setting feed='%s' ...", args.feed)
856 feed = str(args.feed)
857 source_domain = urlparse(args.feed).netloc
859 if sources.is_recent(source_domain):
860 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
863 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
864 sources.update(source_domain)
868 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
869 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
871 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
872 if response.ok and response.status_code == 200 and len(response.text) > 0:
873 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
874 atom = atoma.parse_atom_bytes(response.content)
876 logger.debug("atom[]='%s'", type(atom))
877 for entry in atom.entries:
878 logger.debug("entry[]='%s'", type(entry))
879 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
880 logger.debug("doc[]='%s'", type(doc))
881 for element in doc.findAll("a"):
882 logger.debug("element[]='%s'", type(element))
883 for href in element["href"].split(","):
884 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
885 domain = tidyup.domain(href) if href not in [None, ""] else None
887 logger.debug("domain='%s' - AFTER!", domain)
888 if domain is None or domain == "":
889 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
892 logger.debug("domain='%s' - BEFORE!", domain)
893 domain = domain.encode("idna").decode("utf-8")
894 logger.debug("domain='%s' - AFTER!", domain)
896 if not domain_helper.is_wanted(domain):
897 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
899 elif domain in domains:
900 logger.debug("domain='%s' is already added - SKIPPED!", domain)
902 elif instances.is_registered(domain):
903 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
905 elif instances.is_recent(domain):
906 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
909 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
910 domains.append(domain)
912 logger.debug("domains()=%d", len(domains))
914 logger.info("Adding %d new instances ...", len(domains))
915 for domain in domains:
916 logger.debug("domain='%s'", domain)
918 logger.info("Fetching instances from domain='%s' ...", domain)
919 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
920 except network.exceptions as exception:
921 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
922 instances.set_last_error(domain, exception)
925 logger.debug("Success! - EXIT!")
928 def fetch_instances(args: argparse.Namespace) -> int:
929 logger.debug("args[]='%s' - CALLED!", type(args))
931 logger.debug("args.domain='%s' - checking ...", args.domain)
932 if not validators.domain(args.domain):
933 logger.warning("args.domain='%s' is not valid.", args.domain)
935 elif blacklist.is_blacklisted(args.domain):
936 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
939 logger.debug("Invoking locking.acquire() ...")
943 domain = tidyup.domain(args.domain)
944 origin = software = None
947 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
948 row = database.cursor.fetchone()
950 origin = row["origin"]
951 software = row["software"]
953 if software_helper.is_relay(software):
954 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
959 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
960 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
961 except network.exceptions as exception:
962 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
963 instances.set_last_error(args.domain, exception)
964 instances.update(args.domain)
968 logger.debug("Not fetching more instances - EXIT!")
971 # Loop through some instances
972 database.cursor.execute(
973 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
976 rows = database.cursor.fetchall()
977 logger.info("Checking %d entries ...", len(rows))
979 logger.debug("row[domain]='%s'", row["domain"])
980 if row["domain"] == "":
981 logger.debug("row[domain] is empty - SKIPPED!")
984 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
985 domain = row["domain"].encode("idna").decode("utf-8")
986 logger.debug("domain='%s' - AFTER!", domain)
988 if not domain_helper.is_wanted(domain):
989 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
993 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
994 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
995 except network.exceptions as exception:
996 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
997 instances.set_last_error(domain, exception)
999 logger.debug("Success - EXIT!")
1002 def fetch_csv(args: argparse.Namespace) -> int:
1003 logger.debug("args[]='%s' - CALLED!", type(args))
1005 logger.debug("Invoking locking.acquire() ...")
1008 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1009 for block in blocklists.csv_files:
1010 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1012 # Is domain given and not equal blocker?
1013 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1014 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1017 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1018 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1020 logger.debug("Success - EXIT!")
1023 def fetch_oliphant(args: argparse.Namespace) -> int:
1024 logger.debug("args[]='%s' - CALLED!", type(args))
1026 logger.debug("Invoking locking.acquire() ...")
1029 source_domain = "codeberg.org"
1030 if sources.is_recent(source_domain):
1031 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1034 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1035 sources.update(source_domain)
1038 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1040 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1041 for block in blocklists.oliphant_blocklists:
1042 # Is domain given and not equal blocker?
1043 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1044 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1047 url = f"{base_url}/{block['csv_url']}"
1049 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1050 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1052 logger.debug("Success! - EXIT!")
1055 def fetch_txt(args: argparse.Namespace) -> int:
1056 logger.debug("args[]='%s' - CALLED!", type(args))
1058 logger.debug("Invoking locking.acquire() ...")
1061 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1062 for row in blocklists.txt_files:
1063 logger.debug("Fetching row[url]='%s' ...", row["url"])
1064 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1066 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1067 if response.ok and response.status_code == 200 and response.text != "":
1068 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1069 domains = response.text.strip().split("\n")
1071 logger.info("Processing %d domains ...", len(domains))
1072 for domain in domains:
1073 logger.debug("domain='%s' - BEFORE!", domain)
1074 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1076 logger.debug("domain='%s' - AFTER!", domain)
1077 if domain is None or domain == "":
1078 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1080 elif not domain_helper.is_wanted(domain):
1081 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1083 elif instances.is_recent(domain):
1084 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1087 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1088 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1090 logger.debug("processed='%s'", processed)
1092 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1095 logger.debug("Success! - EXIT!")
1098 def fetch_fedipact(args: argparse.Namespace) -> int:
1099 logger.debug("args[]='%s' - CALLED!", type(args))
1101 logger.debug("Invoking locking.acquire() ...")
1104 source_domain = "fedipact.online"
1105 if sources.is_recent(source_domain):
1106 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1109 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1110 sources.update(source_domain)
1112 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1113 response = utils.fetch_url(
1114 f"https://{source_domain}",
1115 network.web_headers,
1116 (config.get("connection_timeout"), config.get("read_timeout"))
1119 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1120 if response.ok and response.status_code == 200 and response.text != "":
1121 logger.debug("Parsing %d Bytes ...", len(response.text))
1123 doc = bs4.BeautifulSoup(response.text, "html.parser")
1124 logger.debug("doc[]='%s'", type(doc))
1126 rows = doc.findAll("li")
1127 logger.info("Checking %d row(s) ...", len(rows))
1129 logger.debug("row[]='%s'", type(row))
1130 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1132 logger.debug("domain='%s' - AFTER!", domain)
1133 if domain is None or domain == "":
1134 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1137 logger.debug("domain='%s' - BEFORE!", domain)
1138 domain = domain.encode("idna").decode("utf-8")
1139 logger.debug("domain='%s' - AFTER!", domain)
1141 if not domain_helper.is_wanted(domain):
1142 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1144 elif instances.is_registered(domain):
1145 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1147 elif instances.is_recent(domain):
1148 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1151 logger.info("Fetching domain='%s' ...", domain)
1152 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1154 logger.debug("Success! - EXIT!")
1157 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1158 logger.debug("args[]='%s' - CALLED!", type(args))
1160 logger.debug("Invoking locking.acquire() ...")
1163 source_domain = "instances.joinmobilizon.org"
1164 if sources.is_recent(source_domain):
1165 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1168 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1169 sources.update(source_domain)
1171 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1172 raw = utils.fetch_url(
1173 f"https://{source_domain}/api/v1/instances",
1174 network.web_headers,
1175 (config.get("connection_timeout"), config.get("read_timeout"))
1177 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1179 parsed = json.loads(raw)
1180 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1182 if "data" not in parsed:
1183 logger.warning("parsed()=%d does not contain key 'data'")
1186 logger.info("Checking %d instances ...", len(parsed["data"]))
1187 for row in parsed["data"]:
1188 logger.debug("row[]='%s'", type(row))
1189 if "host" not in row:
1190 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1192 elif not domain_helper.is_wanted(row["host"]):
1193 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1195 elif instances.is_registered(row["host"]):
1196 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1199 logger.info("Fetching row[host]='%s' ...", row["host"])
1200 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1202 logger.debug("Success! - EXIT!")
1205 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1206 logger.debug("args[]='%s' - CALLED!", type(args))
1208 logger.debug("Invoking locking.acquire() ...")
1211 source_domain = "instanceapp.misskey.page"
1212 if sources.is_recent(source_domain):
1213 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1216 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1217 sources.update(source_domain)
1219 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1220 raw = utils.fetch_url(
1221 f"https://{source_domain}/instances.json",
1222 network.web_headers,
1223 (config.get("connection_timeout"), config.get("read_timeout"))
1225 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1227 parsed = json.loads(raw)
1228 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1230 if "instancesInfos" not in parsed:
1231 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1234 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1235 for row in parsed["instancesInfos"]:
1236 logger.debug("row[%s]='%s'", type(row), row)
1237 if "url" not in row:
1238 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1240 elif not domain_helper.is_wanted(row["url"]):
1241 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1243 elif instances.is_registered(row["url"]):
1244 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1247 logger.info("Fetching row[url]='%s' ...", row["url"])
1248 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1250 logger.debug("Success! - EXIT!")
1253 def recheck_obfuscation(args: argparse.Namespace) -> int:
1254 logger.debug("args[]='%s' - CALLED!", type(args))
1256 logger.debug("Invoking locking.acquire() ...")
1259 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1260 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1261 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1262 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1264 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1266 rows = database.cursor.fetchall()
1267 logger.info("Checking %d domains ...", len(rows))
1269 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1270 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1271 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1274 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1275 blocking = federation.fetch_blocks(row["domain"])
1277 logger.debug("blocking()=%d", len(blocking))
1278 if len(blocking) == 0:
1279 if row["software"] == "pleroma":
1280 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1281 blocking = pleroma.fetch_blocks(row["domain"])
1282 elif row["software"] == "mastodon":
1283 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1284 blocking = mastodon.fetch_blocks(row["domain"])
1285 elif row["software"] == "lemmy":
1286 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1287 blocking = lemmy.fetch_blocks(row["domain"])
1288 elif row["software"] == "friendica":
1289 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290 blocking = friendica.fetch_blocks(row["domain"])
1291 elif row["software"] == "misskey":
1292 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293 blocking = misskey.fetch_blocks(row["domain"])
1295 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1297 # c.s isn't part of oliphant's "hidden" blocklists
1298 logger.debug("row[domain]='%s'", row["domain"])
1299 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1300 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1301 instances.set_last_blocked(row["domain"])
1302 instances.set_total_blocks(row["domain"], blocking)
1307 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1308 for block in blocking:
1309 logger.debug("block[blocked]='%s'", block["blocked"])
1312 if block["blocked"] == "":
1313 logger.debug("block[blocked] is empty - SKIPPED!")
1315 elif block["blocked"].endswith(".arpa"):
1316 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1318 elif block["blocked"].endswith(".tld"):
1319 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1321 elif block["blocked"].endswith(".onion"):
1322 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1324 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1325 logger.debug("block='%s' is obfuscated.", block["blocked"])
1326 obfuscated = obfuscated + 1
1327 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1328 elif not domain_helper.is_wanted(block["blocked"]):
1329 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1331 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1332 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1335 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1336 if blocked is not None and blocked != block["blocked"]:
1337 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1338 obfuscated = obfuscated - 1
1340 if blacklist.is_blacklisted(blocked):
1341 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1343 elif blacklist.is_blacklisted(row["domain"]):
1344 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1346 elif blocks.is_instance_blocked(row["domain"], blocked):
1347 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1350 block["block_level"] = blocks.alias_block_level(block["block_level"])
1352 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1353 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1354 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1357 "reason" : block["reason"],
1360 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1361 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1363 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1364 if instances.has_pending(row["domain"]):
1365 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1366 instances.update(row["domain"])
1368 logger.debug("Invoking commit() ...")
1369 database.connection.commit()
1371 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1372 if config.get("bot_enabled") and len(blockdict) > 0:
1373 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1374 network.send_bot_post(row["domain"], blockdict)
1376 logger.debug("Success! - EXIT!")
1379 def fetch_fedilist(args: argparse.Namespace) -> int:
1380 logger.debug("args[]='%s' - CALLED!", type(args))
1382 logger.debug("Invoking locking.acquire() ...")
1385 source_domain = "demo.fedilist.com"
1386 if sources.is_recent(source_domain):
1387 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1390 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1391 sources.update(source_domain)
1393 url = f"http://{source_domain}/instance/csv?onion=not"
1394 if args.software is not None and args.software != "":
1395 logger.debug("args.software='%s'", args.software)
1396 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1398 logger.info("Fetching url='%s' ...", url)
1399 response = reqto.get(
1401 headers=network.web_headers,
1402 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1403 allow_redirects=False
1406 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1407 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1408 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1411 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1413 logger.debug("reader[]='%s'", type(reader))
1415 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1420 logger.info("Checking %d rows ...", len(rows))
1422 logger.debug("row[]='%s'", type(row))
1423 if "hostname" not in row:
1424 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1427 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1428 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1429 logger.debug("domain='%s' - AFTER!", domain)
1431 if domain is None or domain == "":
1432 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1435 logger.debug("domain='%s' - BEFORE!", domain)
1436 domain = domain.encode("idna").decode("utf-8")
1437 logger.debug("domain='%s' - AFTER!", domain)
1439 if not domain_helper.is_wanted(domain):
1440 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1442 elif (args.force is None or not args.force) and instances.is_registered(domain):
1443 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1445 elif instances.is_recent(domain):
1446 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1449 logger.info("Fetching instances from domain='%s' ...", domain)
1450 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1452 logger.debug("Success! - EXIT!")
1455 def update_nodeinfo(args: argparse.Namespace) -> int:
1456 logger.debug("args[]='%s' - CALLED!", type(args))
1458 logger.debug("Invoking locking.acquire() ...")
1461 if args.domain is not None and args.domain != "":
1462 logger.debug("Fetching args.domain='%s'", args.domain)
1463 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1464 elif args.software is not None and args.software != "":
1465 logger.info("Fetching domains for args.software='%s'", args.software)
1466 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1467 elif args.mode is not None and args.mode != "":
1468 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1469 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1470 elif args.no_software:
1471 logger.info("Fetching domains with no software type detected ...")
1472 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1474 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1475 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1476 elif args.no_detection:
1477 logger.info("Fetching domains with no detection mode being set ...")
1478 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1480 logger.info("Fetching domains for recently updated ...")
1481 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1483 domains = database.cursor.fetchall()
1485 logger.info("Checking %d domain(s) ...", len(domains))
1488 logger.debug("row[]='%s'", type(row))
1489 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1490 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1494 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1495 software = federation.determine_software(row["domain"])
1497 logger.debug("Determined software='%s'", software)
1498 if (software != row["software"] and software is not None) or args.force is True:
1499 logger.debug("software='%s'", software)
1500 if software is None:
1501 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1502 instances.set_nodeinfo_url(row["domain"], None)
1504 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1505 instances.set_software(row["domain"], software)
1507 if software is not None:
1508 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1509 instances.set_success(row["domain"])
1510 except network.exceptions as exception:
1511 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1512 instances.set_last_error(row["domain"], exception)
1514 instances.set_last_nodeinfo(row["domain"])
1515 instances.update(row["domain"])
1518 logger.debug("Success! - EXIT!")
1521 def fetch_instances_social(args: argparse.Namespace) -> int:
1522 logger.debug("args[]='%s' - CALLED!", type(args))
1524 logger.debug("Invoking locking.acquire() ...")
1527 source_domain = "instances.social"
1529 if config.get("instances_social_api_key") == "":
1530 logger.error("API key not set. Please set in your config.json file.")
1532 elif sources.is_recent(source_domain):
1533 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1536 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1537 sources.update(source_domain)
1540 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1543 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1544 fetched = network.get_json_api(
1546 "/api/1.0/instances/list?count=0&sort_by=name",
1548 (config.get("connection_timeout"), config.get("read_timeout"))
1550 logger.debug("fetched[]='%s'", type(fetched))
1552 if "error_message" in fetched:
1553 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1555 elif "exception" in fetched:
1556 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1558 elif "json" not in fetched:
1559 logger.warning("fetched has no element 'json' - EXIT!")
1561 elif "instances" not in fetched["json"]:
1562 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1566 rows = fetched["json"]["instances"]
1568 logger.info("Checking %d row(s) ...", len(rows))
1570 logger.debug("row[]='%s'", type(row))
1571 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1572 logger.debug("domain='%s' - AFTER!", domain)
1574 if domain is None and domain == "":
1575 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1578 logger.debug("domain='%s' - BEFORE!", domain)
1579 domain = domain.encode("idna").decode("utf-8")
1580 logger.debug("domain='%s' - AFTER!", domain)
1582 if not domain_helper.is_wanted(domain):
1583 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1585 elif domain in domains:
1586 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1588 elif instances.is_registered(domain):
1589 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1591 elif instances.is_recent(domain):
1592 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1595 logger.info("Fetching instances from domain='%s'", domain)
1596 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1598 logger.debug("Success! - EXIT!")
1601 def fetch_relays(args: argparse.Namespace) -> int:
1602 logger.debug("args[]='%s' - CALLED!", type(args))
1604 logger.debug("Invoking locking.acquire() ...")
1607 if args.domain is not None and args.domain != "":
1608 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1609 elif args.software is not None and args.software != "":
1610 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1612 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1615 rows = database.cursor.fetchall()
1617 logger.info("Checking %d relays ...", len(rows))
1619 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1621 if not args.force and instances.is_recent(row["domain"]):
1622 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1626 if row["software"] == "pub-relay":
1627 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1628 raw = network.fetch_api_url(
1629 row["nodeinfo_url"],
1630 (config.get("connection_timeout"), config.get("read_timeout"))
1633 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1634 if "exception" in raw:
1635 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1636 raise raw["exception"]
1637 elif "error_message" in raw:
1638 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1639 instances.set_last_error(row["domain"], raw)
1640 instances.set_last_instance_fetch(row["domain"])
1641 instances.update(row["domain"])
1643 elif "json" not in raw:
1644 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1646 elif not "metadata" in raw["json"]:
1647 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1649 elif not "peers" in raw["json"]["metadata"]:
1650 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1653 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1654 raw = utils.fetch_url(
1655 f"https://{row['domain']}",
1656 network.web_headers,
1657 (config.get("connection_timeout"), config.get("read_timeout"))
1659 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1661 doc = bs4.BeautifulSoup(raw, features="html.parser")
1662 logger.debug("doc[]='%s'", type(doc))
1664 except network.exceptions as exception:
1665 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1666 instances.set_last_error(row["domain"], exception)
1667 instances.set_last_instance_fetch(row["domain"])
1668 instances.update(row["domain"])
1671 logger.debug("row[software]='%s'", row["software"])
1672 if row["software"] == "activityrelay":
1673 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1674 tags = doc.findAll("p")
1676 logger.debug("Checking %d paragraphs ...", len(tags))
1678 logger.debug("tag[]='%s'", type(tag))
1679 if len(tag.contents) == 0:
1680 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1682 elif "registered instances" not in tag.contents[0]:
1683 logger.debug("Skipping paragraph, text not found.")
1686 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1687 for domain in tag.contents:
1688 logger.debug("domain[%s]='%s'", type(domain), domain)
1689 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1692 domain = str(domain)
1693 logger.debug("domain='%s'", domain)
1694 if not domain_helper.is_wanted(domain):
1695 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1698 logger.debug("domain='%s' - BEFORE!", domain)
1699 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1700 logger.debug("domain='%s' - AFTER!", domain)
1702 if domain is None or domain == "":
1703 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1705 elif domain not in peers:
1706 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1707 peers.append(domain)
1709 if dict_helper.has_key(domains, "domain", domain):
1710 logger.debug("domain='%s' already added", domain)
1713 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1716 "origin": row["domain"],
1718 elif row["software"] in ["aoderelay", "selective-relay"]:
1719 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1720 if row["software"] == "aoderelay":
1721 tags = doc.findAll("section", {"class": "instance"})
1723 tags = doc.find("div", {"id": "instances"}).findAll("li")
1725 logger.debug("Checking %d tags ...", len(tags))
1727 logger.debug("tag[]='%s'", type(tag))
1729 link = tag.find("a")
1730 logger.debug("link[%s]='%s'", type(link), link)
1731 if not isinstance(link, bs4.element.Tag):
1732 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1735 components = urlparse(link.get("href"))
1736 logger.debug("components(%d)='%s'", len(components), components)
1737 domain = components.netloc.lower().split(":")[0]
1739 logger.debug("domain='%s' - BEFORE!", domain)
1740 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1741 logger.debug("domain='%s' - AFTER!", domain)
1743 if domain is None or domain == "":
1744 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1746 elif domain not in peers:
1747 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1748 peers.append(domain)
1750 if dict_helper.has_key(domains, "domain", domain):
1751 logger.debug("domain='%s' already added", domain)
1754 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1757 "origin": row["domain"],
1759 elif row["software"] == "pub-relay":
1760 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1761 for domain in raw["json"]["metadata"]["peers"]:
1762 logger.debug("domain='%s' - BEFORE!", domain)
1763 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1764 logger.debug("domain='%s' - AFTER!", domain)
1766 if domain is None or domain == "":
1767 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1769 elif domain not in peers:
1770 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1771 peers.append(domain)
1773 if dict_helper.has_key(domains, "domain", domain):
1774 logger.debug("domain='%s' already added", domain)
1777 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1780 "origin": row["domain"],
1783 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1786 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1787 instances.set_last_instance_fetch(row["domain"])
1789 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1790 instances.set_total_peers(row["domain"], peers)
1792 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1793 instances.update(row["domain"])
1795 logger.info("Checking %d domains ...", len(domains))
1797 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1798 if not domain_helper.is_wanted(row["domain"]):
1799 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1801 elif instances.is_registered(row["domain"]):
1802 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1805 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1806 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1808 logger.debug("Success! - EXIT!")
1811 def convert_idna(args: argparse.Namespace) -> int:
1812 logger.debug("args[]='%s' - CALLED!", type(args))
1814 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1815 rows = database.cursor.fetchall()
1817 logger.debug("rows[]='%s'", type(rows))
1818 instances.translate_idnas(rows, "domain")
1820 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1821 rows = database.cursor.fetchall()
1823 logger.debug("rows[]='%s'", type(rows))
1824 instances.translate_idnas(rows, "origin")
1826 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1827 rows = database.cursor.fetchall()
1829 logger.debug("rows[]='%s'", type(rows))
1830 blocks.translate_idnas(rows, "blocker")
1832 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1833 rows = database.cursor.fetchall()
1835 logger.debug("rows[]='%s'", type(rows))
1836 blocks.translate_idnas(rows, "blocked")
1838 logger.debug("Success! - EXIT!")
1841 def remove_invalid(args: argparse.Namespace) -> int:
1842 logger.debug("args[]='%s' - CALLED!", type(args))
1844 logger.debug("Invoking locking.acquire() ...")
1847 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1848 rows = database.cursor.fetchall()
1850 logger.info("Checking %d domains ...", len(rows))
1852 logger.debug("row[domain]='%s'", row["domain"])
1853 if not validators.domain(row["domain"].split("/")[0]):
1854 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1855 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1856 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1858 logger.debug("Invoking commit() ...")
1859 database.connection.commit()
1861 logger.info("Vaccum cleaning database ...")
1862 database.cursor.execute("VACUUM")
1864 logger.debug("Success! - EXIT!")