1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
353 deobfuscated = obfuscated = 0
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] == "":
369 logger.warning("blocked is empty, blocker='%s'", blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".arpa"):
375 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".tld"):
378 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
380 elif block["blocked"].find("*") >= 0:
381 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 instances.set_has_obfuscation(blocker, True)
383 obfuscated = obfuscated + 1
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 deobfuscated = deobfuscated + 1
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 instances.set_has_obfuscation(blocker, True)
400 obfuscated = obfuscated + 1
402 # Some obscure them with question marks, not sure if that's dependent on version or not
403 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
405 logger.debug("row[]='%s'", type(row))
407 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
410 deobfuscated = deobfuscated + 1
411 block["blocked"] = row["domain"]
412 origin = row["origin"]
413 nodeinfo_url = row["nodeinfo_url"]
415 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416 if block["blocked"] == "":
417 logger.debug("block[blocked] is empty - SKIPPED!")
420 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
424 if not domain_helper.is_wanted(block["blocked"]):
425 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
427 elif block["block_level"] in ["accept", "accepted"]:
428 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
430 elif not instances.is_registered(block["blocked"]):
431 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
434 block["block_level"] = blocks.alias_block_level(block["block_level"])
436 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
439 "blocked": block["blocked"],
440 "reason" : block["reason"],
443 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444 cookies.clear(block["blocked"])
446 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
448 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
449 if instances.has_pending(blocker):
450 logger.debug("Flushing updates for blocker='%s' ...", blocker)
451 instances.update(blocker)
453 logger.debug("Invoking commit() ...")
454 database.connection.commit()
456 logger.debug("Invoking cookies.clear(%s) ...", blocker)
457 cookies.clear(blocker)
459 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
460 if config.get("bot_enabled") and len(blockdict) > 0:
461 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
462 network.send_bot_post(blocker, blockdict)
464 logger.debug("Success! - EXIT!")
467 def fetch_observer(args: argparse.Namespace) -> int:
468 logger.debug("args[]='%s' - CALLED!", type(args))
470 logger.debug("Invoking locking.acquire() ...")
473 source_domain = "fediverse.observer"
474 if sources.is_recent(source_domain):
475 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
478 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
479 sources.update(source_domain)
482 if args.software is None:
483 logger.info("Fetching software list ...")
484 raw = utils.fetch_url(
485 f"https://{source_domain}",
487 (config.get("connection_timeout"), config.get("read_timeout"))
489 logger.debug("raw[%s]()=%d", type(raw), len(raw))
491 doc = bs4.BeautifulSoup(raw, features="html.parser")
492 logger.debug("doc[]='%s'", type(doc))
494 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
495 logger.debug("navbar[]='%s'", type(navbar))
497 logger.warning("Cannot find navigation bar, cannot continue!")
500 items = navbar.findAll("a", {"class": "dropdown-item"})
501 logger.debug("items[]='%s'", type(items))
503 logger.info("Checking %d menu items ...", len(items))
505 logger.debug("item[%s]='%s'", type(item), item)
506 if item.text.lower() == "all":
507 logger.debug("Skipping 'All' menu entry ...")
510 logger.debug("Appending item.text='%s' ...", item.text)
511 types.append(tidyup.domain(item.text))
513 logger.info("Adding args.software='%s' as type ...", args.software)
514 types.append(args.software)
516 logger.info("Fetching %d different table data ...", len(types))
517 for software in types:
518 logger.debug("software='%s'", software)
520 if args.software is not None and args.software != software:
521 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
526 logger.debug("Fetching table data for software='%s' ...", software)
527 raw = utils.fetch_url(
528 f"https://{source_domain}/app/views/tabledata.php?software={software}",
530 (config.get("connection_timeout"), config.get("read_timeout"))
532 logger.debug("raw[%s]()=%d", type(raw), len(raw))
534 doc = bs4.BeautifulSoup(raw, features="html.parser")
535 logger.debug("doc[]='%s'", type(doc))
536 except network.exceptions as exception:
537 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
540 items = doc.findAll("a", {"class": "url"})
541 logger.info("Checking %d items,software='%s' ...", len(items), software)
543 logger.debug("item[]='%s'", type(item))
544 domain = item.decode_contents()
545 domain = tidyup.domain(domain) if domain not in [None, ""] else None
546 logger.debug("domain='%s' - AFTER!", domain)
548 if domain is None or domain == "":
549 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
552 logger.debug("domain='%s' - BEFORE!", domain)
553 domain = domain.encode("idna").decode("utf-8")
554 logger.debug("domain='%s' - AFTER!", domain)
556 if not domain_helper.is_wanted(domain):
557 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
559 elif instances.is_registered(domain):
560 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
563 logger.info("Fetching instances for domain='%s'", domain)
564 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
566 logger.debug("Success! - EXIT!")
569 def fetch_todon_wiki(args: argparse.Namespace) -> int:
570 logger.debug("args[]='%s' - CALLED!", type(args))
572 logger.debug("Invoking locking.acquire() ...")
575 source_domain = "wiki.todon.eu"
576 if sources.is_recent(source_domain):
577 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
580 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
581 sources.update(source_domain)
588 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
589 raw = utils.fetch_url(
590 f"https://{source_domain}/todon/domainblocks",
592 (config.get("connection_timeout"), config.get("read_timeout"))
594 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
596 doc = bs4.BeautifulSoup(raw, "html.parser")
597 logger.debug("doc[]='%s'", type(doc))
599 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
600 logger.info("Checking %d silenced/limited entries ...", len(silenced))
601 blocklist["silenced"] = utils.find_domains(silenced, "div")
603 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
604 logger.info("Checking %d suspended entries ...", len(suspended))
605 blocklist["reject"] = utils.find_domains(suspended, "div")
607 blocking = blocklist["silenced"] + blocklist["reject"]
610 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
611 instances.set_last_blocked(blocker)
612 instances.set_total_blocks(blocker, blocking)
615 for block_level in blocklist:
616 blockers = blocklist[block_level]
618 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
619 for blocked in blockers:
620 logger.debug("blocked='%s'", blocked)
622 if not instances.is_registered(blocked):
624 logger.info("Fetching instances from domain='%s' ...", blocked)
625 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
626 except network.exceptions as exception:
627 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
628 instances.set_last_error(blocked, exception)
630 if not domain_helper.is_wanted(blocked):
631 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
633 elif not domain_helper.is_wanted(blocker):
634 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
636 elif blocks.is_instance_blocked(blocker, blocked, block_level):
637 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
640 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
641 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
642 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
648 logger.debug("Invoking commit() ...")
649 database.connection.commit()
651 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
652 if config.get("bot_enabled") and len(blockdict) > 0:
653 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
654 network.send_bot_post(blocker, blockdict)
656 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
657 if instances.has_pending(blocker):
658 logger.debug("Flushing updates for blocker='%s' ...", blocker)
659 instances.update(blocker)
661 logger.debug("Success! - EXIT!")
664 def fetch_cs(args: argparse.Namespace):
665 logger.debug("args[]='%s' - CALLED!", type(args))
667 logger.debug("Invoking locking.acquire() ...")
695 source_domain = "raw.githubusercontent.com"
696 if sources.is_recent(source_domain):
697 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
700 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
701 sources.update(source_domain)
703 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
704 raw = utils.fetch_url(
705 f"https://{source_domain}/chaossocial/meta/master/federation.md",
707 (config.get("connection_timeout"), config.get("read_timeout"))
709 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
711 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
712 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
714 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
715 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
716 blocklist["silenced"] = federation.find_domains(silenced)
718 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
719 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
720 blocklist["reject"] = federation.find_domains(blocked)
722 blocking = blocklist["silenced"] + blocklist["reject"]
723 blocker = "chaos.social"
725 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
726 instances.set_last_blocked(blocker)
727 instances.set_total_blocks(blocker, blocking)
729 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
730 if len(blocking) > 0:
732 for block_level in blocklist:
733 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
735 for row in blocklist[block_level]:
736 logger.debug("row[%s]='%s'", type(row), row)
737 if not "domain" in row:
738 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
740 elif not instances.is_registered(row["domain"]):
742 logger.info("Fetching instances from domain='%s' ...", row["domain"])
743 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
744 except network.exceptions as exception:
745 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
746 instances.set_last_error(row["domain"], exception)
748 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
749 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
751 "blocked": row["domain"],
752 "reason" : row["reason"],
755 logger.debug("Invoking commit() ...")
756 database.connection.commit()
758 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
759 if config.get("bot_enabled") and len(blockdict) > 0:
760 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
761 network.send_bot_post(blocker, blockdict)
763 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
764 if instances.has_pending(blocker):
765 logger.debug("Flushing updates for blocker='%s' ...", blocker)
766 instances.update(blocker)
768 logger.debug("Success! - EXIT!")
771 def fetch_fba_rss(args: argparse.Namespace) -> int:
772 logger.debug("args[]='%s' - CALLED!", type(args))
776 logger.debug("Invoking locking.acquire() ...")
779 components = urlparse(args.feed)
780 domain = components.netloc.lower().split(":")[0]
782 logger.debug("domain='%s'", domain)
783 if sources.is_recent(domain):
784 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
787 logger.debug("domain='%s' has not been recently used, marking ...", domain)
788 sources.update(domain)
790 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
791 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
793 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
794 if response.ok and response.status_code == 200 and len(response.text) > 0:
795 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
796 rss = atoma.parse_rss_bytes(response.content)
798 logger.debug("rss[]='%s'", type(rss))
799 for item in rss.items:
800 logger.debug("item[%s]='%s'", type(item), item)
801 domain = item.link.split("=")[1]
802 domain = tidyup.domain(domain) if domain not in[None, ""] else None
804 logger.debug("domain='%s' - AFTER!", domain)
805 if domain is None or domain == "":
806 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
809 logger.debug("domain='%s' - BEFORE!", domain)
810 domain = domain.encode("idna").decode("utf-8")
811 logger.debug("domain='%s' - AFTER!", domain)
813 if not domain_helper.is_wanted(domain):
814 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
816 elif domain in domains:
817 logger.debug("domain='%s' is already added - SKIPPED!", domain)
819 elif instances.is_registered(domain):
820 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
822 elif instances.is_recent(domain):
823 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
826 logger.debug("Adding domain='%s'", domain)
827 domains.append(domain)
829 logger.debug("domains()=%d", len(domains))
831 logger.info("Adding %d new instances ...", len(domains))
832 for domain in domains:
833 logger.debug("domain='%s'", domain)
835 logger.info("Fetching instances from domain='%s' ...", domain)
836 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
837 except network.exceptions as exception:
838 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
839 instances.set_last_error(domain, exception)
842 logger.debug("Success! - EXIT!")
845 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
846 logger.debug("args[]='%s' - CALLED!", type(args))
848 logger.debug("Invoking locking.acquire() ...")
851 source_domain = "ryona.agency"
852 feed = f"https://{source_domain}/users/fba/feed.atom"
854 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
855 if args.feed is not None and validators.url(args.feed):
856 logger.debug("Setting feed='%s' ...", args.feed)
857 feed = str(args.feed)
858 source_domain = urlparse(args.feed).netloc
860 if sources.is_recent(source_domain):
861 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
864 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
865 sources.update(source_domain)
869 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
870 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
872 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
873 if response.ok and response.status_code == 200 and len(response.text) > 0:
874 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
875 atom = atoma.parse_atom_bytes(response.content)
877 logger.debug("atom[]='%s'", type(atom))
878 for entry in atom.entries:
879 logger.debug("entry[]='%s'", type(entry))
880 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
881 logger.debug("doc[]='%s'", type(doc))
882 for element in doc.findAll("a"):
883 logger.debug("element[]='%s'", type(element))
884 for href in element["href"].split(","):
885 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
886 domain = tidyup.domain(href) if href not in [None, ""] else None
888 logger.debug("domain='%s' - AFTER!", domain)
889 if domain is None or domain == "":
890 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
893 logger.debug("domain='%s' - BEFORE!", domain)
894 domain = domain.encode("idna").decode("utf-8")
895 logger.debug("domain='%s' - AFTER!", domain)
897 if not domain_helper.is_wanted(domain):
898 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
900 elif domain in domains:
901 logger.debug("domain='%s' is already added - SKIPPED!", domain)
903 elif instances.is_registered(domain):
904 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
906 elif instances.is_recent(domain):
907 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
910 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
911 domains.append(domain)
913 logger.debug("domains()=%d", len(domains))
915 logger.info("Adding %d new instances ...", len(domains))
916 for domain in domains:
917 logger.debug("domain='%s'", domain)
919 logger.info("Fetching instances from domain='%s' ...", domain)
920 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
921 except network.exceptions as exception:
922 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
923 instances.set_last_error(domain, exception)
926 logger.debug("Success! - EXIT!")
929 def fetch_instances(args: argparse.Namespace) -> int:
930 logger.debug("args[]='%s' - CALLED!", type(args))
932 logger.debug("args.domain='%s' - checking ...", args.domain)
933 if not validators.domain(args.domain):
934 logger.warning("args.domain='%s' is not valid.", args.domain)
936 elif blacklist.is_blacklisted(args.domain):
937 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
940 logger.debug("Invoking locking.acquire() ...")
944 domain = tidyup.domain(args.domain)
945 origin = software = None
948 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
949 row = database.cursor.fetchone()
951 origin = row["origin"]
952 software = row["software"]
954 if software_helper.is_relay(software):
955 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
960 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
961 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
962 except network.exceptions as exception:
963 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
964 instances.set_last_error(args.domain, exception)
965 instances.update(args.domain)
969 logger.debug("Not fetching more instances - EXIT!")
972 # Loop through some instances
973 database.cursor.execute(
974 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
977 rows = database.cursor.fetchall()
978 logger.info("Checking %d entries ...", len(rows))
980 logger.debug("row[domain]='%s'", row["domain"])
981 if row["domain"] == "":
982 logger.debug("row[domain] is empty - SKIPPED!")
985 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
986 domain = row["domain"].encode("idna").decode("utf-8")
987 logger.debug("domain='%s' - AFTER!", domain)
989 if not domain_helper.is_wanted(domain):
990 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
994 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
995 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
996 except network.exceptions as exception:
997 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
998 instances.set_last_error(domain, exception)
1000 logger.debug("Success - EXIT!")
1003 def fetch_csv(args: argparse.Namespace) -> int:
1004 logger.debug("args[]='%s' - CALLED!", type(args))
1006 logger.debug("Invoking locking.acquire() ...")
1009 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1010 for block in blocklists.csv_files:
1011 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1013 # Is domain given and not equal blocker?
1014 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1018 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1019 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1021 logger.debug("Success - EXIT!")
1024 def fetch_oliphant(args: argparse.Namespace) -> int:
1025 logger.debug("args[]='%s' - CALLED!", type(args))
1027 logger.debug("Invoking locking.acquire() ...")
1030 source_domain = "codeberg.org"
1031 if sources.is_recent(source_domain):
1032 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1035 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1036 sources.update(source_domain)
1039 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1041 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1042 for block in blocklists.oliphant_blocklists:
1043 # Is domain given and not equal blocker?
1044 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1045 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1048 url = f"{base_url}/{block['csv_url']}"
1050 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1051 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1053 logger.debug("Success! - EXIT!")
1056 def fetch_txt(args: argparse.Namespace) -> int:
1057 logger.debug("args[]='%s' - CALLED!", type(args))
1059 logger.debug("Invoking locking.acquire() ...")
1064 "blocker": "seirdy.one",
1065 "url" : "https://seirdy.one/pb/bsl.txt",
1068 logger.info("Checking %d text file(s) ...", len(urls))
1070 logger.debug("Fetching row[url]='%s' ...", row["url"])
1071 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1073 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1074 if response.ok and response.status_code == 200 and response.text != "":
1075 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1076 domains = response.text.strip().split("\n")
1078 logger.info("Processing %d domains ...", len(domains))
1079 for domain in domains:
1080 logger.debug("domain='%s' - BEFORE!", domain)
1081 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1083 logger.debug("domain='%s' - AFTER!", domain)
1084 if domain is None or domain == "":
1085 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1087 elif not domain_helper.is_wanted(domain):
1088 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1090 elif instances.is_recent(domain):
1091 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1094 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1095 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1097 logger.debug("processed='%s'", processed)
1099 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1102 logger.debug("Success! - EXIT!")
1105 def fetch_fedipact(args: argparse.Namespace) -> int:
1106 logger.debug("args[]='%s' - CALLED!", type(args))
1108 logger.debug("Invoking locking.acquire() ...")
1111 source_domain = "fedipact.online"
1112 if sources.is_recent(source_domain):
1113 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1116 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1117 sources.update(source_domain)
1119 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1120 response = utils.fetch_url(
1121 f"https://{source_domain}",
1122 network.web_headers,
1123 (config.get("connection_timeout"), config.get("read_timeout"))
1126 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1127 if response.ok and response.status_code == 200 and response.text != "":
1128 logger.debug("Parsing %d Bytes ...", len(response.text))
1130 doc = bs4.BeautifulSoup(response.text, "html.parser")
1131 logger.debug("doc[]='%s'", type(doc))
1133 rows = doc.findAll("li")
1134 logger.info("Checking %d row(s) ...", len(rows))
1136 logger.debug("row[]='%s'", type(row))
1137 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1139 logger.debug("domain='%s' - AFTER!", domain)
1140 if domain is None or domain == "":
1141 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1144 logger.debug("domain='%s' - BEFORE!", domain)
1145 domain = domain.encode("idna").decode("utf-8")
1146 logger.debug("domain='%s' - AFTER!", domain)
1148 if not domain_helper.is_wanted(domain):
1149 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1151 elif instances.is_registered(domain):
1152 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1154 elif instances.is_recent(domain):
1155 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1158 logger.info("Fetching domain='%s' ...", domain)
1159 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1161 logger.debug("Success! - EXIT!")
1164 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1165 logger.debug("args[]='%s' - CALLED!", type(args))
1167 logger.debug("Invoking locking.acquire() ...")
1170 source_domain = "instances.joinmobilizon.org"
1171 if sources.is_recent(source_domain):
1172 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1175 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1176 sources.update(source_domain)
1178 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1179 raw = utils.fetch_url(
1180 f"https://{source_domain}/api/v1/instances",
1181 network.web_headers,
1182 (config.get("connection_timeout"), config.get("read_timeout"))
1184 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1186 parsed = json.loads(raw)
1187 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1189 if "data" not in parsed:
1190 logger.warning("parsed()=%d does not contain key 'data'")
1193 logger.info("Checking %d instances ...", len(parsed["data"]))
1194 for row in parsed["data"]:
1195 logger.debug("row[]='%s'", type(row))
1196 if "host" not in row:
1197 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1199 elif not domain_helper.is_wanted(row["host"]):
1200 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1202 elif instances.is_registered(row["host"]):
1203 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1206 logger.info("Fetching row[host]='%s' ...", row["host"])
1207 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1209 logger.debug("Success! - EXIT!")
1212 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1213 logger.debug("args[]='%s' - CALLED!", type(args))
1215 logger.debug("Invoking locking.acquire() ...")
1218 source_domain = "instanceapp.misskey.page"
1219 if sources.is_recent(source_domain):
1220 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1223 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1224 sources.update(source_domain)
1226 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1227 raw = utils.fetch_url(
1228 f"https://{source_domain}/instances.json",
1229 network.web_headers,
1230 (config.get("connection_timeout"), config.get("read_timeout"))
1232 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1234 parsed = json.loads(raw)
1235 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1237 if "instancesInfos" not in parsed:
1238 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1241 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1242 for row in parsed["instancesInfos"]:
1243 logger.debug("row[%s]='%s'", type(row), row)
1244 if "url" not in row:
1245 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1247 elif not domain_helper.is_wanted(row["url"]):
1248 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1250 elif instances.is_registered(row["url"]):
1251 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1254 logger.info("Fetching row[url]='%s' ...", row["url"])
1255 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1257 logger.debug("Success! - EXIT!")
1260 def recheck_obfuscation(args: argparse.Namespace) -> int:
1261 logger.debug("args[]='%s' - CALLED!", type(args))
1263 logger.debug("Invoking locking.acquire() ...")
1266 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1267 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1268 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1269 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1271 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1273 rows = database.cursor.fetchall()
1274 logger.info("Checking %d domains ...", len(rows))
1276 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1277 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1278 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1281 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1282 blocking = federation.fetch_blocks(row["domain"])
1284 logger.debug("blocking()=%d", len(blocking))
1285 if len(blocking) == 0:
1286 if row["software"] == "pleroma":
1287 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1288 blocking = pleroma.fetch_blocks(row["domain"])
1289 elif row["software"] == "mastodon":
1290 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1291 blocking = mastodon.fetch_blocks(row["domain"])
1292 elif row["software"] == "lemmy":
1293 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1294 blocking = lemmy.fetch_blocks(row["domain"])
1295 elif row["software"] == "friendica":
1296 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1297 blocking = friendica.fetch_blocks(row["domain"])
1298 elif row["software"] == "misskey":
1299 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1300 blocking = misskey.fetch_blocks(row["domain"])
1302 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1304 # c.s isn't part of oliphant's "hidden" blocklists
1305 logger.debug("row[domain]='%s'", row["domain"])
1306 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1307 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1308 instances.set_last_blocked(row["domain"])
1309 instances.set_total_blocks(row["domain"], blocking)
1314 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1315 for block in blocking:
1316 logger.debug("block[blocked]='%s'", block["blocked"])
1319 if block["blocked"] == "":
1320 logger.debug("block[blocked] is empty - SKIPPED!")
1322 elif block["blocked"].endswith(".arpa"):
1323 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1325 elif block["blocked"].endswith(".tld"):
1326 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1328 elif block["blocked"].endswith(".onion"):
1329 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1331 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1332 logger.debug("block='%s' is obfuscated.", block["blocked"])
1333 obfuscated = obfuscated + 1
1334 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1335 elif not domain_helper.is_wanted(block["blocked"]):
1336 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1338 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1339 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1342 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1343 if blocked is not None and blocked != block["blocked"]:
1344 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1345 obfuscated = obfuscated - 1
1347 if blacklist.is_blacklisted(blocked):
1348 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1350 elif blacklist.is_blacklisted(row["domain"]):
1351 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1353 elif blocks.is_instance_blocked(row["domain"], blocked):
1354 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1357 block["block_level"] = blocks.alias_block_level(block["block_level"])
1359 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1360 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1361 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1364 "reason" : block["reason"],
1367 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1368 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1370 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1371 if instances.has_pending(row["domain"]):
1372 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1373 instances.update(row["domain"])
1375 logger.debug("Invoking commit() ...")
1376 database.connection.commit()
1378 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1379 if config.get("bot_enabled") and len(blockdict) > 0:
1380 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1381 network.send_bot_post(row["domain"], blockdict)
1383 logger.debug("Success! - EXIT!")
1386 def fetch_fedilist(args: argparse.Namespace) -> int:
1387 logger.debug("args[]='%s' - CALLED!", type(args))
1389 logger.debug("Invoking locking.acquire() ...")
1392 source_domain = "demo.fedilist.com"
1393 if sources.is_recent(source_domain):
1394 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1397 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1398 sources.update(source_domain)
1400 url = f"http://{source_domain}/instance/csv?onion=not"
1401 if args.software is not None and args.software != "":
1402 logger.debug("args.software='%s'", args.software)
1403 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1405 logger.info("Fetching url='%s' ...", url)
1406 response = reqto.get(
1408 headers=network.web_headers,
1409 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1410 allow_redirects=False
1413 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1414 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1415 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1418 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1420 logger.debug("reader[]='%s'", type(reader))
1422 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1427 logger.info("Checking %d rows ...", len(rows))
1429 logger.debug("row[]='%s'", type(row))
1430 if "hostname" not in row:
1431 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1434 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1435 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1436 logger.debug("domain='%s' - AFTER!", domain)
1438 if domain is None or domain == "":
1439 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1442 logger.debug("domain='%s' - BEFORE!", domain)
1443 domain = domain.encode("idna").decode("utf-8")
1444 logger.debug("domain='%s' - AFTER!", domain)
1446 if not domain_helper.is_wanted(domain):
1447 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1449 elif (args.force is None or not args.force) and instances.is_registered(domain):
1450 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1452 elif instances.is_recent(domain):
1453 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1456 logger.info("Fetching instances from domain='%s' ...", domain)
1457 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1459 logger.debug("Success! - EXIT!")
1462 def update_nodeinfo(args: argparse.Namespace) -> int:
1463 logger.debug("args[]='%s' - CALLED!", type(args))
1465 logger.debug("Invoking locking.acquire() ...")
1468 if args.domain is not None and args.domain != "":
1469 logger.debug("Fetching args.domain='%s'", args.domain)
1470 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1471 elif args.software is not None and args.software != "":
1472 logger.info("Fetching domains for args.software='%s'", args.software)
1473 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1474 elif args.mode is not None and args.mode != "":
1475 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1476 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1477 elif args.no_software:
1478 logger.info("Fetching domains with no software type detected ...")
1479 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1481 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1482 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1483 elif args.no_detection:
1484 logger.info("Fetching domains with no detection mode being set ...")
1485 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1487 logger.info("Fetching domains for recently updated ...")
1488 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1490 domains = database.cursor.fetchall()
1492 logger.info("Checking %d domain(s) ...", len(domains))
1495 logger.debug("row[]='%s'", type(row))
1496 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1497 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1501 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1502 software = federation.determine_software(row["domain"])
1504 logger.debug("Determined software='%s'", software)
1505 if (software != row["software"] and software is not None) or args.force is True:
1506 logger.debug("software='%s'", software)
1507 if software is None:
1508 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1509 instances.set_nodeinfo_url(row["domain"], None)
1511 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1512 instances.set_software(row["domain"], software)
1514 if software is not None:
1515 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1516 instances.set_success(row["domain"])
1517 except network.exceptions as exception:
1518 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1519 instances.set_last_error(row["domain"], exception)
1521 instances.set_last_nodeinfo(row["domain"])
1522 instances.update(row["domain"])
1525 logger.debug("Success! - EXIT!")
1528 def fetch_instances_social(args: argparse.Namespace) -> int:
1529 logger.debug("args[]='%s' - CALLED!", type(args))
1531 logger.debug("Invoking locking.acquire() ...")
1534 source_domain = "instances.social"
1536 if config.get("instances_social_api_key") == "":
1537 logger.error("API key not set. Please set in your config.json file.")
1539 elif sources.is_recent(source_domain):
1540 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1543 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1544 sources.update(source_domain)
1547 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1550 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1551 fetched = network.get_json_api(
1553 "/api/1.0/instances/list?count=0&sort_by=name",
1555 (config.get("connection_timeout"), config.get("read_timeout"))
1557 logger.debug("fetched[]='%s'", type(fetched))
1559 if "error_message" in fetched:
1560 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1562 elif "exception" in fetched:
1563 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1565 elif "json" not in fetched:
1566 logger.warning("fetched has no element 'json' - EXIT!")
1568 elif "instances" not in fetched["json"]:
1569 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1573 rows = fetched["json"]["instances"]
1575 logger.info("Checking %d row(s) ...", len(rows))
1577 logger.debug("row[]='%s'", type(row))
1578 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1579 logger.debug("domain='%s' - AFTER!", domain)
1581 if domain is None and domain == "":
1582 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1585 logger.debug("domain='%s' - BEFORE!", domain)
1586 domain = domain.encode("idna").decode("utf-8")
1587 logger.debug("domain='%s' - AFTER!", domain)
1589 if not domain_helper.is_wanted(domain):
1590 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1592 elif domain in domains:
1593 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1595 elif instances.is_registered(domain):
1596 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1598 elif instances.is_recent(domain):
1599 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1602 logger.info("Fetching instances from domain='%s'", domain)
1603 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1605 logger.debug("Success! - EXIT!")
1608 def fetch_relays(args: argparse.Namespace) -> int:
1609 logger.debug("args[]='%s' - CALLED!", type(args))
1611 logger.debug("Invoking locking.acquire() ...")
1614 if args.domain is not None and args.domain != "":
1615 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1616 elif args.software is not None and args.software != "":
1617 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1619 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1622 rows = database.cursor.fetchall()
1624 logger.info("Checking %d relays ...", len(rows))
1626 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1628 if not args.force and instances.is_recent(row["domain"]):
1629 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1633 if row["software"] == "pub-relay":
1634 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1635 raw = network.fetch_api_url(
1636 row["nodeinfo_url"],
1637 (config.get("connection_timeout"), config.get("read_timeout"))
1640 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1641 if "exception" in raw:
1642 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1643 raise raw["exception"]
1644 elif "error_message" in raw:
1645 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1646 instances.set_last_error(row["domain"], raw)
1647 instances.set_last_instance_fetch(row["domain"])
1648 instances.update(row["domain"])
1650 elif "json" not in raw:
1651 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1653 elif not "metadata" in raw["json"]:
1654 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1656 elif not "peers" in raw["json"]["metadata"]:
1657 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1660 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1661 raw = utils.fetch_url(
1662 f"https://{row['domain']}",
1663 network.web_headers,
1664 (config.get("connection_timeout"), config.get("read_timeout"))
1666 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1668 doc = bs4.BeautifulSoup(raw, features="html.parser")
1669 logger.debug("doc[]='%s'", type(doc))
1671 except network.exceptions as exception:
1672 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1673 instances.set_last_error(row["domain"], exception)
1674 instances.set_last_instance_fetch(row["domain"])
1675 instances.update(row["domain"])
1678 logger.debug("row[software]='%s'", row["software"])
1679 if row["software"] == "activityrelay":
1680 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1681 tags = doc.findAll("p")
1683 logger.debug("Checking %d paragraphs ...", len(tags))
1685 logger.debug("tag[]='%s'", type(tag))
1686 if len(tag.contents) == 0:
1687 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1689 elif "registered instances" not in tag.contents[0]:
1690 logger.debug("Skipping paragraph, text not found.")
1693 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1694 for domain in tag.contents:
1695 logger.debug("domain[%s]='%s'", type(domain), domain)
1696 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1699 domain = str(domain)
1700 logger.debug("domain='%s'", domain)
1701 if not domain_helper.is_wanted(domain):
1702 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1705 logger.debug("domain='%s' - BEFORE!", domain)
1706 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1707 logger.debug("domain='%s' - AFTER!", domain)
1709 if domain is None or domain == "":
1710 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1712 elif domain not in peers:
1713 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1714 peers.append(domain)
1716 if dict_helper.has_key(domains, "domain", domain):
1717 logger.debug("domain='%s' already added", domain)
1720 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1723 "origin": row["domain"],
1725 elif row["software"] in ["aoderelay", "selective-relay"]:
1726 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1727 if row["software"] == "aoderelay":
1728 tags = doc.findAll("section", {"class": "instance"})
1730 tags = doc.find("div", {"id": "instances"}).findAll("li")
1732 logger.debug("Checking %d tags ...", len(tags))
1734 logger.debug("tag[]='%s'", type(tag))
1736 link = tag.find("a")
1737 logger.debug("link[%s]='%s'", type(link), link)
1738 if not isinstance(link, bs4.element.Tag):
1739 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1742 components = urlparse(link.get("href"))
1743 logger.debug("components(%d)='%s'", len(components), components)
1744 domain = components.netloc.lower().split(":")[0]
1746 logger.debug("domain='%s' - BEFORE!", domain)
1747 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1748 logger.debug("domain='%s' - AFTER!", domain)
1750 if domain is None or domain == "":
1751 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1753 elif domain not in peers:
1754 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1755 peers.append(domain)
1757 if dict_helper.has_key(domains, "domain", domain):
1758 logger.debug("domain='%s' already added", domain)
1761 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1764 "origin": row["domain"],
1766 elif row["software"] == "pub-relay":
1767 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1768 for domain in raw["json"]["metadata"]["peers"]:
1769 logger.debug("domain='%s' - BEFORE!", domain)
1770 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1771 logger.debug("domain='%s' - AFTER!", domain)
1773 if domain is None or domain == "":
1774 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1776 elif domain not in peers:
1777 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1778 peers.append(domain)
1780 if dict_helper.has_key(domains, "domain", domain):
1781 logger.debug("domain='%s' already added", domain)
1784 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1787 "origin": row["domain"],
1790 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1793 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1794 instances.set_last_instance_fetch(row["domain"])
1796 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1797 instances.set_total_peers(row["domain"], peers)
1799 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1800 instances.update(row["domain"])
1802 logger.info("Checking %d domains ...", len(domains))
1804 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1805 if not domain_helper.is_wanted(row["domain"]):
1806 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1808 elif instances.is_registered(row["domain"]):
1809 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1812 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1813 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1815 logger.debug("Success! - EXIT!")
1818 def convert_idna(args: argparse.Namespace) -> int:
1819 logger.debug("args[]='%s' - CALLED!", type(args))
1821 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1822 rows = database.cursor.fetchall()
1824 logger.debug("rows[]='%s'", type(rows))
1825 instances.translate_idnas(rows, "domain")
1827 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1828 rows = database.cursor.fetchall()
1830 logger.debug("rows[]='%s'", type(rows))
1831 instances.translate_idnas(rows, "origin")
1833 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1834 rows = database.cursor.fetchall()
1836 logger.debug("rows[]='%s'", type(rows))
1837 blocks.translate_idnas(rows, "blocker")
1839 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1840 rows = database.cursor.fetchall()
1842 logger.debug("rows[]='%s'", type(rows))
1843 blocks.translate_idnas(rows, "blocked")
1845 logger.debug("Success! - EXIT!")
1848 def remove_invalid(args: argparse.Namespace) -> int:
1849 logger.debug("args[]='%s' - CALLED!", type(args))
1851 logger.debug("Invoking locking.acquire() ...")
1854 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1855 rows = database.cursor.fetchall()
1857 logger.info("Checking %d domains ...", len(rows))
1859 logger.debug("row[domain]='%s'", row["domain"])
1860 if not validators.domain(row["domain"].split("/")[0]):
1861 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1862 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1863 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1865 logger.debug("Invoking commit() ...")
1866 database.connection.commit()
1868 logger.info("Vaccum cleaning database ...")
1869 database.cursor.execute("VACUUM")
1871 logger.debug("Success! - EXIT!")