1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
353 deobfuscated = obfuscated = 0
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] == "":
369 logger.warning("blocked is empty, blocker='%s'", blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".arpa"):
375 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".tld"):
378 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
380 elif block["blocked"].find("*") >= 0:
381 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 instances.set_has_obfuscation(blocker, True)
383 obfuscated = obfuscated + 1
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 deobfuscated = deobfuscated + 1
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 instances.set_has_obfuscation(blocker, True)
400 obfuscated = obfuscated + 1
402 # Some obscure them with question marks, not sure if that's dependent on version or not
403 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
405 logger.debug("row[]='%s'", type(row))
407 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
410 deobfuscated = deobfuscated + 1
411 block["blocked"] = row["domain"]
412 origin = row["origin"]
413 nodeinfo_url = row["nodeinfo_url"]
415 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416 if block["blocked"] == "":
417 logger.debug("block[blocked] is empty - SKIPPED!")
420 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
424 if not domain_helper.is_wanted(block["blocked"]):
425 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
427 elif block["block_level"] in ["accept", "accepted"]:
428 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
430 elif not instances.is_registered(block["blocked"]):
431 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
434 block["block_level"] = blocks.alias_block_level(block["block_level"])
436 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
439 "blocked": block["blocked"],
440 "reason" : block["reason"],
443 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444 cookies.clear(block["blocked"])
446 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
448 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
449 if instances.has_pending(blocker):
450 logger.debug("Flushing updates for blocker='%s' ...", blocker)
451 instances.update(blocker)
453 logger.debug("Invoking commit() ...")
454 database.connection.commit()
456 logger.debug("Invoking cookies.clear(%s) ...", blocker)
457 cookies.clear(blocker)
459 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
460 if config.get("bot_enabled") and len(blockdict) > 0:
461 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
462 network.send_bot_post(blocker, blockdict)
464 logger.debug("Success! - EXIT!")
467 def fetch_observer(args: argparse.Namespace) -> int:
468 logger.debug("args[]='%s' - CALLED!", type(args))
470 logger.debug("Invoking locking.acquire() ...")
473 source_domain = "fediverse.observer"
474 if sources.is_recent(source_domain):
475 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
478 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
479 sources.update(source_domain)
482 if args.software is None:
483 logger.info("Fetching software list ...")
484 raw = utils.fetch_url(
485 f"https://{source_domain}",
487 (config.get("connection_timeout"), config.get("read_timeout"))
489 logger.debug("raw[%s]()=%d", type(raw), len(raw))
491 doc = bs4.BeautifulSoup(raw, features="html.parser")
492 logger.debug("doc[]='%s'", type(doc))
494 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
495 logger.debug("navbar[]='%s'", type(navbar))
497 logger.warning("Cannot find navigation bar, cannot continue!")
500 items = navbar.findAll("a", {"class": "dropdown-item"})
501 logger.debug("items[]='%s'", type(items))
503 logger.info("Checking %d menu items ...", len(items))
505 logger.debug("item[%s]='%s'", type(item), item)
506 if item.text.lower() == "all":
507 logger.debug("Skipping 'All' menu entry ...")
510 logger.debug("Appending item.text='%s' ...", item.text)
511 types.append(tidyup.domain(item.text))
513 logger.info("Adding args.software='%s' as type ...", args.software)
514 types.append(args.software)
516 logger.info("Fetching %d different table data ...", len(types))
517 for software in types:
518 logger.debug("software='%s' - BEFORE!", software)
519 software = software_helper.alias(software)
520 logger.debug("software='%s' - AFTER!", software)
522 if args.software is not None and args.software != software:
523 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
528 logger.debug("Fetching table data for software='%s' ...", software)
529 raw = utils.fetch_url(
530 f"https://{source_domain}/app/views/tabledata.php?software={software}",
532 (config.get("connection_timeout"), config.get("read_timeout"))
534 logger.debug("raw[%s]()=%d", type(raw), len(raw))
536 doc = bs4.BeautifulSoup(raw, features="html.parser")
537 logger.debug("doc[]='%s'", type(doc))
538 except network.exceptions as exception:
539 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
542 items = doc.findAll("a", {"class": "url"})
543 logger.info("Checking %d items,software='%s' ...", len(items), software)
545 logger.debug("item[]='%s'", type(item))
546 domain = item.decode_contents()
547 domain = tidyup.domain(domain) if domain not in [None, ""] else None
548 logger.debug("domain='%s' - AFTER!", domain)
550 if domain is None or domain == "":
551 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
554 logger.debug("domain='%s' - BEFORE!", domain)
555 domain = domain.encode("idna").decode("utf-8")
556 logger.debug("domain='%s' - AFTER!", domain)
558 if not domain_helper.is_wanted(domain):
559 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
561 elif instances.is_registered(domain):
562 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
565 logger.info("Fetching instances for domain='%s'", domain)
566 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
568 logger.debug("Success! - EXIT!")
571 def fetch_todon_wiki(args: argparse.Namespace) -> int:
572 logger.debug("args[]='%s' - CALLED!", type(args))
574 logger.debug("Invoking locking.acquire() ...")
577 source_domain = "wiki.todon.eu"
578 if sources.is_recent(source_domain):
579 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
582 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
583 sources.update(source_domain)
590 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
591 raw = utils.fetch_url(
592 f"https://{source_domain}/todon/domainblocks",
594 (config.get("connection_timeout"), config.get("read_timeout"))
596 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
598 doc = bs4.BeautifulSoup(raw, "html.parser")
599 logger.debug("doc[]='%s'", type(doc))
601 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
602 logger.info("Checking %d silenced/limited entries ...", len(silenced))
603 blocklist["silenced"] = utils.find_domains(silenced, "div")
605 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
606 logger.info("Checking %d suspended entries ...", len(suspended))
607 blocklist["reject"] = utils.find_domains(suspended, "div")
609 blocking = blocklist["silenced"] + blocklist["reject"]
612 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
613 instances.set_last_blocked(blocker)
614 instances.set_total_blocks(blocker, blocking)
617 for block_level in blocklist:
618 blockers = blocklist[block_level]
620 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
621 for blocked in blockers:
622 logger.debug("blocked='%s'", blocked)
624 if not instances.is_registered(blocked):
626 logger.info("Fetching instances from domain='%s' ...", blocked)
627 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
628 except network.exceptions as exception:
629 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
630 instances.set_last_error(blocked, exception)
632 if not domain_helper.is_wanted(blocked):
633 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
635 elif not domain_helper.is_wanted(blocker):
636 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
638 elif blocks.is_instance_blocked(blocker, blocked, block_level):
639 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
642 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
643 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
644 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
650 logger.debug("Invoking commit() ...")
651 database.connection.commit()
653 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
654 if config.get("bot_enabled") and len(blockdict) > 0:
655 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
656 network.send_bot_post(blocker, blockdict)
658 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
659 if instances.has_pending(blocker):
660 logger.debug("Flushing updates for blocker='%s' ...", blocker)
661 instances.update(blocker)
663 logger.debug("Success! - EXIT!")
666 def fetch_cs(args: argparse.Namespace):
667 logger.debug("args[]='%s' - CALLED!", type(args))
669 logger.debug("Invoking locking.acquire() ...")
697 source_domain = "raw.githubusercontent.com"
698 if sources.is_recent(source_domain):
699 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
702 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
703 sources.update(source_domain)
705 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
706 raw = utils.fetch_url(
707 f"https://{source_domain}/chaossocial/meta/master/federation.md",
709 (config.get("connection_timeout"), config.get("read_timeout"))
711 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
713 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
714 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
716 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
717 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
718 blocklist["silenced"] = federation.find_domains(silenced)
720 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
721 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
722 blocklist["reject"] = federation.find_domains(blocked)
724 blocking = blocklist["silenced"] + blocklist["reject"]
725 blocker = "chaos.social"
727 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
728 instances.set_last_blocked(blocker)
729 instances.set_total_blocks(blocker, blocking)
731 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
732 if len(blocking) > 0:
734 for block_level in blocklist:
735 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
737 for row in blocklist[block_level]:
738 logger.debug("row[%s]='%s'", type(row), row)
739 if not "domain" in row:
740 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
742 elif not instances.is_registered(row["domain"]):
744 logger.info("Fetching instances from domain='%s' ...", row["domain"])
745 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
746 except network.exceptions as exception:
747 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
748 instances.set_last_error(row["domain"], exception)
750 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
751 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
753 "blocked": row["domain"],
754 "reason" : row["reason"],
757 logger.debug("Invoking commit() ...")
758 database.connection.commit()
760 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
761 if config.get("bot_enabled") and len(blockdict) > 0:
762 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
763 network.send_bot_post(blocker, blockdict)
765 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
766 if instances.has_pending(blocker):
767 logger.debug("Flushing updates for blocker='%s' ...", blocker)
768 instances.update(blocker)
770 logger.debug("Success! - EXIT!")
773 def fetch_fba_rss(args: argparse.Namespace) -> int:
774 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.debug("Invoking locking.acquire() ...")
781 components = urlparse(args.feed)
782 domain = components.netloc.lower().split(":")[0]
784 logger.debug("domain='%s'", domain)
785 if sources.is_recent(domain):
786 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
789 logger.debug("domain='%s' has not been recently used, marking ...", domain)
790 sources.update(domain)
792 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
793 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
795 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
796 if response.ok and response.status_code == 200 and len(response.text) > 0:
797 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
798 rss = atoma.parse_rss_bytes(response.content)
800 logger.debug("rss[]='%s'", type(rss))
801 for item in rss.items:
802 logger.debug("item[%s]='%s'", type(item), item)
803 domain = item.link.split("=")[1]
804 domain = tidyup.domain(domain) if domain not in[None, ""] else None
806 logger.debug("domain='%s' - AFTER!", domain)
807 if domain is None or domain == "":
808 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
811 logger.debug("domain='%s' - BEFORE!", domain)
812 domain = domain.encode("idna").decode("utf-8")
813 logger.debug("domain='%s' - AFTER!", domain)
815 if not domain_helper.is_wanted(domain):
816 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
818 elif domain in domains:
819 logger.debug("domain='%s' is already added - SKIPPED!", domain)
821 elif instances.is_registered(domain):
822 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
824 elif instances.is_recent(domain):
825 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
828 logger.debug("Adding domain='%s'", domain)
829 domains.append(domain)
831 logger.debug("domains()=%d", len(domains))
833 logger.info("Adding %d new instances ...", len(domains))
834 for domain in domains:
835 logger.debug("domain='%s'", domain)
837 logger.info("Fetching instances from domain='%s' ...", domain)
838 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
839 except network.exceptions as exception:
840 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
841 instances.set_last_error(domain, exception)
844 logger.debug("Success! - EXIT!")
847 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
848 logger.debug("args[]='%s' - CALLED!", type(args))
850 logger.debug("Invoking locking.acquire() ...")
853 source_domain = "ryona.agency"
854 feed = f"https://{source_domain}/users/fba/feed.atom"
856 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
857 if args.feed is not None and validators.url(args.feed):
858 logger.debug("Setting feed='%s' ...", args.feed)
859 feed = str(args.feed)
860 source_domain = urlparse(args.feed).netloc
862 if sources.is_recent(source_domain):
863 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
866 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
867 sources.update(source_domain)
871 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
872 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
874 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
875 if response.ok and response.status_code == 200 and len(response.text) > 0:
876 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
877 atom = atoma.parse_atom_bytes(response.content)
879 logger.debug("atom[]='%s'", type(atom))
880 for entry in atom.entries:
881 logger.debug("entry[]='%s'", type(entry))
882 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
883 logger.debug("doc[]='%s'", type(doc))
884 for element in doc.findAll("a"):
885 logger.debug("element[]='%s'", type(element))
886 for href in element["href"].split(","):
887 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
888 domain = tidyup.domain(href) if href not in [None, ""] else None
890 logger.debug("domain='%s' - AFTER!", domain)
891 if domain is None or domain == "":
892 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
895 logger.debug("domain='%s' - BEFORE!", domain)
896 domain = domain.encode("idna").decode("utf-8")
897 logger.debug("domain='%s' - AFTER!", domain)
899 if not domain_helper.is_wanted(domain):
900 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
902 elif domain in domains:
903 logger.debug("domain='%s' is already added - SKIPPED!", domain)
905 elif instances.is_registered(domain):
906 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
908 elif instances.is_recent(domain):
909 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
912 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
913 domains.append(domain)
915 logger.debug("domains()=%d", len(domains))
917 logger.info("Adding %d new instances ...", len(domains))
918 for domain in domains:
919 logger.debug("domain='%s'", domain)
921 logger.info("Fetching instances from domain='%s' ...", domain)
922 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
923 except network.exceptions as exception:
924 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
925 instances.set_last_error(domain, exception)
928 logger.debug("Success! - EXIT!")
931 def fetch_instances(args: argparse.Namespace) -> int:
932 logger.debug("args[]='%s' - CALLED!", type(args))
934 logger.debug("args.domain='%s' - checking ...", args.domain)
935 if not validators.domain(args.domain):
936 logger.warning("args.domain='%s' is not valid.", args.domain)
938 elif blacklist.is_blacklisted(args.domain):
939 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
942 logger.debug("Invoking locking.acquire() ...")
946 domain = tidyup.domain(args.domain)
947 origin = software = None
950 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
951 row = database.cursor.fetchone()
953 origin = row["origin"]
954 software = row["software"]
956 if software_helper.is_relay(software):
957 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
962 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
963 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
964 except network.exceptions as exception:
965 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
966 instances.set_last_error(args.domain, exception)
967 instances.update(args.domain)
971 logger.debug("Not fetching more instances - EXIT!")
974 # Loop through some instances
975 database.cursor.execute(
976 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
979 rows = database.cursor.fetchall()
980 logger.info("Checking %d entries ...", len(rows))
982 logger.debug("row[domain]='%s'", row["domain"])
983 if row["domain"] == "":
984 logger.debug("row[domain] is empty - SKIPPED!")
987 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
988 domain = row["domain"].encode("idna").decode("utf-8")
989 logger.debug("domain='%s' - AFTER!", domain)
991 if not domain_helper.is_wanted(domain):
992 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
996 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
997 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
998 except network.exceptions as exception:
999 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1000 instances.set_last_error(domain, exception)
1002 logger.debug("Success - EXIT!")
1005 def fetch_csv(args: argparse.Namespace) -> int:
1006 logger.debug("args[]='%s' - CALLED!", type(args))
1008 logger.debug("Invoking locking.acquire() ...")
1011 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1012 for block in blocklists.csv_files:
1013 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1015 # Is domain given and not equal blocker?
1016 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1017 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1020 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1021 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1023 logger.debug("Success - EXIT!")
1026 def fetch_oliphant(args: argparse.Namespace) -> int:
1027 logger.debug("args[]='%s' - CALLED!", type(args))
1029 logger.debug("Invoking locking.acquire() ...")
1032 source_domain = "codeberg.org"
1033 if sources.is_recent(source_domain):
1034 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1037 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1038 sources.update(source_domain)
1041 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1043 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1044 for block in blocklists.oliphant_blocklists:
1045 # Is domain given and not equal blocker?
1046 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1047 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1050 url = f"{base_url}/{block['csv_url']}"
1052 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1053 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1055 logger.debug("Success! - EXIT!")
1058 def fetch_txt(args: argparse.Namespace) -> int:
1059 logger.debug("args[]='%s' - CALLED!", type(args))
1061 logger.debug("Invoking locking.acquire() ...")
1066 "blocker": "seirdy.one",
1067 "url" : "https://seirdy.one/pb/bsl.txt",
1070 logger.info("Checking %d text file(s) ...", len(urls))
1072 logger.debug("Fetching row[url]='%s' ...", row["url"])
1073 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1075 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1076 if response.ok and response.status_code == 200 and response.text != "":
1077 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1078 domains = response.text.strip().split("\n")
1080 logger.info("Processing %d domains ...", len(domains))
1081 for domain in domains:
1082 logger.debug("domain='%s' - BEFORE!", domain)
1083 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1085 logger.debug("domain='%s' - AFTER!", domain)
1086 if domain is None or domain == "":
1087 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1089 elif not domain_helper.is_wanted(domain):
1090 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1092 elif instances.is_recent(domain):
1093 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1096 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1097 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1099 logger.debug("processed='%s'", processed)
1101 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1104 logger.debug("Success! - EXIT!")
1107 def fetch_fedipact(args: argparse.Namespace) -> int:
1108 logger.debug("args[]='%s' - CALLED!", type(args))
1110 logger.debug("Invoking locking.acquire() ...")
1113 source_domain = "fedipact.online"
1114 if sources.is_recent(source_domain):
1115 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1118 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1119 sources.update(source_domain)
1121 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1122 response = utils.fetch_url(
1123 f"https://{source_domain}",
1124 network.web_headers,
1125 (config.get("connection_timeout"), config.get("read_timeout"))
1128 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1129 if response.ok and response.status_code == 200 and response.text != "":
1130 logger.debug("Parsing %d Bytes ...", len(response.text))
1132 doc = bs4.BeautifulSoup(response.text, "html.parser")
1133 logger.debug("doc[]='%s'", type(doc))
1135 rows = doc.findAll("li")
1136 logger.info("Checking %d row(s) ...", len(rows))
1138 logger.debug("row[]='%s'", type(row))
1139 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1141 logger.debug("domain='%s' - AFTER!", domain)
1142 if domain is None or domain == "":
1143 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1146 logger.debug("domain='%s' - BEFORE!", domain)
1147 domain = domain.encode("idna").decode("utf-8")
1148 logger.debug("domain='%s' - AFTER!", domain)
1150 if not domain_helper.is_wanted(domain):
1151 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1153 elif instances.is_registered(domain):
1154 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1156 elif instances.is_recent(domain):
1157 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1160 logger.info("Fetching domain='%s' ...", domain)
1161 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1163 logger.debug("Success! - EXIT!")
1166 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1167 logger.debug("args[]='%s' - CALLED!", type(args))
1169 logger.debug("Invoking locking.acquire() ...")
1172 source_domain = "instances.joinmobilizon.org"
1173 if sources.is_recent(source_domain):
1174 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1177 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1178 sources.update(source_domain)
1180 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1181 raw = utils.fetch_url(
1182 f"https://{source_domain}/api/v1/instances",
1183 network.web_headers,
1184 (config.get("connection_timeout"), config.get("read_timeout"))
1186 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1188 parsed = json.loads(raw)
1189 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1191 if "data" not in parsed:
1192 logger.warning("parsed()=%d does not contain key 'data'")
1195 logger.info("Checking %d instances ...", len(parsed["data"]))
1196 for row in parsed["data"]:
1197 logger.debug("row[]='%s'", type(row))
1198 if "host" not in row:
1199 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1201 elif not domain_helper.is_wanted(row["host"]):
1202 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1204 elif instances.is_registered(row["host"]):
1205 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1208 logger.info("Fetching row[host]='%s' ...", row["host"])
1209 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1211 logger.debug("Success! - EXIT!")
1214 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1215 logger.debug("args[]='%s' - CALLED!", type(args))
1217 logger.debug("Invoking locking.acquire() ...")
1220 source_domain = "instanceapp.misskey.page"
1221 if sources.is_recent(source_domain):
1222 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1225 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1226 sources.update(source_domain)
1228 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1229 raw = utils.fetch_url(
1230 f"https://{source_domain}/instances.json",
1231 network.web_headers,
1232 (config.get("connection_timeout"), config.get("read_timeout"))
1234 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1236 parsed = json.loads(raw)
1237 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1239 if "instancesInfos" not in parsed:
1240 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1243 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1244 for row in parsed["instancesInfos"]:
1245 logger.debug("row[%s]='%s'", type(row), row)
1246 if "url" not in row:
1247 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1249 elif not domain_helper.is_wanted(row["url"]):
1250 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1252 elif instances.is_registered(row["url"]):
1253 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1256 logger.info("Fetching row[url]='%s' ...", row["url"])
1257 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1259 logger.debug("Success! - EXIT!")
1262 def recheck_obfuscation(args: argparse.Namespace) -> int:
1263 logger.debug("args[]='%s' - CALLED!", type(args))
1265 logger.debug("Invoking locking.acquire() ...")
1268 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1269 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1270 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1271 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1273 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1275 rows = database.cursor.fetchall()
1276 logger.info("Checking %d domains ...", len(rows))
1278 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1279 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1280 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1283 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1284 blocking = federation.fetch_blocks(row["domain"])
1286 logger.debug("blocking()=%d", len(blocking))
1287 if len(blocking) == 0:
1288 if row["software"] == "pleroma":
1289 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290 blocking = pleroma.fetch_blocks(row["domain"])
1291 elif row["software"] == "mastodon":
1292 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293 blocking = mastodon.fetch_blocks(row["domain"])
1294 elif row["software"] == "lemmy":
1295 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1296 blocking = lemmy.fetch_blocks(row["domain"])
1297 elif row["software"] == "friendica":
1298 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1299 blocking = friendica.fetch_blocks(row["domain"])
1300 elif row["software"] == "misskey":
1301 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1302 blocking = misskey.fetch_blocks(row["domain"])
1304 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1306 # c.s isn't part of oliphant's "hidden" blocklists
1307 logger.debug("row[domain]='%s'", row["domain"])
1308 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1309 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1310 instances.set_last_blocked(row["domain"])
1311 instances.set_total_blocks(row["domain"], blocking)
1316 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1317 for block in blocking:
1318 logger.debug("block[blocked]='%s'", block["blocked"])
1321 if block["blocked"] == "":
1322 logger.debug("block[blocked] is empty - SKIPPED!")
1324 elif block["blocked"].endswith(".arpa"):
1325 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1327 elif block["blocked"].endswith(".tld"):
1328 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1330 elif block["blocked"].endswith(".onion"):
1331 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1333 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1334 logger.debug("block='%s' is obfuscated.", block["blocked"])
1335 obfuscated = obfuscated + 1
1336 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1337 elif not domain_helper.is_wanted(block["blocked"]):
1338 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1340 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1341 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1344 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1345 if blocked is not None and blocked != block["blocked"]:
1346 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1347 obfuscated = obfuscated - 1
1349 if blacklist.is_blacklisted(blocked):
1350 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1352 elif blacklist.is_blacklisted(row["domain"]):
1353 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1355 elif blocks.is_instance_blocked(row["domain"], blocked):
1356 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1359 block["block_level"] = blocks.alias_block_level(block["block_level"])
1361 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1362 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1363 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1366 "reason" : block["reason"],
1369 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1370 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1372 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1373 if instances.has_pending(row["domain"]):
1374 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1375 instances.update(row["domain"])
1377 logger.debug("Invoking commit() ...")
1378 database.connection.commit()
1380 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1381 if config.get("bot_enabled") and len(blockdict) > 0:
1382 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1383 network.send_bot_post(row["domain"], blockdict)
1385 logger.debug("Success! - EXIT!")
1388 def fetch_fedilist(args: argparse.Namespace) -> int:
1389 logger.debug("args[]='%s' - CALLED!", type(args))
1391 logger.debug("Invoking locking.acquire() ...")
1394 source_domain = "demo.fedilist.com"
1395 if sources.is_recent(source_domain):
1396 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1399 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1400 sources.update(source_domain)
1402 url = f"http://{source_domain}/instance/csv?onion=not"
1403 if args.software is not None and args.software != "":
1404 logger.debug("args.software='%s'", args.software)
1405 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1407 logger.info("Fetching url='%s' ...", url)
1408 response = reqto.get(
1410 headers=network.web_headers,
1411 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1412 allow_redirects=False
1415 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1416 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1417 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1420 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1422 logger.debug("reader[]='%s'", type(reader))
1424 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1429 logger.info("Checking %d rows ...", len(rows))
1431 logger.debug("row[]='%s'", type(row))
1432 if "hostname" not in row:
1433 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1436 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1437 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1438 logger.debug("domain='%s' - AFTER!", domain)
1440 if domain is None or domain == "":
1441 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1444 logger.debug("domain='%s' - BEFORE!", domain)
1445 domain = domain.encode("idna").decode("utf-8")
1446 logger.debug("domain='%s' - AFTER!", domain)
1448 if not domain_helper.is_wanted(domain):
1449 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1451 elif (args.force is None or not args.force) and instances.is_registered(domain):
1452 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1454 elif instances.is_recent(domain):
1455 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1458 logger.info("Fetching instances from domain='%s' ...", domain)
1459 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1461 logger.debug("Success! - EXIT!")
1464 def update_nodeinfo(args: argparse.Namespace) -> int:
1465 logger.debug("args[]='%s' - CALLED!", type(args))
1467 logger.debug("Invoking locking.acquire() ...")
1470 if args.domain is not None and args.domain != "":
1471 logger.debug("Fetching args.domain='%s'", args.domain)
1472 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1473 elif args.software is not None and args.software != "":
1474 logger.info("Fetching domains for args.software='%s'", args.software)
1475 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC")
1476 elif args.mode is not None and args.mode != "":
1477 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1478 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC")
1479 elif args.no_software:
1480 logger.info("Fetching domains with no software type detected ...")
1481 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1483 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1484 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1485 elif args.no_detection:
1486 logger.info("Fetching domains with no detection mode being set ...")
1487 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1489 logger.info("Fetching domains for recently updated ...")
1490 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1492 domains = database.cursor.fetchall()
1494 logger.info("Checking %d domain(s) ...", len(domains))
1497 logger.debug("row[]='%s'", type(row))
1498 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1499 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1503 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1504 software = federation.determine_software(row["domain"])
1506 logger.debug("Determined software='%s'", software)
1507 if (software != row["software"] and software is not None) or args.force is True:
1508 logger.debug("software='%s'", software)
1509 if software is None:
1510 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1511 instances.set_nodeinfo_url(row["domain"], None)
1513 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1514 instances.set_software(row["domain"], software)
1516 if software is not None:
1517 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1518 instances.set_success(row["domain"])
1519 except network.exceptions as exception:
1520 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1521 instances.set_last_error(row["domain"], exception)
1523 instances.set_last_nodeinfo(row["domain"])
1524 instances.update(row["domain"])
1527 logger.debug("Success! - EXIT!")
1530 def fetch_instances_social(args: argparse.Namespace) -> int:
1531 logger.debug("args[]='%s' - CALLED!", type(args))
1533 logger.debug("Invoking locking.acquire() ...")
1536 source_domain = "instances.social"
1538 if config.get("instances_social_api_key") == "":
1539 logger.error("API key not set. Please set in your config.json file.")
1541 elif sources.is_recent(source_domain):
1542 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1545 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1546 sources.update(source_domain)
1549 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1552 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1553 fetched = network.get_json_api(
1555 "/api/1.0/instances/list?count=0&sort_by=name",
1557 (config.get("connection_timeout"), config.get("read_timeout"))
1559 logger.debug("fetched[]='%s'", type(fetched))
1561 if "error_message" in fetched:
1562 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1564 elif "exception" in fetched:
1565 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1567 elif "json" not in fetched:
1568 logger.warning("fetched has no element 'json' - EXIT!")
1570 elif "instances" not in fetched["json"]:
1571 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1575 rows = fetched["json"]["instances"]
1577 logger.info("Checking %d row(s) ...", len(rows))
1579 logger.debug("row[]='%s'", type(row))
1580 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1581 logger.debug("domain='%s' - AFTER!", domain)
1583 if domain is None and domain == "":
1584 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1587 logger.debug("domain='%s' - BEFORE!", domain)
1588 domain = domain.encode("idna").decode("utf-8")
1589 logger.debug("domain='%s' - AFTER!", domain)
1591 if not domain_helper.is_wanted(domain):
1592 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1594 elif domain in domains:
1595 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1597 elif instances.is_registered(domain):
1598 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1600 elif instances.is_recent(domain):
1601 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1604 logger.info("Fetching instances from domain='%s'", domain)
1605 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1607 logger.debug("Success! - EXIT!")
1610 def fetch_relays(args: argparse.Namespace) -> int:
1611 logger.debug("args[]='%s' - CALLED!", type(args))
1613 logger.debug("Invoking locking.acquire() ...")
1616 if args.domain is not None and args.domain != "":
1617 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1618 elif args.software is not None and args.software != "":
1619 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1621 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1624 rows = database.cursor.fetchall()
1626 logger.info("Checking %d relays ...", len(rows))
1628 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1630 if not args.force and instances.is_recent(row["domain"]):
1631 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1635 if row["software"] == "pub-relay":
1636 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1637 raw = network.fetch_api_url(
1638 row["nodeinfo_url"],
1639 (config.get("connection_timeout"), config.get("read_timeout"))
1642 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1643 if "exception" in raw:
1644 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1645 raise raw["exception"]
1646 elif "error_message" in raw:
1647 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1648 instances.set_last_error(row["domain"], raw)
1649 instances.set_last_instance_fetch(row["domain"])
1650 instances.update(row["domain"])
1652 elif "json" not in raw:
1653 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1655 elif not "metadata" in raw["json"]:
1656 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1658 elif not "peers" in raw["json"]["metadata"]:
1659 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1662 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1663 raw = utils.fetch_url(
1664 f"https://{row['domain']}",
1665 network.web_headers,
1666 (config.get("connection_timeout"), config.get("read_timeout"))
1668 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1670 doc = bs4.BeautifulSoup(raw, features="html.parser")
1671 logger.debug("doc[]='%s'", type(doc))
1673 except network.exceptions as exception:
1674 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1675 instances.set_last_error(row["domain"], exception)
1676 instances.set_last_instance_fetch(row["domain"])
1677 instances.update(row["domain"])
1680 logger.debug("row[software]='%s'", row["software"])
1681 if row["software"] == "activityrelay":
1682 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1683 tags = doc.findAll("p")
1685 logger.debug("Checking %d paragraphs ...", len(tags))
1687 logger.debug("tag[]='%s'", type(tag))
1688 if len(tag.contents) == 0:
1689 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1691 elif "registered instances" not in tag.contents[0]:
1692 logger.debug("Skipping paragraph, text not found.")
1695 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1696 for domain in tag.contents:
1697 logger.debug("domain[%s]='%s'", type(domain), domain)
1698 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1701 domain = str(domain)
1702 logger.debug("domain='%s'", domain)
1703 if not domain_helper.is_wanted(domain):
1704 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1707 logger.debug("domain='%s' - BEFORE!", domain)
1708 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1709 logger.debug("domain='%s' - AFTER!", domain)
1711 if domain is None or domain == "":
1712 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1714 elif domain not in peers:
1715 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1716 peers.append(domain)
1718 if dict_helper.has_key(domains, "domain", domain):
1719 logger.debug("domain='%s' already added", domain)
1722 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1725 "origin": row["domain"],
1727 elif row["software"] in ["aoderelay", "selective-relay"]:
1728 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1729 if row["software"] == "aoderelay":
1730 tags = doc.findAll("section", {"class": "instance"})
1732 tags = doc.find("div", {"id": "instances"}).findAll("li")
1734 logger.debug("Checking %d tags ...", len(tags))
1736 logger.debug("tag[]='%s'", type(tag))
1738 link = tag.find("a")
1739 logger.debug("link[%s]='%s'", type(link), link)
1740 if not isinstance(link, bs4.element.Tag):
1741 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1744 components = urlparse(link.get("href"))
1745 logger.debug("components(%d)='%s'", len(components), components)
1746 domain = components.netloc.lower().split(":")[0]
1748 logger.debug("domain='%s' - BEFORE!", domain)
1749 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1750 logger.debug("domain='%s' - AFTER!", domain)
1752 if domain is None or domain == "":
1753 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1755 elif domain not in peers:
1756 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1757 peers.append(domain)
1759 if dict_helper.has_key(domains, "domain", domain):
1760 logger.debug("domain='%s' already added", domain)
1763 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1766 "origin": row["domain"],
1768 elif row["software"] == "pub-relay":
1769 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1770 for domain in raw["json"]["metadata"]["peers"]:
1771 logger.debug("domain='%s' - BEFORE!", domain)
1772 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1773 logger.debug("domain='%s' - AFTER!", domain)
1775 if domain is None or domain == "":
1776 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1778 elif domain not in peers:
1779 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1780 peers.append(domain)
1782 if dict_helper.has_key(domains, "domain", domain):
1783 logger.debug("domain='%s' already added", domain)
1786 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1789 "origin": row["domain"],
1792 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1795 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1796 instances.set_last_instance_fetch(row["domain"])
1798 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1799 instances.set_total_peers(row["domain"], peers)
1801 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1802 instances.update(row["domain"])
1804 logger.info("Checking %d domains ...", len(domains))
1806 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1807 if not domain_helper.is_wanted(row["domain"]):
1808 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1810 elif instances.is_registered(row["domain"]):
1811 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1814 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1815 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1817 logger.debug("Success! - EXIT!")
1820 def convert_idna(args: argparse.Namespace) -> int:
1821 logger.debug("args[]='%s' - CALLED!", type(args))
1823 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1824 rows = database.cursor.fetchall()
1826 logger.debug("rows[]='%s'", type(rows))
1827 instances.translate_idnas(rows, "domain")
1829 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1830 rows = database.cursor.fetchall()
1832 logger.debug("rows[]='%s'", type(rows))
1833 instances.translate_idnas(rows, "origin")
1835 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1836 rows = database.cursor.fetchall()
1838 logger.debug("rows[]='%s'", type(rows))
1839 blocks.translate_idnas(rows, "blocker")
1841 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1842 rows = database.cursor.fetchall()
1844 logger.debug("rows[]='%s'", type(rows))
1845 blocks.translate_idnas(rows, "blocked")
1847 logger.debug("Success! - EXIT!")
1850 def remove_invalid(args: argparse.Namespace) -> int:
1851 logger.debug("args[]='%s' - CALLED!", type(args))
1853 logger.debug("Invoking locking.acquire() ...")
1856 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1857 rows = database.cursor.fetchall()
1859 logger.info("Checking %d domains ...", len(rows))
1861 logger.debug("row[domain]='%s'", row["domain"])
1862 if not validators.domain(row["domain"].split("/")[0]):
1863 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1864 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1865 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1867 logger.debug("Invoking commit() ...")
1868 database.connection.commit()
1870 logger.info("Vaccum cleaning database ...")
1871 database.cursor.execute("VACUUM")
1873 logger.debug("Success! - EXIT!")