1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
353 deobfuscated = obfuscated = 0
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] == "":
369 logger.warning("blocked is empty, blocker='%s'", blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".arpa"):
375 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".tld"):
378 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
380 elif block["blocked"].find("*") >= 0:
381 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 instances.set_has_obfuscation(blocker, True)
383 obfuscated = obfuscated + 1
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 deobfuscated = deobfuscated + 1
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 instances.set_has_obfuscation(blocker, True)
400 obfuscated = obfuscated + 1
402 # Some obscure them with question marks, not sure if that's dependent on version or not
403 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
405 logger.debug("row[]='%s'", type(row))
407 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
410 deobfuscated = deobfuscated + 1
411 block["blocked"] = row["domain"]
412 origin = row["origin"]
413 nodeinfo_url = row["nodeinfo_url"]
415 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416 if block["blocked"] == "":
417 logger.debug("block[blocked] is empty - SKIPPED!")
420 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
424 if not domain_helper.is_wanted(block["blocked"]):
425 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
427 elif block["block_level"] in ["accept", "accepted"]:
428 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
430 elif not instances.is_registered(block["blocked"]):
431 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
434 block["block_level"] = blocks.alias_block_level(block["block_level"])
436 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
439 "blocked": block["blocked"],
440 "reason" : block["reason"],
443 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444 cookies.clear(block["blocked"])
446 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", block["blocker"], obfuscated, deobfuscated)
448 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
449 if instances.has_pending(blocker):
450 logger.debug("Flushing updates for blocker='%s' ...", blocker)
451 instances.update(blocker)
453 logger.debug("Invoking commit() ...")
454 database.connection.commit()
456 logger.debug("Invoking cookies.clear(%s) ...", blocker)
457 cookies.clear(blocker)
459 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
460 if config.get("bot_enabled") and len(blockdict) > 0:
461 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
462 network.send_bot_post(blocker, blockdict)
464 logger.debug("Success! - EXIT!")
467 def fetch_observer(args: argparse.Namespace) -> int:
468 logger.debug("args[]='%s' - CALLED!", type(args))
470 logger.debug("Invoking locking.acquire() ...")
473 source_domain = "fediverse.observer"
474 if sources.is_recent(source_domain):
475 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
478 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
479 sources.update(source_domain)
482 if args.software is None:
483 logger.info("Fetching software list ...")
484 raw = utils.fetch_url(
485 f"https://{source_domain}",
487 (config.get("connection_timeout"), config.get("read_timeout"))
489 logger.debug("raw[%s]()=%d", type(raw), len(raw))
491 doc = bs4.BeautifulSoup(raw, features="html.parser")
492 logger.debug("doc[]='%s'", type(doc))
494 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
495 logger.debug("navbar[]='%s'", type(navbar))
497 logger.warning("Cannot find navigation bar, cannot continue!")
500 items = navbar.findAll("a", {"class": "dropdown-item"})
501 logger.debug("items[]='%s'", type(items))
503 logger.info("Checking %d menu items ...", len(items))
505 logger.debug("item[%s]='%s'", type(item), item)
506 if item.text.lower() == "all":
507 logger.debug("Skipping 'All' menu entry ...")
510 logger.debug("Appending item.text='%s' ...", item.text)
511 types.append(tidyup.domain(item.text))
513 logger.info("Adding args.software='%s' as type ...", args.software)
514 types.append(args.software)
516 logger.info("Fetching %d different table data ...", len(types))
517 for software in types:
518 logger.debug("software='%s' - BEFORE!", software)
519 if args.software is not None and args.software != software:
520 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
525 logger.debug("Fetching table data for software='%s' ...", software)
526 raw = utils.fetch_url(
527 f"https://{source_domain}/app/views/tabledata.php?software={software}",
529 (config.get("connection_timeout"), config.get("read_timeout"))
531 logger.debug("raw[%s]()=%d", type(raw), len(raw))
533 doc = bs4.BeautifulSoup(raw, features="html.parser")
534 logger.debug("doc[]='%s'", type(doc))
535 except network.exceptions as exception:
536 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
539 items = doc.findAll("a", {"class": "url"})
540 logger.info("Checking %d items,software='%s' ...", len(items), software)
542 logger.debug("item[]='%s'", type(item))
543 domain = item.decode_contents()
544 domain = tidyup.domain(domain) if domain != None and domain != "" else None
545 logger.debug("domain='%s' - AFTER!", domain)
547 if domain is None or domain == "":
548 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
551 logger.debug("domain='%s' - BEFORE!", domain)
552 domain = domain.encode("idna").decode("utf-8")
553 logger.debug("domain='%s' - AFTER!", domain)
555 if not domain_helper.is_wanted(domain):
556 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
558 elif instances.is_registered(domain):
559 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
562 software = software_helper.alias(software)
563 logger.info("Fetching instances for domain='%s'", domain)
564 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
566 logger.debug("Success! - EXIT!")
569 def fetch_todon_wiki(args: argparse.Namespace) -> int:
570 logger.debug("args[]='%s' - CALLED!", type(args))
572 logger.debug("Invoking locking.acquire() ...")
575 source_domain = "wiki.todon.eu"
576 if sources.is_recent(source_domain):
577 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
580 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
581 sources.update(source_domain)
588 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
589 raw = utils.fetch_url(
590 f"https://{source_domain}/todon/domainblocks",
592 (config.get("connection_timeout"), config.get("read_timeout"))
594 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
596 doc = bs4.BeautifulSoup(raw, "html.parser")
597 logger.debug("doc[]='%s'", type(doc))
599 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
600 logger.info("Checking %d silenced/limited entries ...", len(silenced))
601 blocklist["silenced"] = utils.find_domains(silenced, "div")
603 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
604 logger.info("Checking %d suspended entries ...", len(suspended))
605 blocklist["reject"] = utils.find_domains(suspended, "div")
607 blocking = blocklist["silenced"] + blocklist["reject"]
610 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
611 instances.set_last_blocked(blocker)
612 instances.set_total_blocks(blocker, blocking)
615 for block_level in blocklist:
616 blockers = blocklist[block_level]
618 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
619 for blocked in blockers:
620 logger.debug("blocked='%s'", blocked)
622 if not instances.is_registered(blocked):
624 logger.info("Fetching instances from domain='%s' ...", blocked)
625 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
626 except network.exceptions as exception:
627 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
628 instances.set_last_error(blocked, exception)
630 if not domain_helper.is_wanted(blocked):
631 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
633 elif not domain_helper.is_wanted(blocker):
634 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
636 elif blocks.is_instance_blocked(blocker, blocked, block_level):
637 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
640 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
641 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
642 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
648 logger.debug("Invoking commit() ...")
649 database.connection.commit()
651 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
652 if config.get("bot_enabled") and len(blockdict) > 0:
653 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
654 network.send_bot_post(blocker, blockdict)
656 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
657 if instances.has_pending(blocker):
658 logger.debug("Flushing updates for blocker='%s' ...", blocker)
659 instances.update(blocker)
661 logger.debug("Success! - EXIT!")
664 def fetch_cs(args: argparse.Namespace):
665 logger.debug("args[]='%s' - CALLED!", type(args))
667 logger.debug("Invoking locking.acquire() ...")
695 source_domain = "raw.githubusercontent.com"
696 if sources.is_recent(source_domain):
697 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
700 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
701 sources.update(source_domain)
703 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
704 raw = utils.fetch_url(
705 f"https://{source_domain}/chaossocial/meta/master/federation.md",
707 (config.get("connection_timeout"), config.get("read_timeout"))
709 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
711 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
712 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
714 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
715 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
716 blocklist["silenced"] = federation.find_domains(silenced)
718 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
719 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
720 blocklist["reject"] = federation.find_domains(blocked)
722 blocking = blocklist["silenced"] + blocklist["reject"]
723 blocker = "chaos.social"
725 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
726 instances.set_last_blocked(blocker)
727 instances.set_total_blocks(blocker, blocking)
729 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
730 if len(blocking) > 0:
732 for block_level in blocklist:
733 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
735 for row in blocklist[block_level]:
736 logger.debug("row[%s]='%s'", type(row), row)
737 if not "domain" in row:
738 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
740 elif not instances.is_registered(row["domain"]):
742 logger.info("Fetching instances from domain='%s' ...", row["domain"])
743 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
744 except network.exceptions as exception:
745 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
746 instances.set_last_error(row["domain"], exception)
748 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
749 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
751 "blocked": row["domain"],
752 "reason" : row["reason"],
755 logger.debug("Invoking commit() ...")
756 database.connection.commit()
758 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
759 if config.get("bot_enabled") and len(blockdict) > 0:
760 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
761 network.send_bot_post(blocker, blockdict)
763 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
764 if instances.has_pending(blocker):
765 logger.debug("Flushing updates for blocker='%s' ...", blocker)
766 instances.update(blocker)
768 logger.debug("Success! - EXIT!")
771 def fetch_fba_rss(args: argparse.Namespace) -> int:
772 logger.debug("args[]='%s' - CALLED!", type(args))
776 logger.debug("Invoking locking.acquire() ...")
779 components = urlparse(args.feed)
780 domain = components.netloc.lower().split(":")[0]
782 logger.debug("domain='%s'", domain)
783 if sources.is_recent(domain):
784 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
787 logger.debug("domain='%s' has not been recently used, marking ...", domain)
788 sources.update(domain)
790 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
791 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
793 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
794 if response.ok and response.status_code == 200 and len(response.text) > 0:
795 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
796 rss = atoma.parse_rss_bytes(response.content)
798 logger.debug("rss[]='%s'", type(rss))
799 for item in rss.items:
800 logger.debug("item[%s]='%s'", type(item), item)
801 domain = item.link.split("=")[1]
802 domain = tidyup.domain(domain) if domain != None and domain != "" else None
804 logger.debug("domain='%s' - AFTER!", domain)
805 if domain is None or domain == "":
806 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
809 logger.debug("domain='%s' - BEFORE!", domain)
810 domain = domain.encode("idna").decode("utf-8")
811 logger.debug("domain='%s' - AFTER!", domain)
813 if not domain_helper.is_wanted(domain):
814 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
816 elif domain in domains:
817 logger.debug("domain='%s' is already added - SKIPPED!", domain)
819 elif instances.is_registered(domain):
820 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
822 elif instances.is_recent(domain):
823 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
826 logger.debug("Adding domain='%s'", domain)
827 domains.append(domain)
829 logger.debug("domains()=%d", len(domains))
831 logger.info("Adding %d new instances ...", len(domains))
832 for domain in domains:
833 logger.debug("domain='%s'", domain)
835 logger.info("Fetching instances from domain='%s' ...", domain)
836 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
837 except network.exceptions as exception:
838 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
839 instances.set_last_error(domain, exception)
842 logger.debug("Success! - EXIT!")
845 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
846 logger.debug("args[]='%s' - CALLED!", type(args))
848 logger.debug("Invoking locking.acquire() ...")
851 source_domain = "ryona.agency"
852 feed = f"https://{source_domain}/users/fba/feed.atom"
854 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
855 if args.feed is not None and validators.url(args.feed):
856 logger.debug("Setting feed='%s' ...", args.feed)
857 feed = str(args.feed)
858 source_domain = urlparse(args.feed).netloc
860 if sources.is_recent(source_domain):
861 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
864 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
865 sources.update(source_domain)
869 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
870 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
872 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
873 if response.ok and response.status_code == 200 and len(response.text) > 0:
874 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
875 atom = atoma.parse_atom_bytes(response.content)
877 logger.debug("atom[]='%s'", type(atom))
878 for entry in atom.entries:
879 logger.debug("entry[]='%s'", type(entry))
880 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
881 logger.debug("doc[]='%s'", type(doc))
882 for element in doc.findAll("a"):
883 logger.debug("element[]='%s'", type(element))
884 for href in element["href"].split(","):
885 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
886 domain = tidyup.domain(href) if href != None and href != "" else None
888 logger.debug("domain='%s' - AFTER!", domain)
889 if domain is None or domain == "":
890 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
893 logger.debug("domain='%s' - BEFORE!", domain)
894 domain = domain.encode("idna").decode("utf-8")
895 logger.debug("domain='%s' - AFTER!", domain)
897 if not domain_helper.is_wanted(domain):
898 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
900 elif domain in domains:
901 logger.debug("domain='%s' is already added - SKIPPED!", domain)
903 elif instances.is_registered(domain):
904 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
906 elif instances.is_recent(domain):
907 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
910 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
911 domains.append(domain)
913 logger.debug("domains()=%d", len(domains))
915 logger.info("Adding %d new instances ...", len(domains))
916 for domain in domains:
917 logger.debug("domain='%s'", domain)
919 logger.info("Fetching instances from domain='%s' ...", domain)
920 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
921 except network.exceptions as exception:
922 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
923 instances.set_last_error(domain, exception)
926 logger.debug("Success! - EXIT!")
929 def fetch_instances(args: argparse.Namespace) -> int:
930 logger.debug("args[]='%s' - CALLED!", type(args))
932 logger.debug("args.domain='%s' - checking ...", args.domain)
933 if not validators.domain(args.domain):
934 logger.warning("args.domain='%s' is not valid.", args.domain)
936 elif blacklist.is_blacklisted(args.domain):
937 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
940 logger.debug("Invoking locking.acquire() ...")
944 domain = tidyup.domain(args.domain)
945 origin = software = None
948 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
949 row = database.cursor.fetchone()
951 origin = row["origin"]
952 software = row["software"]
954 if software_helper.is_relay(software):
955 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
960 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
961 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
962 except network.exceptions as exception:
963 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
964 instances.set_last_error(args.domain, exception)
965 instances.update(args.domain)
969 logger.debug("Not fetching more instances - EXIT!")
972 # Loop through some instances
973 database.cursor.execute(
974 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
977 rows = database.cursor.fetchall()
978 logger.info("Checking %d entries ...", len(rows))
980 logger.debug("row[domain]='%s'", row["domain"])
981 if row["domain"] == "":
982 logger.debug("row[domain] is empty - SKIPPED!")
985 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
986 domain = row["domain"].encode("idna").decode("utf-8")
987 logger.debug("domain='%s' - AFTER!", domain)
989 if not domain_helper.is_wanted(domain):
990 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
994 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
995 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
996 except network.exceptions as exception:
997 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
998 instances.set_last_error(domain, exception)
1000 logger.debug("Success - EXIT!")
1003 def fetch_csv(args: argparse.Namespace) -> int:
1004 logger.debug("args[]='%s' - CALLED!", type(args))
1006 logger.debug("Invoking locking.acquire() ...")
1009 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1010 for block in blocklists.csv_files:
1011 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1013 # Is domain given and not equal blocker?
1014 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1018 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1019 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1021 logger.debug("Success - EXIT!")
1024 def fetch_oliphant(args: argparse.Namespace) -> int:
1025 logger.debug("args[]='%s' - CALLED!", type(args))
1027 logger.debug("Invoking locking.acquire() ...")
1030 source_domain = "codeberg.org"
1031 if sources.is_recent(source_domain):
1032 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1035 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1036 sources.update(source_domain)
1039 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1041 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1042 for block in blocklists.oliphant_blocklists:
1043 # Is domain given and not equal blocker?
1044 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1045 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1048 url = f"{base_url}/{block['csv_url']}"
1050 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1051 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1053 logger.debug("Success! - EXIT!")
1056 def fetch_txt(args: argparse.Namespace) -> int:
1057 logger.debug("args[]='%s' - CALLED!", type(args))
1059 logger.debug("Invoking locking.acquire() ...")
1064 "blocker": "seirdy.one",
1065 "url" : "https://seirdy.one/pb/bsl.txt",
1068 logger.info("Checking %d text file(s) ...", len(urls))
1070 logger.debug("Fetching row[url]='%s' ...", row["url"])
1071 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1073 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1074 if response.ok and response.status_code == 200 and response.text != "":
1075 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1076 domains = response.text.strip().split("\n")
1078 logger.info("Processing %d domains ...", len(domains))
1079 for domain in domains:
1080 logger.debug("domain='%s' - BEFORE!", domain)
1081 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1083 logger.debug("domain='%s' - AFTER!", domain)
1084 if domain is None or domain == "":
1085 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1087 elif not domain_helper.is_wanted(domain):
1088 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1090 elif instances.is_recent(domain):
1091 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1094 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1095 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1097 logger.debug("processed='%s'", processed)
1099 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1102 logger.debug("Success! - EXIT!")
1105 def fetch_fedipact(args: argparse.Namespace) -> int:
1106 logger.debug("args[]='%s' - CALLED!", type(args))
1108 logger.debug("Invoking locking.acquire() ...")
1111 source_domain = "fedipact.online"
1112 if sources.is_recent(source_domain):
1113 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1116 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1117 sources.update(source_domain)
1119 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1120 response = utils.fetch_url(
1121 f"https://{source_domain}",
1122 network.web_headers,
1123 (config.get("connection_timeout"), config.get("read_timeout"))
1126 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1127 if response.ok and response.status_code == 200 and response.text != "":
1128 logger.debug("Parsing %d Bytes ...", len(response.text))
1130 doc = bs4.BeautifulSoup(response.text, "html.parser")
1131 logger.debug("doc[]='%s'", type(doc))
1133 rows = doc.findAll("li")
1134 logger.info("Checking %d row(s) ...", len(rows))
1136 logger.debug("row[]='%s'", type(row))
1137 domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None
1139 logger.debug("domain='%s' - AFTER!", domain)
1140 if domain is None or domain == "":
1141 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1144 logger.debug("domain='%s' - BEFORE!", domain)
1145 domain = domain.encode("idna").decode("utf-8")
1146 logger.debug("domain='%s' - AFTER!", domain)
1148 if not domain_helper.is_wanted(domain):
1149 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1151 elif instances.is_registered(domain):
1152 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1154 elif instances.is_recent(domain):
1155 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1158 logger.info("Fetching domain='%s' ...", domain)
1159 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1161 logger.debug("Success! - EXIT!")
1164 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1165 logger.debug("args[]='%s' - CALLED!", type(args))
1167 logger.debug("Invoking locking.acquire() ...")
1170 source_domain = "instances.joinmobilizon.org"
1171 if sources.is_recent(source_domain):
1172 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1175 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1176 sources.update(source_domain)
1178 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1179 raw = utils.fetch_url(
1180 f"https://{source_domain}/api/v1/instances",
1181 network.web_headers,
1182 (config.get("connection_timeout"), config.get("read_timeout"))
1184 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1186 parsed = json.loads(raw)
1187 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1189 if "data" not in parsed:
1190 logger.warning("parsed()=%d does not contain key 'data'")
1193 logger.info("Checking %d instances ...", len(parsed["data"]))
1194 for row in parsed["data"]:
1195 logger.debug("row[]='%s'", type(row))
1196 if "host" not in row:
1197 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1199 elif not domain_helper.is_wanted(row["host"]):
1200 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1202 elif instances.is_registered(row["host"]):
1203 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1206 logger.info("Fetching row[host]='%s' ...", row["host"])
1207 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1209 logger.debug("Success! - EXIT!")
1212 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1213 logger.debug("args[]='%s' - CALLED!", type(args))
1215 logger.debug("Invoking locking.acquire() ...")
1218 source_domain = "instanceapp.misskey.page"
1219 if sources.is_recent(source_domain):
1220 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1223 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1224 sources.update(source_domain)
1226 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1227 raw = utils.fetch_url(
1228 f"https://{source_domain}/instances.json",
1229 network.web_headers,
1230 (config.get("connection_timeout"), config.get("read_timeout"))
1232 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1234 parsed = json.loads(raw)
1235 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1237 if "instancesInfos" not in parsed:
1238 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1241 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1242 for row in parsed["instancesInfos"]:
1243 logger.debug("row[%s]='%s'", type(row), row)
1244 if "url" not in row:
1245 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1247 elif not domain_helper.is_wanted(row["url"]):
1248 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1250 elif instances.is_registered(row["url"]):
1251 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1254 logger.info("Fetching row[url]='%s' ...", row["url"])
1255 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1257 logger.debug("Success! - EXIT!")
1260 def recheck_obfuscation(args: argparse.Namespace) -> int:
1261 logger.debug("args[]='%s' - CALLED!", type(args))
1263 logger.debug("Invoking locking.acquire() ...")
1266 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1267 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1268 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1269 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1271 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1273 rows = database.cursor.fetchall()
1274 logger.info("Checking %d domains ...", len(rows))
1276 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1277 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1278 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1281 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1282 blocking = federation.fetch_blocks(row["domain"])
1284 logger.debug("blocking()=%d", len(blocking))
1285 if len(blocking) == 0:
1286 if row["software"] == "pleroma":
1287 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1288 blocking = pleroma.fetch_blocks(row["domain"])
1289 elif row["software"] == "mastodon":
1290 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1291 blocking = mastodon.fetch_blocks(row["domain"])
1292 elif row["software"] == "lemmy":
1293 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1294 blocking = lemmy.fetch_blocks(row["domain"])
1295 elif row["software"] == "friendica":
1296 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1297 blocking = friendica.fetch_blocks(row["domain"])
1298 elif row["software"] == "misskey":
1299 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1300 blocking = misskey.fetch_blocks(row["domain"])
1302 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1304 # c.s isn't part of oliphant's "hidden" blocklists
1305 logger.debug("row[domain]='%s'", row["domain"])
1306 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1307 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1308 instances.set_last_blocked(row["domain"])
1309 instances.set_total_blocks(row["domain"], blocking)
1314 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1315 for block in blocking:
1316 logger.debug("block[blocked]='%s'", block["blocked"])
1319 if block["blocked"] == "":
1320 logger.debug("block[blocked] is empty - SKIPPED!")
1322 elif block["blocked"].endswith(".arpa"):
1323 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1325 elif block["blocked"].endswith(".tld"):
1326 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1328 elif block["blocked"].endswith(".onion"):
1329 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1331 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1332 logger.debug("block='%s' is obfuscated.", block["blocked"])
1333 obfuscated = obfuscated + 1
1334 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1335 elif not domain_helper.is_wanted(block["blocked"]):
1336 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1338 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1339 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1342 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1343 if blocked is not None and blocked != block["blocked"]:
1344 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1345 obfuscated = obfuscated - 1
1347 if blacklist.is_blacklisted(blocked):
1348 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1350 elif blacklist.is_blacklisted(row["domain"]):
1351 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1353 elif blocks.is_instance_blocked(row["domain"], blocked):
1354 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1357 block["block_level"] = blocks.alias_block_level(block["block_level"])
1359 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1360 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1361 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1364 "reason" : block["reason"],
1367 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1368 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1370 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1371 if instances.has_pending(row["domain"]):
1372 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1373 instances.update(row["domain"])
1375 logger.debug("Invoking commit() ...")
1376 database.connection.commit()
1378 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1379 if config.get("bot_enabled") and len(blockdict) > 0:
1380 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1381 network.send_bot_post(row["domain"], blockdict)
1383 logger.debug("Success! - EXIT!")
1386 def fetch_fedilist(args: argparse.Namespace) -> int:
1387 logger.debug("args[]='%s' - CALLED!", type(args))
1389 logger.debug("Invoking locking.acquire() ...")
1392 source_domain = "demo.fedilist.com"
1393 if sources.is_recent(source_domain):
1394 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1397 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1398 sources.update(source_domain)
1400 url = f"http://{source_domain}/instance/csv?onion=not"
1401 if args.software is not None and args.software != "":
1402 logger.debug("args.software='%s'", args.software)
1403 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1405 logger.info("Fetching url='%s' ...", url)
1406 response = reqto.get(
1408 headers=network.web_headers,
1409 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1410 allow_redirects=False
1413 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1414 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1415 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1418 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1420 logger.debug("reader[]='%s'", type(reader))
1422 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1427 logger.info("Checking %d rows ...", len(rows))
1429 logger.debug("row[]='%s'", type(row))
1430 if "hostname" not in row:
1431 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1434 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1435 domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None
1436 logger.debug("domain='%s' - AFTER!", domain)
1438 if domain is None or domain == "":
1439 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1442 logger.debug("domain='%s' - BEFORE!", domain)
1443 domain = domain.encode("idna").decode("utf-8")
1444 logger.debug("domain='%s' - AFTER!", domain)
1446 if not domain_helper.is_wanted(domain):
1447 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1449 elif (args.force is None or not args.force) and instances.is_registered(domain):
1450 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1452 elif instances.is_recent(domain):
1453 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1456 logger.info("Fetching instances from domain='%s' ...", domain)
1457 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1459 logger.debug("Success! - EXIT!")
1462 def update_nodeinfo(args: argparse.Namespace) -> int:
1463 logger.debug("args[]='%s' - CALLED!", type(args))
1465 logger.debug("Invoking locking.acquire() ...")
1468 if args.domain is not None and args.domain != "":
1469 logger.debug("Fetching args.domain='%s'", args.domain)
1470 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1471 elif args.software is not None and args.software != "":
1472 logger.info("Fetching domains for args.software='%s'", args.software)
1473 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC")
1474 elif args.mode is not None and args.mode != "":
1475 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1476 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC")
1477 elif args.no_software:
1478 logger.info("Fetching domains with no software type detected ...")
1479 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1481 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1482 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1484 logger.info("Fetching domains for recently updated ...")
1485 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1487 domains = database.cursor.fetchall()
1489 logger.info("Checking %d domain(s) ...", len(domains))
1492 logger.debug("row[]='%s'", type(row))
1493 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1494 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1498 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1499 software = federation.determine_software(row["domain"])
1501 logger.debug("Determined software='%s'", software)
1502 if (software != row["software"] and software is not None) or args.force is True:
1503 logger.debug("software='%s'", software)
1504 if software is None:
1505 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1506 instances.set_nodeinfo_url(row["domain"], None)
1508 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1509 instances.set_software(row["domain"], software)
1511 if software is not None:
1512 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1513 instances.set_success(row["domain"])
1514 except network.exceptions as exception:
1515 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1516 instances.set_last_error(row["domain"], exception)
1518 instances.set_last_nodeinfo(row["domain"])
1519 instances.update(row["domain"])
1522 logger.debug("Success! - EXIT!")
1525 def fetch_instances_social(args: argparse.Namespace) -> int:
1526 logger.debug("args[]='%s' - CALLED!", type(args))
1528 logger.debug("Invoking locking.acquire() ...")
1531 source_domain = "instances.social"
1533 if config.get("instances_social_api_key") == "":
1534 logger.error("API key not set. Please set in your config.json file.")
1536 elif sources.is_recent(source_domain):
1537 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1540 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1541 sources.update(source_domain)
1544 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1547 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1548 fetched = network.get_json_api(
1550 "/api/1.0/instances/list?count=0&sort_by=name",
1552 (config.get("connection_timeout"), config.get("read_timeout"))
1554 logger.debug("fetched[]='%s'", type(fetched))
1556 if "error_message" in fetched:
1557 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1559 elif "exception" in fetched:
1560 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1562 elif "json" not in fetched:
1563 logger.warning("fetched has no element 'json' - EXIT!")
1565 elif "instances" not in fetched["json"]:
1566 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1570 rows = fetched["json"]["instances"]
1572 logger.info("Checking %d row(s) ...", len(rows))
1574 logger.debug("row[]='%s'", type(row))
1575 domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None
1576 logger.debug("domain='%s' - AFTER!", domain)
1578 if domain is None and domain == "":
1579 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1582 logger.debug("domain='%s' - BEFORE!", domain)
1583 domain = domain.encode("idna").decode("utf-8")
1584 logger.debug("domain='%s' - AFTER!", domain)
1586 if not domain_helper.is_wanted(domain):
1587 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1589 elif domain in domains:
1590 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1592 elif instances.is_registered(domain):
1593 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1595 elif instances.is_recent(domain):
1596 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1599 logger.info("Fetching instances from domain='%s'", domain)
1600 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1602 logger.debug("Success! - EXIT!")
1605 def fetch_relays(args: argparse.Namespace) -> int:
1606 logger.debug("args[]='%s' - CALLED!", type(args))
1608 logger.debug("Invoking locking.acquire() ...")
1611 if args.domain is not None and args.domain != "":
1612 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1613 elif args.software is not None and args.software != "":
1614 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1616 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1619 rows = database.cursor.fetchall()
1621 logger.info("Checking %d relays ...", len(rows))
1623 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1625 if not args.force and instances.is_recent(row["domain"]):
1626 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1630 if row["software"] == "pub-relay":
1631 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1632 raw = network.fetch_api_url(
1633 row["nodeinfo_url"],
1634 (config.get("connection_timeout"), config.get("read_timeout"))
1637 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1638 if "exception" in raw:
1639 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1640 raise raw["exception"]
1641 elif "error_message" in raw:
1642 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1643 instances.set_last_error(row["domain"], raw)
1644 instances.set_last_instance_fetch(row["domain"])
1645 instances.update(row["domain"])
1647 elif not "json" in raw:
1648 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1650 elif not "metadata" in raw["json"]:
1651 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1653 elif not "peers" in raw["json"]["metadata"]:
1654 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1657 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1658 raw = utils.fetch_url(
1659 f"https://{row['domain']}",
1660 network.web_headers,
1661 (config.get("connection_timeout"), config.get("read_timeout"))
1663 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1665 doc = bs4.BeautifulSoup(raw, features="html.parser")
1666 logger.debug("doc[]='%s'", type(doc))
1668 except network.exceptions as exception:
1669 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1670 instances.set_last_error(row["domain"], exception)
1671 instances.set_last_instance_fetch(row["domain"])
1672 instances.update(row["domain"])
1675 logger.debug("row[software]='%s'", row["software"])
1676 if row["software"] == "activityrelay":
1677 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1678 tags = doc.findAll("p")
1680 logger.debug("Checking %d paragraphs ...", len(tags))
1682 logger.debug("tag[]='%s'", type(tag))
1683 if len(tag.contents) == 0:
1684 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1686 elif "registered instances" not in tag.contents[0]:
1687 logger.debug("Skipping paragraph, text not found.")
1690 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1691 for domain in tag.contents:
1692 logger.debug("domain[%s]='%s'", type(domain), domain)
1693 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1696 domain = str(domain)
1697 logger.debug("domain='%s'", domain)
1698 if not domain_helper.is_wanted(domain):
1699 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1702 logger.debug("domain='%s' - BEFORE!", domain)
1703 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1704 logger.debug("domain='%s' - AFTER!", domain)
1706 if domain is None or domain == "":
1707 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1709 elif domain not in peers:
1710 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1711 peers.append(domain)
1713 if dict_helper.has_key(domains, "domain", domain):
1714 logger.debug("domain='%s' already added", domain)
1717 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1720 "origin": row["domain"],
1722 elif row["software"] in ["aoderelay", "selective-relay"]:
1723 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1724 if row["software"] == "aoderelay":
1725 tags = doc.findAll("section", {"class": "instance"})
1727 tags = doc.find("div", {"id": "instances"}).findAll("li")
1729 logger.debug("Checking %d tags ...", len(tags))
1731 logger.debug("tag[]='%s'", type(tag))
1733 link = tag.find("a")
1734 logger.debug("link[%s]='%s'", type(link), link)
1735 if not isinstance(link, bs4.element.Tag):
1736 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1739 components = urlparse(link.get("href"))
1740 logger.debug("components(%d)='%s'", len(components), components)
1741 domain = components.netloc.lower().split(":")[0]
1743 logger.debug("domain='%s' - BEFORE!", domain)
1744 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1745 logger.debug("domain='%s' - AFTER!", domain)
1747 if domain is None or domain == "":
1748 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1750 elif domain not in peers:
1751 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1752 peers.append(domain)
1754 if dict_helper.has_key(domains, "domain", domain):
1755 logger.debug("domain='%s' already added", domain)
1758 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1761 "origin": row["domain"],
1763 elif row["software"] == "pub-relay":
1764 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1765 for domain in raw["json"]["metadata"]["peers"]:
1766 logger.debug("domain='%s' - BEFORE!", domain)
1767 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1768 logger.debug("domain='%s' - AFTER!", domain)
1770 if domain is None or domain == "":
1771 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1773 elif domain not in peers:
1774 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1775 peers.append(domain)
1777 if dict_helper.has_key(domains, "domain", domain):
1778 logger.debug("domain='%s' already added", domain)
1781 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1784 "origin": row["domain"],
1787 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1790 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1791 instances.set_last_instance_fetch(row["domain"])
1793 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1794 instances.set_total_peers(row["domain"], peers)
1796 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1797 instances.update(row["domain"])
1799 logger.info("Checking %d domains ...", len(domains))
1801 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1802 if not domain_helper.is_wanted(row["domain"]):
1803 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1805 elif instances.is_registered(row["domain"]):
1806 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1809 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1810 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1812 logger.debug("Success! - EXIT!")
1815 def convert_idna(args: argparse.Namespace) -> int:
1816 logger.debug("args[]='%s' - CALLED!", type(args))
1818 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1819 rows = database.cursor.fetchall()
1821 logger.debug("rows[]='%s'", type(rows))
1822 instances.translate_idnas(rows, "domain")
1824 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1825 rows = database.cursor.fetchall()
1827 logger.debug("rows[]='%s'", type(rows))
1828 instances.translate_idnas(rows, "origin")
1830 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1831 rows = database.cursor.fetchall()
1833 logger.debug("rows[]='%s'", type(rows))
1834 blocks.translate_idnas(rows, "blocker")
1836 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1837 rows = database.cursor.fetchall()
1839 logger.debug("rows[]='%s'", type(rows))
1840 blocks.translate_idnas(rows, "blocked")
1842 logger.debug("Success! - EXIT!")
1845 def remove_invalid(args: argparse.Namespace) -> int:
1846 logger.debug("args[]='%s' - CALLED!", type(args))
1848 logger.debug("Invoking locking.acquire() ...")
1851 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1852 rows = database.cursor.fetchall()
1854 logger.info("Checking %d domains ...", len(rows))
1856 logger.debug("row[domain]='%s'", row["domain"])
1857 if not validators.domain(row["domain"].split("/")[0]):
1858 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1859 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1860 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1862 logger.debug("Invoking commit() ...")
1863 database.connection.commit()
1865 logger.info("Vaccum cleaning database ...")
1866 database.cursor.execute("VACUUM")
1868 logger.debug("Success! - EXIT!")