1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
354 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
355 for block in blocking:
356 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
358 if block["block_level"] == "":
359 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
362 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
363 block["blocked"] = tidyup.domain(block["blocked"])
364 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
365 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
367 if block["blocked"] == "":
368 logger.warning("blocked is empty, blocker='%s'", blocker)
370 elif block["blocked"].endswith(".onion"):
371 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
373 elif block["blocked"].endswith(".arpa"):
374 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".tld"):
377 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
379 elif block["blocked"].find("*") >= 0:
380 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 # Some friendica servers also obscure domains without hash
383 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
385 logger.debug("row[]='%s'", type(row))
387 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
388 instances.set_has_obfuscation(blocker, True)
391 block["blocked"] = row["domain"]
392 origin = row["origin"]
393 nodeinfo_url = row["nodeinfo_url"]
394 elif block["blocked"].find("?") >= 0:
395 logger.debug("blocker='%s' uses obfuscated domains", blocker)
397 # Some obscure them with question marks, not sure if that's dependent on version or not
398 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
400 logger.debug("row[]='%s'", type(row))
402 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
403 instances.set_has_obfuscation(blocker, True)
406 block["blocked"] = row["domain"]
407 origin = row["origin"]
408 nodeinfo_url = row["nodeinfo_url"]
410 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
411 if block["blocked"] == "":
412 logger.debug("block[blocked] is empty - SKIPPED!")
415 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
416 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
417 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
419 if not domain_helper.is_wanted(block["blocked"]):
420 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
422 elif block["block_level"] in ["accept", "accepted"]:
423 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
425 elif not instances.is_registered(block["blocked"]):
426 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
427 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
429 block["block_level"] = blocks.alias_block_level(block["block_level"])
431 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
432 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
434 "blocked": block["blocked"],
435 "reason" : block["reason"],
438 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
439 cookies.clear(block["blocked"])
441 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
442 if instances.has_pending(blocker):
443 logger.debug("Flushing updates for blocker='%s' ...", blocker)
444 instances.update(blocker)
446 logger.debug("Invoking commit() ...")
447 database.connection.commit()
449 logger.debug("Invoking cookies.clear(%s) ...", blocker)
450 cookies.clear(blocker)
452 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
453 if config.get("bot_enabled") and len(blockdict) > 0:
454 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
455 network.send_bot_post(blocker, blockdict)
457 logger.debug("Success! - EXIT!")
460 def fetch_observer(args: argparse.Namespace) -> int:
461 logger.debug("args[]='%s' - CALLED!", type(args))
463 logger.debug("Invoking locking.acquire() ...")
466 source_domain = "fediverse.observer"
467 if sources.is_recent(source_domain):
468 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
471 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
472 sources.update(source_domain)
475 if args.software is None:
476 logger.info("Fetching software list ...")
477 raw = utils.fetch_url(
478 f"https://{source_domain}",
480 (config.get("connection_timeout"), config.get("read_timeout"))
482 logger.debug("raw[%s]()=%d", type(raw), len(raw))
484 doc = bs4.BeautifulSoup(raw, features="html.parser")
485 logger.debug("doc[]='%s'", type(doc))
487 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
488 logger.debug("navbar[]='%s'", type(navbar))
490 logger.warning("Cannot find navigation bar, cannot continue!")
493 items = navbar.findAll("a", {"class": "dropdown-item"})
494 logger.debug("items[]='%s'", type(items))
496 logger.info("Checking %d menu items ...", len(items))
498 logger.debug("item[%s]='%s'", type(item), item)
499 if item.text.lower() == "all":
500 logger.debug("Skipping 'All' menu entry ...")
503 logger.debug("Appending item.text='%s' ...", item.text)
504 types.append(tidyup.domain(item.text))
506 logger.info("Adding args.software='%s' as type ...", args.software)
507 types.append(args.software)
509 logger.info("Fetching %d different table data ...", len(types))
510 for software in types:
511 logger.debug("software='%s' - BEFORE!", software)
512 if args.software is not None and args.software != software:
513 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
518 logger.debug("Fetching table data for software='%s' ...", software)
519 raw = utils.fetch_url(
520 f"https://{source_domain}/app/views/tabledata.php?software={software}",
522 (config.get("connection_timeout"), config.get("read_timeout"))
524 logger.debug("raw[%s]()=%d", type(raw), len(raw))
526 doc = bs4.BeautifulSoup(raw, features="html.parser")
527 logger.debug("doc[]='%s'", type(doc))
528 except network.exceptions as exception:
529 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
532 items = doc.findAll("a", {"class": "url"})
533 logger.info("Checking %d items,software='%s' ...", len(items), software)
535 logger.debug("item[]='%s'", type(item))
536 domain = item.decode_contents()
537 domain = tidyup.domain(domain) if domain != None and domain != "" else None
538 logger.debug("domain='%s' - AFTER!", domain)
540 if domain is None or domain == "":
541 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
544 logger.debug("domain='%s' - BEFORE!", domain)
545 domain = domain.encode("idna").decode("utf-8")
546 logger.debug("domain='%s' - AFTER!", domain)
548 if not domain_helper.is_wanted(domain):
549 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
551 elif instances.is_registered(domain):
552 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
555 software = software_helper.alias(software)
556 logger.info("Fetching instances for domain='%s'", domain)
557 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
559 logger.debug("Success! - EXIT!")
562 def fetch_todon_wiki(args: argparse.Namespace) -> int:
563 logger.debug("args[]='%s' - CALLED!", type(args))
565 logger.debug("Invoking locking.acquire() ...")
568 source_domain = "wiki.todon.eu"
569 if sources.is_recent(source_domain):
570 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
573 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
574 sources.update(source_domain)
581 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
582 raw = utils.fetch_url(
583 f"https://{source_domain}/todon/domainblocks",
585 (config.get("connection_timeout"), config.get("read_timeout"))
587 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
589 doc = bs4.BeautifulSoup(raw, "html.parser")
590 logger.debug("doc[]='%s'", type(doc))
592 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
593 logger.info("Checking %d silenced/limited entries ...", len(silenced))
594 blocklist["silenced"] = utils.find_domains(silenced, "div")
596 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
597 logger.info("Checking %d suspended entries ...", len(suspended))
598 blocklist["reject"] = utils.find_domains(suspended, "div")
600 blocking = blocklist["silenced"] + blocklist["reject"]
603 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
604 instances.set_last_blocked(blocker)
605 instances.set_total_blocks(blocker, blocking)
608 for block_level in blocklist:
609 blockers = blocklist[block_level]
611 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
612 for blocked in blockers:
613 logger.debug("blocked='%s'", blocked)
615 if not instances.is_registered(blocked):
617 logger.info("Fetching instances from domain='%s' ...", blocked)
618 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
619 except network.exceptions as exception:
620 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
621 instances.set_last_error(blocked, exception)
623 if not domain_helper.is_wanted(blocked):
624 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
626 elif not domain_helper.is_wanted(blocker):
627 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
629 elif blocks.is_instance_blocked(blocker, blocked, block_level):
630 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
633 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
634 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
635 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
641 logger.debug("Invoking commit() ...")
642 database.connection.commit()
644 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
645 if config.get("bot_enabled") and len(blockdict) > 0:
646 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
647 network.send_bot_post(blocker, blockdict)
649 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
650 if instances.has_pending(blocker):
651 logger.debug("Flushing updates for blocker='%s' ...", blocker)
652 instances.update(blocker)
654 logger.debug("Success! - EXIT!")
657 def fetch_cs(args: argparse.Namespace):
658 logger.debug("args[]='%s' - CALLED!", type(args))
660 logger.debug("Invoking locking.acquire() ...")
688 source_domain = "raw.githubusercontent.com"
689 if sources.is_recent(source_domain):
690 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
693 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
694 sources.update(source_domain)
696 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
697 raw = utils.fetch_url(
698 f"https://{source_domain}/chaossocial/meta/master/federation.md",
700 (config.get("connection_timeout"), config.get("read_timeout"))
702 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
704 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
705 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
707 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
708 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
709 blocklist["silenced"] = federation.find_domains(silenced)
711 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
712 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
713 blocklist["reject"] = federation.find_domains(blocked)
715 blocking = blocklist["silenced"] + blocklist["reject"]
716 blocker = "chaos.social"
718 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
719 instances.set_last_blocked(blocker)
720 instances.set_total_blocks(blocker, blocking)
722 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
723 if len(blocking) > 0:
725 for block_level in blocklist:
726 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
728 for row in blocklist[block_level]:
729 logger.debug("row[%s]='%s'", type(row), row)
730 if not "domain" in row:
731 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
733 elif not instances.is_registered(row["domain"]):
735 logger.info("Fetching instances from domain='%s' ...", row["domain"])
736 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
737 except network.exceptions as exception:
738 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
739 instances.set_last_error(row["domain"], exception)
741 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
742 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
744 "blocked": row["domain"],
745 "reason" : row["reason"],
748 logger.debug("Invoking commit() ...")
749 database.connection.commit()
751 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
752 if config.get("bot_enabled") and len(blockdict) > 0:
753 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
754 network.send_bot_post(blocker, blockdict)
756 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
757 if instances.has_pending(blocker):
758 logger.debug("Flushing updates for blocker='%s' ...", blocker)
759 instances.update(blocker)
761 logger.debug("Success! - EXIT!")
764 def fetch_fba_rss(args: argparse.Namespace) -> int:
765 logger.debug("args[]='%s' - CALLED!", type(args))
769 logger.debug("Invoking locking.acquire() ...")
772 components = urlparse(args.feed)
773 domain = components.netloc.lower().split(":")[0]
775 logger.debug("domain='%s'", domain)
776 if sources.is_recent(domain):
777 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
780 logger.debug("domain='%s' has not been recently used, marking ...", domain)
781 sources.update(domain)
783 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
784 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
786 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
787 if response.ok and response.status_code == 200 and len(response.text) > 0:
788 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
789 rss = atoma.parse_rss_bytes(response.content)
791 logger.debug("rss[]='%s'", type(rss))
792 for item in rss.items:
793 logger.debug("item[%s]='%s'", type(item), item)
794 domain = item.link.split("=")[1]
795 domain = tidyup.domain(domain) if domain != None and domain != "" else None
797 logger.debug("domain='%s' - AFTER!", domain)
798 if domain is None or domain == "":
799 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
802 logger.debug("domain='%s' - BEFORE!", domain)
803 domain = domain.encode("idna").decode("utf-8")
804 logger.debug("domain='%s' - AFTER!", domain)
806 if not domain_helper.is_wanted(domain):
807 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
809 elif domain in domains:
810 logger.debug("domain='%s' is already added - SKIPPED!", domain)
812 elif instances.is_registered(domain):
813 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
815 elif instances.is_recent(domain):
816 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
819 logger.debug("Adding domain='%s'", domain)
820 domains.append(domain)
822 logger.debug("domains()=%d", len(domains))
824 logger.info("Adding %d new instances ...", len(domains))
825 for domain in domains:
826 logger.debug("domain='%s'", domain)
828 logger.info("Fetching instances from domain='%s' ...", domain)
829 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
830 except network.exceptions as exception:
831 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
832 instances.set_last_error(domain, exception)
835 logger.debug("Success! - EXIT!")
838 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
839 logger.debug("args[]='%s' - CALLED!", type(args))
841 logger.debug("Invoking locking.acquire() ...")
844 source_domain = "ryona.agency"
845 feed = f"https://{source_domain}/users/fba/feed.atom"
847 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
848 if args.feed is not None and validators.url(args.feed):
849 logger.debug("Setting feed='%s' ...", args.feed)
850 feed = str(args.feed)
851 source_domain = urlparse(args.feed).netloc
853 if sources.is_recent(source_domain):
854 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
857 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
858 sources.update(source_domain)
862 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
863 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
865 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
866 if response.ok and response.status_code == 200 and len(response.text) > 0:
867 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
868 atom = atoma.parse_atom_bytes(response.content)
870 logger.debug("atom[]='%s'", type(atom))
871 for entry in atom.entries:
872 logger.debug("entry[]='%s'", type(entry))
873 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
874 logger.debug("doc[]='%s'", type(doc))
875 for element in doc.findAll("a"):
876 logger.debug("element[]='%s'", type(element))
877 for href in element["href"].split(","):
878 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
879 domain = tidyup.domain(href) if href != None and href != "" else None
881 logger.debug("domain='%s' - AFTER!", domain)
882 if domain is None or domain == "":
883 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
886 logger.debug("domain='%s' - BEFORE!", domain)
887 domain = domain.encode("idna").decode("utf-8")
888 logger.debug("domain='%s' - AFTER!", domain)
890 if not domain_helper.is_wanted(domain):
891 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
893 elif domain in domains:
894 logger.debug("domain='%s' is already added - SKIPPED!", domain)
896 elif instances.is_registered(domain):
897 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
899 elif instances.is_recent(domain):
900 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
903 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
904 domains.append(domain)
906 logger.debug("domains()=%d", len(domains))
908 logger.info("Adding %d new instances ...", len(domains))
909 for domain in domains:
910 logger.debug("domain='%s'", domain)
912 logger.info("Fetching instances from domain='%s' ...", domain)
913 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
914 except network.exceptions as exception:
915 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
916 instances.set_last_error(domain, exception)
919 logger.debug("Success! - EXIT!")
922 def fetch_instances(args: argparse.Namespace) -> int:
923 logger.debug("args[]='%s' - CALLED!", type(args))
925 logger.debug("args.domain='%s' - checking ...", args.domain)
926 if not validators.domain(args.domain):
927 logger.warning("args.domain='%s' is not valid.", args.domain)
929 elif blacklist.is_blacklisted(args.domain):
930 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
933 logger.debug("Invoking locking.acquire() ...")
937 domain = tidyup.domain(args.domain)
938 origin = software = None
941 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
942 row = database.cursor.fetchone()
944 origin = row["origin"]
945 software = row["software"]
947 if software_helper.is_relay(software):
948 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
953 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
954 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
955 except network.exceptions as exception:
956 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
957 instances.set_last_error(args.domain, exception)
958 instances.update(args.domain)
962 logger.debug("Not fetching more instances - EXIT!")
965 # Loop through some instances
966 database.cursor.execute(
967 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
970 rows = database.cursor.fetchall()
971 logger.info("Checking %d entries ...", len(rows))
973 logger.debug("row[domain]='%s'", row["domain"])
974 if row["domain"] == "":
975 logger.debug("row[domain] is empty - SKIPPED!")
978 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
979 domain = row["domain"].encode("idna").decode("utf-8")
980 logger.debug("domain='%s' - AFTER!", domain)
982 if not domain_helper.is_wanted(domain):
983 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
987 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
988 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
989 except network.exceptions as exception:
990 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
991 instances.set_last_error(domain, exception)
993 logger.debug("Success - EXIT!")
996 def fetch_csv(args: argparse.Namespace) -> int:
997 logger.debug("args[]='%s' - CALLED!", type(args))
999 logger.debug("Invoking locking.acquire() ...")
1002 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1003 for block in blocklists.csv_files:
1004 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1006 # Is domain given and not equal blocker?
1007 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1008 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1011 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1012 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1014 logger.debug("Success - EXIT!")
1017 def fetch_oliphant(args: argparse.Namespace) -> int:
1018 logger.debug("args[]='%s' - CALLED!", type(args))
1020 logger.debug("Invoking locking.acquire() ...")
1023 source_domain = "codeberg.org"
1024 if sources.is_recent(source_domain):
1025 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1028 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1029 sources.update(source_domain)
1032 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1034 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1035 for block in blocklists.oliphant_blocklists:
1036 # Is domain given and not equal blocker?
1037 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1038 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1041 url = f"{base_url}/{block['csv_url']}"
1043 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1044 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1046 logger.debug("Success! - EXIT!")
1049 def fetch_txt(args: argparse.Namespace) -> int:
1050 logger.debug("args[]='%s' - CALLED!", type(args))
1052 logger.debug("Invoking locking.acquire() ...")
1057 "blocker": "seirdy.one",
1058 "url" : "https://seirdy.one/pb/bsl.txt",
1061 logger.info("Checking %d text file(s) ...", len(urls))
1063 logger.debug("Fetching row[url]='%s' ...", row["url"])
1064 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1066 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1067 if response.ok and response.status_code == 200 and response.text != "":
1068 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1069 domains = response.text.strip().split("\n")
1071 logger.info("Processing %d domains ...", len(domains))
1072 for domain in domains:
1073 logger.debug("domain='%s' - BEFORE!", domain)
1074 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1076 logger.debug("domain='%s' - AFTER!", domain)
1077 if domain is None or domain == "":
1078 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1080 elif not domain_helper.is_wanted(domain):
1081 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1083 elif instances.is_recent(domain):
1084 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1087 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1088 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1090 logger.debug("processed='%s'", processed)
1092 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1095 logger.debug("Success! - EXIT!")
1098 def fetch_fedipact(args: argparse.Namespace) -> int:
1099 logger.debug("args[]='%s' - CALLED!", type(args))
1101 logger.debug("Invoking locking.acquire() ...")
1104 source_domain = "fedipact.online"
1105 if sources.is_recent(source_domain):
1106 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1109 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1110 sources.update(source_domain)
1112 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1113 response = utils.fetch_url(
1114 f"https://{source_domain}",
1115 network.web_headers,
1116 (config.get("connection_timeout"), config.get("read_timeout"))
1119 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1120 if response.ok and response.status_code == 200 and response.text != "":
1121 logger.debug("Parsing %d Bytes ...", len(response.text))
1123 doc = bs4.BeautifulSoup(response.text, "html.parser")
1124 logger.debug("doc[]='%s'", type(doc))
1126 rows = doc.findAll("li")
1127 logger.info("Checking %d row(s) ...", len(rows))
1129 logger.debug("row[]='%s'", type(row))
1130 domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None
1132 logger.debug("domain='%s' - AFTER!", domain)
1133 if domain is None or domain == "":
1134 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1137 logger.debug("domain='%s' - BEFORE!", domain)
1138 domain = domain.encode("idna").decode("utf-8")
1139 logger.debug("domain='%s' - AFTER!", domain)
1141 if not domain_helper.is_wanted(domain):
1142 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1144 elif instances.is_registered(domain):
1145 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1147 elif instances.is_recent(domain):
1148 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1151 logger.info("Fetching domain='%s' ...", domain)
1152 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1154 logger.debug("Success! - EXIT!")
1157 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1158 logger.debug("args[]='%s' - CALLED!", type(args))
1160 logger.debug("Invoking locking.acquire() ...")
1163 source_domain = "instances.joinmobilizon.org"
1164 if sources.is_recent(source_domain):
1165 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1168 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1169 sources.update(source_domain)
1171 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1172 raw = utils.fetch_url(
1173 f"https://{source_domain}/api/v1/instances",
1174 network.web_headers,
1175 (config.get("connection_timeout"), config.get("read_timeout"))
1177 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1179 parsed = json.loads(raw)
1180 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1182 if "data" not in parsed:
1183 logger.warning("parsed()=%d does not contain key 'data'")
1186 logger.info("Checking %d instances ...", len(parsed["data"]))
1187 for row in parsed["data"]:
1188 logger.debug("row[]='%s'", type(row))
1189 if "host" not in row:
1190 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1192 elif not domain_helper.is_wanted(row["host"]):
1193 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1195 elif instances.is_registered(row["host"]):
1196 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1199 logger.info("Fetching row[host]='%s' ...", row["host"])
1200 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1202 logger.debug("Success! - EXIT!")
1205 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1206 logger.debug("args[]='%s' - CALLED!", type(args))
1208 logger.debug("Invoking locking.acquire() ...")
1211 source_domain = "instanceapp.misskey.page"
1212 if sources.is_recent(source_domain):
1213 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1216 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1217 sources.update(source_domain)
1219 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1220 raw = utils.fetch_url(
1221 f"https://{source_domain}/instances.json",
1222 network.web_headers,
1223 (config.get("connection_timeout"), config.get("read_timeout"))
1225 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1227 parsed = json.loads(raw)
1228 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1230 if "instancesInfos" not in parsed:
1231 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1234 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1235 for row in parsed["instancesInfos"]:
1236 logger.debug("row[%s]='%s'", type(row), row)
1237 if "url" not in row:
1238 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1240 elif not domain_helper.is_wanted(row["url"]):
1241 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1243 elif instances.is_registered(row["url"]):
1244 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1247 logger.info("Fetching row[url]='%s' ...", row["url"])
1248 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1250 logger.debug("Success! - EXIT!")
1253 def recheck_obfuscation(args: argparse.Namespace) -> int:
1254 logger.debug("args[]='%s' - CALLED!", type(args))
1256 logger.debug("Invoking locking.acquire() ...")
1259 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1260 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1261 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1262 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1264 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1266 rows = database.cursor.fetchall()
1267 logger.info("Checking %d domains ...", len(rows))
1269 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1270 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1271 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1274 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1275 blocking = federation.fetch_blocks(row["domain"])
1277 logger.debug("blocking()=%d", len(blocking))
1278 if len(blocking) == 0:
1279 if row["software"] == "pleroma":
1280 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1281 blocking = pleroma.fetch_blocks(row["domain"])
1282 elif row["software"] == "mastodon":
1283 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1284 blocking = mastodon.fetch_blocks(row["domain"])
1285 elif row["software"] == "lemmy":
1286 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1287 blocking = lemmy.fetch_blocks(row["domain"])
1288 elif row["software"] == "friendica":
1289 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290 blocking = friendica.fetch_blocks(row["domain"])
1291 elif row["software"] == "misskey":
1292 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293 blocking = misskey.fetch_blocks(row["domain"])
1295 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1297 # c.s isn't part of oliphant's "hidden" blocklists
1298 logger.debug("row[domain]='%s'", row["domain"])
1299 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1300 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1301 instances.set_last_blocked(row["domain"])
1302 instances.set_total_blocks(row["domain"], blocking)
1307 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1308 for block in blocking:
1309 logger.debug("block[blocked]='%s'", block["blocked"])
1312 if block["blocked"] == "":
1313 logger.debug("block[blocked] is empty - SKIPPED!")
1315 elif block["blocked"].endswith(".arpa"):
1316 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1318 elif block["blocked"].endswith(".tld"):
1319 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1321 elif block["blocked"].endswith(".onion"):
1322 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1324 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1325 logger.debug("block='%s' is obfuscated.", block["blocked"])
1326 obfuscated = obfuscated + 1
1327 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1328 elif not domain_helper.is_wanted(block["blocked"]):
1329 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1331 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1332 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1335 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1336 if blocked is not None and blocked != block["blocked"]:
1337 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1338 obfuscated = obfuscated - 1
1340 if blacklist.is_blacklisted(blocked):
1341 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1343 elif blacklist.is_blacklisted(row["domain"]):
1344 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1346 elif blocks.is_instance_blocked(row["domain"], blocked):
1347 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1350 block["block_level"] = blocks.alias_block_level(block["block_level"])
1352 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1353 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1354 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1357 "reason" : block["reason"],
1360 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1361 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1363 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1364 if instances.has_pending(row["domain"]):
1365 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1366 instances.update(row["domain"])
1368 logger.debug("Invoking commit() ...")
1369 database.connection.commit()
1371 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1372 if config.get("bot_enabled") and len(blockdict) > 0:
1373 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1374 network.send_bot_post(row["domain"], blockdict)
1376 logger.debug("Success! - EXIT!")
1379 def fetch_fedilist(args: argparse.Namespace) -> int:
1380 logger.debug("args[]='%s' - CALLED!", type(args))
1382 logger.debug("Invoking locking.acquire() ...")
1385 source_domain = "demo.fedilist.com"
1386 if sources.is_recent(source_domain):
1387 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1390 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1391 sources.update(source_domain)
1393 url = f"http://{source_domain}/instance/csv?onion=not"
1394 if args.software is not None and args.software != "":
1395 logger.debug("args.software='%s'", args.software)
1396 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1398 logger.info("Fetching url='%s' ...", url)
1399 response = reqto.get(
1401 headers=network.web_headers,
1402 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1403 allow_redirects=False
1406 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1407 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1408 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1411 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1413 logger.debug("reader[]='%s'", type(reader))
1415 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1420 logger.info("Checking %d rows ...", len(rows))
1422 logger.debug("row[]='%s'", type(row))
1423 if "hostname" not in row:
1424 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1427 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1428 domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None
1429 logger.debug("domain='%s' - AFTER!", domain)
1431 if domain is None or domain == "":
1432 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1435 logger.debug("domain='%s' - BEFORE!", domain)
1436 domain = domain.encode("idna").decode("utf-8")
1437 logger.debug("domain='%s' - AFTER!", domain)
1439 if not domain_helper.is_wanted(domain):
1440 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1442 elif (args.force is None or not args.force) and instances.is_registered(domain):
1443 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1445 elif instances.is_recent(domain):
1446 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1449 logger.info("Fetching instances from domain='%s' ...", domain)
1450 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1452 logger.debug("Success! - EXIT!")
1455 def update_nodeinfo(args: argparse.Namespace) -> int:
1456 logger.debug("args[]='%s' - CALLED!", type(args))
1458 logger.debug("Invoking locking.acquire() ...")
1461 if args.domain is not None and args.domain != "":
1462 logger.debug("Fetching args.domain='%s'", args.domain)
1463 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1464 elif args.software is not None and args.software != "":
1465 logger.info("Fetching domains for args.software='%s'", args.software)
1466 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1467 elif args.mode is not None and args.mode != "":
1468 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1469 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1470 elif args.no_software:
1471 logger.info("Fetching domains with no software type detected ...")
1472 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1474 logger.info("Fetching domains for recently updated ...")
1475 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1477 domains = database.cursor.fetchall()
1479 logger.info("Checking %d domain(s) ...", len(domains))
1482 logger.debug("row[]='%s'", type(row))
1483 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1484 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1488 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1489 software = federation.determine_software(row["domain"])
1491 logger.debug("Determined software='%s'", software)
1492 if (software != row["software"] and software is not None) or args.force is True:
1493 logger.debug("software='%s'", software)
1494 if software is None:
1495 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1496 instances.set_nodeinfo_url(row["domain"], None)
1498 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1499 instances.set_software(row["domain"], software)
1501 if software is not None:
1502 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1503 instances.set_success(row["domain"])
1504 except network.exceptions as exception:
1505 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1506 instances.set_last_error(row["domain"], exception)
1508 instances.set_last_nodeinfo(row["domain"])
1509 instances.update(row["domain"])
1512 logger.debug("Success! - EXIT!")
1515 def fetch_instances_social(args: argparse.Namespace) -> int:
1516 logger.debug("args[]='%s' - CALLED!", type(args))
1518 logger.debug("Invoking locking.acquire() ...")
1521 source_domain = "instances.social"
1523 if config.get("instances_social_api_key") == "":
1524 logger.error("API key not set. Please set in your config.json file.")
1526 elif sources.is_recent(source_domain):
1527 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1530 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1531 sources.update(source_domain)
1534 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1537 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1538 fetched = network.get_json_api(
1540 "/api/1.0/instances/list?count=0&sort_by=name",
1542 (config.get("connection_timeout"), config.get("read_timeout"))
1544 logger.debug("fetched[]='%s'", type(fetched))
1546 if "error_message" in fetched:
1547 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1549 elif "exception" in fetched:
1550 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1552 elif "json" not in fetched:
1553 logger.warning("fetched has no element 'json' - EXIT!")
1555 elif "instances" not in fetched["json"]:
1556 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1560 rows = fetched["json"]["instances"]
1562 logger.info("Checking %d row(s) ...", len(rows))
1564 logger.debug("row[]='%s'", type(row))
1565 domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None
1566 logger.debug("domain='%s' - AFTER!", domain)
1568 if domain is None and domain == "":
1569 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1572 logger.debug("domain='%s' - BEFORE!", domain)
1573 domain = domain.encode("idna").decode("utf-8")
1574 logger.debug("domain='%s' - AFTER!", domain)
1576 if not domain_helper.is_wanted(domain):
1577 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1579 elif domain in domains:
1580 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1582 elif instances.is_registered(domain):
1583 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1585 elif instances.is_recent(domain):
1586 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1589 logger.info("Fetching instances from domain='%s'", domain)
1590 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1592 logger.debug("Success! - EXIT!")
1595 def fetch_relays(args: argparse.Namespace) -> int:
1596 logger.debug("args[]='%s' - CALLED!", type(args))
1598 logger.debug("Invoking locking.acquire() ...")
1601 if args.domain is not None and args.domain != "":
1602 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1603 elif args.software is not None and args.software != "":
1604 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1606 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1609 rows = database.cursor.fetchall()
1611 logger.info("Checking %d relays ...", len(rows))
1613 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1615 if not args.force and instances.is_recent(row["domain"]):
1616 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1620 if row["software"] == "pub-relay":
1621 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1622 raw = network.fetch_api_url(
1623 row["nodeinfo_url"],
1624 (config.get("connection_timeout"), config.get("read_timeout"))
1627 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1628 if "exception" in raw:
1629 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1630 raise raw["exception"]
1631 elif "error_message" in raw:
1632 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1633 instances.set_last_error(row["domain"], raw)
1634 instances.set_last_instance_fetch(row["domain"])
1635 instances.update(row["domain"])
1637 elif not "json" in raw:
1638 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1640 elif not "metadata" in raw["json"]:
1641 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1643 elif not "peers" in raw["json"]["metadata"]:
1644 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1647 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1648 raw = utils.fetch_url(
1649 f"https://{row['domain']}",
1650 network.web_headers,
1651 (config.get("connection_timeout"), config.get("read_timeout"))
1653 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1655 doc = bs4.BeautifulSoup(raw, features="html.parser")
1656 logger.debug("doc[]='%s'", type(doc))
1658 except network.exceptions as exception:
1659 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1660 instances.set_last_error(row["domain"], exception)
1661 instances.set_last_instance_fetch(row["domain"])
1662 instances.update(row["domain"])
1665 logger.debug("row[software]='%s'", row["software"])
1666 if row["software"] == "activityrelay":
1667 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1668 tags = doc.findAll("p")
1670 logger.debug("Checking %d paragraphs ...", len(tags))
1672 logger.debug("tag[]='%s'", type(tag))
1673 if len(tag.contents) == 0:
1674 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1676 elif "registered instances" not in tag.contents[0]:
1677 logger.debug("Skipping paragraph, text not found.")
1680 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1681 for domain in tag.contents:
1682 logger.debug("domain[%s]='%s'", type(domain), domain)
1683 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1686 domain = str(domain)
1687 logger.debug("domain='%s'", domain)
1688 if not domain_helper.is_wanted(domain):
1689 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1692 logger.debug("domain='%s' - BEFORE!", domain)
1693 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1694 logger.debug("domain='%s' - AFTER!", domain)
1696 if domain is None or domain == "":
1697 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1699 elif domain not in peers:
1700 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1701 peers.append(domain)
1703 if dict_helper.has_key(domains, "domain", domain):
1704 logger.debug("domain='%s' already added", domain)
1707 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1710 "origin": row["domain"],
1712 elif row["software"] in ["aoderelay", "selective-relay"]:
1713 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1714 if row["software"] == "aoderelay":
1715 tags = doc.findAll("section", {"class": "instance"})
1717 tags = doc.find("div", {"id": "instances"}).findAll("li")
1719 logger.debug("Checking %d tags ...", len(tags))
1721 logger.debug("tag[]='%s'", type(tag))
1723 link = tag.find("a")
1724 logger.debug("link[%s]='%s'", type(link), link)
1725 if not isinstance(link, bs4.element.Tag):
1726 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1729 components = urlparse(link.get("href"))
1730 logger.debug("components(%d)='%s'", len(components), components)
1731 domain = components.netloc.lower().split(":")[0]
1733 logger.debug("domain='%s' - BEFORE!", domain)
1734 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1735 logger.debug("domain='%s' - AFTER!", domain)
1737 if domain is None or domain == "":
1738 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1740 elif domain not in peers:
1741 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1742 peers.append(domain)
1744 if dict_helper.has_key(domains, "domain", domain):
1745 logger.debug("domain='%s' already added", domain)
1748 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1751 "origin": row["domain"],
1753 elif row["software"] == "pub-relay":
1754 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1755 for domain in raw["json"]["metadata"]["peers"]:
1756 logger.debug("domain='%s' - BEFORE!", domain)
1757 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1758 logger.debug("domain='%s' - AFTER!", domain)
1760 if domain is None or domain == "":
1761 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1763 elif domain not in peers:
1764 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1765 peers.append(domain)
1767 if dict_helper.has_key(domains, "domain", domain):
1768 logger.debug("domain='%s' already added", domain)
1771 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1774 "origin": row["domain"],
1777 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1780 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1781 instances.set_last_instance_fetch(row["domain"])
1783 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1784 instances.set_total_peers(row["domain"], peers)
1786 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1787 instances.update(row["domain"])
1789 logger.info("Checking %d domains ...", len(domains))
1791 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1792 if not domain_helper.is_wanted(row["domain"]):
1793 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1795 elif instances.is_registered(row["domain"]):
1796 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1799 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1800 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1802 logger.debug("Success! - EXIT!")
1805 def convert_idna(args: argparse.Namespace) -> int:
1806 logger.debug("args[]='%s' - CALLED!", type(args))
1808 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1809 rows = database.cursor.fetchall()
1811 logger.debug("rows[]='%s'", type(rows))
1812 instances.translate_idnas(rows, "domain")
1814 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1815 rows = database.cursor.fetchall()
1817 logger.debug("rows[]='%s'", type(rows))
1818 instances.translate_idnas(rows, "origin")
1820 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1821 rows = database.cursor.fetchall()
1823 logger.debug("rows[]='%s'", type(rows))
1824 blocks.translate_idnas(rows, "blocker")
1826 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1827 rows = database.cursor.fetchall()
1829 logger.debug("rows[]='%s'", type(rows))
1830 blocks.translate_idnas(rows, "blocked")
1832 logger.debug("Success! - EXIT!")
1835 def remove_invalid(args: argparse.Namespace) -> int:
1836 logger.debug("args[]='%s' - CALLED!", type(args))
1838 logger.debug("Invoking locking.acquire() ...")
1841 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1842 rows = database.cursor.fetchall()
1844 logger.info("Checking %d domains ...", len(rows))
1846 logger.debug("row[domain]='%s'", row["domain"])
1847 if not validators.domain(row["domain"].split("/")[0]):
1848 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1849 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1850 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1852 logger.debug("Invoking commit() ...")
1853 database.connection.commit()
1855 logger.info("Vaccum cleaning database ...")
1856 database.cursor.execute("VACUUM")
1858 logger.debug("Success! - EXIT!")