1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
354 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
355 for block in blocking:
356 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
358 if block["block_level"] == "":
359 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
362 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
363 block["blocked"] = tidyup.domain(block["blocked"])
364 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
365 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
367 if block["blocked"] == "":
368 logger.warning("blocked is empty, blocker='%s'", blocker)
370 elif block["blocked"].endswith(".onion"):
371 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
373 elif block["blocked"].endswith(".arpa"):
374 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".tld"):
377 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
379 elif block["blocked"].find("*") >= 0:
380 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 # Some friendica servers also obscure domains without hash
383 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
385 logger.debug("row[]='%s'", type(row))
387 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
388 instances.set_has_obfuscation(blocker, True)
391 block["blocked"] = row["domain"]
392 origin = row["origin"]
393 nodeinfo_url = row["nodeinfo_url"]
394 elif block["blocked"].find("?") >= 0:
395 logger.debug("blocker='%s' uses obfuscated domains", blocker)
397 # Some obscure them with question marks, not sure if that's dependent on version or not
398 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
400 logger.debug("row[]='%s'", type(row))
402 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
403 instances.set_has_obfuscation(blocker, True)
406 block["blocked"] = row["domain"]
407 origin = row["origin"]
408 nodeinfo_url = row["nodeinfo_url"]
410 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
411 if block["blocked"] == "":
412 logger.debug("block[blocked] is empty - SKIPPED!")
415 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
416 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
417 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
419 if not domain_helper.is_wanted(block["blocked"]):
420 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
422 elif block["block_level"] in ["accept", "accepted"]:
423 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
425 elif not instances.is_registered(block["blocked"]):
426 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
427 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
429 block["block_level"] = blocks.alias_block_level(block["block_level"])
431 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
432 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
434 "blocked": block["blocked"],
435 "reason" : block["reason"],
438 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
439 cookies.clear(block["blocked"])
441 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
442 if instances.has_pending(blocker):
443 logger.debug("Flushing updates for blocker='%s' ...", blocker)
444 instances.update(blocker)
446 logger.debug("Invoking commit() ...")
447 database.connection.commit()
449 logger.debug("Invoking cookies.clear(%s) ...", blocker)
450 cookies.clear(blocker)
452 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
453 if config.get("bot_enabled") and len(blockdict) > 0:
454 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
455 network.send_bot_post(blocker, blockdict)
457 logger.debug("Success! - EXIT!")
460 def fetch_observer(args: argparse.Namespace) -> int:
461 logger.debug("args[]='%s' - CALLED!", type(args))
463 logger.debug("Invoking locking.acquire() ...")
466 source_domain = "fediverse.observer"
467 if sources.is_recent(source_domain):
468 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
471 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
472 sources.update(source_domain)
475 if args.software is None:
476 logger.info("Fetching software list ...")
477 raw = utils.fetch_url(
478 f"https://{source_domain}",
480 (config.get("connection_timeout"), config.get("read_timeout"))
482 logger.debug("raw[%s]()=%d", type(raw), len(raw))
484 doc = bs4.BeautifulSoup(raw, features="html.parser")
485 logger.debug("doc[]='%s'", type(doc))
487 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
488 logger.debug("navbar[]='%s'", type(navbar))
490 logger.warning("Cannot find navigation bar, cannot continue!")
493 items = navbar.findAll("a", {"class": "dropdown-item"})
494 logger.debug("items[]='%s'", type(items))
496 logger.info("Checking %d menu items ...", len(items))
498 logger.debug("item[%s]='%s'", type(item), item)
499 if item.text.lower() == "all":
500 logger.debug("Skipping 'All' menu entry ...")
503 logger.debug("Appending item.text='%s' ...", item.text)
504 types.append(tidyup.domain(item.text))
506 logger.info("Adding args.software='%s' as type ...", args.software)
507 types.append(args.software)
509 logger.info("Fetching %d different table data ...", len(types))
510 for software in types:
511 logger.debug("software='%s' - BEFORE!", software)
512 if args.software is not None and args.software != software:
513 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
518 logger.debug("Fetching table data for software='%s' ...", software)
519 raw = utils.fetch_url(
520 f"https://{source_domain}/app/views/tabledata.php?software={software}",
522 (config.get("connection_timeout"), config.get("read_timeout"))
524 logger.debug("raw[%s]()=%d", type(raw), len(raw))
526 doc = bs4.BeautifulSoup(raw, features="html.parser")
527 logger.debug("doc[]='%s'", type(doc))
528 except network.exceptions as exception:
529 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
532 items = doc.findAll("a", {"class": "url"})
533 logger.info("Checking %d items,software='%s' ...", len(items), software)
535 logger.debug("item[]='%s'", type(item))
536 domain = item.decode_contents()
537 domain = tidyup.domain(domain) if domain != None and domain != "" else None
538 logger.debug("domain='%s' - AFTER!", domain)
540 if domain is None or domain == "":
541 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
544 logger.debug("domain='%s' - BEFORE!", domain)
545 domain = domain.encode("idna").decode("utf-8")
546 logger.debug("domain='%s' - AFTER!", domain)
548 if not domain_helper.is_wanted(domain):
549 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
551 elif instances.is_registered(domain):
552 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
555 software = software_helper.alias(software)
556 logger.info("Fetching instances for domain='%s'", domain)
557 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
559 logger.debug("Success! - EXIT!")
562 def fetch_todon_wiki(args: argparse.Namespace) -> int:
563 logger.debug("args[]='%s' - CALLED!", type(args))
565 logger.debug("Invoking locking.acquire() ...")
568 source_domain = "wiki.todon.eu"
569 if sources.is_recent(source_domain):
570 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
573 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
574 sources.update(source_domain)
581 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
582 raw = utils.fetch_url(
583 f"https://{source_domain}/todon/domainblocks",
585 (config.get("connection_timeout"), config.get("read_timeout"))
587 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
589 doc = bs4.BeautifulSoup(raw, "html.parser")
590 logger.debug("doc[]='%s'", type(doc))
592 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
593 logger.info("Checking %d silenced/limited entries ...", len(silenced))
594 blocklist["silenced"] = utils.find_domains(silenced, "div")
596 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
597 logger.info("Checking %d suspended entries ...", len(suspended))
598 blocklist["reject"] = utils.find_domains(suspended, "div")
600 blocking = blocklist["silenced"] + blocklist["reject"]
603 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
604 instances.set_last_blocked(blocker)
605 instances.set_total_blocks(blocker, blocking)
608 for block_level in blocklist:
609 blockers = blocklist[block_level]
611 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
612 for blocked in blockers:
613 logger.debug("blocked='%s'", blocked)
615 if not instances.is_registered(blocked):
617 logger.info("Fetching instances from domain='%s' ...", blocked)
618 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
619 except network.exceptions as exception:
620 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
621 instances.set_last_error(blocked, exception)
623 if not domain_helper.is_wanted(blocked):
624 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
626 elif not domain_helper.is_wanted(blocker):
627 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
629 elif blocks.is_instance_blocked(blocker, blocked, block_level):
630 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
633 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
634 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
635 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
641 logger.debug("Invoking commit() ...")
642 database.connection.commit()
644 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
645 if config.get("bot_enabled") and len(blockdict) > 0:
646 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
647 network.send_bot_post(blocker, blockdict)
649 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
650 if instances.has_pending(blocker):
651 logger.debug("Flushing updates for blocker='%s' ...", blocker)
652 instances.update(blocker)
654 logger.debug("Success! - EXIT!")
657 def fetch_cs(args: argparse.Namespace):
658 logger.debug("args[]='%s' - CALLED!", type(args))
660 logger.debug("Invoking locking.acquire() ...")
688 source_domain = "raw.githubusercontent.com"
689 if sources.is_recent(source_domain):
690 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
693 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
694 sources.update(source_domain)
696 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
697 raw = utils.fetch_url(
698 f"https://{source_domain}/chaossocial/meta/master/federation.md",
700 (config.get("connection_timeout"), config.get("read_timeout"))
702 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
704 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
705 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
707 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
708 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
709 blocklist["silenced"] = federation.find_domains(silenced)
711 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
712 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
713 blocklist["reject"] = federation.find_domains(blocked)
715 blocking = blocklist["silenced"] + blocklist["reject"]
716 blocker = "chaos.social"
718 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
719 instances.set_last_blocked(blocker)
720 instances.set_total_blocks(blocker, blocking)
722 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
723 if len(blocking) > 0:
725 for block_level in blocklist:
726 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
728 for row in blocklist[block_level]:
729 logger.debug("row[%s]='%s'", type(row), row)
730 if not "domain" in row:
731 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
733 elif not instances.is_registered(row["domain"]):
735 logger.info("Fetching instances from domain='%s' ...", row["domain"])
736 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
737 except network.exceptions as exception:
738 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
739 instances.set_last_error(row["domain"], exception)
741 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
742 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
744 "blocked": row["domain"],
745 "reason" : row["reason"],
748 logger.debug("Invoking commit() ...")
749 database.connection.commit()
751 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
752 if config.get("bot_enabled") and len(blockdict) > 0:
753 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
754 network.send_bot_post(blocker, blockdict)
756 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
757 if instances.has_pending(blocker):
758 logger.debug("Flushing updates for blocker='%s' ...", blocker)
759 instances.update(blocker)
761 logger.debug("Success! - EXIT!")
764 def fetch_fba_rss(args: argparse.Namespace) -> int:
765 logger.debug("args[]='%s' - CALLED!", type(args))
769 logger.debug("Invoking locking.acquire() ...")
772 components = urlparse(args.feed)
773 domain = components.netloc.lower().split(":")[0]
775 logger.debug("domain='%s'", domain)
776 if sources.is_recent(domain):
777 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
780 logger.debug("domain='%s' has not been recently used, marking ...", domain)
781 sources.update(domain)
783 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
784 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
786 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
787 if response.ok and response.status_code == 200 and len(response.text) > 0:
788 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
789 rss = atoma.parse_rss_bytes(response.content)
791 logger.debug("rss[]='%s'", type(rss))
792 for item in rss.items:
793 logger.debug("item[%s]='%s'", type(item), item)
794 domain = item.link.split("=")[1]
795 domain = tidyup.domain(domain) if domain != None and domain != "" else None
797 logger.debug("domain='%s' - AFTER!", domain)
798 if domain is None or domain == "":
799 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
802 logger.debug("domain='%s' - BEFORE!", domain)
803 domain = domain.encode("idna").decode("utf-8")
804 logger.debug("domain='%s' - AFTER!", domain)
806 if not domain_helper.is_wanted(domain):
807 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
809 elif domain in domains:
810 logger.debug("domain='%s' is already added - SKIPPED!", domain)
812 elif instances.is_registered(domain):
813 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
815 elif instances.is_recent(domain):
816 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
819 logger.debug("Adding domain='%s'", domain)
820 domains.append(domain)
822 logger.debug("domains()=%d", len(domains))
824 logger.info("Adding %d new instances ...", len(domains))
825 for domain in domains:
826 logger.debug("domain='%s'", domain)
828 logger.info("Fetching instances from domain='%s' ...", domain)
829 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
830 except network.exceptions as exception:
831 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
832 instances.set_last_error(domain, exception)
835 logger.debug("Success! - EXIT!")
838 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
839 logger.debug("args[]='%s' - CALLED!", type(args))
841 logger.debug("Invoking locking.acquire() ...")
844 source_domain = "ryona.agency"
845 feed = f"https://{source_domain}/users/fba/feed.atom"
847 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
848 if args.feed is not None and validators.url(args.feed):
849 logger.debug("Setting feed='%s' ...", args.feed)
850 feed = str(args.feed)
851 source_domain = urlparse(args.feed).netloc
853 if sources.is_recent(source_domain):
854 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
857 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
858 sources.update(source_domain)
862 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
863 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
865 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
866 if response.ok and response.status_code == 200 and len(response.text) > 0:
867 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
868 atom = atoma.parse_atom_bytes(response.content)
870 logger.debug("atom[]='%s'", type(atom))
871 for entry in atom.entries:
872 logger.debug("entry[]='%s'", type(entry))
873 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
874 logger.debug("doc[]='%s'", type(doc))
875 for element in doc.findAll("a"):
876 logger.debug("element[]='%s'", type(element))
877 for href in element["href"].split(","):
878 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
879 domain = tidyup.domain(href) if href != None and href != "" else None
881 logger.debug("domain='%s' - AFTER!", domain)
882 if domain is None or domain == "":
883 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
886 logger.debug("domain='%s' - BEFORE!", domain)
887 domain = domain.encode("idna").decode("utf-8")
888 logger.debug("domain='%s' - AFTER!", domain)
890 if not domain_helper.is_wanted(domain):
891 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
893 elif domain in domains:
894 logger.debug("domain='%s' is already added - SKIPPED!", domain)
896 elif instances.is_registered(domain):
897 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
899 elif instances.is_recent(domain):
900 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
903 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
904 domains.append(domain)
906 logger.debug("domains()=%d", len(domains))
908 logger.info("Adding %d new instances ...", len(domains))
909 for domain in domains:
910 logger.debug("domain='%s'", domain)
912 logger.info("Fetching instances from domain='%s' ...", domain)
913 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
914 except network.exceptions as exception:
915 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
916 instances.set_last_error(domain, exception)
919 logger.debug("Success! - EXIT!")
922 def fetch_instances(args: argparse.Namespace) -> int:
923 logger.debug("args[]='%s' - CALLED!", type(args))
925 logger.debug("args.domain='%s' - checking ...", args.domain)
926 if not validators.domain(args.domain):
927 logger.warning("args.domain='%s' is not valid.", args.domain)
929 elif blacklist.is_blacklisted(args.domain):
930 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
933 logger.debug("Invoking locking.acquire() ...")
937 domain = tidyup.domain(args.domain)
938 origin = software = None
941 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
942 row = database.cursor.fetchone()
944 origin = row["origin"]
945 software = row["software"]
947 if software_helper.is_relay(software):
948 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
953 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
954 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
955 except network.exceptions as exception:
956 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
957 instances.set_last_error(args.domain, exception)
958 instances.update(args.domain)
962 logger.debug("Not fetching more instances - EXIT!")
965 # Loop through some instances
966 database.cursor.execute(
967 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
970 rows = database.cursor.fetchall()
971 logger.info("Checking %d entries ...", len(rows))
973 logger.debug("row[domain]='%s'", row["domain"])
974 if row["domain"] == "":
975 logger.debug("row[domain] is empty - SKIPPED!")
978 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
979 domain = row["domain"].encode("idna").decode("utf-8")
980 logger.debug("domain='%s' - AFTER!", domain)
982 if not domain_helper.is_wanted(domain):
983 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
987 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
988 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
989 except network.exceptions as exception:
990 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
991 instances.set_last_error(domain, exception)
993 logger.debug("Success - EXIT!")
996 def fetch_csv(args: argparse.Namespace) -> int:
997 logger.debug("args[]='%s' - CALLED!", type(args))
999 logger.debug("Invoking locking.acquire() ...")
1002 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1003 for block in blocklists.csv_files:
1004 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1006 # Is domain given and not equal blocker?
1007 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1008 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1011 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1012 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1014 logger.debug("Success - EXIT!")
1017 def fetch_oliphant(args: argparse.Namespace) -> int:
1018 logger.debug("args[]='%s' - CALLED!", type(args))
1020 logger.debug("Invoking locking.acquire() ...")
1023 source_domain = "codeberg.org"
1024 if sources.is_recent(source_domain):
1025 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1028 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1029 sources.update(source_domain)
1032 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1034 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1035 for block in blocklists.oliphant_blocklists:
1036 # Is domain given and not equal blocker?
1037 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1038 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1041 url = f"{base_url}/{block['csv_url']}"
1043 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1044 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1046 logger.debug("Success! - EXIT!")
1049 def fetch_txt(args: argparse.Namespace) -> int:
1050 logger.debug("args[]='%s' - CALLED!", type(args))
1052 logger.debug("Invoking locking.acquire() ...")
1057 "blocker": "seirdy.one",
1058 "url" : "https://seirdy.one/pb/bsl.txt",
1061 logger.info("Checking %d text file(s) ...", len(urls))
1063 logger.debug("Fetching row[url]='%s' ...", row["url"])
1064 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1066 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1067 if response.ok and response.status_code == 200 and response.text != "":
1068 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1069 domains = response.text.strip().split("\n")
1071 logger.info("Processing %d domains ...", len(domains))
1072 for domain in domains:
1073 logger.debug("domain='%s' - BEFORE!", domain)
1074 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1076 logger.debug("domain='%s' - AFTER!", domain)
1077 if domain is None or domain == "":
1078 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1080 elif not domain_helper.is_wanted(domain):
1081 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1083 elif instances.is_recent(domain):
1084 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1087 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1088 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1090 logger.debug("processed='%s'", processed)
1092 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1095 logger.debug("Success! - EXIT!")
1098 def fetch_fedipact(args: argparse.Namespace) -> int:
1099 logger.debug("args[]='%s' - CALLED!", type(args))
1101 logger.debug("Invoking locking.acquire() ...")
1104 source_domain = "fedipact.online"
1105 if sources.is_recent(source_domain):
1106 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1109 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1110 sources.update(source_domain)
1112 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1113 response = utils.fetch_url(
1114 f"https://{source_domain}",
1115 network.web_headers,
1116 (config.get("connection_timeout"), config.get("read_timeout"))
1119 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1120 if response.ok and response.status_code == 200 and response.text != "":
1121 logger.debug("Parsing %d Bytes ...", len(response.text))
1123 doc = bs4.BeautifulSoup(response.text, "html.parser")
1124 logger.debug("doc[]='%s'", type(doc))
1126 rows = doc.findAll("li")
1127 logger.info("Checking %d row(s) ...", len(rows))
1129 logger.debug("row[]='%s'", type(row))
1130 domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None
1132 logger.debug("domain='%s' - AFTER!", domain)
1133 if domain is None or domain == "":
1134 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1137 logger.debug("domain='%s' - BEFORE!", domain)
1138 domain = domain.encode("idna").decode("utf-8")
1139 logger.debug("domain='%s' - AFTER!", domain)
1141 if not domain_helper.is_wanted(domain):
1142 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1144 elif instances.is_registered(domain):
1145 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1147 elif instances.is_recent(domain):
1148 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1151 logger.info("Fetching domain='%s' ...", domain)
1152 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1154 logger.debug("Success! - EXIT!")
1157 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1158 logger.debug("args[]='%s' - CALLED!", type(args))
1160 logger.debug("Invoking locking.acquire() ...")
1163 source_domain = "instances.joinmobilizon.org"
1164 if sources.is_recent(source_domain):
1165 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1168 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1169 sources.update(source_domain)
1171 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1172 raw = utils.fetch_url(
1173 f"https://{source_domain}/api/v1/instances",
1174 network.web_headers,
1175 (config.get("connection_timeout"), config.get("read_timeout"))
1177 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1179 parsed = json.loads(raw)
1180 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1182 if "data" not in parsed:
1183 logger.warning("parsed()=%d does not contain key 'data'")
1186 logger.info("Checking %d instances ...", len(parsed["data"]))
1187 for row in parsed["data"]:
1188 logger.debug("row[]='%s'", type(row))
1189 if "host" not in row:
1190 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1192 elif not domain_helper.is_wanted(row["host"]):
1193 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1195 elif instances.is_registered(row["host"]):
1196 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1199 logger.info("Fetching row[host]='%s' ...", row["host"])
1200 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1202 logger.debug("Success! - EXIT!")
1205 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1206 logger.debug("args[]='%s' - CALLED!", type(args))
1208 logger.debug("Invoking locking.acquire() ...")
1211 source_domain = "instanceapp.misskey.page"
1212 if sources.is_recent(source_domain):
1213 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1216 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1217 sources.update(source_domain)
1219 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1220 raw = utils.fetch_url(
1221 f"https://{source_domain}/instances.json",
1222 network.web_headers,
1223 (config.get("connection_timeout"), config.get("read_timeout"))
1225 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1227 parsed = json.loads(raw)
1228 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1230 if "instancesInfos" not in parsed:
1231 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1234 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1235 for row in parsed["instancesInfos"]:
1236 logger.debug("row[%s]='%s'", type(row), row)
1237 if "url" not in row:
1238 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1240 elif not domain_helper.is_wanted(row["url"]):
1241 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1243 elif instances.is_registered(row["url"]):
1244 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1247 logger.info("Fetching row[url]='%s' ...", row["url"])
1248 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1250 logger.debug("Success! - EXIT!")
1253 def recheck_obfuscation(args: argparse.Namespace) -> int:
1254 logger.debug("args[]='%s' - CALLED!", type(args))
1256 logger.debug("Invoking locking.acquire() ...")
1259 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1260 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1261 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1262 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1264 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1266 rows = database.cursor.fetchall()
1267 logger.info("Checking %d domains ...", len(rows))
1269 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1270 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1271 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1274 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1275 blocking = federation.fetch_blocks(row["domain"])
1277 logger.debug("blocking()=%d", len(blocking))
1278 if len(blocking) == 0:
1279 if row["software"] == "pleroma":
1280 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1281 blocking = pleroma.fetch_blocks(row["domain"])
1282 elif row["software"] == "mastodon":
1283 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1284 blocking = mastodon.fetch_blocks(row["domain"])
1285 elif row["software"] == "lemmy":
1286 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1287 blocking = lemmy.fetch_blocks(row["domain"])
1288 elif row["software"] == "friendica":
1289 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290 blocking = friendica.fetch_blocks(row["domain"])
1291 elif row["software"] == "misskey":
1292 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293 blocking = misskey.fetch_blocks(row["domain"])
1295 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1297 # c.s isn't part of oliphant's "hidden" blocklists
1298 logger.debug("row[domain]='%s'", row["domain"])
1299 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1300 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1301 instances.set_last_blocked(row["domain"])
1302 instances.set_total_blocks(row["domain"], blocking)
1307 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1308 for block in blocking:
1309 logger.debug("block[blocked]='%s'", block["blocked"])
1312 if block["blocked"] == "":
1313 logger.debug("block[blocked] is empty - SKIPPED!")
1315 elif block["blocked"].endswith(".arpa"):
1316 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1318 elif block["blocked"].endswith(".tld"):
1319 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1321 elif block["blocked"].endswith(".onion"):
1322 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1324 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1325 logger.debug("block='%s' is obfuscated.", block["blocked"])
1326 obfuscated = obfuscated + 1
1327 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1328 elif not domain_helper.is_wanted(block["blocked"]):
1329 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1331 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1332 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1335 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1336 if blocked is not None and blocked != block["blocked"]:
1337 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1338 obfuscated = obfuscated - 1
1340 if blacklist.is_blacklisted(blocked):
1341 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1343 elif blacklist.is_blacklisted(row["domain"]):
1344 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1346 elif blocks.is_instance_blocked(row["domain"], blocked):
1347 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1350 block["block_level"] = blocks.alias_block_level(block["block_level"])
1352 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1353 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1354 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1357 "reason" : block["reason"],
1360 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1361 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1363 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1364 if instances.has_pending(row["domain"]):
1365 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1366 instances.update(row["domain"])
1368 logger.debug("Invoking commit() ...")
1369 database.connection.commit()
1371 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1372 if config.get("bot_enabled") and len(blockdict) > 0:
1373 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1374 network.send_bot_post(row["domain"], blockdict)
1376 logger.debug("Success! - EXIT!")
1379 def fetch_fedilist(args: argparse.Namespace) -> int:
1380 logger.debug("args[]='%s' - CALLED!", type(args))
1382 logger.debug("Invoking locking.acquire() ...")
1385 source_domain = "demo.fedilist.com"
1386 if sources.is_recent(source_domain):
1387 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1390 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1391 sources.update(source_domain)
1393 url = f"http://{source_domain}/instance/csv?onion=not"
1394 if args.software is not None and args.software != "":
1395 logger.debug("args.software='%s'", args.software)
1396 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1398 logger.info("Fetching url='%s' ...", url)
1399 response = reqto.get(
1401 headers=network.web_headers,
1402 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1403 allow_redirects=False
1406 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1407 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1408 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1411 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1413 logger.debug("reader[]='%s'", type(reader))
1415 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1420 logger.info("Checking %d rows ...", len(rows))
1422 logger.debug("row[]='%s'", type(row))
1423 if "hostname" not in row:
1424 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1427 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1428 domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None
1429 logger.debug("domain='%s' - AFTER!", domain)
1431 if domain is None or domain == "":
1432 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1435 logger.debug("domain='%s' - BEFORE!", domain)
1436 domain = domain.encode("idna").decode("utf-8")
1437 logger.debug("domain='%s' - AFTER!", domain)
1439 if not domain_helper.is_wanted(domain):
1440 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1442 elif (args.force is None or not args.force) and instances.is_registered(domain):
1443 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1445 elif instances.is_recent(domain):
1446 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1449 logger.info("Fetching instances from domain='%s' ...", domain)
1450 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1452 logger.debug("Success! - EXIT!")
1455 def update_nodeinfo(args: argparse.Namespace) -> int:
1456 logger.debug("args[]='%s' - CALLED!", type(args))
1458 logger.debug("Invoking locking.acquire() ...")
1461 if args.domain is not None and args.domain != "":
1462 logger.debug("Fetching args.domain='%s'", args.domain)
1463 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1464 elif args.software is not None and args.software != "":
1465 logger.info("Fetching domains for args.software='%s'", args.software)
1466 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC")
1467 elif args.mode is not None and args.mode != "":
1468 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1469 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC")
1470 elif args.no_software:
1471 logger.info("Fetching domains with no software type detected ...")
1472 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1474 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1475 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1477 logger.info("Fetching domains for recently updated ...")
1478 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1480 domains = database.cursor.fetchall()
1482 logger.info("Checking %d domain(s) ...", len(domains))
1485 logger.debug("row[]='%s'", type(row))
1486 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1487 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1491 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1492 software = federation.determine_software(row["domain"])
1494 logger.debug("Determined software='%s'", software)
1495 if (software != row["software"] and software is not None) or args.force is True:
1496 logger.debug("software='%s'", software)
1497 if software is None:
1498 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1499 instances.set_nodeinfo_url(row["domain"], None)
1501 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1502 instances.set_software(row["domain"], software)
1504 if software is not None:
1505 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1506 instances.set_success(row["domain"])
1507 except network.exceptions as exception:
1508 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1509 instances.set_last_error(row["domain"], exception)
1511 instances.set_last_nodeinfo(row["domain"])
1512 instances.update(row["domain"])
1515 logger.debug("Success! - EXIT!")
1518 def fetch_instances_social(args: argparse.Namespace) -> int:
1519 logger.debug("args[]='%s' - CALLED!", type(args))
1521 logger.debug("Invoking locking.acquire() ...")
1524 source_domain = "instances.social"
1526 if config.get("instances_social_api_key") == "":
1527 logger.error("API key not set. Please set in your config.json file.")
1529 elif sources.is_recent(source_domain):
1530 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1533 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1534 sources.update(source_domain)
1537 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1540 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1541 fetched = network.get_json_api(
1543 "/api/1.0/instances/list?count=0&sort_by=name",
1545 (config.get("connection_timeout"), config.get("read_timeout"))
1547 logger.debug("fetched[]='%s'", type(fetched))
1549 if "error_message" in fetched:
1550 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1552 elif "exception" in fetched:
1553 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1555 elif "json" not in fetched:
1556 logger.warning("fetched has no element 'json' - EXIT!")
1558 elif "instances" not in fetched["json"]:
1559 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1563 rows = fetched["json"]["instances"]
1565 logger.info("Checking %d row(s) ...", len(rows))
1567 logger.debug("row[]='%s'", type(row))
1568 domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None
1569 logger.debug("domain='%s' - AFTER!", domain)
1571 if domain is None and domain == "":
1572 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1575 logger.debug("domain='%s' - BEFORE!", domain)
1576 domain = domain.encode("idna").decode("utf-8")
1577 logger.debug("domain='%s' - AFTER!", domain)
1579 if not domain_helper.is_wanted(domain):
1580 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1582 elif domain in domains:
1583 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1585 elif instances.is_registered(domain):
1586 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1588 elif instances.is_recent(domain):
1589 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1592 logger.info("Fetching instances from domain='%s'", domain)
1593 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1595 logger.debug("Success! - EXIT!")
1598 def fetch_relays(args: argparse.Namespace) -> int:
1599 logger.debug("args[]='%s' - CALLED!", type(args))
1601 logger.debug("Invoking locking.acquire() ...")
1604 if args.domain is not None and args.domain != "":
1605 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1606 elif args.software is not None and args.software != "":
1607 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1609 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1612 rows = database.cursor.fetchall()
1614 logger.info("Checking %d relays ...", len(rows))
1616 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1618 if not args.force and instances.is_recent(row["domain"]):
1619 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1623 if row["software"] == "pub-relay":
1624 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1625 raw = network.fetch_api_url(
1626 row["nodeinfo_url"],
1627 (config.get("connection_timeout"), config.get("read_timeout"))
1630 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1631 if "exception" in raw:
1632 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1633 raise raw["exception"]
1634 elif "error_message" in raw:
1635 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1636 instances.set_last_error(row["domain"], raw)
1637 instances.set_last_instance_fetch(row["domain"])
1638 instances.update(row["domain"])
1640 elif not "json" in raw:
1641 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1643 elif not "metadata" in raw["json"]:
1644 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1646 elif not "peers" in raw["json"]["metadata"]:
1647 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1650 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1651 raw = utils.fetch_url(
1652 f"https://{row['domain']}",
1653 network.web_headers,
1654 (config.get("connection_timeout"), config.get("read_timeout"))
1656 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1658 doc = bs4.BeautifulSoup(raw, features="html.parser")
1659 logger.debug("doc[]='%s'", type(doc))
1661 except network.exceptions as exception:
1662 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1663 instances.set_last_error(row["domain"], exception)
1664 instances.set_last_instance_fetch(row["domain"])
1665 instances.update(row["domain"])
1668 logger.debug("row[software]='%s'", row["software"])
1669 if row["software"] == "activityrelay":
1670 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1671 tags = doc.findAll("p")
1673 logger.debug("Checking %d paragraphs ...", len(tags))
1675 logger.debug("tag[]='%s'", type(tag))
1676 if len(tag.contents) == 0:
1677 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1679 elif "registered instances" not in tag.contents[0]:
1680 logger.debug("Skipping paragraph, text not found.")
1683 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1684 for domain in tag.contents:
1685 logger.debug("domain[%s]='%s'", type(domain), domain)
1686 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1689 domain = str(domain)
1690 logger.debug("domain='%s'", domain)
1691 if not domain_helper.is_wanted(domain):
1692 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1695 logger.debug("domain='%s' - BEFORE!", domain)
1696 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1697 logger.debug("domain='%s' - AFTER!", domain)
1699 if domain is None or domain == "":
1700 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1702 elif domain not in peers:
1703 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1704 peers.append(domain)
1706 if dict_helper.has_key(domains, "domain", domain):
1707 logger.debug("domain='%s' already added", domain)
1710 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1713 "origin": row["domain"],
1715 elif row["software"] in ["aoderelay", "selective-relay"]:
1716 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1717 if row["software"] == "aoderelay":
1718 tags = doc.findAll("section", {"class": "instance"})
1720 tags = doc.find("div", {"id": "instances"}).findAll("li")
1722 logger.debug("Checking %d tags ...", len(tags))
1724 logger.debug("tag[]='%s'", type(tag))
1726 link = tag.find("a")
1727 logger.debug("link[%s]='%s'", type(link), link)
1728 if not isinstance(link, bs4.element.Tag):
1729 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1732 components = urlparse(link.get("href"))
1733 logger.debug("components(%d)='%s'", len(components), components)
1734 domain = components.netloc.lower().split(":")[0]
1736 logger.debug("domain='%s' - BEFORE!", domain)
1737 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1738 logger.debug("domain='%s' - AFTER!", domain)
1740 if domain is None or domain == "":
1741 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1743 elif domain not in peers:
1744 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1745 peers.append(domain)
1747 if dict_helper.has_key(domains, "domain", domain):
1748 logger.debug("domain='%s' already added", domain)
1751 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1754 "origin": row["domain"],
1756 elif row["software"] == "pub-relay":
1757 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1758 for domain in raw["json"]["metadata"]["peers"]:
1759 logger.debug("domain='%s' - BEFORE!", domain)
1760 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1761 logger.debug("domain='%s' - AFTER!", domain)
1763 if domain is None or domain == "":
1764 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1766 elif domain not in peers:
1767 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1768 peers.append(domain)
1770 if dict_helper.has_key(domains, "domain", domain):
1771 logger.debug("domain='%s' already added", domain)
1774 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1777 "origin": row["domain"],
1780 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1783 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1784 instances.set_last_instance_fetch(row["domain"])
1786 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1787 instances.set_total_peers(row["domain"], peers)
1789 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1790 instances.update(row["domain"])
1792 logger.info("Checking %d domains ...", len(domains))
1794 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1795 if not domain_helper.is_wanted(row["domain"]):
1796 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1798 elif instances.is_registered(row["domain"]):
1799 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1802 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1803 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1805 logger.debug("Success! - EXIT!")
1808 def convert_idna(args: argparse.Namespace) -> int:
1809 logger.debug("args[]='%s' - CALLED!", type(args))
1811 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1812 rows = database.cursor.fetchall()
1814 logger.debug("rows[]='%s'", type(rows))
1815 instances.translate_idnas(rows, "domain")
1817 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1818 rows = database.cursor.fetchall()
1820 logger.debug("rows[]='%s'", type(rows))
1821 instances.translate_idnas(rows, "origin")
1823 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1824 rows = database.cursor.fetchall()
1826 logger.debug("rows[]='%s'", type(rows))
1827 blocks.translate_idnas(rows, "blocker")
1829 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1830 rows = database.cursor.fetchall()
1832 logger.debug("rows[]='%s'", type(rows))
1833 blocks.translate_idnas(rows, "blocked")
1835 logger.debug("Success! - EXIT!")
1838 def remove_invalid(args: argparse.Namespace) -> int:
1839 logger.debug("args[]='%s' - CALLED!", type(args))
1841 logger.debug("Invoking locking.acquire() ...")
1844 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1845 rows = database.cursor.fetchall()
1847 logger.info("Checking %d domains ...", len(rows))
1849 logger.debug("row[domain]='%s'", row["domain"])
1850 if not validators.domain(row["domain"].split("/")[0]):
1851 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1852 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1853 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1855 logger.debug("Invoking commit() ...")
1856 database.connection.commit()
1858 logger.info("Vaccum cleaning database ...")
1859 database.cursor.execute("VACUUM")
1861 logger.debug("Success! - EXIT!")