1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] is None or row["domain"] == "":
152 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] is None or entry["domain"] == "":
228 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
353 deobfuscated = obfuscated = 0
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] is None or block["blocked"] == "":
369 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
375 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".arpa"):
378 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
380 elif block["blocked"].endswith(".tld"):
381 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
383 elif block["blocked"].find("*") >= 0:
384 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385 instances.set_has_obfuscation(blocker, True)
386 obfuscated = obfuscated + 1
388 # Some friendica servers also obscure domains without hash
389 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
391 logger.debug("row[]='%s'", type(row))
393 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
396 deobfuscated = deobfuscated + 1
397 block["blocked"] = row["domain"]
398 origin = row["origin"]
399 nodeinfo_url = row["nodeinfo_url"]
400 elif block["blocked"].find("?") >= 0:
401 logger.debug("blocker='%s' uses obfuscated domains", blocker)
402 instances.set_has_obfuscation(blocker, True)
403 obfuscated = obfuscated + 1
405 # Some obscure them with question marks, not sure if that's dependent on version or not
406 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
408 logger.debug("row[]='%s'", type(row))
410 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
413 deobfuscated = deobfuscated + 1
414 block["blocked"] = row["domain"]
415 origin = row["origin"]
416 nodeinfo_url = row["nodeinfo_url"]
418 logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
419 if block["blocked"] is None or block["blocked"] == "":
420 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
423 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
424 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
425 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
427 if not domain_helper.is_wanted(block["blocked"]):
428 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
430 elif block["block_level"] in ["accept", "accepted"]:
431 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
433 elif not instances.is_registered(block["blocked"]):
434 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
435 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
437 block["block_level"] = blocks.alias_block_level(block["block_level"])
439 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
440 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
442 "blocked": block["blocked"],
443 "reason" : block["reason"],
446 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
447 cookies.clear(block["blocked"])
449 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
450 instances.set_obfuscated_blocks(blocker, obfuscated)
452 logger.debug("Flushing updates for blocker='%s' ...", blocker)
453 instances.update(blocker)
455 logger.debug("Invoking commit() ...")
456 database.connection.commit()
458 logger.debug("Invoking cookies.clear(%s) ...", blocker)
459 cookies.clear(blocker)
461 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
462 if config.get("bot_enabled") and len(blockdict) > 0:
463 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
464 network.send_bot_post(blocker, blockdict)
466 logger.debug("Success! - EXIT!")
469 def fetch_observer(args: argparse.Namespace) -> int:
470 logger.debug("args[]='%s' - CALLED!", type(args))
472 logger.debug("Invoking locking.acquire() ...")
475 source_domain = "fediverse.observer"
476 if sources.is_recent(source_domain):
477 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
480 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
481 sources.update(source_domain)
484 if args.software is None:
485 logger.info("Fetching software list ...")
486 raw = utils.fetch_url(
487 f"https://{source_domain}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
496 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
497 logger.debug("navbar[]='%s'", type(navbar))
499 logger.warning("Cannot find navigation bar, cannot continue!")
502 items = navbar.findAll("a", {"class": "dropdown-item"})
503 logger.debug("items[]='%s'", type(items))
505 logger.info("Checking %d menu items ...", len(items))
507 logger.debug("item[%s]='%s'", type(item), item)
508 if item.text.lower() == "all":
509 logger.debug("Skipping 'All' menu entry ...")
512 logger.debug("Appending item.text='%s' ...", item.text)
513 types.append(tidyup.domain(item.text))
515 logger.info("Adding args.software='%s' as type ...", args.software)
516 types.append(args.software)
518 logger.info("Fetching %d different table data ...", len(types))
519 for software in types:
520 logger.debug("software='%s'", software)
522 if args.software is not None and args.software != software:
523 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
528 logger.debug("Fetching table data for software='%s' ...", software)
529 raw = utils.fetch_url(
530 f"https://{source_domain}/app/views/tabledata.php?software={software}",
532 (config.get("connection_timeout"), config.get("read_timeout"))
534 logger.debug("raw[%s]()=%d", type(raw), len(raw))
536 doc = bs4.BeautifulSoup(raw, features="html.parser")
537 logger.debug("doc[]='%s'", type(doc))
538 except network.exceptions as exception:
539 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
542 items = doc.findAll("a", {"class": "url"})
543 logger.info("Checking %d items,software='%s' ...", len(items), software)
545 logger.debug("item[]='%s'", type(item))
546 domain = item.decode_contents()
547 logger.debug("domain[%s]='%s'", type(domain), domain)
548 domain = tidyup.domain(domain) if domain not in [None, ""] else None
549 logger.debug("domain='%s' - AFTER!", domain)
551 if domain is None or domain == "":
552 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
555 logger.debug("domain='%s' - BEFORE!", domain)
556 domain = domain.encode("idna").decode("utf-8")
557 logger.debug("domain='%s' - AFTER!", domain)
559 if not domain_helper.is_wanted(domain):
560 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
562 elif instances.is_registered(domain):
563 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
566 logger.info("Fetching instances for domain='%s'", domain)
567 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
569 logger.debug("Success! - EXIT!")
572 def fetch_todon_wiki(args: argparse.Namespace) -> int:
573 logger.debug("args[]='%s' - CALLED!", type(args))
575 logger.debug("Invoking locking.acquire() ...")
578 source_domain = "wiki.todon.eu"
579 if sources.is_recent(source_domain):
580 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
583 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
584 sources.update(source_domain)
591 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
592 raw = utils.fetch_url(
593 f"https://{source_domain}/todon/domainblocks",
595 (config.get("connection_timeout"), config.get("read_timeout"))
597 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
599 doc = bs4.BeautifulSoup(raw, "html.parser")
600 logger.debug("doc[]='%s'", type(doc))
602 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
603 logger.info("Checking %d silenced/limited entries ...", len(silenced))
604 blocklist["silenced"] = utils.find_domains(silenced, "div")
606 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
607 logger.info("Checking %d suspended entries ...", len(suspended))
608 blocklist["reject"] = utils.find_domains(suspended, "div")
610 blocking = blocklist["silenced"] + blocklist["reject"]
613 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
614 instances.set_last_blocked(blocker)
615 instances.set_total_blocks(blocker, blocking)
618 for block_level in blocklist:
619 blockers = blocklist[block_level]
621 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
622 for blocked in blockers:
623 logger.debug("blocked='%s'", blocked)
625 if not instances.is_registered(blocked):
627 logger.info("Fetching instances from domain='%s' ...", blocked)
628 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
629 except network.exceptions as exception:
630 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
631 instances.set_last_error(blocked, exception)
633 if not domain_helper.is_wanted(blocked):
634 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
636 elif not domain_helper.is_wanted(blocker):
637 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
639 elif blocks.is_instance_blocked(blocker, blocked, block_level):
640 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
643 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
644 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
645 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
651 logger.debug("Invoking commit() ...")
652 database.connection.commit()
654 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
655 if config.get("bot_enabled") and len(blockdict) > 0:
656 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
657 network.send_bot_post(blocker, blockdict)
659 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
660 if instances.has_pending(blocker):
661 logger.debug("Flushing updates for blocker='%s' ...", blocker)
662 instances.update(blocker)
664 logger.debug("Success! - EXIT!")
667 def fetch_cs(args: argparse.Namespace):
668 logger.debug("args[]='%s' - CALLED!", type(args))
670 logger.debug("Invoking locking.acquire() ...")
698 source_domain = "raw.githubusercontent.com"
699 if sources.is_recent(source_domain):
700 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
703 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
704 sources.update(source_domain)
706 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
707 raw = utils.fetch_url(
708 f"https://{source_domain}/chaossocial/meta/master/federation.md",
710 (config.get("connection_timeout"), config.get("read_timeout"))
712 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
714 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
715 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
717 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
718 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
719 blocklist["silenced"] = federation.find_domains(silenced)
721 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
722 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
723 blocklist["reject"] = federation.find_domains(blocked)
725 blocking = blocklist["silenced"] + blocklist["reject"]
726 blocker = "chaos.social"
728 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
729 instances.set_last_blocked(blocker)
730 instances.set_total_blocks(blocker, blocking)
732 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
733 if len(blocking) > 0:
735 for block_level in blocklist:
736 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
738 for row in blocklist[block_level]:
739 logger.debug("row[%s]='%s'", type(row), row)
740 if not "domain" in row:
741 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
743 elif not instances.is_registered(row["domain"]):
745 logger.info("Fetching instances from domain='%s' ...", row["domain"])
746 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
747 except network.exceptions as exception:
748 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
749 instances.set_last_error(row["domain"], exception)
751 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
752 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
754 "blocked": row["domain"],
755 "reason" : row["reason"],
758 logger.debug("Invoking commit() ...")
759 database.connection.commit()
761 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
762 if config.get("bot_enabled") and len(blockdict) > 0:
763 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
764 network.send_bot_post(blocker, blockdict)
766 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
767 if instances.has_pending(blocker):
768 logger.debug("Flushing updates for blocker='%s' ...", blocker)
769 instances.update(blocker)
771 logger.debug("Success! - EXIT!")
774 def fetch_fba_rss(args: argparse.Namespace) -> int:
775 logger.debug("args[]='%s' - CALLED!", type(args))
779 logger.debug("Invoking locking.acquire() ...")
782 components = urlparse(args.feed)
783 domain = components.netloc.lower().split(":")[0]
785 logger.debug("domain='%s'", domain)
786 if sources.is_recent(domain):
787 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
790 logger.debug("domain='%s' has not been recently used, marking ...", domain)
791 sources.update(domain)
793 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
794 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
796 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
797 if response.ok and response.status_code == 200 and len(response.text) > 0:
798 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
799 rss = atoma.parse_rss_bytes(response.content)
801 logger.debug("rss[]='%s'", type(rss))
802 for item in rss.items:
803 logger.debug("item[%s]='%s'", type(item), item)
804 domain = item.link.split("=")[1]
805 domain = tidyup.domain(domain) if domain not in[None, ""] else None
807 logger.debug("domain='%s' - AFTER!", domain)
808 if domain is None or domain == "":
809 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
812 logger.debug("domain='%s' - BEFORE!", domain)
813 domain = domain.encode("idna").decode("utf-8")
814 logger.debug("domain='%s' - AFTER!", domain)
816 if not domain_helper.is_wanted(domain):
817 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
819 elif domain in domains:
820 logger.debug("domain='%s' is already added - SKIPPED!", domain)
822 elif instances.is_registered(domain):
823 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
825 elif instances.is_recent(domain):
826 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
829 logger.debug("Adding domain='%s'", domain)
830 domains.append(domain)
832 logger.debug("domains()=%d", len(domains))
834 logger.info("Adding %d new instances ...", len(domains))
835 for domain in domains:
836 logger.debug("domain='%s'", domain)
838 logger.info("Fetching instances from domain='%s' ...", domain)
839 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
840 except network.exceptions as exception:
841 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
842 instances.set_last_error(domain, exception)
845 logger.debug("Success! - EXIT!")
848 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
849 logger.debug("args[]='%s' - CALLED!", type(args))
851 logger.debug("Invoking locking.acquire() ...")
854 source_domain = "ryona.agency"
855 feed = f"https://{source_domain}/users/fba/feed.atom"
857 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
858 if args.feed is not None and validators.url(args.feed):
859 logger.debug("Setting feed='%s' ...", args.feed)
860 feed = str(args.feed)
861 source_domain = urlparse(args.feed).netloc
863 if sources.is_recent(source_domain):
864 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
867 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
868 sources.update(source_domain)
872 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
873 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
875 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
876 if response.ok and response.status_code == 200 and len(response.text) > 0:
877 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
878 atom = atoma.parse_atom_bytes(response.content)
880 logger.debug("atom[]='%s'", type(atom))
881 for entry in atom.entries:
882 logger.debug("entry[]='%s'", type(entry))
883 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
884 logger.debug("doc[]='%s'", type(doc))
886 for element in doc.findAll("a"):
887 logger.debug("element[]='%s'", type(element))
888 for href in element["href"].split(","):
889 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
890 domain = tidyup.domain(href) if href not in [None, ""] else None
892 logger.debug("domain='%s' - AFTER!", domain)
893 if domain is None or domain == "":
894 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
897 logger.debug("domain='%s' - BEFORE!", domain)
898 domain = domain.encode("idna").decode("utf-8")
899 logger.debug("domain='%s' - AFTER!", domain)
901 if not domain_helper.is_wanted(domain):
902 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
904 elif domain in domains:
905 logger.debug("domain='%s' is already added - SKIPPED!", domain)
907 elif instances.is_registered(domain):
908 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
910 elif instances.is_recent(domain):
911 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
914 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
915 domains.append(domain)
917 logger.debug("domains()=%d", len(domains))
919 logger.info("Adding %d new instances ...", len(domains))
920 for domain in domains:
921 logger.debug("domain='%s'", domain)
923 logger.info("Fetching instances from domain='%s' ...", domain)
924 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
925 except network.exceptions as exception:
926 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
927 instances.set_last_error(domain, exception)
930 logger.debug("Success! - EXIT!")
933 def fetch_instances(args: argparse.Namespace) -> int:
934 logger.debug("args[]='%s' - CALLED!", type(args))
936 logger.debug("args.domain='%s' - checking ...", args.domain)
937 if not validators.domain(args.domain):
938 logger.warning("args.domain='%s' is not valid.", args.domain)
940 elif blacklist.is_blacklisted(args.domain):
941 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
944 logger.debug("Invoking locking.acquire() ...")
948 domain = tidyup.domain(args.domain)
949 origin = software = None
952 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
953 row = database.cursor.fetchone()
955 origin = row["origin"]
956 software = row["software"]
958 if software_helper.is_relay(software):
959 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
964 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
965 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
966 except network.exceptions as exception:
967 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
968 instances.set_last_error(args.domain, exception)
969 instances.update(args.domain)
973 logger.debug("Not fetching more instances - EXIT!")
976 # Loop through some instances
977 database.cursor.execute(
978 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
981 rows = database.cursor.fetchall()
982 logger.info("Checking %d entries ...", len(rows))
984 logger.debug("row[domain]='%s'", row["domain"])
985 if row["domain"] == "":
986 logger.debug("row[domain] is empty - SKIPPED!")
989 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
990 domain = row["domain"].encode("idna").decode("utf-8")
991 logger.debug("domain='%s' - AFTER!", domain)
993 if not domain_helper.is_wanted(domain):
994 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
998 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
999 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
1000 except network.exceptions as exception:
1001 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1002 instances.set_last_error(domain, exception)
1004 logger.debug("Success - EXIT!")
1007 def fetch_csv(args: argparse.Namespace) -> int:
1008 logger.debug("args[]='%s' - CALLED!", type(args))
1010 logger.debug("Invoking locking.acquire() ...")
1013 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1014 for block in blocklists.csv_files:
1015 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1017 # Is domain given and not equal blocker?
1018 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1019 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1022 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1023 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1025 logger.debug("Success - EXIT!")
1028 def fetch_oliphant(args: argparse.Namespace) -> int:
1029 logger.debug("args[]='%s' - CALLED!", type(args))
1031 logger.debug("Invoking locking.acquire() ...")
1034 source_domain = "codeberg.org"
1035 if sources.is_recent(source_domain):
1036 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1039 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1040 sources.update(source_domain)
1043 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1045 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1046 for block in blocklists.oliphant_blocklists:
1047 # Is domain given and not equal blocker?
1048 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1049 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1050 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1053 url = f"{base_url}/{block['csv_url']}"
1055 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1056 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1058 logger.debug("Success! - EXIT!")
1061 def fetch_txt(args: argparse.Namespace) -> int:
1062 logger.debug("args[]='%s' - CALLED!", type(args))
1064 logger.debug("Invoking locking.acquire() ...")
1067 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1068 for row in blocklists.txt_files:
1069 logger.debug("Fetching row[url]='%s' ...", row["url"])
1070 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1072 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1073 if response.ok and response.status_code == 200 and response.text != "":
1074 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1075 domains = response.text.strip().split("\n")
1077 logger.info("Processing %d domains ...", len(domains))
1078 for domain in domains:
1079 logger.debug("domain='%s' - BEFORE!", domain)
1080 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1082 logger.debug("domain='%s' - AFTER!", domain)
1083 if domain is None or domain == "":
1084 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1086 elif not domain_helper.is_wanted(domain):
1087 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1089 elif instances.is_recent(domain):
1090 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1093 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1094 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1096 logger.debug("processed='%s'", processed)
1098 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1101 logger.debug("Success! - EXIT!")
1104 def fetch_fedipact(args: argparse.Namespace) -> int:
1105 logger.debug("args[]='%s' - CALLED!", type(args))
1107 logger.debug("Invoking locking.acquire() ...")
1110 source_domain = "fedipact.online"
1111 if sources.is_recent(source_domain):
1112 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1115 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1116 sources.update(source_domain)
1118 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1119 response = utils.fetch_url(
1120 f"https://{source_domain}",
1121 network.web_headers,
1122 (config.get("connection_timeout"), config.get("read_timeout"))
1125 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1126 if response.ok and response.status_code == 200 and response.text != "":
1127 logger.debug("Parsing %d Bytes ...", len(response.text))
1129 doc = bs4.BeautifulSoup(response.text, "html.parser")
1130 logger.debug("doc[]='%s'", type(doc))
1132 rows = doc.findAll("li")
1133 logger.info("Checking %d row(s) ...", len(rows))
1135 logger.debug("row[]='%s'", type(row))
1136 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1138 logger.debug("domain='%s' - AFTER!", domain)
1139 if domain is None or domain == "":
1140 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1143 logger.debug("domain='%s' - BEFORE!", domain)
1144 domain = domain.encode("idna").decode("utf-8")
1145 logger.debug("domain='%s' - AFTER!", domain)
1147 if not domain_helper.is_wanted(domain):
1148 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1150 elif instances.is_registered(domain):
1151 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1153 elif instances.is_recent(domain):
1154 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1157 logger.info("Fetching domain='%s' ...", domain)
1158 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1160 logger.debug("Success! - EXIT!")
1163 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1164 logger.debug("args[]='%s' - CALLED!", type(args))
1166 logger.debug("Invoking locking.acquire() ...")
1169 source_domain = "instances.joinmobilizon.org"
1170 if sources.is_recent(source_domain):
1171 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1174 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1175 sources.update(source_domain)
1177 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1178 raw = utils.fetch_url(
1179 f"https://{source_domain}/api/v1/instances",
1180 network.web_headers,
1181 (config.get("connection_timeout"), config.get("read_timeout"))
1183 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1185 parsed = json.loads(raw)
1186 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1188 if "data" not in parsed:
1189 logger.warning("parsed()=%d does not contain key 'data'")
1192 logger.info("Checking %d instances ...", len(parsed["data"]))
1193 for row in parsed["data"]:
1194 logger.debug("row[]='%s'", type(row))
1195 if "host" not in row:
1196 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1198 elif not domain_helper.is_wanted(row["host"]):
1199 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1201 elif instances.is_registered(row["host"]):
1202 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1205 logger.info("Fetching row[host]='%s' ...", row["host"])
1206 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1208 logger.debug("Success! - EXIT!")
1211 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1212 logger.debug("args[]='%s' - CALLED!", type(args))
1214 logger.debug("Invoking locking.acquire() ...")
1217 source_domain = "instanceapp.misskey.page"
1218 if sources.is_recent(source_domain):
1219 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1222 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1223 sources.update(source_domain)
1225 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1226 raw = utils.fetch_url(
1227 f"https://{source_domain}/instances.json",
1228 network.web_headers,
1229 (config.get("connection_timeout"), config.get("read_timeout"))
1231 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1233 parsed = json.loads(raw)
1234 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1236 if "instancesInfos" not in parsed:
1237 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1240 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1241 for row in parsed["instancesInfos"]:
1242 logger.debug("row[%s]='%s'", type(row), row)
1243 if "url" not in row:
1244 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1246 elif not domain_helper.is_wanted(row["url"]):
1247 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1249 elif instances.is_registered(row["url"]):
1250 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1253 logger.info("Fetching row[url]='%s' ...", row["url"])
1254 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1256 logger.debug("Success! - EXIT!")
1259 def recheck_obfuscation(args: argparse.Namespace) -> int:
1260 logger.debug("args[]='%s' - CALLED!", type(args))
1262 logger.debug("Invoking locking.acquire() ...")
1265 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1266 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1267 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1268 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1270 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1272 rows = database.cursor.fetchall()
1273 logger.info("Checking %d domains ...", len(rows))
1275 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1276 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1277 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1280 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1281 blocking = federation.fetch_blocks(row["domain"])
1283 logger.debug("blocking()=%d", len(blocking))
1284 if len(blocking) == 0:
1285 if row["software"] == "pleroma":
1286 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1287 blocking = pleroma.fetch_blocks(row["domain"])
1288 elif row["software"] == "mastodon":
1289 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290 blocking = mastodon.fetch_blocks(row["domain"])
1291 elif row["software"] == "lemmy":
1292 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293 blocking = lemmy.fetch_blocks(row["domain"])
1294 elif row["software"] == "friendica":
1295 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1296 blocking = friendica.fetch_blocks(row["domain"])
1297 elif row["software"] == "misskey":
1298 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1299 blocking = misskey.fetch_blocks(row["domain"])
1301 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1303 # c.s isn't part of oliphant's "hidden" blocklists
1304 logger.debug("row[domain]='%s'", row["domain"])
1305 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1306 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1307 instances.set_last_blocked(row["domain"])
1308 instances.set_total_blocks(row["domain"], blocking)
1313 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1314 for block in blocking:
1315 logger.debug("block[blocked]='%s'", block["blocked"])
1318 if block["blocked"] == "":
1319 logger.debug("block[blocked] is empty - SKIPPED!")
1321 elif block["blocked"].endswith(".onion"):
1322 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1324 elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
1325 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
1327 elif block["blocked"].endswith(".arpa"):
1328 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1330 elif block["blocked"].endswith(".tld"):
1331 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1333 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1334 logger.debug("block='%s' is obfuscated.", block["blocked"])
1335 obfuscated = obfuscated + 1
1336 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1337 elif not domain_helper.is_wanted(block["blocked"]):
1338 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1340 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1341 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1344 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1345 if blocked is not None and blocked != block["blocked"]:
1346 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1347 obfuscated = obfuscated - 1
1349 if blacklist.is_blacklisted(blocked):
1350 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1352 elif blacklist.is_blacklisted(row["domain"]):
1353 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1355 elif blocks.is_instance_blocked(row["domain"], blocked):
1356 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1359 block["block_level"] = blocks.alias_block_level(block["block_level"])
1361 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1362 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1363 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1366 "reason" : block["reason"],
1369 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1370 instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1371 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1373 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1374 if instances.has_pending(row["domain"]):
1375 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1376 instances.update(row["domain"])
1378 logger.debug("Invoking commit() ...")
1379 database.connection.commit()
1381 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1382 if config.get("bot_enabled") and len(blockdict) > 0:
1383 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1384 network.send_bot_post(row["domain"], blockdict)
1386 logger.debug("Success! - EXIT!")
1389 def fetch_fedilist(args: argparse.Namespace) -> int:
1390 logger.debug("args[]='%s' - CALLED!", type(args))
1392 logger.debug("Invoking locking.acquire() ...")
1395 source_domain = "demo.fedilist.com"
1396 if sources.is_recent(source_domain):
1397 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1400 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1401 sources.update(source_domain)
1403 url = f"http://{source_domain}/instance/csv?onion=not"
1404 if args.software is not None and args.software != "":
1405 logger.debug("args.software='%s'", args.software)
1406 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1408 logger.info("Fetching url='%s' ...", url)
1409 response = reqto.get(
1411 headers=network.web_headers,
1412 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1413 allow_redirects=False
1416 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1417 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1418 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1421 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1423 logger.debug("reader[]='%s'", type(reader))
1425 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1430 logger.info("Checking %d rows ...", len(rows))
1432 logger.debug("row[]='%s'", type(row))
1433 if "hostname" not in row:
1434 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1437 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1438 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1439 logger.debug("domain='%s' - AFTER!", domain)
1441 if domain is None or domain == "":
1442 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1445 logger.debug("domain='%s' - BEFORE!", domain)
1446 domain = domain.encode("idna").decode("utf-8")
1447 logger.debug("domain='%s' - AFTER!", domain)
1449 if not domain_helper.is_wanted(domain):
1450 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1452 elif (args.force is None or not args.force) and instances.is_registered(domain):
1453 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1455 elif instances.is_recent(domain):
1456 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1459 logger.info("Fetching instances from domain='%s' ...", domain)
1460 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1462 logger.debug("Success! - EXIT!")
1465 def update_nodeinfo(args: argparse.Namespace) -> int:
1466 logger.debug("args[]='%s' - CALLED!", type(args))
1468 logger.debug("Invoking locking.acquire() ...")
1471 if args.domain is not None and args.domain != "":
1472 logger.debug("Fetching args.domain='%s'", args.domain)
1473 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1474 elif args.software is not None and args.software != "":
1475 logger.info("Fetching domains for args.software='%s'", args.software)
1476 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1477 elif args.mode is not None and args.mode != "":
1478 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1479 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1480 elif args.no_software:
1481 logger.info("Fetching domains with no software type detected ...")
1482 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1484 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1485 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1486 elif args.no_detection:
1487 logger.info("Fetching domains with no detection mode being set ...")
1488 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1490 logger.info("Fetching domains for recently updated ...")
1491 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1493 domains = database.cursor.fetchall()
1495 logger.info("Checking %d domain(s) ...", len(domains))
1498 logger.debug("row[]='%s'", type(row))
1499 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1500 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1504 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1505 software = federation.determine_software(row["domain"])
1507 logger.debug("Determined software='%s'", software)
1508 if (software != row["software"] and software is not None) or args.force is True:
1509 logger.debug("software='%s'", software)
1510 if software is None:
1511 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1512 instances.set_nodeinfo_url(row["domain"], None)
1514 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1515 instances.set_software(row["domain"], software)
1517 if software is not None:
1518 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1519 instances.set_success(row["domain"])
1520 except network.exceptions as exception:
1521 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1522 instances.set_last_error(row["domain"], exception)
1524 instances.set_last_nodeinfo(row["domain"])
1525 instances.update(row["domain"])
1528 logger.debug("Success! - EXIT!")
1531 def fetch_instances_social(args: argparse.Namespace) -> int:
1532 logger.debug("args[]='%s' - CALLED!", type(args))
1534 logger.debug("Invoking locking.acquire() ...")
1537 source_domain = "instances.social"
1539 if config.get("instances_social_api_key") == "":
1540 logger.error("API key not set. Please set in your config.json file.")
1542 elif sources.is_recent(source_domain):
1543 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1546 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1547 sources.update(source_domain)
1550 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1553 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1554 fetched = network.get_json_api(
1556 "/api/1.0/instances/list?count=0&sort_by=name",
1558 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1560 logger.debug("fetched[]='%s'", type(fetched))
1562 if "error_message" in fetched:
1563 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1565 elif "exception" in fetched:
1566 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1568 elif "json" not in fetched:
1569 logger.warning("fetched has no element 'json' - EXIT!")
1571 elif "instances" not in fetched["json"]:
1572 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1576 rows = fetched["json"]["instances"]
1578 logger.info("Checking %d row(s) ...", len(rows))
1580 logger.debug("row[]='%s'", type(row))
1581 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1582 logger.debug("domain='%s' - AFTER!", domain)
1584 if domain is None and domain == "":
1585 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1588 logger.debug("domain='%s' - BEFORE!", domain)
1589 domain = domain.encode("idna").decode("utf-8")
1590 logger.debug("domain='%s' - AFTER!", domain)
1592 if not domain_helper.is_wanted(domain):
1593 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1595 elif domain in domains:
1596 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1598 elif instances.is_registered(domain):
1599 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1601 elif instances.is_recent(domain):
1602 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1605 logger.info("Fetching instances from domain='%s'", domain)
1606 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1608 logger.debug("Success! - EXIT!")
1611 def fetch_relaylist(args: argparse.Namespace) -> int:
1612 logger.debug("args[]='%s' - CALLED!", type(args))
1614 logger.debug("Invoking locking.acquire() ...")
1617 source_domain = "api.relaylist.com"
1619 if sources.is_recent(source_domain):
1620 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1623 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1624 sources.update(source_domain)
1626 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1627 fetched = network.get_json_api(
1631 (config.get("connection_timeout"), config.get("read_timeout"))
1633 logger.debug("fetched[]='%s'", type(fetched))
1635 if "error_message" in fetched:
1636 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1638 elif "exception" in fetched:
1639 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1641 elif "json" not in fetched:
1642 logger.warning("fetched has no element 'json' - EXIT!")
1647 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1648 for row in fetched["json"]:
1649 logger.debug("row[]='%s'", type(row))
1650 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1651 logger.debug("domain='%s' - AFTER!", domain)
1653 if domain is None and domain == "":
1654 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1657 logger.debug("domain='%s' - BEFORE!", domain)
1658 domain = domain.encode("idna").decode("utf-8")
1659 logger.debug("domain='%s' - AFTER!", domain)
1661 if not domain_helper.is_wanted(domain):
1662 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1664 elif domain in domains:
1665 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1667 elif instances.is_registered(domain):
1668 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1670 elif instances.is_recent(domain):
1671 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1674 logger.info("Fetching instances from domain='%s'", domain)
1675 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1677 logger.debug("Success! - EXIT!")
1680 def fetch_relays(args: argparse.Namespace) -> int:
1681 logger.debug("args[]='%s' - CALLED!", type(args))
1683 logger.debug("Invoking locking.acquire() ...")
1686 if args.domain is not None and args.domain != "":
1687 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1688 elif args.software is not None and args.software != "":
1689 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1691 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1694 rows = database.cursor.fetchall()
1696 logger.info("Checking %d relays ...", len(rows))
1698 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1700 if not args.force and instances.is_recent(row["domain"]):
1701 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1705 if row["software"] == "pub-relay":
1706 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1707 raw = network.fetch_api_url(
1708 row["nodeinfo_url"],
1709 (config.get("connection_timeout"), config.get("read_timeout"))
1712 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1713 if "exception" in raw:
1714 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1715 raise raw["exception"]
1716 elif "error_message" in raw:
1717 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1718 instances.set_last_error(row["domain"], raw)
1719 instances.set_last_instance_fetch(row["domain"])
1720 instances.update(row["domain"])
1722 elif "json" not in raw:
1723 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1725 elif not "metadata" in raw["json"]:
1726 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1728 elif not "peers" in raw["json"]["metadata"]:
1729 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1732 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1733 raw = utils.fetch_url(
1734 f"https://{row['domain']}",
1735 network.web_headers,
1736 (config.get("connection_timeout"), config.get("read_timeout"))
1738 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1740 doc = bs4.BeautifulSoup(raw, features="html.parser")
1741 logger.debug("doc[]='%s'", type(doc))
1743 except network.exceptions as exception:
1744 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1745 instances.set_last_error(row["domain"], exception)
1746 instances.set_last_instance_fetch(row["domain"])
1747 instances.update(row["domain"])
1750 logger.debug("row[software]='%s'", row["software"])
1751 if row["software"] == "activityrelay":
1752 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1753 tags = doc.findAll("p")
1755 logger.debug("Checking %d paragraphs ...", len(tags))
1757 logger.debug("tag[]='%s'", type(tag))
1758 if len(tag.contents) == 0:
1759 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1761 elif "registered instances" not in tag.contents[0]:
1762 logger.debug("Skipping paragraph, text not found.")
1765 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1766 for domain in tag.contents:
1767 logger.debug("domain[%s]='%s'", type(domain), domain)
1768 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1771 domain = str(domain)
1772 logger.debug("domain='%s'", domain)
1773 if not domain_helper.is_wanted(domain):
1774 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1777 logger.debug("domain='%s' - BEFORE!", domain)
1778 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1779 logger.debug("domain='%s' - AFTER!", domain)
1781 if domain is None or domain == "":
1782 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1784 elif domain not in peers:
1785 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1786 peers.append(domain)
1788 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1789 if dict_helper.has_key(domains, "domain", domain):
1790 logger.debug("domain='%s' already added", domain)
1793 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1796 "origin": row["domain"],
1798 elif row["software"] in ["aoderelay", "selective-relay"]:
1799 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1800 if row["software"] == "aoderelay":
1801 tags = doc.findAll("section", {"class": "instance"})
1803 tags = doc.find("div", {"id": "instances"}).findAll("li")
1805 logger.debug("Checking %d tags ...", len(tags))
1807 logger.debug("tag[]='%s'", type(tag))
1809 link = tag.find("a")
1810 logger.debug("link[%s]='%s'", type(link), link)
1811 if not isinstance(link, bs4.element.Tag):
1812 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1815 components = urlparse(link.get("href"))
1816 logger.debug("components(%d)='%s'", len(components), components)
1817 domain = components.netloc.lower().split(":")[0]
1819 logger.debug("domain='%s' - BEFORE!", domain)
1820 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1821 logger.debug("domain='%s' - AFTER!", domain)
1823 if domain is None or domain == "":
1824 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1826 elif domain not in peers:
1827 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1828 peers.append(domain)
1830 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1831 if dict_helper.has_key(domains, "domain", domain):
1832 logger.debug("domain='%s' already added", domain)
1835 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1838 "origin": row["domain"],
1840 elif row["software"] == "pub-relay":
1841 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1842 for domain in raw["json"]["metadata"]["peers"]:
1843 logger.debug("domain='%s' - BEFORE!", domain)
1844 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1845 logger.debug("domain='%s' - AFTER!", domain)
1847 if domain is None or domain == "":
1848 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1850 elif domain not in peers:
1851 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1852 peers.append(domain)
1854 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1855 if dict_helper.has_key(domains, "domain", domain):
1856 logger.debug("domain='%s' already added", domain)
1859 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1862 "origin": row["domain"],
1865 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1868 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1869 instances.set_last_instance_fetch(row["domain"])
1871 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1872 instances.set_total_peers(row["domain"], peers)
1874 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1875 instances.update(row["domain"])
1877 logger.info("Checking %d domains ...", len(domains))
1879 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1880 if not domain_helper.is_wanted(row["domain"]):
1881 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1883 elif instances.is_registered(row["domain"]):
1884 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1887 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1888 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1890 logger.debug("Success! - EXIT!")
1893 def convert_idna(args: argparse.Namespace) -> int:
1894 logger.debug("args[]='%s' - CALLED!", type(args))
1896 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1897 rows = database.cursor.fetchall()
1899 logger.debug("rows[]='%s'", type(rows))
1900 instances.translate_idnas(rows, "domain")
1902 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1903 rows = database.cursor.fetchall()
1905 logger.debug("rows[]='%s'", type(rows))
1906 instances.translate_idnas(rows, "origin")
1908 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1909 rows = database.cursor.fetchall()
1911 logger.debug("rows[]='%s'", type(rows))
1912 blocks.translate_idnas(rows, "blocker")
1914 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1915 rows = database.cursor.fetchall()
1917 logger.debug("rows[]='%s'", type(rows))
1918 blocks.translate_idnas(rows, "blocked")
1920 logger.debug("Success! - EXIT!")
1923 def remove_invalid(args: argparse.Namespace) -> int:
1924 logger.debug("args[]='%s' - CALLED!", type(args))
1926 logger.debug("Invoking locking.acquire() ...")
1929 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1930 rows = database.cursor.fetchall()
1932 logger.info("Checking %d domains ...", len(rows))
1934 logger.debug("row[domain]='%s'", row["domain"])
1935 if not validators.domain(row["domain"].split("/")[0]):
1936 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1937 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1938 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1940 logger.debug("Invoking commit() ...")
1941 database.connection.commit()
1943 logger.info("Vaccum cleaning database ...")
1944 database.cursor.execute("VACUUM")
1946 logger.debug("Success! - EXIT!")