1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
353 deobfuscated = obfuscated = 0
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] == "":
369 logger.warning("blocked is empty, blocker='%s'", blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".arpa"):
375 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".tld"):
378 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
380 elif block["blocked"].find("*") >= 0:
381 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 instances.set_has_obfuscation(blocker, True)
383 obfuscated = obfuscated + 1
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 deobfuscated = deobfuscated + 1
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 instances.set_has_obfuscation(blocker, True)
400 obfuscated = obfuscated + 1
402 # Some obscure them with question marks, not sure if that's dependent on version or not
403 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
405 logger.debug("row[]='%s'", type(row))
407 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
410 deobfuscated = deobfuscated + 1
411 block["blocked"] = row["domain"]
412 origin = row["origin"]
413 nodeinfo_url = row["nodeinfo_url"]
415 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416 if block["blocked"] == "":
417 logger.debug("block[blocked] is empty - SKIPPED!")
420 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
424 if not domain_helper.is_wanted(block["blocked"]):
425 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
427 elif block["block_level"] in ["accept", "accepted"]:
428 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
430 elif not instances.is_registered(block["blocked"]):
431 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
434 block["block_level"] = blocks.alias_block_level(block["block_level"])
436 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
439 "blocked": block["blocked"],
440 "reason" : block["reason"],
443 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444 cookies.clear(block["blocked"])
446 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
447 instances.set_obfuscated_blocks(blocker, obfuscated)
449 logger.debug("Flushing updates for blocker='%s' ...", blocker)
450 instances.update(blocker)
452 logger.debug("Invoking commit() ...")
453 database.connection.commit()
455 logger.debug("Invoking cookies.clear(%s) ...", blocker)
456 cookies.clear(blocker)
458 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
459 if config.get("bot_enabled") and len(blockdict) > 0:
460 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
461 network.send_bot_post(blocker, blockdict)
463 logger.debug("Success! - EXIT!")
466 def fetch_observer(args: argparse.Namespace) -> int:
467 logger.debug("args[]='%s' - CALLED!", type(args))
469 logger.debug("Invoking locking.acquire() ...")
472 source_domain = "fediverse.observer"
473 if sources.is_recent(source_domain):
474 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
477 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
478 sources.update(source_domain)
481 if args.software is None:
482 logger.info("Fetching software list ...")
483 raw = utils.fetch_url(
484 f"https://{source_domain}",
486 (config.get("connection_timeout"), config.get("read_timeout"))
488 logger.debug("raw[%s]()=%d", type(raw), len(raw))
490 doc = bs4.BeautifulSoup(raw, features="html.parser")
491 logger.debug("doc[]='%s'", type(doc))
493 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
494 logger.debug("navbar[]='%s'", type(navbar))
496 logger.warning("Cannot find navigation bar, cannot continue!")
499 items = navbar.findAll("a", {"class": "dropdown-item"})
500 logger.debug("items[]='%s'", type(items))
502 logger.info("Checking %d menu items ...", len(items))
504 logger.debug("item[%s]='%s'", type(item), item)
505 if item.text.lower() == "all":
506 logger.debug("Skipping 'All' menu entry ...")
509 logger.debug("Appending item.text='%s' ...", item.text)
510 types.append(tidyup.domain(item.text))
512 logger.info("Adding args.software='%s' as type ...", args.software)
513 types.append(args.software)
515 logger.info("Fetching %d different table data ...", len(types))
516 for software in types:
517 logger.debug("software='%s'", software)
519 if args.software is not None and args.software != software:
520 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
525 logger.debug("Fetching table data for software='%s' ...", software)
526 raw = utils.fetch_url(
527 f"https://{source_domain}/app/views/tabledata.php?software={software}",
529 (config.get("connection_timeout"), config.get("read_timeout"))
531 logger.debug("raw[%s]()=%d", type(raw), len(raw))
533 doc = bs4.BeautifulSoup(raw, features="html.parser")
534 logger.debug("doc[]='%s'", type(doc))
535 except network.exceptions as exception:
536 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
539 items = doc.findAll("a", {"class": "url"})
540 logger.info("Checking %d items,software='%s' ...", len(items), software)
542 logger.debug("item[]='%s'", type(item))
543 domain = item.decode_contents()
544 domain = tidyup.domain(domain) if domain not in [None, ""] else None
545 logger.debug("domain='%s' - AFTER!", domain)
547 if domain is None or domain == "":
548 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
551 logger.debug("domain='%s' - BEFORE!", domain)
552 domain = domain.encode("idna").decode("utf-8")
553 logger.debug("domain='%s' - AFTER!", domain)
555 if not domain_helper.is_wanted(domain):
556 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
558 elif instances.is_registered(domain):
559 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
562 logger.info("Fetching instances for domain='%s'", domain)
563 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
565 logger.debug("Success! - EXIT!")
568 def fetch_todon_wiki(args: argparse.Namespace) -> int:
569 logger.debug("args[]='%s' - CALLED!", type(args))
571 logger.debug("Invoking locking.acquire() ...")
574 source_domain = "wiki.todon.eu"
575 if sources.is_recent(source_domain):
576 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
579 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
580 sources.update(source_domain)
587 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
588 raw = utils.fetch_url(
589 f"https://{source_domain}/todon/domainblocks",
591 (config.get("connection_timeout"), config.get("read_timeout"))
593 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
595 doc = bs4.BeautifulSoup(raw, "html.parser")
596 logger.debug("doc[]='%s'", type(doc))
598 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
599 logger.info("Checking %d silenced/limited entries ...", len(silenced))
600 blocklist["silenced"] = utils.find_domains(silenced, "div")
602 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
603 logger.info("Checking %d suspended entries ...", len(suspended))
604 blocklist["reject"] = utils.find_domains(suspended, "div")
606 blocking = blocklist["silenced"] + blocklist["reject"]
609 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
610 instances.set_last_blocked(blocker)
611 instances.set_total_blocks(blocker, blocking)
614 for block_level in blocklist:
615 blockers = blocklist[block_level]
617 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
618 for blocked in blockers:
619 logger.debug("blocked='%s'", blocked)
621 if not instances.is_registered(blocked):
623 logger.info("Fetching instances from domain='%s' ...", blocked)
624 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
625 except network.exceptions as exception:
626 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
627 instances.set_last_error(blocked, exception)
629 if not domain_helper.is_wanted(blocked):
630 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
632 elif not domain_helper.is_wanted(blocker):
633 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
635 elif blocks.is_instance_blocked(blocker, blocked, block_level):
636 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
639 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
640 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
641 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
647 logger.debug("Invoking commit() ...")
648 database.connection.commit()
650 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
651 if config.get("bot_enabled") and len(blockdict) > 0:
652 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
653 network.send_bot_post(blocker, blockdict)
655 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
656 if instances.has_pending(blocker):
657 logger.debug("Flushing updates for blocker='%s' ...", blocker)
658 instances.update(blocker)
660 logger.debug("Success! - EXIT!")
663 def fetch_cs(args: argparse.Namespace):
664 logger.debug("args[]='%s' - CALLED!", type(args))
666 logger.debug("Invoking locking.acquire() ...")
694 source_domain = "raw.githubusercontent.com"
695 if sources.is_recent(source_domain):
696 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
699 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
700 sources.update(source_domain)
702 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
703 raw = utils.fetch_url(
704 f"https://{source_domain}/chaossocial/meta/master/federation.md",
706 (config.get("connection_timeout"), config.get("read_timeout"))
708 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
710 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
711 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
713 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
714 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
715 blocklist["silenced"] = federation.find_domains(silenced)
717 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
718 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
719 blocklist["reject"] = federation.find_domains(blocked)
721 blocking = blocklist["silenced"] + blocklist["reject"]
722 blocker = "chaos.social"
724 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
725 instances.set_last_blocked(blocker)
726 instances.set_total_blocks(blocker, blocking)
728 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
729 if len(blocking) > 0:
731 for block_level in blocklist:
732 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
734 for row in blocklist[block_level]:
735 logger.debug("row[%s]='%s'", type(row), row)
736 if not "domain" in row:
737 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
739 elif not instances.is_registered(row["domain"]):
741 logger.info("Fetching instances from domain='%s' ...", row["domain"])
742 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
743 except network.exceptions as exception:
744 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
745 instances.set_last_error(row["domain"], exception)
747 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
748 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
750 "blocked": row["domain"],
751 "reason" : row["reason"],
754 logger.debug("Invoking commit() ...")
755 database.connection.commit()
757 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
758 if config.get("bot_enabled") and len(blockdict) > 0:
759 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
760 network.send_bot_post(blocker, blockdict)
762 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
763 if instances.has_pending(blocker):
764 logger.debug("Flushing updates for blocker='%s' ...", blocker)
765 instances.update(blocker)
767 logger.debug("Success! - EXIT!")
770 def fetch_fba_rss(args: argparse.Namespace) -> int:
771 logger.debug("args[]='%s' - CALLED!", type(args))
775 logger.debug("Invoking locking.acquire() ...")
778 components = urlparse(args.feed)
779 domain = components.netloc.lower().split(":")[0]
781 logger.debug("domain='%s'", domain)
782 if sources.is_recent(domain):
783 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
786 logger.debug("domain='%s' has not been recently used, marking ...", domain)
787 sources.update(domain)
789 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
790 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
792 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
793 if response.ok and response.status_code == 200 and len(response.text) > 0:
794 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
795 rss = atoma.parse_rss_bytes(response.content)
797 logger.debug("rss[]='%s'", type(rss))
798 for item in rss.items:
799 logger.debug("item[%s]='%s'", type(item), item)
800 domain = item.link.split("=")[1]
801 domain = tidyup.domain(domain) if domain not in[None, ""] else None
803 logger.debug("domain='%s' - AFTER!", domain)
804 if domain is None or domain == "":
805 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
808 logger.debug("domain='%s' - BEFORE!", domain)
809 domain = domain.encode("idna").decode("utf-8")
810 logger.debug("domain='%s' - AFTER!", domain)
812 if not domain_helper.is_wanted(domain):
813 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
815 elif domain in domains:
816 logger.debug("domain='%s' is already added - SKIPPED!", domain)
818 elif instances.is_registered(domain):
819 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
821 elif instances.is_recent(domain):
822 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
825 logger.debug("Adding domain='%s'", domain)
826 domains.append(domain)
828 logger.debug("domains()=%d", len(domains))
830 logger.info("Adding %d new instances ...", len(domains))
831 for domain in domains:
832 logger.debug("domain='%s'", domain)
834 logger.info("Fetching instances from domain='%s' ...", domain)
835 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
836 except network.exceptions as exception:
837 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
838 instances.set_last_error(domain, exception)
841 logger.debug("Success! - EXIT!")
844 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
845 logger.debug("args[]='%s' - CALLED!", type(args))
847 logger.debug("Invoking locking.acquire() ...")
850 source_domain = "ryona.agency"
851 feed = f"https://{source_domain}/users/fba/feed.atom"
853 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
854 if args.feed is not None and validators.url(args.feed):
855 logger.debug("Setting feed='%s' ...", args.feed)
856 feed = str(args.feed)
857 source_domain = urlparse(args.feed).netloc
859 if sources.is_recent(source_domain):
860 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
863 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
864 sources.update(source_domain)
868 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
869 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
871 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
872 if response.ok and response.status_code == 200 and len(response.text) > 0:
873 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
874 atom = atoma.parse_atom_bytes(response.content)
876 logger.debug("atom[]='%s'", type(atom))
877 for entry in atom.entries:
878 logger.debug("entry[]='%s'", type(entry))
879 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
880 logger.debug("doc[]='%s'", type(doc))
881 for element in doc.findAll("a"):
882 logger.debug("element[]='%s'", type(element))
883 for href in element["href"].split(","):
884 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
885 domain = tidyup.domain(href) if href not in [None, ""] else None
887 logger.debug("domain='%s' - AFTER!", domain)
888 if domain is None or domain == "":
889 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
892 logger.debug("domain='%s' - BEFORE!", domain)
893 domain = domain.encode("idna").decode("utf-8")
894 logger.debug("domain='%s' - AFTER!", domain)
896 if not domain_helper.is_wanted(domain):
897 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
899 elif domain in domains:
900 logger.debug("domain='%s' is already added - SKIPPED!", domain)
902 elif instances.is_registered(domain):
903 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
905 elif instances.is_recent(domain):
906 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
909 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
910 domains.append(domain)
912 logger.debug("domains()=%d", len(domains))
914 logger.info("Adding %d new instances ...", len(domains))
915 for domain in domains:
916 logger.debug("domain='%s'", domain)
918 logger.info("Fetching instances from domain='%s' ...", domain)
919 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
920 except network.exceptions as exception:
921 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
922 instances.set_last_error(domain, exception)
925 logger.debug("Success! - EXIT!")
928 def fetch_instances(args: argparse.Namespace) -> int:
929 logger.debug("args[]='%s' - CALLED!", type(args))
931 logger.debug("args.domain='%s' - checking ...", args.domain)
932 if not validators.domain(args.domain):
933 logger.warning("args.domain='%s' is not valid.", args.domain)
935 elif blacklist.is_blacklisted(args.domain):
936 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
939 logger.debug("Invoking locking.acquire() ...")
943 domain = tidyup.domain(args.domain)
944 origin = software = None
947 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
948 row = database.cursor.fetchone()
950 origin = row["origin"]
951 software = row["software"]
953 if software_helper.is_relay(software):
954 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
959 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
960 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
961 except network.exceptions as exception:
962 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
963 instances.set_last_error(args.domain, exception)
964 instances.update(args.domain)
968 logger.debug("Not fetching more instances - EXIT!")
971 # Loop through some instances
972 database.cursor.execute(
973 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
976 rows = database.cursor.fetchall()
977 logger.info("Checking %d entries ...", len(rows))
979 logger.debug("row[domain]='%s'", row["domain"])
980 if row["domain"] == "":
981 logger.debug("row[domain] is empty - SKIPPED!")
984 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
985 domain = row["domain"].encode("idna").decode("utf-8")
986 logger.debug("domain='%s' - AFTER!", domain)
988 if not domain_helper.is_wanted(domain):
989 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
993 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
994 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
995 except network.exceptions as exception:
996 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
997 instances.set_last_error(domain, exception)
999 logger.debug("Success - EXIT!")
1002 def fetch_csv(args: argparse.Namespace) -> int:
1003 logger.debug("args[]='%s' - CALLED!", type(args))
1005 logger.debug("Invoking locking.acquire() ...")
1008 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1009 for block in blocklists.csv_files:
1010 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1012 # Is domain given and not equal blocker?
1013 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1014 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1017 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1018 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1020 logger.debug("Success - EXIT!")
1023 def fetch_oliphant(args: argparse.Namespace) -> int:
1024 logger.debug("args[]='%s' - CALLED!", type(args))
1026 logger.debug("Invoking locking.acquire() ...")
1029 source_domain = "codeberg.org"
1030 if sources.is_recent(source_domain):
1031 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1034 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1035 sources.update(source_domain)
1038 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1040 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1041 for block in blocklists.oliphant_blocklists:
1042 # Is domain given and not equal blocker?
1043 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1044 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1045 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1048 url = f"{base_url}/{block['csv_url']}"
1050 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1051 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1053 logger.debug("Success! - EXIT!")
1056 def fetch_txt(args: argparse.Namespace) -> int:
1057 logger.debug("args[]='%s' - CALLED!", type(args))
1059 logger.debug("Invoking locking.acquire() ...")
1062 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1063 for row in blocklists.txt_files:
1064 logger.debug("Fetching row[url]='%s' ...", row["url"])
1065 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1067 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1068 if response.ok and response.status_code == 200 and response.text != "":
1069 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1070 domains = response.text.strip().split("\n")
1072 logger.info("Processing %d domains ...", len(domains))
1073 for domain in domains:
1074 logger.debug("domain='%s' - BEFORE!", domain)
1075 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1077 logger.debug("domain='%s' - AFTER!", domain)
1078 if domain is None or domain == "":
1079 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1081 elif not domain_helper.is_wanted(domain):
1082 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1084 elif instances.is_recent(domain):
1085 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1088 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1089 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1091 logger.debug("processed='%s'", processed)
1093 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1096 logger.debug("Success! - EXIT!")
1099 def fetch_fedipact(args: argparse.Namespace) -> int:
1100 logger.debug("args[]='%s' - CALLED!", type(args))
1102 logger.debug("Invoking locking.acquire() ...")
1105 source_domain = "fedipact.online"
1106 if sources.is_recent(source_domain):
1107 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1110 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1111 sources.update(source_domain)
1113 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1114 response = utils.fetch_url(
1115 f"https://{source_domain}",
1116 network.web_headers,
1117 (config.get("connection_timeout"), config.get("read_timeout"))
1120 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1121 if response.ok and response.status_code == 200 and response.text != "":
1122 logger.debug("Parsing %d Bytes ...", len(response.text))
1124 doc = bs4.BeautifulSoup(response.text, "html.parser")
1125 logger.debug("doc[]='%s'", type(doc))
1127 rows = doc.findAll("li")
1128 logger.info("Checking %d row(s) ...", len(rows))
1130 logger.debug("row[]='%s'", type(row))
1131 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1133 logger.debug("domain='%s' - AFTER!", domain)
1134 if domain is None or domain == "":
1135 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1138 logger.debug("domain='%s' - BEFORE!", domain)
1139 domain = domain.encode("idna").decode("utf-8")
1140 logger.debug("domain='%s' - AFTER!", domain)
1142 if not domain_helper.is_wanted(domain):
1143 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1145 elif instances.is_registered(domain):
1146 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1148 elif instances.is_recent(domain):
1149 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1152 logger.info("Fetching domain='%s' ...", domain)
1153 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1155 logger.debug("Success! - EXIT!")
1158 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1159 logger.debug("args[]='%s' - CALLED!", type(args))
1161 logger.debug("Invoking locking.acquire() ...")
1164 source_domain = "instances.joinmobilizon.org"
1165 if sources.is_recent(source_domain):
1166 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1169 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1170 sources.update(source_domain)
1172 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1173 raw = utils.fetch_url(
1174 f"https://{source_domain}/api/v1/instances",
1175 network.web_headers,
1176 (config.get("connection_timeout"), config.get("read_timeout"))
1178 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1180 parsed = json.loads(raw)
1181 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1183 if "data" not in parsed:
1184 logger.warning("parsed()=%d does not contain key 'data'")
1187 logger.info("Checking %d instances ...", len(parsed["data"]))
1188 for row in parsed["data"]:
1189 logger.debug("row[]='%s'", type(row))
1190 if "host" not in row:
1191 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1193 elif not domain_helper.is_wanted(row["host"]):
1194 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1196 elif instances.is_registered(row["host"]):
1197 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1200 logger.info("Fetching row[host]='%s' ...", row["host"])
1201 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1203 logger.debug("Success! - EXIT!")
1206 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1207 logger.debug("args[]='%s' - CALLED!", type(args))
1209 logger.debug("Invoking locking.acquire() ...")
1212 source_domain = "instanceapp.misskey.page"
1213 if sources.is_recent(source_domain):
1214 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1217 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1218 sources.update(source_domain)
1220 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1221 raw = utils.fetch_url(
1222 f"https://{source_domain}/instances.json",
1223 network.web_headers,
1224 (config.get("connection_timeout"), config.get("read_timeout"))
1226 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1228 parsed = json.loads(raw)
1229 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1231 if "instancesInfos" not in parsed:
1232 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1235 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1236 for row in parsed["instancesInfos"]:
1237 logger.debug("row[%s]='%s'", type(row), row)
1238 if "url" not in row:
1239 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1241 elif not domain_helper.is_wanted(row["url"]):
1242 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1244 elif instances.is_registered(row["url"]):
1245 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1248 logger.info("Fetching row[url]='%s' ...", row["url"])
1249 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1251 logger.debug("Success! - EXIT!")
1254 def recheck_obfuscation(args: argparse.Namespace) -> int:
1255 logger.debug("args[]='%s' - CALLED!", type(args))
1257 logger.debug("Invoking locking.acquire() ...")
1260 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1261 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1262 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1263 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1265 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1267 rows = database.cursor.fetchall()
1268 logger.info("Checking %d domains ...", len(rows))
1270 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1271 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1272 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1275 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1276 blocking = federation.fetch_blocks(row["domain"])
1278 logger.debug("blocking()=%d", len(blocking))
1279 if len(blocking) == 0:
1280 if row["software"] == "pleroma":
1281 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1282 blocking = pleroma.fetch_blocks(row["domain"])
1283 elif row["software"] == "mastodon":
1284 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1285 blocking = mastodon.fetch_blocks(row["domain"])
1286 elif row["software"] == "lemmy":
1287 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1288 blocking = lemmy.fetch_blocks(row["domain"])
1289 elif row["software"] == "friendica":
1290 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1291 blocking = friendica.fetch_blocks(row["domain"])
1292 elif row["software"] == "misskey":
1293 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1294 blocking = misskey.fetch_blocks(row["domain"])
1296 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1298 # c.s isn't part of oliphant's "hidden" blocklists
1299 logger.debug("row[domain]='%s'", row["domain"])
1300 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1301 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1302 instances.set_last_blocked(row["domain"])
1303 instances.set_total_blocks(row["domain"], blocking)
1308 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1309 for block in blocking:
1310 logger.debug("block[blocked]='%s'", block["blocked"])
1313 if block["blocked"] == "":
1314 logger.debug("block[blocked] is empty - SKIPPED!")
1316 elif block["blocked"].endswith(".arpa"):
1317 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1319 elif block["blocked"].endswith(".tld"):
1320 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1322 elif block["blocked"].endswith(".onion"):
1323 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1325 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1326 logger.debug("block='%s' is obfuscated.", block["blocked"])
1327 obfuscated = obfuscated + 1
1328 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1329 elif not domain_helper.is_wanted(block["blocked"]):
1330 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1332 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1333 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1336 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1337 if blocked is not None and blocked != block["blocked"]:
1338 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1339 obfuscated = obfuscated - 1
1341 if blacklist.is_blacklisted(blocked):
1342 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1344 elif blacklist.is_blacklisted(row["domain"]):
1345 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1347 elif blocks.is_instance_blocked(row["domain"], blocked):
1348 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1351 block["block_level"] = blocks.alias_block_level(block["block_level"])
1353 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1354 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1355 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1358 "reason" : block["reason"],
1361 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1362 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1364 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1365 if instances.has_pending(row["domain"]):
1366 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1367 instances.update(row["domain"])
1369 logger.debug("Invoking commit() ...")
1370 database.connection.commit()
1372 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1373 if config.get("bot_enabled") and len(blockdict) > 0:
1374 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1375 network.send_bot_post(row["domain"], blockdict)
1377 logger.debug("Success! - EXIT!")
1380 def fetch_fedilist(args: argparse.Namespace) -> int:
1381 logger.debug("args[]='%s' - CALLED!", type(args))
1383 logger.debug("Invoking locking.acquire() ...")
1386 source_domain = "demo.fedilist.com"
1387 if sources.is_recent(source_domain):
1388 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1391 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1392 sources.update(source_domain)
1394 url = f"http://{source_domain}/instance/csv?onion=not"
1395 if args.software is not None and args.software != "":
1396 logger.debug("args.software='%s'", args.software)
1397 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1399 logger.info("Fetching url='%s' ...", url)
1400 response = reqto.get(
1402 headers=network.web_headers,
1403 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1404 allow_redirects=False
1407 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1408 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1409 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1412 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1414 logger.debug("reader[]='%s'", type(reader))
1416 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1421 logger.info("Checking %d rows ...", len(rows))
1423 logger.debug("row[]='%s'", type(row))
1424 if "hostname" not in row:
1425 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1428 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1429 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1430 logger.debug("domain='%s' - AFTER!", domain)
1432 if domain is None or domain == "":
1433 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1436 logger.debug("domain='%s' - BEFORE!", domain)
1437 domain = domain.encode("idna").decode("utf-8")
1438 logger.debug("domain='%s' - AFTER!", domain)
1440 if not domain_helper.is_wanted(domain):
1441 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1443 elif (args.force is None or not args.force) and instances.is_registered(domain):
1444 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1446 elif instances.is_recent(domain):
1447 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1450 logger.info("Fetching instances from domain='%s' ...", domain)
1451 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1453 logger.debug("Success! - EXIT!")
1456 def update_nodeinfo(args: argparse.Namespace) -> int:
1457 logger.debug("args[]='%s' - CALLED!", type(args))
1459 logger.debug("Invoking locking.acquire() ...")
1462 if args.domain is not None and args.domain != "":
1463 logger.debug("Fetching args.domain='%s'", args.domain)
1464 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1465 elif args.software is not None and args.software != "":
1466 logger.info("Fetching domains for args.software='%s'", args.software)
1467 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1468 elif args.mode is not None and args.mode != "":
1469 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1470 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1471 elif args.no_software:
1472 logger.info("Fetching domains with no software type detected ...")
1473 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1475 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1476 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1477 elif args.no_detection:
1478 logger.info("Fetching domains with no detection mode being set ...")
1479 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1481 logger.info("Fetching domains for recently updated ...")
1482 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1484 domains = database.cursor.fetchall()
1486 logger.info("Checking %d domain(s) ...", len(domains))
1489 logger.debug("row[]='%s'", type(row))
1490 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1491 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1495 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1496 software = federation.determine_software(row["domain"])
1498 logger.debug("Determined software='%s'", software)
1499 if (software != row["software"] and software is not None) or args.force is True:
1500 logger.debug("software='%s'", software)
1501 if software is None:
1502 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1503 instances.set_nodeinfo_url(row["domain"], None)
1505 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1506 instances.set_software(row["domain"], software)
1508 if software is not None:
1509 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1510 instances.set_success(row["domain"])
1511 except network.exceptions as exception:
1512 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1513 instances.set_last_error(row["domain"], exception)
1515 instances.set_last_nodeinfo(row["domain"])
1516 instances.update(row["domain"])
1519 logger.debug("Success! - EXIT!")
1522 def fetch_instances_social(args: argparse.Namespace) -> int:
1523 logger.debug("args[]='%s' - CALLED!", type(args))
1525 logger.debug("Invoking locking.acquire() ...")
1528 source_domain = "instances.social"
1530 if config.get("instances_social_api_key") == "":
1531 logger.error("API key not set. Please set in your config.json file.")
1533 elif sources.is_recent(source_domain):
1534 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1537 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1538 sources.update(source_domain)
1541 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1544 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1545 fetched = network.get_json_api(
1547 "/api/1.0/instances/list?count=0&sort_by=name",
1549 (config.get("connection_timeout"), config.get("read_timeout"))
1551 logger.debug("fetched[]='%s'", type(fetched))
1553 if "error_message" in fetched:
1554 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1556 elif "exception" in fetched:
1557 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1559 elif "json" not in fetched:
1560 logger.warning("fetched has no element 'json' - EXIT!")
1562 elif "instances" not in fetched["json"]:
1563 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1567 rows = fetched["json"]["instances"]
1569 logger.info("Checking %d row(s) ...", len(rows))
1571 logger.debug("row[]='%s'", type(row))
1572 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1573 logger.debug("domain='%s' - AFTER!", domain)
1575 if domain is None and domain == "":
1576 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1579 logger.debug("domain='%s' - BEFORE!", domain)
1580 domain = domain.encode("idna").decode("utf-8")
1581 logger.debug("domain='%s' - AFTER!", domain)
1583 if not domain_helper.is_wanted(domain):
1584 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1586 elif domain in domains:
1587 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1589 elif instances.is_registered(domain):
1590 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1592 elif instances.is_recent(domain):
1593 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1596 logger.info("Fetching instances from domain='%s'", domain)
1597 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1599 logger.debug("Success! - EXIT!")
1602 def fetch_relaylist(args: argparse.Namespace) -> int:
1603 logger.debug("args[]='%s' - CALLED!", type(args))
1605 logger.debug("Invoking locking.acquire() ...")
1608 source_domain = "api.relaylist.com"
1610 if sources.is_recent(source_domain):
1611 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1614 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1615 sources.update(source_domain)
1617 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1618 fetched = network.get_json_api(
1622 (config.get("connection_timeout"), config.get("read_timeout"))
1624 logger.debug("fetched[]='%s'", type(fetched))
1626 if "error_message" in fetched:
1627 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1629 elif "exception" in fetched:
1630 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1632 elif "json" not in fetched:
1633 logger.warning("fetched has no element 'json' - EXIT!")
1638 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1639 for row in fetched["json"]:
1640 logger.debug("row[]='%s'", type(row))
1641 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1643 logger.debug("domain='%s' - AFTER!", domain)
1644 if domain is None and domain == "":
1645 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1648 logger.debug("domain='%s' - BEFORE!", domain)
1649 domain = domain.encode("idna").decode("utf-8")
1650 logger.debug("domain='%s' - AFTER!", domain)
1652 if not domain_helper.is_wanted(domain):
1653 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1655 elif domain in domains:
1656 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1658 elif instances.is_registered(domain):
1659 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1661 elif instances.is_recent(domain):
1662 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1665 logger.info("Fetching instances from domain='%s'", domain)
1666 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1668 logger.debug("Success! - EXIT!")
1671 def fetch_relays(args: argparse.Namespace) -> int:
1672 logger.debug("args[]='%s' - CALLED!", type(args))
1674 logger.debug("Invoking locking.acquire() ...")
1677 if args.domain is not None and args.domain != "":
1678 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1679 elif args.software is not None and args.software != "":
1680 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1682 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1685 rows = database.cursor.fetchall()
1687 logger.info("Checking %d relays ...", len(rows))
1689 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1691 if not args.force and instances.is_recent(row["domain"]):
1692 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1696 if row["software"] == "pub-relay":
1697 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1698 raw = network.fetch_api_url(
1699 row["nodeinfo_url"],
1700 (config.get("connection_timeout"), config.get("read_timeout"))
1703 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1704 if "exception" in raw:
1705 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1706 raise raw["exception"]
1707 elif "error_message" in raw:
1708 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1709 instances.set_last_error(row["domain"], raw)
1710 instances.set_last_instance_fetch(row["domain"])
1711 instances.update(row["domain"])
1713 elif "json" not in raw:
1714 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1716 elif not "metadata" in raw["json"]:
1717 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1719 elif not "peers" in raw["json"]["metadata"]:
1720 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1723 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1724 raw = utils.fetch_url(
1725 f"https://{row['domain']}",
1726 network.web_headers,
1727 (config.get("connection_timeout"), config.get("read_timeout"))
1729 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1731 doc = bs4.BeautifulSoup(raw, features="html.parser")
1732 logger.debug("doc[]='%s'", type(doc))
1734 except network.exceptions as exception:
1735 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1736 instances.set_last_error(row["domain"], exception)
1737 instances.set_last_instance_fetch(row["domain"])
1738 instances.update(row["domain"])
1741 logger.debug("row[software]='%s'", row["software"])
1742 if row["software"] == "activityrelay":
1743 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1744 tags = doc.findAll("p")
1746 logger.debug("Checking %d paragraphs ...", len(tags))
1748 logger.debug("tag[]='%s'", type(tag))
1749 if len(tag.contents) == 0:
1750 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1752 elif "registered instances" not in tag.contents[0]:
1753 logger.debug("Skipping paragraph, text not found.")
1756 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1757 for domain in tag.contents:
1758 logger.debug("domain[%s]='%s'", type(domain), domain)
1759 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1762 domain = str(domain)
1763 logger.debug("domain='%s'", domain)
1764 if not domain_helper.is_wanted(domain):
1765 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1768 logger.debug("domain='%s' - BEFORE!", domain)
1769 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1770 logger.debug("domain='%s' - AFTER!", domain)
1772 if domain is None or domain == "":
1773 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1775 elif domain not in peers:
1776 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1777 peers.append(domain)
1779 if dict_helper.has_key(domains, "domain", domain):
1780 logger.debug("domain='%s' already added", domain)
1783 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1786 "origin": row["domain"],
1788 elif row["software"] in ["aoderelay", "selective-relay"]:
1789 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1790 if row["software"] == "aoderelay":
1791 tags = doc.findAll("section", {"class": "instance"})
1793 tags = doc.find("div", {"id": "instances"}).findAll("li")
1795 logger.debug("Checking %d tags ...", len(tags))
1797 logger.debug("tag[]='%s'", type(tag))
1799 link = tag.find("a")
1800 logger.debug("link[%s]='%s'", type(link), link)
1801 if not isinstance(link, bs4.element.Tag):
1802 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1805 components = urlparse(link.get("href"))
1806 logger.debug("components(%d)='%s'", len(components), components)
1807 domain = components.netloc.lower().split(":")[0]
1809 logger.debug("domain='%s' - BEFORE!", domain)
1810 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1811 logger.debug("domain='%s' - AFTER!", domain)
1813 if domain is None or domain == "":
1814 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1816 elif domain not in peers:
1817 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1818 peers.append(domain)
1820 if dict_helper.has_key(domains, "domain", domain):
1821 logger.debug("domain='%s' already added", domain)
1824 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1827 "origin": row["domain"],
1829 elif row["software"] == "pub-relay":
1830 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1831 for domain in raw["json"]["metadata"]["peers"]:
1832 logger.debug("domain='%s' - BEFORE!", domain)
1833 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1834 logger.debug("domain='%s' - AFTER!", domain)
1836 if domain is None or domain == "":
1837 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1839 elif domain not in peers:
1840 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1841 peers.append(domain)
1843 if dict_helper.has_key(domains, "domain", domain):
1844 logger.debug("domain='%s' already added", domain)
1847 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1850 "origin": row["domain"],
1853 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1856 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1857 instances.set_last_instance_fetch(row["domain"])
1859 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1860 instances.set_total_peers(row["domain"], peers)
1862 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1863 instances.update(row["domain"])
1865 logger.info("Checking %d domains ...", len(domains))
1867 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1868 if not domain_helper.is_wanted(row["domain"]):
1869 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1871 elif instances.is_registered(row["domain"]):
1872 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1875 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1876 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1878 logger.debug("Success! - EXIT!")
1881 def convert_idna(args: argparse.Namespace) -> int:
1882 logger.debug("args[]='%s' - CALLED!", type(args))
1884 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1885 rows = database.cursor.fetchall()
1887 logger.debug("rows[]='%s'", type(rows))
1888 instances.translate_idnas(rows, "domain")
1890 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1891 rows = database.cursor.fetchall()
1893 logger.debug("rows[]='%s'", type(rows))
1894 instances.translate_idnas(rows, "origin")
1896 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1897 rows = database.cursor.fetchall()
1899 logger.debug("rows[]='%s'", type(rows))
1900 blocks.translate_idnas(rows, "blocker")
1902 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1903 rows = database.cursor.fetchall()
1905 logger.debug("rows[]='%s'", type(rows))
1906 blocks.translate_idnas(rows, "blocked")
1908 logger.debug("Success! - EXIT!")
1911 def remove_invalid(args: argparse.Namespace) -> int:
1912 logger.debug("args[]='%s' - CALLED!", type(args))
1914 logger.debug("Invoking locking.acquire() ...")
1917 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1918 rows = database.cursor.fetchall()
1920 logger.info("Checking %d domains ...", len(rows))
1922 logger.debug("row[domain]='%s'", row["domain"])
1923 if not validators.domain(row["domain"].split("/")[0]):
1924 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1925 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1926 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1928 logger.debug("Invoking commit() ...")
1929 database.connection.commit()
1931 logger.info("Vaccum cleaning database ...")
1932 database.cursor.execute("VACUUM")
1934 logger.debug("Success! - EXIT!")