1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] is None or row["domain"] == "":
152 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] is None or entry["domain"] == "":
228 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
353 deobfuscated = obfuscated = 0
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] is None or block["blocked"] == "":
369 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".arpa"):
375 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".tld"):
378 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
380 elif block["blocked"].find("*") >= 0:
381 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 instances.set_has_obfuscation(blocker, True)
383 obfuscated = obfuscated + 1
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393 deobfuscated = deobfuscated + 1
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 instances.set_has_obfuscation(blocker, True)
400 obfuscated = obfuscated + 1
402 # Some obscure them with question marks, not sure if that's dependent on version or not
403 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
405 logger.debug("row[]='%s'", type(row))
407 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
410 deobfuscated = deobfuscated + 1
411 block["blocked"] = row["domain"]
412 origin = row["origin"]
413 nodeinfo_url = row["nodeinfo_url"]
415 logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
416 if block["blocked"] is None or block["blocked"] == "":
417 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
420 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
424 if not domain_helper.is_wanted(block["blocked"]):
425 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
427 elif block["block_level"] in ["accept", "accepted"]:
428 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
430 elif not instances.is_registered(block["blocked"]):
431 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
434 block["block_level"] = blocks.alias_block_level(block["block_level"])
436 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
439 "blocked": block["blocked"],
440 "reason" : block["reason"],
443 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444 cookies.clear(block["blocked"])
446 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
447 instances.set_obfuscated_blocks(blocker, obfuscated)
449 logger.debug("Flushing updates for blocker='%s' ...", blocker)
450 instances.update(blocker)
452 logger.debug("Invoking commit() ...")
453 database.connection.commit()
455 logger.debug("Invoking cookies.clear(%s) ...", blocker)
456 cookies.clear(blocker)
458 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
459 if config.get("bot_enabled") and len(blockdict) > 0:
460 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
461 network.send_bot_post(blocker, blockdict)
463 logger.debug("Success! - EXIT!")
466 def fetch_observer(args: argparse.Namespace) -> int:
467 logger.debug("args[]='%s' - CALLED!", type(args))
469 logger.debug("Invoking locking.acquire() ...")
472 source_domain = "fediverse.observer"
473 if sources.is_recent(source_domain):
474 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
477 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
478 sources.update(source_domain)
481 if args.software is None:
482 logger.info("Fetching software list ...")
483 raw = utils.fetch_url(
484 f"https://{source_domain}",
486 (config.get("connection_timeout"), config.get("read_timeout"))
488 logger.debug("raw[%s]()=%d", type(raw), len(raw))
490 doc = bs4.BeautifulSoup(raw, features="html.parser")
491 logger.debug("doc[]='%s'", type(doc))
493 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
494 logger.debug("navbar[]='%s'", type(navbar))
496 logger.warning("Cannot find navigation bar, cannot continue!")
499 items = navbar.findAll("a", {"class": "dropdown-item"})
500 logger.debug("items[]='%s'", type(items))
502 logger.info("Checking %d menu items ...", len(items))
504 logger.debug("item[%s]='%s'", type(item), item)
505 if item.text.lower() == "all":
506 logger.debug("Skipping 'All' menu entry ...")
509 logger.debug("Appending item.text='%s' ...", item.text)
510 types.append(tidyup.domain(item.text))
512 logger.info("Adding args.software='%s' as type ...", args.software)
513 types.append(args.software)
515 logger.info("Fetching %d different table data ...", len(types))
516 for software in types:
517 logger.debug("software='%s'", software)
519 if args.software is not None and args.software != software:
520 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
525 logger.debug("Fetching table data for software='%s' ...", software)
526 raw = utils.fetch_url(
527 f"https://{source_domain}/app/views/tabledata.php?software={software}",
529 (config.get("connection_timeout"), config.get("read_timeout"))
531 logger.debug("raw[%s]()=%d", type(raw), len(raw))
533 doc = bs4.BeautifulSoup(raw, features="html.parser")
534 logger.debug("doc[]='%s'", type(doc))
535 except network.exceptions as exception:
536 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
539 items = doc.findAll("a", {"class": "url"})
540 logger.info("Checking %d items,software='%s' ...", len(items), software)
542 logger.debug("item[]='%s'", type(item))
543 domain = item.decode_contents()
544 logger.debug("domain[%s]='%s'", type(domain), domain)
545 domain = tidyup.domain(domain) if domain not in [None, ""] else None
546 logger.debug("domain='%s' - AFTER!", domain)
548 if domain is None or domain == "":
549 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
552 logger.debug("domain='%s' - BEFORE!", domain)
553 domain = domain.encode("idna").decode("utf-8")
554 logger.debug("domain='%s' - AFTER!", domain)
556 if not domain_helper.is_wanted(domain):
557 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
559 elif instances.is_registered(domain):
560 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
563 logger.info("Fetching instances for domain='%s'", domain)
564 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
566 logger.debug("Success! - EXIT!")
569 def fetch_todon_wiki(args: argparse.Namespace) -> int:
570 logger.debug("args[]='%s' - CALLED!", type(args))
572 logger.debug("Invoking locking.acquire() ...")
575 source_domain = "wiki.todon.eu"
576 if sources.is_recent(source_domain):
577 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
580 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
581 sources.update(source_domain)
588 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
589 raw = utils.fetch_url(
590 f"https://{source_domain}/todon/domainblocks",
592 (config.get("connection_timeout"), config.get("read_timeout"))
594 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
596 doc = bs4.BeautifulSoup(raw, "html.parser")
597 logger.debug("doc[]='%s'", type(doc))
599 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
600 logger.info("Checking %d silenced/limited entries ...", len(silenced))
601 blocklist["silenced"] = utils.find_domains(silenced, "div")
603 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
604 logger.info("Checking %d suspended entries ...", len(suspended))
605 blocklist["reject"] = utils.find_domains(suspended, "div")
607 blocking = blocklist["silenced"] + blocklist["reject"]
610 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
611 instances.set_last_blocked(blocker)
612 instances.set_total_blocks(blocker, blocking)
615 for block_level in blocklist:
616 blockers = blocklist[block_level]
618 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
619 for blocked in blockers:
620 logger.debug("blocked='%s'", blocked)
622 if not instances.is_registered(blocked):
624 logger.info("Fetching instances from domain='%s' ...", blocked)
625 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
626 except network.exceptions as exception:
627 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
628 instances.set_last_error(blocked, exception)
630 if not domain_helper.is_wanted(blocked):
631 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
633 elif not domain_helper.is_wanted(blocker):
634 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
636 elif blocks.is_instance_blocked(blocker, blocked, block_level):
637 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
640 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
641 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
642 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
648 logger.debug("Invoking commit() ...")
649 database.connection.commit()
651 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
652 if config.get("bot_enabled") and len(blockdict) > 0:
653 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
654 network.send_bot_post(blocker, blockdict)
656 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
657 if instances.has_pending(blocker):
658 logger.debug("Flushing updates for blocker='%s' ...", blocker)
659 instances.update(blocker)
661 logger.debug("Success! - EXIT!")
664 def fetch_cs(args: argparse.Namespace):
665 logger.debug("args[]='%s' - CALLED!", type(args))
667 logger.debug("Invoking locking.acquire() ...")
695 source_domain = "raw.githubusercontent.com"
696 if sources.is_recent(source_domain):
697 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
700 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
701 sources.update(source_domain)
703 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
704 raw = utils.fetch_url(
705 f"https://{source_domain}/chaossocial/meta/master/federation.md",
707 (config.get("connection_timeout"), config.get("read_timeout"))
709 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
711 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
712 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
714 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
715 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
716 blocklist["silenced"] = federation.find_domains(silenced)
718 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
719 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
720 blocklist["reject"] = federation.find_domains(blocked)
722 blocking = blocklist["silenced"] + blocklist["reject"]
723 blocker = "chaos.social"
725 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
726 instances.set_last_blocked(blocker)
727 instances.set_total_blocks(blocker, blocking)
729 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
730 if len(blocking) > 0:
732 for block_level in blocklist:
733 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
735 for row in blocklist[block_level]:
736 logger.debug("row[%s]='%s'", type(row), row)
737 if not "domain" in row:
738 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
740 elif not instances.is_registered(row["domain"]):
742 logger.info("Fetching instances from domain='%s' ...", row["domain"])
743 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
744 except network.exceptions as exception:
745 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
746 instances.set_last_error(row["domain"], exception)
748 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
749 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
751 "blocked": row["domain"],
752 "reason" : row["reason"],
755 logger.debug("Invoking commit() ...")
756 database.connection.commit()
758 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
759 if config.get("bot_enabled") and len(blockdict) > 0:
760 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
761 network.send_bot_post(blocker, blockdict)
763 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
764 if instances.has_pending(blocker):
765 logger.debug("Flushing updates for blocker='%s' ...", blocker)
766 instances.update(blocker)
768 logger.debug("Success! - EXIT!")
771 def fetch_fba_rss(args: argparse.Namespace) -> int:
772 logger.debug("args[]='%s' - CALLED!", type(args))
776 logger.debug("Invoking locking.acquire() ...")
779 components = urlparse(args.feed)
780 domain = components.netloc.lower().split(":")[0]
782 logger.debug("domain='%s'", domain)
783 if sources.is_recent(domain):
784 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
787 logger.debug("domain='%s' has not been recently used, marking ...", domain)
788 sources.update(domain)
790 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
791 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
793 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
794 if response.ok and response.status_code == 200 and len(response.text) > 0:
795 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
796 rss = atoma.parse_rss_bytes(response.content)
798 logger.debug("rss[]='%s'", type(rss))
799 for item in rss.items:
800 logger.debug("item[%s]='%s'", type(item), item)
801 domain = item.link.split("=")[1]
802 domain = tidyup.domain(domain) if domain not in[None, ""] else None
804 logger.debug("domain='%s' - AFTER!", domain)
805 if domain is None or domain == "":
806 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
809 logger.debug("domain='%s' - BEFORE!", domain)
810 domain = domain.encode("idna").decode("utf-8")
811 logger.debug("domain='%s' - AFTER!", domain)
813 if not domain_helper.is_wanted(domain):
814 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
816 elif domain in domains:
817 logger.debug("domain='%s' is already added - SKIPPED!", domain)
819 elif instances.is_registered(domain):
820 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
822 elif instances.is_recent(domain):
823 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
826 logger.debug("Adding domain='%s'", domain)
827 domains.append(domain)
829 logger.debug("domains()=%d", len(domains))
831 logger.info("Adding %d new instances ...", len(domains))
832 for domain in domains:
833 logger.debug("domain='%s'", domain)
835 logger.info("Fetching instances from domain='%s' ...", domain)
836 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
837 except network.exceptions as exception:
838 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
839 instances.set_last_error(domain, exception)
842 logger.debug("Success! - EXIT!")
845 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
846 logger.debug("args[]='%s' - CALLED!", type(args))
848 logger.debug("Invoking locking.acquire() ...")
851 source_domain = "ryona.agency"
852 feed = f"https://{source_domain}/users/fba/feed.atom"
854 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
855 if args.feed is not None and validators.url(args.feed):
856 logger.debug("Setting feed='%s' ...", args.feed)
857 feed = str(args.feed)
858 source_domain = urlparse(args.feed).netloc
860 if sources.is_recent(source_domain):
861 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
864 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
865 sources.update(source_domain)
869 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
870 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
872 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
873 if response.ok and response.status_code == 200 and len(response.text) > 0:
874 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
875 atom = atoma.parse_atom_bytes(response.content)
877 logger.debug("atom[]='%s'", type(atom))
878 for entry in atom.entries:
879 logger.debug("entry[]='%s'", type(entry))
880 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
881 logger.debug("doc[]='%s'", type(doc))
883 for element in doc.findAll("a"):
884 logger.debug("element[]='%s'", type(element))
885 for href in element["href"].split(","):
886 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
887 domain = tidyup.domain(href) if href not in [None, ""] else None
889 logger.debug("domain='%s' - AFTER!", domain)
890 if domain is None or domain == "":
891 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
894 logger.debug("domain='%s' - BEFORE!", domain)
895 domain = domain.encode("idna").decode("utf-8")
896 logger.debug("domain='%s' - AFTER!", domain)
898 if not domain_helper.is_wanted(domain):
899 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
901 elif domain in domains:
902 logger.debug("domain='%s' is already added - SKIPPED!", domain)
904 elif instances.is_registered(domain):
905 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
907 elif instances.is_recent(domain):
908 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
911 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
912 domains.append(domain)
914 logger.debug("domains()=%d", len(domains))
916 logger.info("Adding %d new instances ...", len(domains))
917 for domain in domains:
918 logger.debug("domain='%s'", domain)
920 logger.info("Fetching instances from domain='%s' ...", domain)
921 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
922 except network.exceptions as exception:
923 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
924 instances.set_last_error(domain, exception)
927 logger.debug("Success! - EXIT!")
930 def fetch_instances(args: argparse.Namespace) -> int:
931 logger.debug("args[]='%s' - CALLED!", type(args))
933 logger.debug("args.domain='%s' - checking ...", args.domain)
934 if not validators.domain(args.domain):
935 logger.warning("args.domain='%s' is not valid.", args.domain)
937 elif blacklist.is_blacklisted(args.domain):
938 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
941 logger.debug("Invoking locking.acquire() ...")
945 domain = tidyup.domain(args.domain)
946 origin = software = None
949 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
950 row = database.cursor.fetchone()
952 origin = row["origin"]
953 software = row["software"]
955 if software_helper.is_relay(software):
956 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
961 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
962 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
963 except network.exceptions as exception:
964 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
965 instances.set_last_error(args.domain, exception)
966 instances.update(args.domain)
970 logger.debug("Not fetching more instances - EXIT!")
973 # Loop through some instances
974 database.cursor.execute(
975 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
978 rows = database.cursor.fetchall()
979 logger.info("Checking %d entries ...", len(rows))
981 logger.debug("row[domain]='%s'", row["domain"])
982 if row["domain"] == "":
983 logger.debug("row[domain] is empty - SKIPPED!")
986 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
987 domain = row["domain"].encode("idna").decode("utf-8")
988 logger.debug("domain='%s' - AFTER!", domain)
990 if not domain_helper.is_wanted(domain):
991 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
995 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
996 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
997 except network.exceptions as exception:
998 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
999 instances.set_last_error(domain, exception)
1001 logger.debug("Success - EXIT!")
1004 def fetch_csv(args: argparse.Namespace) -> int:
1005 logger.debug("args[]='%s' - CALLED!", type(args))
1007 logger.debug("Invoking locking.acquire() ...")
1010 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1011 for block in blocklists.csv_files:
1012 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1014 # Is domain given and not equal blocker?
1015 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1016 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1019 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1020 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1022 logger.debug("Success - EXIT!")
1025 def fetch_oliphant(args: argparse.Namespace) -> int:
1026 logger.debug("args[]='%s' - CALLED!", type(args))
1028 logger.debug("Invoking locking.acquire() ...")
1031 source_domain = "codeberg.org"
1032 if sources.is_recent(source_domain):
1033 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1036 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1037 sources.update(source_domain)
1040 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1042 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1043 for block in blocklists.oliphant_blocklists:
1044 # Is domain given and not equal blocker?
1045 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1046 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1047 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1050 url = f"{base_url}/{block['csv_url']}"
1052 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1053 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1055 logger.debug("Success! - EXIT!")
1058 def fetch_txt(args: argparse.Namespace) -> int:
1059 logger.debug("args[]='%s' - CALLED!", type(args))
1061 logger.debug("Invoking locking.acquire() ...")
1064 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1065 for row in blocklists.txt_files:
1066 logger.debug("Fetching row[url]='%s' ...", row["url"])
1067 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1069 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1070 if response.ok and response.status_code == 200 and response.text != "":
1071 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1072 domains = response.text.strip().split("\n")
1074 logger.info("Processing %d domains ...", len(domains))
1075 for domain in domains:
1076 logger.debug("domain='%s' - BEFORE!", domain)
1077 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1079 logger.debug("domain='%s' - AFTER!", domain)
1080 if domain is None or domain == "":
1081 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1083 elif not domain_helper.is_wanted(domain):
1084 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1086 elif instances.is_recent(domain):
1087 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1090 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1091 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1093 logger.debug("processed='%s'", processed)
1095 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1098 logger.debug("Success! - EXIT!")
1101 def fetch_fedipact(args: argparse.Namespace) -> int:
1102 logger.debug("args[]='%s' - CALLED!", type(args))
1104 logger.debug("Invoking locking.acquire() ...")
1107 source_domain = "fedipact.online"
1108 if sources.is_recent(source_domain):
1109 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1112 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1113 sources.update(source_domain)
1115 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1116 response = utils.fetch_url(
1117 f"https://{source_domain}",
1118 network.web_headers,
1119 (config.get("connection_timeout"), config.get("read_timeout"))
1122 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1123 if response.ok and response.status_code == 200 and response.text != "":
1124 logger.debug("Parsing %d Bytes ...", len(response.text))
1126 doc = bs4.BeautifulSoup(response.text, "html.parser")
1127 logger.debug("doc[]='%s'", type(doc))
1129 rows = doc.findAll("li")
1130 logger.info("Checking %d row(s) ...", len(rows))
1132 logger.debug("row[]='%s'", type(row))
1133 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1135 logger.debug("domain='%s' - AFTER!", domain)
1136 if domain is None or domain == "":
1137 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1140 logger.debug("domain='%s' - BEFORE!", domain)
1141 domain = domain.encode("idna").decode("utf-8")
1142 logger.debug("domain='%s' - AFTER!", domain)
1144 if not domain_helper.is_wanted(domain):
1145 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1147 elif instances.is_registered(domain):
1148 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1150 elif instances.is_recent(domain):
1151 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1154 logger.info("Fetching domain='%s' ...", domain)
1155 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1157 logger.debug("Success! - EXIT!")
1160 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1161 logger.debug("args[]='%s' - CALLED!", type(args))
1163 logger.debug("Invoking locking.acquire() ...")
1166 source_domain = "instances.joinmobilizon.org"
1167 if sources.is_recent(source_domain):
1168 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1171 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1172 sources.update(source_domain)
1174 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1175 raw = utils.fetch_url(
1176 f"https://{source_domain}/api/v1/instances",
1177 network.web_headers,
1178 (config.get("connection_timeout"), config.get("read_timeout"))
1180 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1182 parsed = json.loads(raw)
1183 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1185 if "data" not in parsed:
1186 logger.warning("parsed()=%d does not contain key 'data'")
1189 logger.info("Checking %d instances ...", len(parsed["data"]))
1190 for row in parsed["data"]:
1191 logger.debug("row[]='%s'", type(row))
1192 if "host" not in row:
1193 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1195 elif not domain_helper.is_wanted(row["host"]):
1196 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1198 elif instances.is_registered(row["host"]):
1199 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1202 logger.info("Fetching row[host]='%s' ...", row["host"])
1203 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1205 logger.debug("Success! - EXIT!")
1208 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1209 logger.debug("args[]='%s' - CALLED!", type(args))
1211 logger.debug("Invoking locking.acquire() ...")
1214 source_domain = "instanceapp.misskey.page"
1215 if sources.is_recent(source_domain):
1216 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1219 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1220 sources.update(source_domain)
1222 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1223 raw = utils.fetch_url(
1224 f"https://{source_domain}/instances.json",
1225 network.web_headers,
1226 (config.get("connection_timeout"), config.get("read_timeout"))
1228 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1230 parsed = json.loads(raw)
1231 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1233 if "instancesInfos" not in parsed:
1234 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1237 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1238 for row in parsed["instancesInfos"]:
1239 logger.debug("row[%s]='%s'", type(row), row)
1240 if "url" not in row:
1241 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1243 elif not domain_helper.is_wanted(row["url"]):
1244 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1246 elif instances.is_registered(row["url"]):
1247 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1250 logger.info("Fetching row[url]='%s' ...", row["url"])
1251 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1253 logger.debug("Success! - EXIT!")
1256 def recheck_obfuscation(args: argparse.Namespace) -> int:
1257 logger.debug("args[]='%s' - CALLED!", type(args))
1259 logger.debug("Invoking locking.acquire() ...")
1262 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1263 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1264 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1265 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1267 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1269 rows = database.cursor.fetchall()
1270 logger.info("Checking %d domains ...", len(rows))
1272 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1273 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1274 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1277 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1278 blocking = federation.fetch_blocks(row["domain"])
1280 logger.debug("blocking()=%d", len(blocking))
1281 if len(blocking) == 0:
1282 if row["software"] == "pleroma":
1283 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1284 blocking = pleroma.fetch_blocks(row["domain"])
1285 elif row["software"] == "mastodon":
1286 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1287 blocking = mastodon.fetch_blocks(row["domain"])
1288 elif row["software"] == "lemmy":
1289 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290 blocking = lemmy.fetch_blocks(row["domain"])
1291 elif row["software"] == "friendica":
1292 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293 blocking = friendica.fetch_blocks(row["domain"])
1294 elif row["software"] == "misskey":
1295 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1296 blocking = misskey.fetch_blocks(row["domain"])
1298 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1300 # c.s isn't part of oliphant's "hidden" blocklists
1301 logger.debug("row[domain]='%s'", row["domain"])
1302 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1303 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1304 instances.set_last_blocked(row["domain"])
1305 instances.set_total_blocks(row["domain"], blocking)
1310 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1311 for block in blocking:
1312 logger.debug("block[blocked]='%s'", block["blocked"])
1315 if block["blocked"] == "":
1316 logger.debug("block[blocked] is empty - SKIPPED!")
1318 elif block["blocked"].endswith(".arpa"):
1319 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1321 elif block["blocked"].endswith(".tld"):
1322 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1324 elif block["blocked"].endswith(".onion"):
1325 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1327 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1328 logger.debug("block='%s' is obfuscated.", block["blocked"])
1329 obfuscated = obfuscated + 1
1330 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1331 elif not domain_helper.is_wanted(block["blocked"]):
1332 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1334 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1335 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1338 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1339 if blocked is not None and blocked != block["blocked"]:
1340 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1341 obfuscated = obfuscated - 1
1343 if blacklist.is_blacklisted(blocked):
1344 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1346 elif blacklist.is_blacklisted(row["domain"]):
1347 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1349 elif blocks.is_instance_blocked(row["domain"], blocked):
1350 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1353 block["block_level"] = blocks.alias_block_level(block["block_level"])
1355 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1356 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1357 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1360 "reason" : block["reason"],
1363 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1364 instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1365 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1367 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1368 if instances.has_pending(row["domain"]):
1369 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1370 instances.update(row["domain"])
1372 logger.debug("Invoking commit() ...")
1373 database.connection.commit()
1375 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1376 if config.get("bot_enabled") and len(blockdict) > 0:
1377 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1378 network.send_bot_post(row["domain"], blockdict)
1380 logger.debug("Success! - EXIT!")
1383 def fetch_fedilist(args: argparse.Namespace) -> int:
1384 logger.debug("args[]='%s' - CALLED!", type(args))
1386 logger.debug("Invoking locking.acquire() ...")
1389 source_domain = "demo.fedilist.com"
1390 if sources.is_recent(source_domain):
1391 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1394 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1395 sources.update(source_domain)
1397 url = f"http://{source_domain}/instance/csv?onion=not"
1398 if args.software is not None and args.software != "":
1399 logger.debug("args.software='%s'", args.software)
1400 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1402 logger.info("Fetching url='%s' ...", url)
1403 response = reqto.get(
1405 headers=network.web_headers,
1406 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1407 allow_redirects=False
1410 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1411 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1412 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1415 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1417 logger.debug("reader[]='%s'", type(reader))
1419 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1424 logger.info("Checking %d rows ...", len(rows))
1426 logger.debug("row[]='%s'", type(row))
1427 if "hostname" not in row:
1428 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1431 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1432 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1433 logger.debug("domain='%s' - AFTER!", domain)
1435 if domain is None or domain == "":
1436 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1439 logger.debug("domain='%s' - BEFORE!", domain)
1440 domain = domain.encode("idna").decode("utf-8")
1441 logger.debug("domain='%s' - AFTER!", domain)
1443 if not domain_helper.is_wanted(domain):
1444 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1446 elif (args.force is None or not args.force) and instances.is_registered(domain):
1447 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1449 elif instances.is_recent(domain):
1450 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1453 logger.info("Fetching instances from domain='%s' ...", domain)
1454 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1456 logger.debug("Success! - EXIT!")
1459 def update_nodeinfo(args: argparse.Namespace) -> int:
1460 logger.debug("args[]='%s' - CALLED!", type(args))
1462 logger.debug("Invoking locking.acquire() ...")
1465 if args.domain is not None and args.domain != "":
1466 logger.debug("Fetching args.domain='%s'", args.domain)
1467 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1468 elif args.software is not None and args.software != "":
1469 logger.info("Fetching domains for args.software='%s'", args.software)
1470 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1471 elif args.mode is not None and args.mode != "":
1472 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1473 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1474 elif args.no_software:
1475 logger.info("Fetching domains with no software type detected ...")
1476 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1478 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1479 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1480 elif args.no_detection:
1481 logger.info("Fetching domains with no detection mode being set ...")
1482 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1484 logger.info("Fetching domains for recently updated ...")
1485 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1487 domains = database.cursor.fetchall()
1489 logger.info("Checking %d domain(s) ...", len(domains))
1492 logger.debug("row[]='%s'", type(row))
1493 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1494 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1498 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1499 software = federation.determine_software(row["domain"])
1501 logger.debug("Determined software='%s'", software)
1502 if (software != row["software"] and software is not None) or args.force is True:
1503 logger.debug("software='%s'", software)
1504 if software is None:
1505 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1506 instances.set_nodeinfo_url(row["domain"], None)
1508 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1509 instances.set_software(row["domain"], software)
1511 if software is not None:
1512 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1513 instances.set_success(row["domain"])
1514 except network.exceptions as exception:
1515 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1516 instances.set_last_error(row["domain"], exception)
1518 instances.set_last_nodeinfo(row["domain"])
1519 instances.update(row["domain"])
1522 logger.debug("Success! - EXIT!")
1525 def fetch_instances_social(args: argparse.Namespace) -> int:
1526 logger.debug("args[]='%s' - CALLED!", type(args))
1528 logger.debug("Invoking locking.acquire() ...")
1531 source_domain = "instances.social"
1533 if config.get("instances_social_api_key") == "":
1534 logger.error("API key not set. Please set in your config.json file.")
1536 elif sources.is_recent(source_domain):
1537 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1540 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1541 sources.update(source_domain)
1544 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1547 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1548 fetched = network.get_json_api(
1550 "/api/1.0/instances/list?count=0&sort_by=name",
1552 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1554 logger.debug("fetched[]='%s'", type(fetched))
1556 if "error_message" in fetched:
1557 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1559 elif "exception" in fetched:
1560 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1562 elif "json" not in fetched:
1563 logger.warning("fetched has no element 'json' - EXIT!")
1565 elif "instances" not in fetched["json"]:
1566 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1570 rows = fetched["json"]["instances"]
1572 logger.info("Checking %d row(s) ...", len(rows))
1574 logger.debug("row[]='%s'", type(row))
1575 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1576 logger.debug("domain='%s' - AFTER!", domain)
1578 if domain is None and domain == "":
1579 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1582 logger.debug("domain='%s' - BEFORE!", domain)
1583 domain = domain.encode("idna").decode("utf-8")
1584 logger.debug("domain='%s' - AFTER!", domain)
1586 if not domain_helper.is_wanted(domain):
1587 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1589 elif domain in domains:
1590 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1592 elif instances.is_registered(domain):
1593 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1595 elif instances.is_recent(domain):
1596 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1599 logger.info("Fetching instances from domain='%s'", domain)
1600 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1602 logger.debug("Success! - EXIT!")
1605 def fetch_relaylist(args: argparse.Namespace) -> int:
1606 logger.debug("args[]='%s' - CALLED!", type(args))
1608 logger.debug("Invoking locking.acquire() ...")
1611 source_domain = "api.relaylist.com"
1613 if sources.is_recent(source_domain):
1614 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1617 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1618 sources.update(source_domain)
1620 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1621 fetched = network.get_json_api(
1625 (config.get("connection_timeout"), config.get("read_timeout"))
1627 logger.debug("fetched[]='%s'", type(fetched))
1629 if "error_message" in fetched:
1630 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1632 elif "exception" in fetched:
1633 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1635 elif "json" not in fetched:
1636 logger.warning("fetched has no element 'json' - EXIT!")
1641 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1642 for row in fetched["json"]:
1643 logger.debug("row[]='%s'", type(row))
1644 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1645 logger.debug("domain='%s' - AFTER!", domain)
1647 if domain is None and domain == "":
1648 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1651 logger.debug("domain='%s' - BEFORE!", domain)
1652 domain = domain.encode("idna").decode("utf-8")
1653 logger.debug("domain='%s' - AFTER!", domain)
1655 if not domain_helper.is_wanted(domain):
1656 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1658 elif domain in domains:
1659 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1661 elif instances.is_registered(domain):
1662 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1664 elif instances.is_recent(domain):
1665 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1668 logger.info("Fetching instances from domain='%s'", domain)
1669 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1671 logger.debug("Success! - EXIT!")
1674 def fetch_relays(args: argparse.Namespace) -> int:
1675 logger.debug("args[]='%s' - CALLED!", type(args))
1677 logger.debug("Invoking locking.acquire() ...")
1680 if args.domain is not None and args.domain != "":
1681 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1682 elif args.software is not None and args.software != "":
1683 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1685 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1688 rows = database.cursor.fetchall()
1690 logger.info("Checking %d relays ...", len(rows))
1692 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1694 if not args.force and instances.is_recent(row["domain"]):
1695 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1699 if row["software"] == "pub-relay":
1700 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1701 raw = network.fetch_api_url(
1702 row["nodeinfo_url"],
1703 (config.get("connection_timeout"), config.get("read_timeout"))
1706 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1707 if "exception" in raw:
1708 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1709 raise raw["exception"]
1710 elif "error_message" in raw:
1711 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1712 instances.set_last_error(row["domain"], raw)
1713 instances.set_last_instance_fetch(row["domain"])
1714 instances.update(row["domain"])
1716 elif "json" not in raw:
1717 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1719 elif not "metadata" in raw["json"]:
1720 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1722 elif not "peers" in raw["json"]["metadata"]:
1723 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1726 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1727 raw = utils.fetch_url(
1728 f"https://{row['domain']}",
1729 network.web_headers,
1730 (config.get("connection_timeout"), config.get("read_timeout"))
1732 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1734 doc = bs4.BeautifulSoup(raw, features="html.parser")
1735 logger.debug("doc[]='%s'", type(doc))
1737 except network.exceptions as exception:
1738 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1739 instances.set_last_error(row["domain"], exception)
1740 instances.set_last_instance_fetch(row["domain"])
1741 instances.update(row["domain"])
1744 logger.debug("row[software]='%s'", row["software"])
1745 if row["software"] == "activityrelay":
1746 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1747 tags = doc.findAll("p")
1749 logger.debug("Checking %d paragraphs ...", len(tags))
1751 logger.debug("tag[]='%s'", type(tag))
1752 if len(tag.contents) == 0:
1753 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1755 elif "registered instances" not in tag.contents[0]:
1756 logger.debug("Skipping paragraph, text not found.")
1759 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1760 for domain in tag.contents:
1761 logger.debug("domain[%s]='%s'", type(domain), domain)
1762 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1765 domain = str(domain)
1766 logger.debug("domain='%s'", domain)
1767 if not domain_helper.is_wanted(domain):
1768 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1771 logger.debug("domain='%s' - BEFORE!", domain)
1772 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1773 logger.debug("domain='%s' - AFTER!", domain)
1775 if domain is None or domain == "":
1776 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1778 elif domain not in peers:
1779 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1780 peers.append(domain)
1782 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1783 if dict_helper.has_key(domains, "domain", domain):
1784 logger.debug("domain='%s' already added", domain)
1787 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1790 "origin": row["domain"],
1792 elif row["software"] in ["aoderelay", "selective-relay"]:
1793 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1794 if row["software"] == "aoderelay":
1795 tags = doc.findAll("section", {"class": "instance"})
1797 tags = doc.find("div", {"id": "instances"}).findAll("li")
1799 logger.debug("Checking %d tags ...", len(tags))
1801 logger.debug("tag[]='%s'", type(tag))
1803 link = tag.find("a")
1804 logger.debug("link[%s]='%s'", type(link), link)
1805 if not isinstance(link, bs4.element.Tag):
1806 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1809 components = urlparse(link.get("href"))
1810 logger.debug("components(%d)='%s'", len(components), components)
1811 domain = components.netloc.lower().split(":")[0]
1813 logger.debug("domain='%s' - BEFORE!", domain)
1814 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1815 logger.debug("domain='%s' - AFTER!", domain)
1817 if domain is None or domain == "":
1818 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1820 elif domain not in peers:
1821 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1822 peers.append(domain)
1824 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1825 if dict_helper.has_key(domains, "domain", domain):
1826 logger.debug("domain='%s' already added", domain)
1829 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1832 "origin": row["domain"],
1834 elif row["software"] == "pub-relay":
1835 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1836 for domain in raw["json"]["metadata"]["peers"]:
1837 logger.debug("domain='%s' - BEFORE!", domain)
1838 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1839 logger.debug("domain='%s' - AFTER!", domain)
1841 if domain is None or domain == "":
1842 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1844 elif domain not in peers:
1845 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1846 peers.append(domain)
1848 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1849 if dict_helper.has_key(domains, "domain", domain):
1850 logger.debug("domain='%s' already added", domain)
1853 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1856 "origin": row["domain"],
1859 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1862 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1863 instances.set_last_instance_fetch(row["domain"])
1865 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1866 instances.set_total_peers(row["domain"], peers)
1868 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1869 instances.update(row["domain"])
1871 logger.info("Checking %d domains ...", len(domains))
1873 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1874 if not domain_helper.is_wanted(row["domain"]):
1875 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1877 elif instances.is_registered(row["domain"]):
1878 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1881 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1882 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1884 logger.debug("Success! - EXIT!")
1887 def convert_idna(args: argparse.Namespace) -> int:
1888 logger.debug("args[]='%s' - CALLED!", type(args))
1890 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1891 rows = database.cursor.fetchall()
1893 logger.debug("rows[]='%s'", type(rows))
1894 instances.translate_idnas(rows, "domain")
1896 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1897 rows = database.cursor.fetchall()
1899 logger.debug("rows[]='%s'", type(rows))
1900 instances.translate_idnas(rows, "origin")
1902 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1903 rows = database.cursor.fetchall()
1905 logger.debug("rows[]='%s'", type(rows))
1906 blocks.translate_idnas(rows, "blocker")
1908 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1909 rows = database.cursor.fetchall()
1911 logger.debug("rows[]='%s'", type(rows))
1912 blocks.translate_idnas(rows, "blocked")
1914 logger.debug("Success! - EXIT!")
1917 def remove_invalid(args: argparse.Namespace) -> int:
1918 logger.debug("args[]='%s' - CALLED!", type(args))
1920 logger.debug("Invoking locking.acquire() ...")
1923 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1924 rows = database.cursor.fetchall()
1926 logger.info("Checking %d domains ...", len(rows))
1928 logger.debug("row[domain]='%s'", row["domain"])
1929 if not validators.domain(row["domain"].split("/")[0]):
1930 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1931 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1932 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1934 logger.debug("Invoking commit() ...")
1935 database.connection.commit()
1937 logger.info("Vaccum cleaning database ...")
1938 database.cursor.execute("VACUUM")
1940 logger.debug("Success! - EXIT!")