1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] in [None, ""]:
152 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] in [None, ""]:
228 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
296 # Check only entries with total_blocked=None
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if not domain_helper.is_wanted(blocker):
312 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314 elif not args.force and instances.is_recent(blocker, "last_blocked"):
315 logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker)
318 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
319 instances.set_last_blocked(blocker)
320 instances.set_has_obfuscation(blocker, False)
322 # c.s isn't part of oliphant's "hidden" blocklists
323 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
324 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
327 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
328 blocking = federation.fetch_blocks(blocker)
330 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
331 if len(blocking) == 0:
332 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
333 if software == "pleroma":
334 blocking = pleroma.fetch_blocks(blocker)
335 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
336 elif software == "mastodon":
337 blocking = mastodon.fetch_blocks(blocker)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "lemmy":
340 blocking = lemmy.fetch_blocks(blocker)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "friendica":
343 blocking = friendica.fetch_blocks(blocker)
344 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
345 elif software == "misskey":
346 blocking = misskey.fetch_blocks(blocker)
347 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
349 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
351 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
352 instances.set_total_blocks(blocker, blocking)
355 deobfuscated = obfuscated = 0
357 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
358 for block in blocking:
359 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
361 if block["block_level"] == "":
362 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
365 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
366 block["blocked"] = tidyup.domain(block["blocked"])
367 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
368 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
370 if block["blocked"] in [None, ""]:
371 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
373 elif block["blocked"].endswith(".onion"):
374 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
377 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
379 elif block["blocked"].endswith(".arpa"):
380 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
382 elif block["blocked"].endswith(".tld"):
383 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
385 elif block["blocked"].find("*") >= 0:
386 logger.debug("blocker='%s' uses obfuscated domains", blocker)
387 instances.set_has_obfuscation(blocker, True)
388 obfuscated = obfuscated + 1
390 # Some friendica servers also obscure domains without hash
391 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
393 logger.debug("row[]='%s'", type(row))
395 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
398 deobfuscated = deobfuscated + 1
399 block["blocked"] = row["domain"]
400 origin = row["origin"]
401 nodeinfo_url = row["nodeinfo_url"]
402 elif block["blocked"].find("?") >= 0:
403 logger.debug("blocker='%s' uses obfuscated domains", blocker)
404 instances.set_has_obfuscation(blocker, True)
405 obfuscated = obfuscated + 1
407 # Some obscure them with question marks, not sure if that's dependent on version or not
408 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
410 logger.debug("row[]='%s'", type(row))
412 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
415 deobfuscated = deobfuscated + 1
416 block["blocked"] = row["domain"]
417 origin = row["origin"]
418 nodeinfo_url = row["nodeinfo_url"]
420 logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
421 if block["blocked"] in [None, ""]:
422 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
425 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
426 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
427 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
429 if not domain_helper.is_wanted(block["blocked"]):
430 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
432 elif block["block_level"] in ["accept", "accepted"]:
433 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
435 elif not instances.is_registered(block["blocked"]):
436 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
437 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
439 block["block_level"] = blocks.alias_block_level(block["block_level"])
441 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
442 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
444 "blocked": block["blocked"],
445 "reason" : block["reason"],
448 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
449 cookies.clear(block["blocked"])
451 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
452 instances.set_obfuscated_blocks(blocker, obfuscated)
454 logger.debug("Flushing updates for blocker='%s' ...", blocker)
455 instances.update(blocker)
457 logger.debug("Invoking commit() ...")
458 database.connection.commit()
460 logger.debug("Invoking cookies.clear(%s) ...", blocker)
461 cookies.clear(blocker)
463 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
464 if config.get("bot_enabled") and len(blockdict) > 0:
465 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
466 network.send_bot_post(blocker, blockdict)
468 logger.debug("Success! - EXIT!")
471 def fetch_observer(args: argparse.Namespace) -> int:
472 logger.debug("args[]='%s' - CALLED!", type(args))
474 logger.debug("Invoking locking.acquire() ...")
477 source_domain = "fediverse.observer"
478 if sources.is_recent(source_domain):
479 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
482 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
483 sources.update(source_domain)
486 if args.software is None:
487 logger.info("Fetching software list ...")
488 raw = utils.fetch_url(
489 f"https://{source_domain}",
491 (config.get("connection_timeout"), config.get("read_timeout"))
493 logger.debug("raw[%s]()=%d", type(raw), len(raw))
495 doc = bs4.BeautifulSoup(raw, features="html.parser")
496 logger.debug("doc[]='%s'", type(doc))
498 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
499 logger.debug("navbar[]='%s'", type(navbar))
501 logger.warning("Cannot find navigation bar, cannot continue!")
504 items = navbar.findAll("a", {"class": "dropdown-item"})
505 logger.debug("items[]='%s'", type(items))
507 logger.info("Checking %d menu items ...", len(items))
509 logger.debug("item[%s]='%s'", type(item), item)
510 if item.text.lower() == "all":
511 logger.debug("Skipping 'All' menu entry ...")
514 logger.debug("Appending item.text='%s' ...", item.text)
515 types.append(tidyup.domain(item.text))
517 logger.info("Adding args.software='%s' as type ...", args.software)
518 types.append(args.software)
520 logger.info("Fetching %d different table data ...", len(types))
521 for software in types:
522 logger.debug("software='%s'", software)
524 if args.software is not None and args.software != software:
525 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
530 logger.debug("Fetching table data for software='%s' ...", software)
531 raw = utils.fetch_url(
532 f"https://{source_domain}/app/views/tabledata.php?software={software}",
534 (config.get("connection_timeout"), config.get("read_timeout"))
536 logger.debug("raw[%s]()=%d", type(raw), len(raw))
538 doc = bs4.BeautifulSoup(raw, features="html.parser")
539 logger.debug("doc[]='%s'", type(doc))
540 except network.exceptions as exception:
541 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
544 items = doc.findAll("a", {"class": "url"})
545 logger.info("Checking %d items,software='%s' ...", len(items), software)
547 logger.debug("item[]='%s'", type(item))
548 domain = item.decode_contents()
549 logger.debug("domain[%s]='%s'", type(domain), domain)
550 domain = tidyup.domain(domain) if domain not in [None, ""] else None
551 logger.debug("domain='%s' - AFTER!", domain)
553 if domain in [None, ""]:
554 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
557 logger.debug("domain='%s' - BEFORE!", domain)
558 domain = domain.encode("idna").decode("utf-8")
559 logger.debug("domain='%s' - AFTER!", domain)
561 if not domain_helper.is_wanted(domain):
562 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
564 elif instances.is_registered(domain):
565 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
568 logger.info("Fetching instances for domain='%s' ...", domain)
569 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
571 logger.debug("Success! - EXIT!")
574 def fetch_todon_wiki(args: argparse.Namespace) -> int:
575 logger.debug("args[]='%s' - CALLED!", type(args))
577 logger.debug("Invoking locking.acquire() ...")
580 source_domain = "wiki.todon.eu"
581 if sources.is_recent(source_domain):
582 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
585 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
586 sources.update(source_domain)
593 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
594 raw = utils.fetch_url(
595 f"https://{source_domain}/todon/domainblocks",
597 (config.get("connection_timeout"), config.get("read_timeout"))
599 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
601 doc = bs4.BeautifulSoup(raw, "html.parser")
602 logger.debug("doc[]='%s'", type(doc))
604 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
605 logger.info("Checking %d silenced/limited entries ...", len(silenced))
606 blocklist["silenced"] = utils.find_domains(silenced, "div")
608 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
609 logger.info("Checking %d suspended entries ...", len(suspended))
610 blocklist["reject"] = utils.find_domains(suspended, "div")
612 blocking = blocklist["silenced"] + blocklist["reject"]
615 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
616 instances.set_last_blocked(blocker)
617 instances.set_total_blocks(blocker, blocking)
620 for block_level in blocklist:
621 blockers = blocklist[block_level]
623 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
624 for blocked in blockers:
625 logger.debug("blocked='%s'", blocked)
627 if not instances.is_registered(blocked):
629 logger.info("Fetching instances from domain='%s' ...", blocked)
630 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
631 except network.exceptions as exception:
632 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
633 instances.set_last_error(blocked, exception)
635 if not domain_helper.is_wanted(blocked):
636 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
638 elif not domain_helper.is_wanted(blocker):
639 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
641 elif blocks.is_instance_blocked(blocker, blocked, block_level):
642 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
645 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
646 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
647 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
653 logger.debug("Invoking commit() ...")
654 database.connection.commit()
656 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
657 if config.get("bot_enabled") and len(blockdict) > 0:
658 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
659 network.send_bot_post(blocker, blockdict)
661 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
662 if instances.has_pending(blocker):
663 logger.debug("Flushing updates for blocker='%s' ...", blocker)
664 instances.update(blocker)
666 logger.debug("Success! - EXIT!")
669 def fetch_cs(args: argparse.Namespace):
670 logger.debug("args[]='%s' - CALLED!", type(args))
672 logger.debug("Invoking locking.acquire() ...")
700 source_domain = "raw.githubusercontent.com"
701 if sources.is_recent(source_domain):
702 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
705 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
706 sources.update(source_domain)
708 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
709 raw = utils.fetch_url(
710 f"https://{source_domain}/chaossocial/meta/master/federation.md",
712 (config.get("connection_timeout"), config.get("read_timeout"))
714 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
716 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
717 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
719 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
720 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
721 blocklist["silenced"] = federation.find_domains(silenced)
723 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
724 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
725 blocklist["reject"] = federation.find_domains(blocked)
727 blocking = blocklist["silenced"] + blocklist["reject"]
728 blocker = "chaos.social"
730 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
731 instances.set_last_blocked(blocker)
732 instances.set_total_blocks(blocker, blocking)
734 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
735 if len(blocking) > 0:
737 for block_level in blocklist:
738 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
740 for row in blocklist[block_level]:
741 logger.debug("row[%s]='%s'", type(row), row)
742 if not "domain" in row:
743 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
745 elif not instances.is_registered(row["domain"]):
747 logger.info("Fetching instances from domain='%s' ...", row["domain"])
748 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
749 except network.exceptions as exception:
750 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
751 instances.set_last_error(row["domain"], exception)
753 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
754 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
756 "blocked": row["domain"],
757 "reason" : row["reason"],
760 logger.debug("Invoking commit() ...")
761 database.connection.commit()
763 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
764 if config.get("bot_enabled") and len(blockdict) > 0:
765 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
766 network.send_bot_post(blocker, blockdict)
768 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
769 if instances.has_pending(blocker):
770 logger.debug("Flushing updates for blocker='%s' ...", blocker)
771 instances.update(blocker)
773 logger.debug("Success! - EXIT!")
776 def fetch_fba_rss(args: argparse.Namespace) -> int:
777 logger.debug("args[]='%s' - CALLED!", type(args))
781 logger.debug("Invoking locking.acquire() ...")
784 components = urlparse(args.feed)
785 domain = components.netloc.lower().split(":")[0]
787 logger.debug("domain='%s'", domain)
788 if sources.is_recent(domain):
789 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
792 logger.debug("domain='%s' has not been recently used, marking ...", domain)
793 sources.update(domain)
795 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
796 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
798 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
799 if response.ok and response.status_code == 200 and len(response.text) > 0:
800 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
801 rss = atoma.parse_rss_bytes(response.content)
803 logger.debug("rss[]='%s'", type(rss))
804 for item in rss.items:
805 logger.debug("item[%s]='%s'", type(item), item)
806 domain = item.link.split("=")[1]
807 domain = tidyup.domain(domain) if domain not in[None, ""] else None
809 logger.debug("domain='%s' - AFTER!", domain)
810 if domain in [None, ""]:
811 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
814 logger.debug("domain='%s' - BEFORE!", domain)
815 domain = domain.encode("idna").decode("utf-8")
816 logger.debug("domain='%s' - AFTER!", domain)
818 if not domain_helper.is_wanted(domain):
819 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
821 elif domain in domains:
822 logger.debug("domain='%s' is already added - SKIPPED!", domain)
824 elif instances.is_registered(domain):
825 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
827 elif instances.is_recent(domain):
828 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
831 logger.debug("Adding domain='%s'", domain)
832 domains.append(domain)
834 logger.debug("domains()=%d", len(domains))
836 logger.info("Adding %d new instances ...", len(domains))
837 for domain in domains:
838 logger.debug("domain='%s'", domain)
840 logger.info("Fetching instances from domain='%s' ...", domain)
841 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
842 except network.exceptions as exception:
843 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
844 instances.set_last_error(domain, exception)
847 logger.debug("Success! - EXIT!")
850 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
851 logger.debug("args[]='%s' - CALLED!", type(args))
853 logger.debug("Invoking locking.acquire() ...")
856 source_domain = "ryona.agency"
857 feed = f"https://{source_domain}/users/fba/feed.atom"
859 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
860 if args.feed is not None and validators.url(args.feed):
861 logger.debug("Setting feed='%s' ...", args.feed)
862 feed = str(args.feed)
863 source_domain = urlparse(args.feed).netloc
865 if sources.is_recent(source_domain):
866 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
869 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
870 sources.update(source_domain)
874 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
875 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
877 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
878 if response.ok and response.status_code == 200 and len(response.text) > 0:
879 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
880 atom = atoma.parse_atom_bytes(response.content)
882 logger.debug("atom[]='%s'", type(atom))
883 for entry in atom.entries:
884 logger.debug("entry[]='%s'", type(entry))
885 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
886 logger.debug("doc[]='%s'", type(doc))
887 elements = doc.findAll("a")
889 logger.debug("Checking %d element(s) ...", len(elements))
890 for element in elements:
891 logger.debug("element[%s]='%s'", type(element), element)
892 for href in element["href"].split(","):
893 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
894 domain = tidyup.domain(href) if href not in [None, ""] else None
896 logger.debug("domain='%s' - AFTER!", domain)
897 if domain in [None, ""]:
898 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
901 logger.debug("domain='%s' - BEFORE!", domain)
902 domain = domain.encode("idna").decode("utf-8")
903 logger.debug("domain='%s' - AFTER!", domain)
905 if not domain_helper.is_wanted(domain):
906 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
908 elif domain in domains:
909 logger.debug("domain='%s' is already added - SKIPPED!", domain)
911 elif instances.is_registered(domain):
912 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
914 elif instances.is_recent(domain):
915 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
918 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
919 domains.append(domain)
921 logger.debug("domains()=%d", len(domains))
923 logger.info("Adding %d new instances ...", len(domains))
924 for domain in domains:
925 logger.debug("domain='%s'", domain)
927 logger.info("Fetching instances from domain='%s' ...", domain)
928 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
929 except network.exceptions as exception:
930 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
931 instances.set_last_error(domain, exception)
934 logger.debug("Success! - EXIT!")
937 def fetch_instances(args: argparse.Namespace) -> int:
938 logger.debug("args[]='%s' - CALLED!", type(args))
940 logger.debug("args.domain='%s' - checking ...", args.domain)
941 if not validators.domain(args.domain):
942 logger.warning("args.domain='%s' is not valid.", args.domain)
944 elif blacklist.is_blacklisted(args.domain):
945 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
948 logger.debug("Invoking locking.acquire() ...")
952 domain = tidyup.domain(args.domain)
953 origin = software = None
956 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
957 row = database.cursor.fetchone()
959 origin = row["origin"]
960 software = row["software"]
963 logger.warning("args.domain='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated.", args.domain, args.domain)
965 elif software_helper.is_relay(software):
966 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
971 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
972 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
973 except network.exceptions as exception:
974 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
975 instances.set_last_error(args.domain, exception)
976 instances.update(args.domain)
980 logger.debug("Not fetching more instances - EXIT!")
983 # Loop through some instances
984 database.cursor.execute(
985 "SELECT domain, origin, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
988 rows = database.cursor.fetchall()
989 logger.info("Checking %d entries ...", len(rows))
991 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
992 domain = row["domain"].encode("idna").decode("utf-8")
993 logger.debug("domain='%s' - AFTER!", domain)
995 if not domain_helper.is_wanted(domain):
996 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
1000 logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
1001 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1002 except network.exceptions as exception:
1003 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1004 instances.set_last_error(domain, exception)
1006 logger.debug("Success - EXIT!")
1009 def fetch_csv(args: argparse.Namespace) -> int:
1010 logger.debug("args[]='%s' - CALLED!", type(args))
1012 logger.debug("Invoking locking.acquire() ...")
1015 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1016 for block in blocklists.csv_files:
1017 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1019 # Is domain given and not equal blocker?
1020 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1021 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1024 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1025 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1027 logger.debug("Success - EXIT!")
1030 def fetch_oliphant(args: argparse.Namespace) -> int:
1031 logger.debug("args[]='%s' - CALLED!", type(args))
1033 logger.debug("Invoking locking.acquire() ...")
1036 source_domain = "codeberg.org"
1037 if sources.is_recent(source_domain):
1038 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1041 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1042 sources.update(source_domain)
1045 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1047 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1048 for block in blocklists.oliphant_blocklists:
1049 # Is domain given and not equal blocker?
1050 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1051 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1052 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1055 url = f"{base_url}/{block['csv_url']}"
1057 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1058 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1060 logger.debug("Success! - EXIT!")
1063 def fetch_txt(args: argparse.Namespace) -> int:
1064 logger.debug("args[]='%s' - CALLED!", type(args))
1066 logger.debug("Invoking locking.acquire() ...")
1069 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1070 for row in blocklists.txt_files:
1071 logger.debug("Fetching row[url]='%s' ...", row["url"])
1072 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1074 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1075 if response.ok and response.status_code == 200 and response.text != "":
1076 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1077 domains = response.text.strip().split("\n")
1079 logger.info("Processing %d domains ...", len(domains))
1080 for domain in domains:
1081 logger.debug("domain='%s' - BEFORE!", domain)
1082 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1083 logger.debug("domain='%s' - AFTER!", domain)
1085 if domain in [None, ""]:
1086 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1088 elif not domain_helper.is_wanted(domain):
1089 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1091 elif not args.force and instances.is_registered(domain):
1092 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1095 logger.debug("Processing domain='%s',row[blocker]='%s' ...", domain, row["blocker"])
1096 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name, force=args.force)
1097 logger.debug("processed='%s'", processed)
1099 logger.debug("Success! - EXIT!")
1102 def fetch_fedipact(args: argparse.Namespace) -> int:
1103 logger.debug("args[]='%s' - CALLED!", type(args))
1105 logger.debug("Invoking locking.acquire() ...")
1108 source_domain = "fedipact.online"
1109 if sources.is_recent(source_domain):
1110 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1113 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1114 sources.update(source_domain)
1116 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1117 response = utils.fetch_url(
1118 f"https://{source_domain}",
1119 network.web_headers,
1120 (config.get("connection_timeout"), config.get("read_timeout"))
1123 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1124 if response.ok and response.status_code == 200 and response.text != "":
1125 logger.debug("Parsing %d Bytes ...", len(response.text))
1127 doc = bs4.BeautifulSoup(response.text, "html.parser")
1128 logger.debug("doc[]='%s'", type(doc))
1130 rows = doc.findAll("li")
1131 logger.info("Checking %d row(s) ...", len(rows))
1133 logger.debug("row[]='%s'", type(row))
1134 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1136 logger.debug("domain='%s' - AFTER!", domain)
1137 if domain in [None, ""]:
1138 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1141 logger.debug("domain='%s' - BEFORE!", domain)
1142 domain = domain.encode("idna").decode("utf-8")
1143 logger.debug("domain='%s' - AFTER!", domain)
1145 if not domain_helper.is_wanted(domain):
1146 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1148 elif instances.is_registered(domain):
1149 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1151 elif instances.is_recent(domain):
1152 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1155 logger.info("Fetching domain='%s' ...", domain)
1156 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1158 logger.debug("Success! - EXIT!")
1161 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1162 logger.debug("args[]='%s' - CALLED!", type(args))
1164 logger.debug("Invoking locking.acquire() ...")
1167 source_domain = "instances.joinmobilizon.org"
1168 if sources.is_recent(source_domain):
1169 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1172 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1173 sources.update(source_domain)
1175 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1176 raw = utils.fetch_url(
1177 f"https://{source_domain}/api/v1/instances",
1178 network.web_headers,
1179 (config.get("connection_timeout"), config.get("read_timeout"))
1181 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1183 parsed = json.loads(raw)
1184 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1186 if "data" not in parsed:
1187 logger.warning("parsed()=%d does not contain key 'data'")
1190 logger.info("Checking %d instances ...", len(parsed["data"]))
1191 for row in parsed["data"]:
1192 logger.debug("row[]='%s'", type(row))
1193 if "host" not in row:
1194 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1196 elif not domain_helper.is_wanted(row["host"]):
1197 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1199 elif instances.is_registered(row["host"]):
1200 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1203 logger.info("Fetching row[host]='%s' ...", row["host"])
1204 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1206 logger.debug("Success! - EXIT!")
1209 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1210 logger.debug("args[]='%s' - CALLED!", type(args))
1212 logger.debug("Invoking locking.acquire() ...")
1215 source_domain = "instanceapp.misskey.page"
1216 if sources.is_recent(source_domain):
1217 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1220 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1221 sources.update(source_domain)
1223 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1224 raw = utils.fetch_url(
1225 f"https://{source_domain}/instances.json",
1226 network.web_headers,
1227 (config.get("connection_timeout"), config.get("read_timeout"))
1229 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1231 parsed = json.loads(raw)
1232 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1234 if "instancesInfos" not in parsed:
1235 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1238 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1239 for row in parsed["instancesInfos"]:
1240 logger.debug("row[%s]='%s'", type(row), row)
1241 if "url" not in row:
1242 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1244 elif not domain_helper.is_wanted(row["url"]):
1245 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1247 elif instances.is_registered(row["url"]):
1248 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1251 logger.info("Fetching row[url]='%s' ...", row["url"])
1252 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1254 logger.debug("Success! - EXIT!")
1257 def recheck_obfuscation(args: argparse.Namespace) -> int:
1258 logger.debug("args[]='%s' - CALLED!", type(args))
1260 logger.debug("Invoking locking.acquire() ...")
1263 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1264 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1265 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1266 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1268 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1270 rows = database.cursor.fetchall()
1271 logger.info("Checking %d domains ...", len(rows))
1273 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1274 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1275 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1277 elif blacklist.is_blacklisted(row["domain"]):
1278 logger.warning("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1281 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1282 blocking = federation.fetch_blocks(row["domain"])
1284 logger.debug("blocking()=%d", len(blocking))
1285 if len(blocking) == 0:
1286 logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
1287 if row["software"] == "pleroma":
1288 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1289 blocking = pleroma.fetch_blocks(row["domain"])
1290 elif row["software"] == "mastodon":
1291 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1292 blocking = mastodon.fetch_blocks(row["domain"])
1293 elif row["software"] == "lemmy":
1294 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1295 blocking = lemmy.fetch_blocks(row["domain"])
1296 elif row["software"] == "friendica":
1297 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1298 blocking = friendica.fetch_blocks(row["domain"])
1299 elif row["software"] == "misskey":
1300 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1301 blocking = misskey.fetch_blocks(row["domain"])
1303 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1305 # c.s isn't part of oliphant's "hidden" blocklists
1306 logger.debug("row[domain]='%s'", row["domain"])
1307 if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1308 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1309 instances.set_last_blocked(row["domain"])
1310 instances.set_total_blocks(row["domain"], blocking)
1315 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1316 for block in blocking:
1317 logger.debug("block[blocked]='%s'", block["blocked"])
1320 if block["blocked"] == "":
1321 logger.debug("block[blocked] is empty - SKIPPED!")
1323 elif block["blocked"].endswith(".onion"):
1324 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1326 elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
1327 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
1329 elif block["blocked"].endswith(".arpa"):
1330 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1332 elif block["blocked"].endswith(".tld"):
1333 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1335 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1336 logger.debug("block='%s' is obfuscated.", block["blocked"])
1337 obfuscated = obfuscated + 1
1338 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1339 elif not domain_helper.is_wanted(block["blocked"]):
1340 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1342 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1343 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1346 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1347 if blocked is not None and blocked != block["blocked"]:
1348 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1349 obfuscated = obfuscated - 1
1351 if blacklist.is_blacklisted(blocked):
1352 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1354 elif blacklist.is_blacklisted(row["domain"]):
1355 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1357 elif blocks.is_instance_blocked(row["domain"], blocked):
1358 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1361 block["block_level"] = blocks.alias_block_level(block["block_level"])
1363 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1364 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1365 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1368 "reason" : block["reason"],
1371 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1372 instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1373 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1375 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1376 if instances.has_pending(row["domain"]):
1377 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1378 instances.update(row["domain"])
1380 logger.debug("Invoking commit() ...")
1381 database.connection.commit()
1383 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1384 if config.get("bot_enabled") and len(blockdict) > 0:
1385 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1386 network.send_bot_post(row["domain"], blockdict)
1388 logger.debug("Success! - EXIT!")
1391 def fetch_fedilist(args: argparse.Namespace) -> int:
1392 logger.debug("args[]='%s' - CALLED!", type(args))
1394 logger.debug("Invoking locking.acquire() ...")
1397 source_domain = "demo.fedilist.com"
1398 if sources.is_recent(source_domain):
1399 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1402 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1403 sources.update(source_domain)
1405 url = f"http://{source_domain}/instance/csv?onion=not"
1406 if args.software is not None and args.software != "":
1407 logger.debug("args.software='%s'", args.software)
1408 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1410 logger.info("Fetching url='%s' ...", url)
1411 response = reqto.get(
1413 headers=network.web_headers,
1414 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1415 allow_redirects=False
1418 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1419 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1420 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1423 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1425 logger.debug("reader[]='%s'", type(reader))
1427 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1432 logger.info("Checking %d rows ...", len(rows))
1434 logger.debug("row[]='%s'", type(row))
1435 if "hostname" not in row:
1436 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1439 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1440 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1441 logger.debug("domain='%s' - AFTER!", domain)
1443 if domain in [None, ""]:
1444 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1447 logger.debug("domain='%s' - BEFORE!", domain)
1448 domain = domain.encode("idna").decode("utf-8")
1449 logger.debug("domain='%s' - AFTER!", domain)
1451 if not domain_helper.is_wanted(domain):
1452 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1454 elif (args.force is None or not args.force) and instances.is_registered(domain):
1455 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1457 elif instances.is_recent(domain):
1458 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1461 logger.info("Fetching instances from domain='%s' ...", domain)
1462 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1464 logger.debug("Success! - EXIT!")
1467 def update_nodeinfo(args: argparse.Namespace) -> int:
1468 logger.debug("args[]='%s' - CALLED!", type(args))
1470 logger.debug("Invoking locking.acquire() ...")
1473 if args.domain is not None and args.domain != "":
1474 logger.debug("Fetching args.domain='%s'", args.domain)
1475 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1476 elif args.software is not None and args.software != "":
1477 logger.info("Fetching domains for args.software='%s'", args.software)
1478 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1479 elif args.mode is not None and args.mode != "":
1480 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1481 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1482 elif args.no_software:
1483 logger.info("Fetching domains with no software type detected ...")
1484 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1485 elif args.with_software:
1486 logger.info("Fetching domains with any software type detected ...")
1487 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC")
1489 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1490 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1491 elif args.no_detection:
1492 logger.info("Fetching domains with no detection mode being set ...")
1493 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1495 logger.info("Fetching domains for recently updated ...")
1496 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1498 domains = database.cursor.fetchall()
1500 logger.info("Checking %d domain(s) ...", len(domains))
1503 logger.debug("row[]='%s'", type(row))
1504 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1505 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1507 elif blacklist.is_blacklisted(row["domain"]):
1508 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1512 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1513 software = federation.determine_software(row["domain"])
1515 logger.debug("Determined software='%s'", software)
1516 if (software != row["software"] and software is not None) or args.force is True:
1517 logger.debug("software='%s'", software)
1518 if software is None:
1519 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1520 instances.set_nodeinfo_url(row["domain"], None)
1522 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1523 instances.set_software(row["domain"], software)
1525 if software is not None:
1526 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1527 instances.set_success(row["domain"])
1528 except network.exceptions as exception:
1529 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1530 instances.set_last_error(row["domain"], exception)
1532 instances.set_last_nodeinfo(row["domain"])
1533 instances.update(row["domain"])
1536 logger.debug("Success! - EXIT!")
1539 def fetch_instances_social(args: argparse.Namespace) -> int:
1540 logger.debug("args[]='%s' - CALLED!", type(args))
1542 logger.debug("Invoking locking.acquire() ...")
1545 source_domain = "instances.social"
1547 if config.get("instances_social_api_key") == "":
1548 logger.error("API key not set. Please set in your config.json file.")
1550 elif sources.is_recent(source_domain):
1551 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1554 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1555 sources.update(source_domain)
1558 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1561 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1562 fetched = network.get_json_api(
1564 "/api/1.0/instances/list?count=0&sort_by=name",
1566 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1568 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1570 if "error_message" in fetched:
1571 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1573 elif "exception" in fetched:
1574 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1576 elif "json" not in fetched:
1577 logger.warning("fetched has no element 'json' - EXIT!")
1579 elif "instances" not in fetched["json"]:
1580 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1584 rows = fetched["json"]["instances"]
1586 logger.info("Checking %d row(s) ...", len(rows))
1588 logger.debug("row[]='%s'", type(row))
1589 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1590 logger.debug("domain='%s' - AFTER!", domain)
1592 if domain is None and domain == "":
1593 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1596 logger.debug("domain='%s' - BEFORE!", domain)
1597 domain = domain.encode("idna").decode("utf-8")
1598 logger.debug("domain='%s' - AFTER!", domain)
1600 if not domain_helper.is_wanted(domain):
1601 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1603 elif domain in domains:
1604 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1606 elif instances.is_registered(domain):
1607 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1609 elif instances.is_recent(domain):
1610 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1613 logger.info("Fetching instances from domain='%s'", domain)
1614 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1616 logger.debug("Success! - EXIT!")
1619 def fetch_relaylist(args: argparse.Namespace) -> int:
1620 logger.debug("args[]='%s' - CALLED!", type(args))
1622 logger.debug("Invoking locking.acquire() ...")
1625 source_domain = "api.relaylist.com"
1627 if sources.is_recent(source_domain):
1628 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1631 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1632 sources.update(source_domain)
1634 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1635 fetched = network.get_json_api(
1639 (config.get("connection_timeout"), config.get("read_timeout"))
1641 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1643 if "error_message" in fetched:
1644 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1646 elif "exception" in fetched:
1647 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1649 elif "json" not in fetched:
1650 logger.warning("fetched has no element 'json' - EXIT!")
1655 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1656 for row in fetched["json"]:
1657 logger.debug("row[]='%s'", type(row))
1658 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1659 logger.debug("domain='%s' - AFTER!", domain)
1661 if domain is None and domain == "":
1662 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1665 logger.debug("domain='%s' - BEFORE!", domain)
1666 domain = domain.encode("idna").decode("utf-8")
1667 logger.debug("domain='%s' - AFTER!", domain)
1669 if not domain_helper.is_wanted(domain):
1670 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1672 elif domain in domains:
1673 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1675 elif instances.is_registered(domain):
1676 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1678 elif instances.is_recent(domain):
1679 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1682 logger.info("Fetching instances from domain='%s'", domain)
1683 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1685 logger.debug("Success! - EXIT!")
1688 def fetch_relays(args: argparse.Namespace) -> int:
1689 logger.debug("args[]='%s' - CALLED!", type(args))
1691 logger.debug("Invoking locking.acquire() ...")
1694 if args.domain is not None and args.domain != "":
1695 logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
1696 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1697 elif args.software is not None and args.software != "":
1698 logger.debug("Fetching instances records for args.software='%s' ...", args.software)
1699 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL AND software = ? ORDER BY last_updated DESC", [args.software])
1701 logger.debug("Fetch all relay instances ...")
1702 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL ORDER BY last_updated DESC")
1705 rows = database.cursor.fetchall()
1707 logger.info("Checking %d relays ...", len(rows))
1709 logger.debug("row[domain]='%s',row[software]='%s'", row["domain"], row["software"])
1710 if not args.force and instances.is_recent(row["domain"]):
1711 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1713 elif row["nodeinfo_url"] is None:
1714 logger.warning("row[domain]='%s' has empty nodeinfo_url but this is required - SKIPPED!", row["domain"])
1719 logger.debug("row[domain]='%s',row[software]='%s' - checking ....", row["domain"], row["software"])
1720 if row["software"] == "pub-relay":
1721 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1722 raw = network.fetch_api_url(
1723 row["nodeinfo_url"],
1724 (config.get("connection_timeout"), config.get("read_timeout"))
1727 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1728 if "exception" in raw:
1729 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1730 raise raw["exception"]
1731 elif "error_message" in raw:
1732 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1733 instances.set_last_error(row["domain"], raw)
1734 instances.set_last_instance_fetch(row["domain"])
1735 instances.update(row["domain"])
1737 elif "json" not in raw:
1738 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1740 elif not "metadata" in raw["json"]:
1741 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1743 elif not "peers" in raw["json"]["metadata"]:
1744 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1747 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1748 raw = utils.fetch_url(
1749 f"https://{row['domain']}",
1750 network.web_headers,
1751 (config.get("connection_timeout"), config.get("read_timeout"))
1753 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1755 doc = bs4.BeautifulSoup(raw, features="html.parser")
1756 logger.debug("doc[]='%s'", type(doc))
1758 except network.exceptions as exception:
1759 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1760 instances.set_last_error(row["domain"], exception)
1761 instances.set_last_instance_fetch(row["domain"])
1762 instances.update(row["domain"])
1765 logger.debug("row[software]='%s'", row["software"])
1766 if row["software"] == "activityrelay":
1767 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1768 tags = doc.findAll("p")
1770 logger.debug("Checking %d paragraphs ...", len(tags))
1772 logger.debug("tag[]='%s'", type(tag))
1773 if len(tag.contents) == 0:
1774 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1776 elif "registered instances" not in tag.contents[0]:
1777 logger.debug("Skipping paragraph, text not found.")
1780 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1781 for domain in tag.contents:
1782 logger.debug("domain[%s]='%s'", type(domain), domain)
1783 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1786 domain = str(domain)
1787 logger.debug("domain='%s'", domain)
1788 if not domain_helper.is_wanted(domain):
1789 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1792 logger.debug("domain='%s' - BEFORE!", domain)
1793 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1794 logger.debug("domain='%s' - AFTER!", domain)
1796 if domain in [None, ""]:
1797 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1799 elif domain not in peers:
1800 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1801 peers.append(domain)
1803 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1804 if dict_helper.has_key(domains, "domain", domain):
1805 logger.debug("domain='%s' already added", domain)
1808 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1811 "origin": row["domain"],
1813 elif row["software"] in ["aoderelay", "selective-relay"]:
1814 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1815 if row["software"] == "aoderelay":
1816 tags = doc.findAll("section", {"class": "instance"})
1818 tags = doc.find("div", {"id": "instances"}).findAll("li")
1820 logger.debug("Checking %d tags ...", len(tags))
1822 logger.debug("tag[]='%s'", type(tag))
1824 link = tag.find("a")
1825 logger.debug("link[%s]='%s'", type(link), link)
1826 if not isinstance(link, bs4.element.Tag):
1827 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1830 components = urlparse(link.get("href"))
1831 logger.debug("components(%d)='%s'", len(components), components)
1832 domain = components.netloc.lower().split(":")[0]
1834 logger.debug("domain='%s' - BEFORE!", domain)
1835 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1836 logger.debug("domain='%s' - AFTER!", domain)
1838 if domain in [None, ""]:
1839 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1841 elif domain not in peers:
1842 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1843 peers.append(domain)
1845 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1846 if dict_helper.has_key(domains, "domain", domain):
1847 logger.debug("domain='%s' already added", domain)
1850 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1853 "origin": row["domain"],
1855 elif row["software"] == "pub-relay":
1856 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1857 for domain in raw["json"]["metadata"]["peers"]:
1858 logger.debug("domain='%s' - BEFORE!", domain)
1859 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1860 logger.debug("domain='%s' - AFTER!", domain)
1862 if domain in [None, ""]:
1863 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1865 elif domain not in peers:
1866 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1867 peers.append(domain)
1869 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1870 if dict_helper.has_key(domains, "domain", domain):
1871 logger.debug("domain='%s' already added", domain)
1874 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1877 "origin": row["domain"],
1880 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1883 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1884 instances.set_last_instance_fetch(row["domain"])
1886 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1887 instances.set_total_peers(row["domain"], peers)
1889 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1890 instances.update(row["domain"])
1892 logger.info("Checking %d domains ...", len(domains))
1894 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1895 if not domain_helper.is_wanted(row["domain"]):
1896 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1898 elif instances.is_registered(row["domain"]):
1899 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1902 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1903 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1905 logger.debug("Success! - EXIT!")
1908 def convert_idna(args: argparse.Namespace) -> int:
1909 logger.debug("args[]='%s' - CALLED!", type(args))
1911 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1912 rows = database.cursor.fetchall()
1914 logger.debug("rows[]='%s'", type(rows))
1915 instances.translate_idnas(rows, "domain")
1917 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1918 rows = database.cursor.fetchall()
1920 logger.debug("rows[]='%s'", type(rows))
1921 instances.translate_idnas(rows, "origin")
1923 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1924 rows = database.cursor.fetchall()
1926 logger.debug("rows[]='%s'", type(rows))
1927 blocks.translate_idnas(rows, "blocker")
1929 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1930 rows = database.cursor.fetchall()
1932 logger.debug("rows[]='%s'", type(rows))
1933 blocks.translate_idnas(rows, "blocked")
1935 logger.debug("Success! - EXIT!")
1938 def remove_invalid(args: argparse.Namespace) -> int:
1939 logger.debug("args[]='%s' - CALLED!", type(args))
1941 logger.debug("Invoking locking.acquire() ...")
1944 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1945 rows = database.cursor.fetchall()
1947 logger.info("Checking %d domains ...", len(rows))
1949 logger.debug("row[domain]='%s'", row["domain"])
1950 if not validators.domain(row["domain"].split("/")[0]):
1951 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1952 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1953 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1955 logger.debug("Invoking commit() ...")
1956 database.connection.commit()
1958 logger.info("Vaccum cleaning database ...")
1959 database.cursor.execute("VACUUM")
1961 logger.debug("Success! - EXIT!")