1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] in [None, ""]:
152 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] in [None, ""]:
228 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
296 # Check only entries with total_blocked=None
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if not domain_helper.is_wanted(blocker):
312 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314 elif not args.force and instances.is_recent(blocker, "last_blocked"):
315 logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker)
318 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
319 instances.set_last_blocked(blocker)
320 instances.set_has_obfuscation(blocker, False)
322 # c.s isn't part of oliphant's "hidden" blocklists
323 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
324 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
327 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
328 blocking = federation.fetch_blocks(blocker)
330 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
331 if len(blocking) == 0:
332 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
333 if software == "pleroma":
334 blocking = pleroma.fetch_blocks(blocker)
335 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
336 elif software == "mastodon":
337 blocking = mastodon.fetch_blocks(blocker)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "lemmy":
340 blocking = lemmy.fetch_blocks(blocker)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "friendica":
343 blocking = friendica.fetch_blocks(blocker)
344 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
345 elif software == "misskey":
346 blocking = misskey.fetch_blocks(blocker)
347 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
349 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
351 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
352 instances.set_total_blocks(blocker, blocking)
355 deobfuscated = obfuscated = 0
357 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
358 for block in blocking:
359 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
361 if block["block_level"] == "":
362 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
365 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
366 block["blocked"] = tidyup.domain(block["blocked"])
367 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
368 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
370 if block["blocked"] in [None, ""]:
371 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
373 elif block["blocked"].endswith(".onion"):
374 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
377 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
379 elif block["blocked"].endswith(".arpa"):
380 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
382 elif block["blocked"].endswith(".tld"):
383 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
385 elif block["blocked"].find("*") >= 0:
386 logger.debug("blocker='%s' uses obfuscated domains", blocker)
387 instances.set_has_obfuscation(blocker, True)
388 obfuscated = obfuscated + 1
390 # Some friendica servers also obscure domains without hash
391 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
393 logger.debug("row[]='%s'", type(row))
395 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
398 deobfuscated = deobfuscated + 1
399 block["blocked"] = row["domain"]
400 origin = row["origin"]
401 nodeinfo_url = row["nodeinfo_url"]
402 elif block["blocked"].find("?") >= 0:
403 logger.debug("blocker='%s' uses obfuscated domains", blocker)
404 instances.set_has_obfuscation(blocker, True)
405 obfuscated = obfuscated + 1
407 # Some obscure them with question marks, not sure if that's dependent on version or not
408 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
410 logger.debug("row[]='%s'", type(row))
412 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
415 deobfuscated = deobfuscated + 1
416 block["blocked"] = row["domain"]
417 origin = row["origin"]
418 nodeinfo_url = row["nodeinfo_url"]
420 logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
421 if block["blocked"] in [None, ""]:
422 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
425 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
426 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
427 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
429 if not domain_helper.is_wanted(block["blocked"]):
430 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
432 elif block["block_level"] in ["accept", "accepted"]:
433 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
435 elif not instances.is_registered(block["blocked"]):
436 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
437 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
439 block["block_level"] = blocks.alias_block_level(block["block_level"])
441 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
442 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
444 "blocked": block["blocked"],
445 "reason" : block["reason"],
448 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
449 cookies.clear(block["blocked"])
451 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
452 instances.set_obfuscated_blocks(blocker, obfuscated)
454 logger.debug("Flushing updates for blocker='%s' ...", blocker)
455 instances.update(blocker)
457 logger.debug("Invoking commit() ...")
458 database.connection.commit()
460 logger.debug("Invoking cookies.clear(%s) ...", blocker)
461 cookies.clear(blocker)
463 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
464 if config.get("bot_enabled") and len(blockdict) > 0:
465 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
466 network.send_bot_post(blocker, blockdict)
468 logger.debug("Success! - EXIT!")
471 def fetch_observer(args: argparse.Namespace) -> int:
472 logger.debug("args[]='%s' - CALLED!", type(args))
474 logger.debug("Invoking locking.acquire() ...")
477 source_domain = "fediverse.observer"
478 if sources.is_recent(source_domain):
479 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
482 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
483 sources.update(source_domain)
486 if args.software is None:
487 logger.info("Fetching software list ...")
488 raw = utils.fetch_url(
489 f"https://{source_domain}",
491 (config.get("connection_timeout"), config.get("read_timeout"))
493 logger.debug("raw[%s]()=%d", type(raw), len(raw))
495 doc = bs4.BeautifulSoup(raw, features="html.parser")
496 logger.debug("doc[]='%s'", type(doc))
498 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
499 logger.debug("navbar[]='%s'", type(navbar))
501 logger.warning("Cannot find navigation bar, cannot continue!")
504 items = navbar.findAll("a", {"class": "dropdown-item"})
505 logger.debug("items[]='%s'", type(items))
507 logger.info("Checking %d menu items ...", len(items))
509 logger.debug("item[%s]='%s'", type(item), item)
510 if item.text.lower() == "all":
511 logger.debug("Skipping 'All' menu entry ...")
514 logger.debug("Appending item.text='%s' ...", item.text)
515 types.append(tidyup.domain(item.text))
517 logger.info("Adding args.software='%s' as type ...", args.software)
518 types.append(args.software)
520 logger.info("Fetching %d different table data ...", len(types))
521 for software in types:
522 logger.debug("software='%s'", software)
524 if args.software is not None and args.software != software:
525 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
530 logger.debug("Fetching table data for software='%s' ...", software)
531 raw = utils.fetch_url(
532 f"https://{source_domain}/app/views/tabledata.php?software={software}",
534 (config.get("connection_timeout"), config.get("read_timeout"))
536 logger.debug("raw[%s]()=%d", type(raw), len(raw))
538 doc = bs4.BeautifulSoup(raw, features="html.parser")
539 logger.debug("doc[]='%s'", type(doc))
540 except network.exceptions as exception:
541 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
544 items = doc.findAll("a", {"class": "url"})
545 logger.info("Checking %d items,software='%s' ...", len(items), software)
547 logger.debug("item[]='%s'", type(item))
548 domain = item.decode_contents()
549 logger.debug("domain[%s]='%s'", type(domain), domain)
550 domain = tidyup.domain(domain) if domain not in [None, ""] else None
551 logger.debug("domain='%s' - AFTER!", domain)
553 if domain in [None, ""]:
554 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
557 logger.debug("domain='%s' - BEFORE!", domain)
558 domain = domain.encode("idna").decode("utf-8")
559 logger.debug("domain='%s' - AFTER!", domain)
561 if not domain_helper.is_wanted(domain):
562 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
564 elif instances.is_registered(domain):
565 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
568 logger.info("Fetching instances for domain='%s' ...", domain)
569 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
571 logger.debug("Success! - EXIT!")
574 def fetch_todon_wiki(args: argparse.Namespace) -> int:
575 logger.debug("args[]='%s' - CALLED!", type(args))
577 logger.debug("Invoking locking.acquire() ...")
580 source_domain = "wiki.todon.eu"
581 if sources.is_recent(source_domain):
582 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
585 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
586 sources.update(source_domain)
593 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
594 raw = utils.fetch_url(
595 f"https://{source_domain}/todon/domainblocks",
597 (config.get("connection_timeout"), config.get("read_timeout"))
599 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
601 doc = bs4.BeautifulSoup(raw, "html.parser")
602 logger.debug("doc[]='%s'", type(doc))
604 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
605 logger.info("Checking %d silenced/limited entries ...", len(silenced))
606 blocklist["silenced"] = utils.find_domains(silenced, "div")
608 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
609 logger.info("Checking %d suspended entries ...", len(suspended))
610 blocklist["reject"] = utils.find_domains(suspended, "div")
612 blocking = blocklist["silenced"] + blocklist["reject"]
615 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
616 instances.set_last_blocked(blocker)
617 instances.set_total_blocks(blocker, blocking)
620 for block_level in blocklist:
621 blockers = blocklist[block_level]
623 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
624 for blocked in blockers:
625 logger.debug("blocked='%s'", blocked)
627 if not instances.is_registered(blocked):
629 logger.info("Fetching instances from domain='%s' ...", blocked)
630 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
631 except network.exceptions as exception:
632 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
633 instances.set_last_error(blocked, exception)
635 if not domain_helper.is_wanted(blocked):
636 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
638 elif not domain_helper.is_wanted(blocker):
639 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
641 elif blocks.is_instance_blocked(blocker, blocked, block_level):
642 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
645 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
646 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
647 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
653 logger.debug("Invoking commit() ...")
654 database.connection.commit()
656 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
657 if config.get("bot_enabled") and len(blockdict) > 0:
658 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
659 network.send_bot_post(blocker, blockdict)
661 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
662 if instances.has_pending(blocker):
663 logger.debug("Flushing updates for blocker='%s' ...", blocker)
664 instances.update(blocker)
666 logger.debug("Success! - EXIT!")
669 def fetch_cs(args: argparse.Namespace):
670 logger.debug("args[]='%s' - CALLED!", type(args))
672 logger.debug("Invoking locking.acquire() ...")
700 source_domain = "raw.githubusercontent.com"
701 if sources.is_recent(source_domain):
702 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
705 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
706 sources.update(source_domain)
708 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
709 raw = utils.fetch_url(
710 f"https://{source_domain}/chaossocial/meta/master/federation.md",
712 (config.get("connection_timeout"), config.get("read_timeout"))
714 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
716 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
717 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
719 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
720 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
721 blocklist["silenced"] = federation.find_domains(silenced)
723 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
724 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
725 blocklist["reject"] = federation.find_domains(blocked)
727 blocking = blocklist["silenced"] + blocklist["reject"]
728 blocker = "chaos.social"
730 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
731 instances.set_last_blocked(blocker)
732 instances.set_total_blocks(blocker, blocking)
734 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
735 if len(blocking) > 0:
737 for block_level in blocklist:
738 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
740 for row in blocklist[block_level]:
741 logger.debug("row[%s]='%s'", type(row), row)
742 if not "domain" in row:
743 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
745 elif not instances.is_registered(row["domain"]):
747 logger.info("Fetching instances from domain='%s' ...", row["domain"])
748 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
749 except network.exceptions as exception:
750 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
751 instances.set_last_error(row["domain"], exception)
753 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
754 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
756 "blocked": row["domain"],
757 "reason" : row["reason"],
760 logger.debug("Invoking commit() ...")
761 database.connection.commit()
763 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
764 if config.get("bot_enabled") and len(blockdict) > 0:
765 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
766 network.send_bot_post(blocker, blockdict)
768 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
769 if instances.has_pending(blocker):
770 logger.debug("Flushing updates for blocker='%s' ...", blocker)
771 instances.update(blocker)
773 logger.debug("Success! - EXIT!")
776 def fetch_fba_rss(args: argparse.Namespace) -> int:
777 logger.debug("args[]='%s' - CALLED!", type(args))
781 logger.debug("Invoking locking.acquire() ...")
784 components = urlparse(args.feed)
785 domain = components.netloc.lower().split(":")[0]
787 logger.debug("domain='%s'", domain)
788 if sources.is_recent(domain):
789 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
792 logger.debug("domain='%s' has not been recently used, marking ...", domain)
793 sources.update(domain)
795 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
796 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
798 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
799 if response.ok and response.status_code == 200 and len(response.text) > 0:
800 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
801 rss = atoma.parse_rss_bytes(response.content)
803 logger.debug("rss[]='%s'", type(rss))
804 for item in rss.items:
805 logger.debug("item[%s]='%s'", type(item), item)
806 domain = item.link.split("=")[1]
807 domain = tidyup.domain(domain) if domain not in[None, ""] else None
809 logger.debug("domain='%s' - AFTER!", domain)
810 if domain in [None, ""]:
811 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
814 logger.debug("domain='%s' - BEFORE!", domain)
815 domain = domain.encode("idna").decode("utf-8")
816 logger.debug("domain='%s' - AFTER!", domain)
818 if not domain_helper.is_wanted(domain):
819 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
821 elif domain in domains:
822 logger.debug("domain='%s' is already added - SKIPPED!", domain)
824 elif instances.is_registered(domain):
825 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
827 elif instances.is_recent(domain):
828 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
831 logger.debug("Adding domain='%s'", domain)
832 domains.append(domain)
834 logger.debug("domains()=%d", len(domains))
836 logger.info("Adding %d new instances ...", len(domains))
837 for domain in domains:
838 logger.debug("domain='%s'", domain)
840 logger.info("Fetching instances from domain='%s' ...", domain)
841 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
842 except network.exceptions as exception:
843 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
844 instances.set_last_error(domain, exception)
847 logger.debug("Success! - EXIT!")
850 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
851 logger.debug("args[]='%s' - CALLED!", type(args))
853 logger.debug("Invoking locking.acquire() ...")
856 source_domain = "ryona.agency"
857 feed = f"https://{source_domain}/users/fba/feed.atom"
859 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
860 if args.feed is not None and validators.url(args.feed):
861 logger.debug("Setting feed='%s' ...", args.feed)
862 feed = str(args.feed)
863 source_domain = urlparse(args.feed).netloc
865 if sources.is_recent(source_domain):
866 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
869 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
870 sources.update(source_domain)
874 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
875 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
877 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
878 if response.ok and response.status_code == 200 and len(response.text) > 0:
879 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
880 atom = atoma.parse_atom_bytes(response.content)
882 logger.debug("atom[]='%s'", type(atom))
883 for entry in atom.entries:
884 logger.debug("entry[]='%s'", type(entry))
885 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
886 logger.debug("doc[]='%s'", type(doc))
887 elements = doc.findAll("a")
889 logger.debug("Checking %d element(s) ...", len(elements))
890 for element in elements:
891 logger.debug("element[%s]='%s'", type(element), element)
892 for href in element["href"].split(","):
893 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
894 domain = tidyup.domain(href) if href not in [None, ""] else None
896 logger.debug("domain='%s' - AFTER!", domain)
897 if domain in [None, ""]:
898 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
901 logger.debug("domain='%s' - BEFORE!", domain)
902 domain = domain.encode("idna").decode("utf-8")
903 logger.debug("domain='%s' - AFTER!", domain)
905 if not domain_helper.is_wanted(domain):
906 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
908 elif domain in domains:
909 logger.debug("domain='%s' is already added - SKIPPED!", domain)
911 elif instances.is_registered(domain):
912 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
914 elif instances.is_recent(domain):
915 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
918 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
919 domains.append(domain)
921 logger.debug("domains()=%d", len(domains))
923 logger.info("Adding %d new instances ...", len(domains))
924 for domain in domains:
925 logger.debug("domain='%s'", domain)
927 logger.info("Fetching instances from domain='%s' ...", domain)
928 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
929 except network.exceptions as exception:
930 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
931 instances.set_last_error(domain, exception)
934 logger.debug("Success! - EXIT!")
937 def fetch_instances(args: argparse.Namespace) -> int:
938 logger.debug("args[]='%s' - CALLED!", type(args))
940 logger.debug("args.domain='%s' - checking ...", args.domain)
941 if not validators.domain(args.domain):
942 logger.warning("args.domain='%s' is not valid.", args.domain)
944 elif blacklist.is_blacklisted(args.domain):
945 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
948 logger.debug("Invoking locking.acquire() ...")
952 domain = tidyup.domain(args.domain)
953 origin = software = None
956 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
957 row = database.cursor.fetchone()
959 origin = row["origin"]
960 software = row["software"]
962 logger.debug("software='%s'", software)
964 logger.warning("args.domain='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated.", args.domain, args.domain)
966 elif software_helper.is_relay(software):
967 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
972 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
973 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
974 except network.exceptions as exception:
975 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
976 instances.set_last_error(args.domain, exception)
977 instances.update(args.domain)
981 logger.debug("Not fetching more instances - EXIT!")
984 # Loop through some instances
985 database.cursor.execute(
986 "SELECT domain, origin, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
989 rows = database.cursor.fetchall()
990 logger.info("Checking %d entries ...", len(rows))
992 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
993 domain = row["domain"].encode("idna").decode("utf-8")
994 logger.debug("domain='%s' - AFTER!", domain)
996 if not domain_helper.is_wanted(domain):
997 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
1001 logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
1002 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1003 except network.exceptions as exception:
1004 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1005 instances.set_last_error(domain, exception)
1007 logger.debug("Success - EXIT!")
1010 def fetch_csv(args: argparse.Namespace) -> int:
1011 logger.debug("args[]='%s' - CALLED!", type(args))
1013 logger.debug("Invoking locking.acquire() ...")
1016 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1017 for block in blocklists.csv_files:
1018 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1020 # Is domain given and not equal blocker?
1021 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1022 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1025 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1026 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1028 logger.debug("Success - EXIT!")
1031 def fetch_oliphant(args: argparse.Namespace) -> int:
1032 logger.debug("args[]='%s' - CALLED!", type(args))
1034 logger.debug("Invoking locking.acquire() ...")
1037 source_domain = "codeberg.org"
1038 if sources.is_recent(source_domain):
1039 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1042 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1043 sources.update(source_domain)
1046 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1048 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1049 for block in blocklists.oliphant_blocklists:
1050 # Is domain given and not equal blocker?
1051 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1052 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1053 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1056 url = f"{base_url}/{block['csv_url']}"
1058 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1059 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1061 logger.debug("Success! - EXIT!")
1064 def fetch_txt(args: argparse.Namespace) -> int:
1065 logger.debug("args[]='%s' - CALLED!", type(args))
1067 logger.debug("Invoking locking.acquire() ...")
1070 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1071 for row in blocklists.txt_files:
1072 logger.debug("Fetching row[url]='%s' ...", row["url"])
1073 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1075 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1076 if response.ok and response.status_code == 200 and response.text != "":
1077 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1078 domains = response.text.strip().split("\n")
1080 logger.info("Processing %d domains ...", len(domains))
1081 for domain in domains:
1082 logger.debug("domain='%s' - BEFORE!", domain)
1083 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1084 logger.debug("domain='%s' - AFTER!", domain)
1086 if domain in [None, ""]:
1087 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1089 elif not domain_helper.is_wanted(domain):
1090 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1092 elif not args.force and instances.is_registered(domain):
1093 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1096 logger.debug("Processing domain='%s',row[blocker]='%s' ...", domain, row["blocker"])
1097 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name, force=args.force)
1098 logger.debug("processed='%s'", processed)
1100 logger.debug("Success! - EXIT!")
1103 def fetch_fedipact(args: argparse.Namespace) -> int:
1104 logger.debug("args[]='%s' - CALLED!", type(args))
1106 logger.debug("Invoking locking.acquire() ...")
1109 source_domain = "fedipact.online"
1110 if sources.is_recent(source_domain):
1111 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1114 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1115 sources.update(source_domain)
1117 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1118 response = utils.fetch_url(
1119 f"https://{source_domain}",
1120 network.web_headers,
1121 (config.get("connection_timeout"), config.get("read_timeout"))
1124 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1125 if response.ok and response.status_code == 200 and response.text != "":
1126 logger.debug("Parsing %d Bytes ...", len(response.text))
1128 doc = bs4.BeautifulSoup(response.text, "html.parser")
1129 logger.debug("doc[]='%s'", type(doc))
1131 rows = doc.findAll("li")
1132 logger.info("Checking %d row(s) ...", len(rows))
1134 logger.debug("row[]='%s'", type(row))
1135 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1137 logger.debug("domain='%s' - AFTER!", domain)
1138 if domain in [None, ""]:
1139 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1142 logger.debug("domain='%s' - BEFORE!", domain)
1143 domain = domain.encode("idna").decode("utf-8")
1144 logger.debug("domain='%s' - AFTER!", domain)
1146 if not domain_helper.is_wanted(domain):
1147 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1149 elif instances.is_registered(domain):
1150 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1152 elif instances.is_recent(domain):
1153 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1156 logger.info("Fetching domain='%s' ...", domain)
1157 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1159 logger.debug("Success! - EXIT!")
1162 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1163 logger.debug("args[]='%s' - CALLED!", type(args))
1165 logger.debug("Invoking locking.acquire() ...")
1168 source_domain = "instances.joinmobilizon.org"
1169 if sources.is_recent(source_domain):
1170 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1173 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1174 sources.update(source_domain)
1176 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1177 raw = utils.fetch_url(
1178 f"https://{source_domain}/api/v1/instances",
1179 network.web_headers,
1180 (config.get("connection_timeout"), config.get("read_timeout"))
1182 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1184 parsed = json.loads(raw)
1185 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1187 if "data" not in parsed:
1188 logger.warning("parsed()=%d does not contain key 'data'")
1191 logger.info("Checking %d instances ...", len(parsed["data"]))
1192 for row in parsed["data"]:
1193 logger.debug("row[]='%s'", type(row))
1194 if "host" not in row:
1195 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1197 elif not domain_helper.is_wanted(row["host"]):
1198 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1200 elif instances.is_registered(row["host"]):
1201 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1204 logger.info("Fetching row[host]='%s' ...", row["host"])
1205 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1207 logger.debug("Success! - EXIT!")
1210 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1211 logger.debug("args[]='%s' - CALLED!", type(args))
1213 logger.debug("Invoking locking.acquire() ...")
1216 source_domain = "instanceapp.misskey.page"
1217 if sources.is_recent(source_domain):
1218 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1221 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1222 sources.update(source_domain)
1224 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1225 raw = utils.fetch_url(
1226 f"https://{source_domain}/instances.json",
1227 network.web_headers,
1228 (config.get("connection_timeout"), config.get("read_timeout"))
1230 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1232 parsed = json.loads(raw)
1233 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1235 if "instancesInfos" not in parsed:
1236 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1239 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1240 for row in parsed["instancesInfos"]:
1241 logger.debug("row[%s]='%s'", type(row), row)
1242 if "url" not in row:
1243 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1245 elif not domain_helper.is_wanted(row["url"]):
1246 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1248 elif instances.is_registered(row["url"]):
1249 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1252 logger.info("Fetching row[url]='%s' ...", row["url"])
1253 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1255 logger.debug("Success! - EXIT!")
1258 def recheck_obfuscation(args: argparse.Namespace) -> int:
1259 logger.debug("args[]='%s' - CALLED!", type(args))
1261 logger.debug("Invoking locking.acquire() ...")
1264 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1265 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1266 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1267 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1269 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1271 rows = database.cursor.fetchall()
1272 logger.info("Checking %d domains ...", len(rows))
1274 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1275 if blacklist.is_blacklisted(row["domain"]):
1276 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1278 elif (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1279 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1282 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1283 blocking = federation.fetch_blocks(row["domain"])
1285 logger.debug("blocking()=%d", len(blocking))
1286 if len(blocking) == 0:
1287 logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
1288 if row["software"] == "pleroma":
1289 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290 blocking = pleroma.fetch_blocks(row["domain"])
1291 elif row["software"] == "mastodon":
1292 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293 blocking = mastodon.fetch_blocks(row["domain"])
1294 elif row["software"] == "lemmy":
1295 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1296 blocking = lemmy.fetch_blocks(row["domain"])
1297 elif row["software"] == "friendica":
1298 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1299 blocking = friendica.fetch_blocks(row["domain"])
1300 elif row["software"] == "misskey":
1301 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1302 blocking = misskey.fetch_blocks(row["domain"])
1304 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1306 # c.s isn't part of oliphant's "hidden" blocklists
1307 logger.debug("row[domain]='%s'", row["domain"])
1308 if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1309 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1310 instances.set_last_blocked(row["domain"])
1311 instances.set_total_blocks(row["domain"], blocking)
1316 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1317 for block in blocking:
1318 logger.debug("block[blocked]='%s'", block["blocked"])
1321 if block["blocked"] == "":
1322 logger.debug("block[blocked] is empty - SKIPPED!")
1324 elif block["blocked"].endswith(".onion"):
1325 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1327 elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
1328 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
1330 elif block["blocked"].endswith(".arpa"):
1331 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1333 elif block["blocked"].endswith(".tld"):
1334 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1336 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1337 logger.debug("block='%s' is obfuscated.", block["blocked"])
1338 obfuscated = obfuscated + 1
1339 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1340 elif not domain_helper.is_wanted(block["blocked"]):
1341 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1343 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1344 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1347 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1348 if blocked is not None and blocked != block["blocked"]:
1349 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1350 obfuscated = obfuscated - 1
1352 if blacklist.is_blacklisted(blocked):
1353 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1355 elif blacklist.is_blacklisted(row["domain"]):
1356 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1358 elif blocks.is_instance_blocked(row["domain"], blocked):
1359 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1362 block["block_level"] = blocks.alias_block_level(block["block_level"])
1364 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1365 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1366 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1369 "reason" : block["reason"],
1372 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1373 instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1374 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1376 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1377 if instances.has_pending(row["domain"]):
1378 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1379 instances.update(row["domain"])
1381 logger.debug("Invoking commit() ...")
1382 database.connection.commit()
1384 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1385 if config.get("bot_enabled") and len(blockdict) > 0:
1386 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1387 network.send_bot_post(row["domain"], blockdict)
1389 logger.debug("Success! - EXIT!")
1392 def fetch_fedilist(args: argparse.Namespace) -> int:
1393 logger.debug("args[]='%s' - CALLED!", type(args))
1395 logger.debug("Invoking locking.acquire() ...")
1398 source_domain = "demo.fedilist.com"
1399 if sources.is_recent(source_domain):
1400 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1403 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1404 sources.update(source_domain)
1406 url = f"http://{source_domain}/instance/csv?onion=not"
1407 if args.software is not None and args.software != "":
1408 logger.debug("args.software='%s'", args.software)
1409 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1411 logger.info("Fetching url='%s' ...", url)
1412 response = reqto.get(
1414 headers=network.web_headers,
1415 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1416 allow_redirects=False
1419 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1420 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1421 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1424 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1426 logger.debug("reader[]='%s'", type(reader))
1428 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1433 logger.info("Checking %d rows ...", len(rows))
1435 logger.debug("row[]='%s'", type(row))
1436 if "hostname" not in row:
1437 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1440 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1441 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1442 logger.debug("domain='%s' - AFTER!", domain)
1444 if domain in [None, ""]:
1445 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1448 logger.debug("domain='%s' - BEFORE!", domain)
1449 domain = domain.encode("idna").decode("utf-8")
1450 logger.debug("domain='%s' - AFTER!", domain)
1452 if not domain_helper.is_wanted(domain):
1453 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1455 elif (args.force is None or not args.force) and instances.is_registered(domain):
1456 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1458 elif instances.is_recent(domain):
1459 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1462 logger.info("Fetching instances from domain='%s' ...", domain)
1463 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1465 logger.debug("Success! - EXIT!")
1468 def update_nodeinfo(args: argparse.Namespace) -> int:
1469 logger.debug("args[]='%s' - CALLED!", type(args))
1471 logger.debug("Invoking locking.acquire() ...")
1474 if args.domain is not None and args.domain != "":
1475 logger.debug("Fetching args.domain='%s'", args.domain)
1476 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1477 elif args.software is not None and args.software != "":
1478 logger.info("Fetching domains for args.software='%s'", args.software)
1479 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1480 elif args.mode is not None and args.mode != "":
1481 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1482 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1483 elif args.no_software:
1484 logger.info("Fetching domains with no software type detected ...")
1485 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1486 elif args.with_software:
1487 logger.info("Fetching domains with any software type detected ...")
1488 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC")
1490 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1491 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1492 elif args.no_detection:
1493 logger.info("Fetching domains with no detection mode being set ...")
1494 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1496 logger.info("Fetching domains for recently updated ...")
1497 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1499 domains = database.cursor.fetchall()
1501 logger.info("Checking %d domain(s) ...", len(domains))
1504 logger.debug("row[]='%s'", type(row))
1505 if blacklist.is_blacklisted(row["domain"]):
1506 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1508 elif not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1509 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1513 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1514 software = federation.determine_software(row["domain"])
1516 logger.debug("Determined software='%s'", software)
1517 if (software != row["software"] and software is not None) or args.force is True:
1518 logger.debug("software='%s'", software)
1519 if software is None:
1520 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1521 instances.set_nodeinfo_url(row["domain"], None)
1523 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1524 instances.set_software(row["domain"], software)
1526 if software is not None:
1527 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1528 instances.set_success(row["domain"])
1529 except network.exceptions as exception:
1530 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1531 instances.set_last_error(row["domain"], exception)
1533 instances.set_last_nodeinfo(row["domain"])
1534 instances.update(row["domain"])
1537 logger.debug("Success! - EXIT!")
1540 def fetch_instances_social(args: argparse.Namespace) -> int:
1541 logger.debug("args[]='%s' - CALLED!", type(args))
1543 logger.debug("Invoking locking.acquire() ...")
1546 source_domain = "instances.social"
1548 if config.get("instances_social_api_key") == "":
1549 logger.error("API key not set. Please set in your config.json file.")
1551 elif sources.is_recent(source_domain):
1552 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1555 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1556 sources.update(source_domain)
1559 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1562 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1563 fetched = network.get_json_api(
1565 "/api/1.0/instances/list?count=0&sort_by=name",
1567 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1569 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1571 if "error_message" in fetched:
1572 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1574 elif "exception" in fetched:
1575 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1577 elif "json" not in fetched:
1578 logger.warning("fetched has no element 'json' - EXIT!")
1580 elif "instances" not in fetched["json"]:
1581 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1585 rows = fetched["json"]["instances"]
1587 logger.info("Checking %d row(s) ...", len(rows))
1589 logger.debug("row[]='%s'", type(row))
1590 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1591 logger.debug("domain='%s' - AFTER!", domain)
1593 if domain is None and domain == "":
1594 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1597 logger.debug("domain='%s' - BEFORE!", domain)
1598 domain = domain.encode("idna").decode("utf-8")
1599 logger.debug("domain='%s' - AFTER!", domain)
1601 if not domain_helper.is_wanted(domain):
1602 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1604 elif domain in domains:
1605 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1607 elif instances.is_registered(domain):
1608 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1610 elif instances.is_recent(domain):
1611 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1614 logger.info("Fetching instances from domain='%s'", domain)
1615 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1617 logger.debug("Success! - EXIT!")
1620 def fetch_relaylist(args: argparse.Namespace) -> int:
1621 logger.debug("args[]='%s' - CALLED!", type(args))
1623 logger.debug("Invoking locking.acquire() ...")
1626 source_domain = "api.relaylist.com"
1628 if sources.is_recent(source_domain):
1629 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1632 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1633 sources.update(source_domain)
1635 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1636 fetched = network.get_json_api(
1640 (config.get("connection_timeout"), config.get("read_timeout"))
1642 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1644 if "error_message" in fetched:
1645 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1647 elif "exception" in fetched:
1648 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1650 elif "json" not in fetched:
1651 logger.warning("fetched has no element 'json' - EXIT!")
1656 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1657 for row in fetched["json"]:
1658 logger.debug("row[]='%s'", type(row))
1659 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1660 logger.debug("domain='%s' - AFTER!", domain)
1662 if domain is None and domain == "":
1663 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1666 logger.debug("domain='%s' - BEFORE!", domain)
1667 domain = domain.encode("idna").decode("utf-8")
1668 logger.debug("domain='%s' - AFTER!", domain)
1670 if not domain_helper.is_wanted(domain):
1671 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1673 elif domain in domains:
1674 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1676 elif instances.is_registered(domain):
1677 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1679 elif instances.is_recent(domain):
1680 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1683 logger.info("Fetching instances from domain='%s'", domain)
1684 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1686 logger.debug("Success! - EXIT!")
1689 def fetch_relays(args: argparse.Namespace) -> int:
1690 logger.debug("args[]='%s' - CALLED!", type(args))
1692 logger.debug("Invoking locking.acquire() ...")
1695 if args.domain is not None and args.domain != "":
1696 logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
1697 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1698 elif args.software is not None and args.software != "":
1699 logger.debug("Fetching instances records for args.software='%s' ...", args.software)
1700 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL AND software = ? ORDER BY last_updated DESC", [args.software])
1702 logger.debug("Fetch all relay instances ...")
1703 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL ORDER BY last_updated DESC")
1706 rows = database.cursor.fetchall()
1708 logger.info("Checking %d relays ...", len(rows))
1710 logger.debug("row[domain]='%s',row[software]='%s'", row["domain"], row["software"])
1711 if not args.force and instances.is_recent(row["domain"]):
1712 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1714 elif row["nodeinfo_url"] is None:
1715 logger.warning("row[domain]='%s' has empty nodeinfo_url but this is required - SKIPPED!", row["domain"])
1720 logger.debug("row[domain]='%s',row[software]='%s' - checking ....", row["domain"], row["software"])
1721 if row["software"] == "pub-relay":
1722 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1723 raw = network.fetch_api_url(
1724 row["nodeinfo_url"],
1725 (config.get("connection_timeout"), config.get("read_timeout"))
1728 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1729 if "exception" in raw:
1730 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1731 raise raw["exception"]
1732 elif "error_message" in raw:
1733 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1734 instances.set_last_error(row["domain"], raw)
1735 instances.set_last_instance_fetch(row["domain"])
1736 instances.update(row["domain"])
1738 elif "json" not in raw:
1739 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1741 elif not "metadata" in raw["json"]:
1742 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1744 elif not "peers" in raw["json"]["metadata"]:
1745 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1748 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1749 raw = utils.fetch_url(
1750 f"https://{row['domain']}",
1751 network.web_headers,
1752 (config.get("connection_timeout"), config.get("read_timeout"))
1754 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1756 doc = bs4.BeautifulSoup(raw, features="html.parser")
1757 logger.debug("doc[]='%s'", type(doc))
1759 except network.exceptions as exception:
1760 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1761 instances.set_last_error(row["domain"], exception)
1762 instances.set_last_instance_fetch(row["domain"])
1763 instances.update(row["domain"])
1766 logger.debug("row[software]='%s'", row["software"])
1767 if row["software"] == "activityrelay":
1768 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1769 tags = doc.findAll("p")
1771 logger.debug("Checking %d paragraphs ...", len(tags))
1773 logger.debug("tag[]='%s'", type(tag))
1774 if len(tag.contents) == 0:
1775 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1777 elif "registered instances" not in tag.contents[0]:
1778 logger.debug("Skipping paragraph, text not found.")
1781 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1782 for domain in tag.contents:
1783 logger.debug("domain[%s]='%s'", type(domain), domain)
1784 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1787 domain = str(domain)
1788 logger.debug("domain='%s'", domain)
1789 if not domain_helper.is_wanted(domain):
1790 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1793 logger.debug("domain='%s' - BEFORE!", domain)
1794 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1795 logger.debug("domain='%s' - AFTER!", domain)
1797 if domain in [None, ""]:
1798 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1800 elif domain not in peers:
1801 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1802 peers.append(domain)
1804 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1805 if dict_helper.has_key(domains, "domain", domain):
1806 logger.debug("domain='%s' already added", domain)
1809 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1812 "origin": row["domain"],
1814 elif row["software"] in ["aoderelay", "selective-relay"]:
1815 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1816 if row["software"] == "aoderelay":
1817 tags = doc.findAll("section", {"class": "instance"})
1819 tags = doc.find("div", {"id": "instances"}).findAll("li")
1821 logger.debug("Checking %d tags ...", len(tags))
1823 logger.debug("tag[]='%s'", type(tag))
1825 link = tag.find("a")
1826 logger.debug("link[%s]='%s'", type(link), link)
1827 if not isinstance(link, bs4.element.Tag):
1828 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1831 components = urlparse(link.get("href"))
1832 logger.debug("components(%d)='%s'", len(components), components)
1833 domain = components.netloc.lower().split(":")[0]
1835 logger.debug("domain='%s' - BEFORE!", domain)
1836 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1837 logger.debug("domain='%s' - AFTER!", domain)
1839 if domain in [None, ""]:
1840 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1842 elif domain not in peers:
1843 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1844 peers.append(domain)
1846 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1847 if dict_helper.has_key(domains, "domain", domain):
1848 logger.debug("domain='%s' already added", domain)
1851 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1854 "origin": row["domain"],
1856 elif row["software"] == "pub-relay":
1857 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1858 for domain in raw["json"]["metadata"]["peers"]:
1859 logger.debug("domain='%s' - BEFORE!", domain)
1860 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1861 logger.debug("domain='%s' - AFTER!", domain)
1863 if domain in [None, ""]:
1864 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1866 elif domain not in peers:
1867 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1868 peers.append(domain)
1870 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1871 if dict_helper.has_key(domains, "domain", domain):
1872 logger.debug("domain='%s' already added", domain)
1875 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1878 "origin": row["domain"],
1881 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1884 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1885 instances.set_last_instance_fetch(row["domain"])
1887 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1888 instances.set_total_peers(row["domain"], peers)
1890 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1891 instances.update(row["domain"])
1893 logger.info("Checking %d domains ...", len(domains))
1895 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1896 if not domain_helper.is_wanted(row["domain"]):
1897 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1899 elif instances.is_registered(row["domain"]):
1900 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1903 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1904 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1906 logger.debug("Success! - EXIT!")
1909 def convert_idna(args: argparse.Namespace) -> int:
1910 logger.debug("args[]='%s' - CALLED!", type(args))
1912 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1913 rows = database.cursor.fetchall()
1915 logger.debug("rows[]='%s'", type(rows))
1916 instances.translate_idnas(rows, "domain")
1918 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1919 rows = database.cursor.fetchall()
1921 logger.debug("rows[]='%s'", type(rows))
1922 instances.translate_idnas(rows, "origin")
1924 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1925 rows = database.cursor.fetchall()
1927 logger.debug("rows[]='%s'", type(rows))
1928 blocks.translate_idnas(rows, "blocker")
1930 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1931 rows = database.cursor.fetchall()
1933 logger.debug("rows[]='%s'", type(rows))
1934 blocks.translate_idnas(rows, "blocked")
1936 logger.debug("Success! - EXIT!")
1939 def remove_invalid(args: argparse.Namespace) -> int:
1940 logger.debug("args[]='%s' - CALLED!", type(args))
1942 logger.debug("Invoking locking.acquire() ...")
1945 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1946 rows = database.cursor.fetchall()
1948 logger.info("Checking %d domains ...", len(rows))
1950 logger.debug("row[domain]='%s'", row["domain"])
1951 if not validators.domain(row["domain"].split("/")[0]):
1952 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1953 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1954 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1956 logger.debug("Invoking commit() ...")
1957 database.connection.commit()
1959 logger.info("Vaccum cleaning database ...")
1960 database.cursor.execute("VACUUM")
1962 logger.debug("Success! - EXIT!")