1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] is None:
152 logger.debug("row[domain] is None - SKIPPED!")
154 elif row["domain"] == "":
155 logger.debug("row[domain] is empty - SKIPPED!")
158 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
159 domain = row["domain"].encode("idna").decode("utf-8")
160 logger.debug("domain='%s' - AFTER!", domain)
162 if not domain_helper.is_wanted(domain):
163 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
165 elif instances.is_registered(domain):
166 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
168 elif instances.is_recent(domain):
169 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
172 logger.debug("Fetching instances from domain='%s' ...", domain)
173 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
175 except network.exceptions as exception:
176 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
179 logger.debug("Success! - EXIT!")
182 def fetch_bkali(args: argparse.Namespace) -> int:
183 logger.debug("args[]='%s' - CALLED!", type(args))
185 logger.debug("Invoking locking.acquire() ...")
188 source_domain = "gql.api.bka.li"
189 if sources.is_recent(source_domain):
190 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
193 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
194 sources.update(source_domain)
198 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
199 fetched = network.post_json_api(
203 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
207 logger.debug("fetched[]='%s'", type(fetched))
208 if "error_message" in fetched:
209 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
211 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
212 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
215 rows = fetched["json"]
217 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
219 raise Exception("WARNING: Returned no records")
220 elif "data" not in rows:
221 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
222 elif "nodeinfo" not in rows["data"]:
223 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
225 for entry in rows["data"]["nodeinfo"]:
226 logger.debug("entry[%s]='%s'", type(entry), entry)
227 if "domain" not in entry:
228 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
230 elif entry["domain"] == "":
231 logger.debug("entry[domain] is empty - SKIPPED!")
233 elif not domain_helper.is_wanted(entry["domain"]):
234 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
236 elif instances.is_registered(entry["domain"]):
237 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
239 elif instances.is_recent(entry["domain"]):
240 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
243 logger.debug("Adding domain='%s' ...", entry["domain"])
244 domains.append(entry["domain"])
246 except network.exceptions as exception:
247 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
250 logger.debug("domains()=%d", len(domains))
252 logger.info("Adding %d new instances ...", len(domains))
253 for domain in domains:
254 logger.debug("domain='%s' - BEFORE!", domain)
255 domain = domain.encode("idna").decode("utf-8")
256 logger.debug("domain='%s' - AFTER!", domain)
259 logger.info("Fetching instances from domain='%s' ...", domain)
260 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
261 except network.exceptions as exception:
262 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
263 instances.set_last_error(domain, exception)
266 logger.debug("Success - EXIT!")
269 def fetch_blocks(args: argparse.Namespace) -> int:
270 logger.debug("args[]='%s' - CALLED!", type(args))
271 if args.domain is not None and args.domain != "":
272 logger.debug("args.domain='%s' - checking ...", args.domain)
273 if not validators.domain(args.domain):
274 logger.warning("args.domain='%s' is not valid.", args.domain)
276 elif blacklist.is_blacklisted(args.domain):
277 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
279 elif not instances.is_registered(args.domain):
280 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
283 logger.debug("Invoking locking.acquire() ...")
286 if args.domain is not None and args.domain != "":
287 # Re-check single domain
288 logger.debug("Querying database for args.domain='%s' ...", args.domain)
289 database.cursor.execute(
290 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
292 elif args.software is not None and args.software != "":
293 # Re-check single software
294 logger.debug("Querying database for args.software='%s' ...", args.software)
295 database.cursor.execute(
296 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
300 logger.debug("Re-checking all instances ...")
301 database.cursor.execute(
302 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
305 # Re-check after "timeout" (aka. minimum interval)
306 database.cursor.execute(
307 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
310 rows = database.cursor.fetchall()
311 logger.info("Checking %d entries ...", len(rows))
312 for blocker, software, origin, nodeinfo_url in rows:
313 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
315 if not domain_helper.is_wanted(blocker):
316 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
319 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
320 instances.set_last_blocked(blocker)
321 instances.set_has_obfuscation(blocker, False)
323 # c.s isn't part of oliphant's "hidden" blocklists
324 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
325 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
328 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
329 blocking = federation.fetch_blocks(blocker)
331 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
332 if len(blocking) == 0:
333 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
334 if software == "pleroma":
335 blocking = pleroma.fetch_blocks(blocker)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "mastodon":
338 blocking = mastodon.fetch_blocks(blocker)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "lemmy":
341 blocking = lemmy.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "friendica":
344 blocking = friendica.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346 elif software == "misskey":
347 blocking = misskey.fetch_blocks(blocker)
348 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
350 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
352 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
353 instances.set_total_blocks(blocker, blocking)
356 deobfuscated = obfuscated = 0
358 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
359 for block in blocking:
360 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
362 if block["block_level"] == "":
363 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
366 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
367 block["blocked"] = tidyup.domain(block["blocked"])
368 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
369 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
371 if block["blocked"] == "":
372 logger.warning("blocked is empty, blocker='%s'", blocker)
374 elif block["blocked"].endswith(".onion"):
375 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".arpa"):
378 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
380 elif block["blocked"].endswith(".tld"):
381 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
383 elif block["blocked"].find("*") >= 0:
384 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385 instances.set_has_obfuscation(blocker, True)
386 obfuscated = obfuscated + 1
388 # Some friendica servers also obscure domains without hash
389 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
391 logger.debug("row[]='%s'", type(row))
393 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
396 deobfuscated = deobfuscated + 1
397 block["blocked"] = row["domain"]
398 origin = row["origin"]
399 nodeinfo_url = row["nodeinfo_url"]
400 elif block["blocked"].find("?") >= 0:
401 logger.debug("blocker='%s' uses obfuscated domains", blocker)
402 instances.set_has_obfuscation(blocker, True)
403 obfuscated = obfuscated + 1
405 # Some obscure them with question marks, not sure if that's dependent on version or not
406 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
408 logger.debug("row[]='%s'", type(row))
410 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
413 deobfuscated = deobfuscated + 1
414 block["blocked"] = row["domain"]
415 origin = row["origin"]
416 nodeinfo_url = row["nodeinfo_url"]
418 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
419 if block["blocked"] == "":
420 logger.debug("block[blocked] is empty - SKIPPED!")
423 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
424 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
425 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
427 if not domain_helper.is_wanted(block["blocked"]):
428 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
430 elif block["block_level"] in ["accept", "accepted"]:
431 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
433 elif not instances.is_registered(block["blocked"]):
434 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
435 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
437 block["block_level"] = blocks.alias_block_level(block["block_level"])
439 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
440 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
442 "blocked": block["blocked"],
443 "reason" : block["reason"],
446 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
447 cookies.clear(block["blocked"])
449 logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
450 instances.set_obfuscated_blocks(blocker, obfuscated)
452 logger.debug("Flushing updates for blocker='%s' ...", blocker)
453 instances.update(blocker)
455 logger.debug("Invoking commit() ...")
456 database.connection.commit()
458 logger.debug("Invoking cookies.clear(%s) ...", blocker)
459 cookies.clear(blocker)
461 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
462 if config.get("bot_enabled") and len(blockdict) > 0:
463 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
464 network.send_bot_post(blocker, blockdict)
466 logger.debug("Success! - EXIT!")
469 def fetch_observer(args: argparse.Namespace) -> int:
470 logger.debug("args[]='%s' - CALLED!", type(args))
472 logger.debug("Invoking locking.acquire() ...")
475 source_domain = "fediverse.observer"
476 if sources.is_recent(source_domain):
477 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
480 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
481 sources.update(source_domain)
484 if args.software is None:
485 logger.info("Fetching software list ...")
486 raw = utils.fetch_url(
487 f"https://{source_domain}",
489 (config.get("connection_timeout"), config.get("read_timeout"))
491 logger.debug("raw[%s]()=%d", type(raw), len(raw))
493 doc = bs4.BeautifulSoup(raw, features="html.parser")
494 logger.debug("doc[]='%s'", type(doc))
496 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
497 logger.debug("navbar[]='%s'", type(navbar))
499 logger.warning("Cannot find navigation bar, cannot continue!")
502 items = navbar.findAll("a", {"class": "dropdown-item"})
503 logger.debug("items[]='%s'", type(items))
505 logger.info("Checking %d menu items ...", len(items))
507 logger.debug("item[%s]='%s'", type(item), item)
508 if item.text.lower() == "all":
509 logger.debug("Skipping 'All' menu entry ...")
512 logger.debug("Appending item.text='%s' ...", item.text)
513 types.append(tidyup.domain(item.text))
515 logger.info("Adding args.software='%s' as type ...", args.software)
516 types.append(args.software)
518 logger.info("Fetching %d different table data ...", len(types))
519 for software in types:
520 logger.debug("software='%s'", software)
522 if args.software is not None and args.software != software:
523 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
528 logger.debug("Fetching table data for software='%s' ...", software)
529 raw = utils.fetch_url(
530 f"https://{source_domain}/app/views/tabledata.php?software={software}",
532 (config.get("connection_timeout"), config.get("read_timeout"))
534 logger.debug("raw[%s]()=%d", type(raw), len(raw))
536 doc = bs4.BeautifulSoup(raw, features="html.parser")
537 logger.debug("doc[]='%s'", type(doc))
538 except network.exceptions as exception:
539 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
542 items = doc.findAll("a", {"class": "url"})
543 logger.info("Checking %d items,software='%s' ...", len(items), software)
545 logger.debug("item[]='%s'", type(item))
546 domain = item.decode_contents()
547 domain = tidyup.domain(domain) if domain not in [None, ""] else None
548 logger.debug("domain='%s' - AFTER!", domain)
550 if domain is None or domain == "":
551 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
554 logger.debug("domain='%s' - BEFORE!", domain)
555 domain = domain.encode("idna").decode("utf-8")
556 logger.debug("domain='%s' - AFTER!", domain)
558 if not domain_helper.is_wanted(domain):
559 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
561 elif instances.is_registered(domain):
562 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
565 logger.info("Fetching instances for domain='%s'", domain)
566 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
568 logger.debug("Success! - EXIT!")
571 def fetch_todon_wiki(args: argparse.Namespace) -> int:
572 logger.debug("args[]='%s' - CALLED!", type(args))
574 logger.debug("Invoking locking.acquire() ...")
577 source_domain = "wiki.todon.eu"
578 if sources.is_recent(source_domain):
579 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
582 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
583 sources.update(source_domain)
590 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
591 raw = utils.fetch_url(
592 f"https://{source_domain}/todon/domainblocks",
594 (config.get("connection_timeout"), config.get("read_timeout"))
596 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
598 doc = bs4.BeautifulSoup(raw, "html.parser")
599 logger.debug("doc[]='%s'", type(doc))
601 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
602 logger.info("Checking %d silenced/limited entries ...", len(silenced))
603 blocklist["silenced"] = utils.find_domains(silenced, "div")
605 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
606 logger.info("Checking %d suspended entries ...", len(suspended))
607 blocklist["reject"] = utils.find_domains(suspended, "div")
609 blocking = blocklist["silenced"] + blocklist["reject"]
612 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
613 instances.set_last_blocked(blocker)
614 instances.set_total_blocks(blocker, blocking)
617 for block_level in blocklist:
618 blockers = blocklist[block_level]
620 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
621 for blocked in blockers:
622 logger.debug("blocked='%s'", blocked)
624 if not instances.is_registered(blocked):
626 logger.info("Fetching instances from domain='%s' ...", blocked)
627 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
628 except network.exceptions as exception:
629 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
630 instances.set_last_error(blocked, exception)
632 if not domain_helper.is_wanted(blocked):
633 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
635 elif not domain_helper.is_wanted(blocker):
636 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
638 elif blocks.is_instance_blocked(blocker, blocked, block_level):
639 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
642 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
643 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
644 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
650 logger.debug("Invoking commit() ...")
651 database.connection.commit()
653 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
654 if config.get("bot_enabled") and len(blockdict) > 0:
655 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
656 network.send_bot_post(blocker, blockdict)
658 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
659 if instances.has_pending(blocker):
660 logger.debug("Flushing updates for blocker='%s' ...", blocker)
661 instances.update(blocker)
663 logger.debug("Success! - EXIT!")
666 def fetch_cs(args: argparse.Namespace):
667 logger.debug("args[]='%s' - CALLED!", type(args))
669 logger.debug("Invoking locking.acquire() ...")
697 source_domain = "raw.githubusercontent.com"
698 if sources.is_recent(source_domain):
699 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
702 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
703 sources.update(source_domain)
705 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
706 raw = utils.fetch_url(
707 f"https://{source_domain}/chaossocial/meta/master/federation.md",
709 (config.get("connection_timeout"), config.get("read_timeout"))
711 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
713 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
714 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
716 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
717 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
718 blocklist["silenced"] = federation.find_domains(silenced)
720 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
721 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
722 blocklist["reject"] = federation.find_domains(blocked)
724 blocking = blocklist["silenced"] + blocklist["reject"]
725 blocker = "chaos.social"
727 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
728 instances.set_last_blocked(blocker)
729 instances.set_total_blocks(blocker, blocking)
731 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
732 if len(blocking) > 0:
734 for block_level in blocklist:
735 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
737 for row in blocklist[block_level]:
738 logger.debug("row[%s]='%s'", type(row), row)
739 if not "domain" in row:
740 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
742 elif not instances.is_registered(row["domain"]):
744 logger.info("Fetching instances from domain='%s' ...", row["domain"])
745 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
746 except network.exceptions as exception:
747 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
748 instances.set_last_error(row["domain"], exception)
750 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
751 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
753 "blocked": row["domain"],
754 "reason" : row["reason"],
757 logger.debug("Invoking commit() ...")
758 database.connection.commit()
760 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
761 if config.get("bot_enabled") and len(blockdict) > 0:
762 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
763 network.send_bot_post(blocker, blockdict)
765 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
766 if instances.has_pending(blocker):
767 logger.debug("Flushing updates for blocker='%s' ...", blocker)
768 instances.update(blocker)
770 logger.debug("Success! - EXIT!")
773 def fetch_fba_rss(args: argparse.Namespace) -> int:
774 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.debug("Invoking locking.acquire() ...")
781 components = urlparse(args.feed)
782 domain = components.netloc.lower().split(":")[0]
784 logger.debug("domain='%s'", domain)
785 if sources.is_recent(domain):
786 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
789 logger.debug("domain='%s' has not been recently used, marking ...", domain)
790 sources.update(domain)
792 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
793 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
795 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
796 if response.ok and response.status_code == 200 and len(response.text) > 0:
797 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
798 rss = atoma.parse_rss_bytes(response.content)
800 logger.debug("rss[]='%s'", type(rss))
801 for item in rss.items:
802 logger.debug("item[%s]='%s'", type(item), item)
803 domain = item.link.split("=")[1]
804 domain = tidyup.domain(domain) if domain not in[None, ""] else None
806 logger.debug("domain='%s' - AFTER!", domain)
807 if domain is None or domain == "":
808 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
811 logger.debug("domain='%s' - BEFORE!", domain)
812 domain = domain.encode("idna").decode("utf-8")
813 logger.debug("domain='%s' - AFTER!", domain)
815 if not domain_helper.is_wanted(domain):
816 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
818 elif domain in domains:
819 logger.debug("domain='%s' is already added - SKIPPED!", domain)
821 elif instances.is_registered(domain):
822 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
824 elif instances.is_recent(domain):
825 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
828 logger.debug("Adding domain='%s'", domain)
829 domains.append(domain)
831 logger.debug("domains()=%d", len(domains))
833 logger.info("Adding %d new instances ...", len(domains))
834 for domain in domains:
835 logger.debug("domain='%s'", domain)
837 logger.info("Fetching instances from domain='%s' ...", domain)
838 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
839 except network.exceptions as exception:
840 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
841 instances.set_last_error(domain, exception)
844 logger.debug("Success! - EXIT!")
847 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
848 logger.debug("args[]='%s' - CALLED!", type(args))
850 logger.debug("Invoking locking.acquire() ...")
853 source_domain = "ryona.agency"
854 feed = f"https://{source_domain}/users/fba/feed.atom"
856 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
857 if args.feed is not None and validators.url(args.feed):
858 logger.debug("Setting feed='%s' ...", args.feed)
859 feed = str(args.feed)
860 source_domain = urlparse(args.feed).netloc
862 if sources.is_recent(source_domain):
863 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
866 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
867 sources.update(source_domain)
871 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
872 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
874 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
875 if response.ok and response.status_code == 200 and len(response.text) > 0:
876 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
877 atom = atoma.parse_atom_bytes(response.content)
879 logger.debug("atom[]='%s'", type(atom))
880 for entry in atom.entries:
881 logger.debug("entry[]='%s'", type(entry))
882 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
883 logger.debug("doc[]='%s'", type(doc))
884 for element in doc.findAll("a"):
885 logger.debug("element[]='%s'", type(element))
886 for href in element["href"].split(","):
887 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
888 domain = tidyup.domain(href) if href not in [None, ""] else None
890 logger.debug("domain='%s' - AFTER!", domain)
891 if domain is None or domain == "":
892 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
895 logger.debug("domain='%s' - BEFORE!", domain)
896 domain = domain.encode("idna").decode("utf-8")
897 logger.debug("domain='%s' - AFTER!", domain)
899 if not domain_helper.is_wanted(domain):
900 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
902 elif domain in domains:
903 logger.debug("domain='%s' is already added - SKIPPED!", domain)
905 elif instances.is_registered(domain):
906 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
908 elif instances.is_recent(domain):
909 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
912 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
913 domains.append(domain)
915 logger.debug("domains()=%d", len(domains))
917 logger.info("Adding %d new instances ...", len(domains))
918 for domain in domains:
919 logger.debug("domain='%s'", domain)
921 logger.info("Fetching instances from domain='%s' ...", domain)
922 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
923 except network.exceptions as exception:
924 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
925 instances.set_last_error(domain, exception)
928 logger.debug("Success! - EXIT!")
931 def fetch_instances(args: argparse.Namespace) -> int:
932 logger.debug("args[]='%s' - CALLED!", type(args))
934 logger.debug("args.domain='%s' - checking ...", args.domain)
935 if not validators.domain(args.domain):
936 logger.warning("args.domain='%s' is not valid.", args.domain)
938 elif blacklist.is_blacklisted(args.domain):
939 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
942 logger.debug("Invoking locking.acquire() ...")
946 domain = tidyup.domain(args.domain)
947 origin = software = None
950 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
951 row = database.cursor.fetchone()
953 origin = row["origin"]
954 software = row["software"]
956 if software_helper.is_relay(software):
957 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
962 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
963 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
964 except network.exceptions as exception:
965 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
966 instances.set_last_error(args.domain, exception)
967 instances.update(args.domain)
971 logger.debug("Not fetching more instances - EXIT!")
974 # Loop through some instances
975 database.cursor.execute(
976 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
979 rows = database.cursor.fetchall()
980 logger.info("Checking %d entries ...", len(rows))
982 logger.debug("row[domain]='%s'", row["domain"])
983 if row["domain"] == "":
984 logger.debug("row[domain] is empty - SKIPPED!")
987 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
988 domain = row["domain"].encode("idna").decode("utf-8")
989 logger.debug("domain='%s' - AFTER!", domain)
991 if not domain_helper.is_wanted(domain):
992 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
996 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
997 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
998 except network.exceptions as exception:
999 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1000 instances.set_last_error(domain, exception)
1002 logger.debug("Success - EXIT!")
1005 def fetch_csv(args: argparse.Namespace) -> int:
1006 logger.debug("args[]='%s' - CALLED!", type(args))
1008 logger.debug("Invoking locking.acquire() ...")
1011 logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1012 for block in blocklists.csv_files:
1013 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1015 # Is domain given and not equal blocker?
1016 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1017 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1020 logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1021 processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1023 logger.debug("Success - EXIT!")
1026 def fetch_oliphant(args: argparse.Namespace) -> int:
1027 logger.debug("args[]='%s' - CALLED!", type(args))
1029 logger.debug("Invoking locking.acquire() ...")
1032 source_domain = "codeberg.org"
1033 if sources.is_recent(source_domain):
1034 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1037 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1038 sources.update(source_domain)
1041 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1043 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1044 for block in blocklists.oliphant_blocklists:
1045 # Is domain given and not equal blocker?
1046 logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1047 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1048 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1051 url = f"{base_url}/{block['csv_url']}"
1053 logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1054 processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1056 logger.debug("Success! - EXIT!")
1059 def fetch_txt(args: argparse.Namespace) -> int:
1060 logger.debug("args[]='%s' - CALLED!", type(args))
1062 logger.debug("Invoking locking.acquire() ...")
1065 logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1066 for row in blocklists.txt_files:
1067 logger.debug("Fetching row[url]='%s' ...", row["url"])
1068 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1070 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1071 if response.ok and response.status_code == 200 and response.text != "":
1072 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1073 domains = response.text.strip().split("\n")
1075 logger.info("Processing %d domains ...", len(domains))
1076 for domain in domains:
1077 logger.debug("domain='%s' - BEFORE!", domain)
1078 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1080 logger.debug("domain='%s' - AFTER!", domain)
1081 if domain is None or domain == "":
1082 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1084 elif not domain_helper.is_wanted(domain):
1085 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1087 elif instances.is_recent(domain):
1088 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1091 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1092 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1094 logger.debug("processed='%s'", processed)
1096 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1099 logger.debug("Success! - EXIT!")
1102 def fetch_fedipact(args: argparse.Namespace) -> int:
1103 logger.debug("args[]='%s' - CALLED!", type(args))
1105 logger.debug("Invoking locking.acquire() ...")
1108 source_domain = "fedipact.online"
1109 if sources.is_recent(source_domain):
1110 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1113 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1114 sources.update(source_domain)
1116 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1117 response = utils.fetch_url(
1118 f"https://{source_domain}",
1119 network.web_headers,
1120 (config.get("connection_timeout"), config.get("read_timeout"))
1123 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1124 if response.ok and response.status_code == 200 and response.text != "":
1125 logger.debug("Parsing %d Bytes ...", len(response.text))
1127 doc = bs4.BeautifulSoup(response.text, "html.parser")
1128 logger.debug("doc[]='%s'", type(doc))
1130 rows = doc.findAll("li")
1131 logger.info("Checking %d row(s) ...", len(rows))
1133 logger.debug("row[]='%s'", type(row))
1134 domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1136 logger.debug("domain='%s' - AFTER!", domain)
1137 if domain is None or domain == "":
1138 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1141 logger.debug("domain='%s' - BEFORE!", domain)
1142 domain = domain.encode("idna").decode("utf-8")
1143 logger.debug("domain='%s' - AFTER!", domain)
1145 if not domain_helper.is_wanted(domain):
1146 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1148 elif instances.is_registered(domain):
1149 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1151 elif instances.is_recent(domain):
1152 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1155 logger.info("Fetching domain='%s' ...", domain)
1156 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1158 logger.debug("Success! - EXIT!")
1161 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1162 logger.debug("args[]='%s' - CALLED!", type(args))
1164 logger.debug("Invoking locking.acquire() ...")
1167 source_domain = "instances.joinmobilizon.org"
1168 if sources.is_recent(source_domain):
1169 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1172 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1173 sources.update(source_domain)
1175 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1176 raw = utils.fetch_url(
1177 f"https://{source_domain}/api/v1/instances",
1178 network.web_headers,
1179 (config.get("connection_timeout"), config.get("read_timeout"))
1181 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1183 parsed = json.loads(raw)
1184 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1186 if "data" not in parsed:
1187 logger.warning("parsed()=%d does not contain key 'data'")
1190 logger.info("Checking %d instances ...", len(parsed["data"]))
1191 for row in parsed["data"]:
1192 logger.debug("row[]='%s'", type(row))
1193 if "host" not in row:
1194 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1196 elif not domain_helper.is_wanted(row["host"]):
1197 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1199 elif instances.is_registered(row["host"]):
1200 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1203 logger.info("Fetching row[host]='%s' ...", row["host"])
1204 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1206 logger.debug("Success! - EXIT!")
1209 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1210 logger.debug("args[]='%s' - CALLED!", type(args))
1212 logger.debug("Invoking locking.acquire() ...")
1215 source_domain = "instanceapp.misskey.page"
1216 if sources.is_recent(source_domain):
1217 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1220 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1221 sources.update(source_domain)
1223 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1224 raw = utils.fetch_url(
1225 f"https://{source_domain}/instances.json",
1226 network.web_headers,
1227 (config.get("connection_timeout"), config.get("read_timeout"))
1229 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1231 parsed = json.loads(raw)
1232 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1234 if "instancesInfos" not in parsed:
1235 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1238 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1239 for row in parsed["instancesInfos"]:
1240 logger.debug("row[%s]='%s'", type(row), row)
1241 if "url" not in row:
1242 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1244 elif not domain_helper.is_wanted(row["url"]):
1245 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1247 elif instances.is_registered(row["url"]):
1248 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1251 logger.info("Fetching row[url]='%s' ...", row["url"])
1252 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1254 logger.debug("Success! - EXIT!")
1257 def recheck_obfuscation(args: argparse.Namespace) -> int:
1258 logger.debug("args[]='%s' - CALLED!", type(args))
1260 logger.debug("Invoking locking.acquire() ...")
1263 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1264 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1265 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1266 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1268 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1270 rows = database.cursor.fetchall()
1271 logger.info("Checking %d domains ...", len(rows))
1273 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1274 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1275 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1278 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1279 blocking = federation.fetch_blocks(row["domain"])
1281 logger.debug("blocking()=%d", len(blocking))
1282 if len(blocking) == 0:
1283 if row["software"] == "pleroma":
1284 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1285 blocking = pleroma.fetch_blocks(row["domain"])
1286 elif row["software"] == "mastodon":
1287 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1288 blocking = mastodon.fetch_blocks(row["domain"])
1289 elif row["software"] == "lemmy":
1290 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1291 blocking = lemmy.fetch_blocks(row["domain"])
1292 elif row["software"] == "friendica":
1293 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1294 blocking = friendica.fetch_blocks(row["domain"])
1295 elif row["software"] == "misskey":
1296 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1297 blocking = misskey.fetch_blocks(row["domain"])
1299 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1301 # c.s isn't part of oliphant's "hidden" blocklists
1302 logger.debug("row[domain]='%s'", row["domain"])
1303 if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1304 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1305 instances.set_last_blocked(row["domain"])
1306 instances.set_total_blocks(row["domain"], blocking)
1311 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1312 for block in blocking:
1313 logger.debug("block[blocked]='%s'", block["blocked"])
1316 if block["blocked"] == "":
1317 logger.debug("block[blocked] is empty - SKIPPED!")
1319 elif block["blocked"].endswith(".arpa"):
1320 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1322 elif block["blocked"].endswith(".tld"):
1323 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1325 elif block["blocked"].endswith(".onion"):
1326 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1328 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1329 logger.debug("block='%s' is obfuscated.", block["blocked"])
1330 obfuscated = obfuscated + 1
1331 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1332 elif not domain_helper.is_wanted(block["blocked"]):
1333 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1335 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1336 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1339 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1340 if blocked is not None and blocked != block["blocked"]:
1341 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1342 obfuscated = obfuscated - 1
1344 if blacklist.is_blacklisted(blocked):
1345 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1347 elif blacklist.is_blacklisted(row["domain"]):
1348 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1350 elif blocks.is_instance_blocked(row["domain"], blocked):
1351 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1354 block["block_level"] = blocks.alias_block_level(block["block_level"])
1356 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1357 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1358 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1361 "reason" : block["reason"],
1364 logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1365 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1367 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1368 if instances.has_pending(row["domain"]):
1369 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1370 instances.update(row["domain"])
1372 logger.debug("Invoking commit() ...")
1373 database.connection.commit()
1375 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1376 if config.get("bot_enabled") and len(blockdict) > 0:
1377 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1378 network.send_bot_post(row["domain"], blockdict)
1380 logger.debug("Success! - EXIT!")
1383 def fetch_fedilist(args: argparse.Namespace) -> int:
1384 logger.debug("args[]='%s' - CALLED!", type(args))
1386 logger.debug("Invoking locking.acquire() ...")
1389 source_domain = "demo.fedilist.com"
1390 if sources.is_recent(source_domain):
1391 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1394 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1395 sources.update(source_domain)
1397 url = f"http://{source_domain}/instance/csv?onion=not"
1398 if args.software is not None and args.software != "":
1399 logger.debug("args.software='%s'", args.software)
1400 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1402 logger.info("Fetching url='%s' ...", url)
1403 response = reqto.get(
1405 headers=network.web_headers,
1406 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1407 allow_redirects=False
1410 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1411 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1412 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1415 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1417 logger.debug("reader[]='%s'", type(reader))
1419 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1424 logger.info("Checking %d rows ...", len(rows))
1426 logger.debug("row[]='%s'", type(row))
1427 if "hostname" not in row:
1428 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1431 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1432 domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1433 logger.debug("domain='%s' - AFTER!", domain)
1435 if domain is None or domain == "":
1436 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1439 logger.debug("domain='%s' - BEFORE!", domain)
1440 domain = domain.encode("idna").decode("utf-8")
1441 logger.debug("domain='%s' - AFTER!", domain)
1443 if not domain_helper.is_wanted(domain):
1444 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1446 elif (args.force is None or not args.force) and instances.is_registered(domain):
1447 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1449 elif instances.is_recent(domain):
1450 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1453 logger.info("Fetching instances from domain='%s' ...", domain)
1454 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1456 logger.debug("Success! - EXIT!")
1459 def update_nodeinfo(args: argparse.Namespace) -> int:
1460 logger.debug("args[]='%s' - CALLED!", type(args))
1462 logger.debug("Invoking locking.acquire() ...")
1465 if args.domain is not None and args.domain != "":
1466 logger.debug("Fetching args.domain='%s'", args.domain)
1467 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1468 elif args.software is not None and args.software != "":
1469 logger.info("Fetching domains for args.software='%s'", args.software)
1470 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1471 elif args.mode is not None and args.mode != "":
1472 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1473 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1474 elif args.no_software:
1475 logger.info("Fetching domains with no software type detected ...")
1476 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1478 logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1479 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1480 elif args.no_detection:
1481 logger.info("Fetching domains with no detection mode being set ...")
1482 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1484 logger.info("Fetching domains for recently updated ...")
1485 database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1487 domains = database.cursor.fetchall()
1489 logger.info("Checking %d domain(s) ...", len(domains))
1492 logger.debug("row[]='%s'", type(row))
1493 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1494 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1498 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1499 software = federation.determine_software(row["domain"])
1501 logger.debug("Determined software='%s'", software)
1502 if (software != row["software"] and software is not None) or args.force is True:
1503 logger.debug("software='%s'", software)
1504 if software is None:
1505 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1506 instances.set_nodeinfo_url(row["domain"], None)
1508 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1509 instances.set_software(row["domain"], software)
1511 if software is not None:
1512 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1513 instances.set_success(row["domain"])
1514 except network.exceptions as exception:
1515 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1516 instances.set_last_error(row["domain"], exception)
1518 instances.set_last_nodeinfo(row["domain"])
1519 instances.update(row["domain"])
1522 logger.debug("Success! - EXIT!")
1525 def fetch_instances_social(args: argparse.Namespace) -> int:
1526 logger.debug("args[]='%s' - CALLED!", type(args))
1528 logger.debug("Invoking locking.acquire() ...")
1531 source_domain = "instances.social"
1533 if config.get("instances_social_api_key") == "":
1534 logger.error("API key not set. Please set in your config.json file.")
1536 elif sources.is_recent(source_domain):
1537 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1540 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1541 sources.update(source_domain)
1544 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1547 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1548 fetched = network.get_json_api(
1550 "/api/1.0/instances/list?count=0&sort_by=name",
1552 (config.get("connection_timeout"), config.get("read_timeout"))
1554 logger.debug("fetched[]='%s'", type(fetched))
1556 if "error_message" in fetched:
1557 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1559 elif "exception" in fetched:
1560 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1562 elif "json" not in fetched:
1563 logger.warning("fetched has no element 'json' - EXIT!")
1565 elif "instances" not in fetched["json"]:
1566 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1570 rows = fetched["json"]["instances"]
1572 logger.info("Checking %d row(s) ...", len(rows))
1574 logger.debug("row[]='%s'", type(row))
1575 domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1576 logger.debug("domain='%s' - AFTER!", domain)
1578 if domain is None and domain == "":
1579 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1582 logger.debug("domain='%s' - BEFORE!", domain)
1583 domain = domain.encode("idna").decode("utf-8")
1584 logger.debug("domain='%s' - AFTER!", domain)
1586 if not domain_helper.is_wanted(domain):
1587 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1589 elif domain in domains:
1590 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1592 elif instances.is_registered(domain):
1593 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1595 elif instances.is_recent(domain):
1596 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1599 logger.info("Fetching instances from domain='%s'", domain)
1600 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1602 logger.debug("Success! - EXIT!")
1605 def fetch_relaylist(args: argparse.Namespace) -> int:
1606 logger.debug("args[]='%s' - CALLED!", type(args))
1608 logger.debug("Invoking locking.acquire() ...")
1611 source_domain = "api.relaylist.com"
1613 if sources.is_recent(source_domain):
1614 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1617 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1618 sources.update(source_domain)
1620 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1621 fetched = network.get_json_api(
1625 (config.get("connection_timeout"), config.get("read_timeout"))
1627 logger.debug("fetched[]='%s'", type(fetched))
1629 if "error_message" in fetched:
1630 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1632 elif "exception" in fetched:
1633 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1635 elif "json" not in fetched:
1636 logger.warning("fetched has no element 'json' - EXIT!")
1641 logger.info("Checking %d row(s) ...", len(fetched["json"]))
1642 for row in fetched["json"]:
1643 logger.debug("row[]='%s'", type(row))
1644 domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1645 logger.debug("domain='%s' - AFTER!", domain)
1647 if domain is None and domain == "":
1648 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1651 logger.debug("domain='%s' - BEFORE!", domain)
1652 domain = domain.encode("idna").decode("utf-8")
1653 logger.debug("domain='%s' - AFTER!", domain)
1655 if not domain_helper.is_wanted(domain):
1656 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1658 elif domain in domains:
1659 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1661 elif instances.is_registered(domain):
1662 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1664 elif instances.is_recent(domain):
1665 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1668 logger.info("Fetching instances from domain='%s'", domain)
1669 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1671 logger.debug("Success! - EXIT!")
1674 def fetch_relays(args: argparse.Namespace) -> int:
1675 logger.debug("args[]='%s' - CALLED!", type(args))
1677 logger.debug("Invoking locking.acquire() ...")
1680 if args.domain is not None and args.domain != "":
1681 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1682 elif args.software is not None and args.software != "":
1683 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1685 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1688 rows = database.cursor.fetchall()
1690 logger.info("Checking %d relays ...", len(rows))
1692 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1694 if not args.force and instances.is_recent(row["domain"]):
1695 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1699 if row["software"] == "pub-relay":
1700 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1701 raw = network.fetch_api_url(
1702 row["nodeinfo_url"],
1703 (config.get("connection_timeout"), config.get("read_timeout"))
1706 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1707 if "exception" in raw:
1708 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1709 raise raw["exception"]
1710 elif "error_message" in raw:
1711 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1712 instances.set_last_error(row["domain"], raw)
1713 instances.set_last_instance_fetch(row["domain"])
1714 instances.update(row["domain"])
1716 elif "json" not in raw:
1717 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1719 elif not "metadata" in raw["json"]:
1720 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1722 elif not "peers" in raw["json"]["metadata"]:
1723 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1726 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1727 raw = utils.fetch_url(
1728 f"https://{row['domain']}",
1729 network.web_headers,
1730 (config.get("connection_timeout"), config.get("read_timeout"))
1732 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1734 doc = bs4.BeautifulSoup(raw, features="html.parser")
1735 logger.debug("doc[]='%s'", type(doc))
1737 except network.exceptions as exception:
1738 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1739 instances.set_last_error(row["domain"], exception)
1740 instances.set_last_instance_fetch(row["domain"])
1741 instances.update(row["domain"])
1744 logger.debug("row[software]='%s'", row["software"])
1745 if row["software"] == "activityrelay":
1746 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1747 tags = doc.findAll("p")
1749 logger.debug("Checking %d paragraphs ...", len(tags))
1751 logger.debug("tag[]='%s'", type(tag))
1752 if len(tag.contents) == 0:
1753 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1755 elif "registered instances" not in tag.contents[0]:
1756 logger.debug("Skipping paragraph, text not found.")
1759 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1760 for domain in tag.contents:
1761 logger.debug("domain[%s]='%s'", type(domain), domain)
1762 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1765 domain = str(domain)
1766 logger.debug("domain='%s'", domain)
1767 if not domain_helper.is_wanted(domain):
1768 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1771 logger.debug("domain='%s' - BEFORE!", domain)
1772 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1773 logger.debug("domain='%s' - AFTER!", domain)
1775 if domain is None or domain == "":
1776 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1778 elif domain not in peers:
1779 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1780 peers.append(domain)
1782 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1783 if dict_helper.has_key(domains, "domain", domain):
1784 logger.debug("domain='%s' already added", domain)
1787 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1790 "origin": row["domain"],
1792 elif row["software"] in ["aoderelay", "selective-relay"]:
1793 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1794 if row["software"] == "aoderelay":
1795 tags = doc.findAll("section", {"class": "instance"})
1797 tags = doc.find("div", {"id": "instances"}).findAll("li")
1799 logger.debug("Checking %d tags ...", len(tags))
1801 logger.debug("tag[]='%s'", type(tag))
1803 link = tag.find("a")
1804 logger.debug("link[%s]='%s'", type(link), link)
1805 if not isinstance(link, bs4.element.Tag):
1806 logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1809 components = urlparse(link.get("href"))
1810 logger.debug("components(%d)='%s'", len(components), components)
1811 domain = components.netloc.lower().split(":")[0]
1813 logger.debug("domain='%s' - BEFORE!", domain)
1814 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1815 logger.debug("domain='%s' - AFTER!", domain)
1817 if domain is None or domain == "":
1818 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1820 elif domain not in peers:
1821 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1822 peers.append(domain)
1824 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1825 if dict_helper.has_key(domains, "domain", domain):
1826 logger.debug("domain='%s' already added", domain)
1829 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1832 "origin": row["domain"],
1834 elif row["software"] == "pub-relay":
1835 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1836 for domain in raw["json"]["metadata"]["peers"]:
1837 logger.debug("domain='%s' - BEFORE!", domain)
1838 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1839 logger.debug("domain='%s' - AFTER!", domain)
1841 if domain is None or domain == "":
1842 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1844 elif domain not in peers:
1845 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1846 peers.append(domain)
1848 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1849 if dict_helper.has_key(domains, "domain", domain):
1850 logger.debug("domain='%s' already added", domain)
1853 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1856 "origin": row["domain"],
1859 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1862 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1863 instances.set_last_instance_fetch(row["domain"])
1865 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1866 instances.set_total_peers(row["domain"], peers)
1868 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1869 instances.update(row["domain"])
1871 logger.info("Checking %d domains ...", len(domains))
1873 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1874 if not domain_helper.is_wanted(row["domain"]):
1875 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1877 elif instances.is_registered(row["domain"]):
1878 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1881 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1882 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1884 logger.debug("Success! - EXIT!")
1887 def convert_idna(args: argparse.Namespace) -> int:
1888 logger.debug("args[]='%s' - CALLED!", type(args))
1890 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1891 rows = database.cursor.fetchall()
1893 logger.debug("rows[]='%s'", type(rows))
1894 instances.translate_idnas(rows, "domain")
1896 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1897 rows = database.cursor.fetchall()
1899 logger.debug("rows[]='%s'", type(rows))
1900 instances.translate_idnas(rows, "origin")
1902 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1903 rows = database.cursor.fetchall()
1905 logger.debug("rows[]='%s'", type(rows))
1906 blocks.translate_idnas(rows, "blocker")
1908 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1909 rows = database.cursor.fetchall()
1911 logger.debug("rows[]='%s'", type(rows))
1912 blocks.translate_idnas(rows, "blocked")
1914 logger.debug("Success! - EXIT!")
1917 def remove_invalid(args: argparse.Namespace) -> int:
1918 logger.debug("args[]='%s' - CALLED!", type(args))
1920 logger.debug("Invoking locking.acquire() ...")
1923 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1924 rows = database.cursor.fetchall()
1926 logger.info("Checking %d domains ...", len(rows))
1928 logger.debug("row[domain]='%s'", row["domain"])
1929 if not validators.domain(row["domain"].split("/")[0]):
1930 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1931 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1932 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1934 logger.debug("Invoking commit() ...")
1935 database.connection.commit()
1937 logger.info("Vaccum cleaning database ...")
1938 database.cursor.execute("VACUUM")
1940 logger.debug("Success! - EXIT!")