1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
354 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
355 for block in blocking:
356 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
358 if block["block_level"] == "":
359 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
362 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
363 block["blocked"] = tidyup.domain(block["blocked"])
364 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
365 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
367 if block["blocked"] == "":
368 logger.warning("blocked is empty, blocker='%s'", blocker)
370 elif block["blocked"].endswith(".onion"):
371 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
373 elif block["blocked"].endswith(".arpa"):
374 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".tld"):
377 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
379 elif block["blocked"].find("*") >= 0:
380 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 # Some friendica servers also obscure domains without hash
383 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
385 logger.debug("row[]='%s'", type(row))
387 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
388 instances.set_has_obfuscation(blocker, True)
391 block["blocked"] = row["domain"]
392 origin = row["origin"]
393 nodeinfo_url = row["nodeinfo_url"]
394 elif block["blocked"].find("?") >= 0:
395 logger.debug("blocker='%s' uses obfuscated domains", blocker)
397 # Some obscure them with question marks, not sure if that's dependent on version or not
398 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
400 logger.debug("row[]='%s'", type(row))
402 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
403 instances.set_has_obfuscation(blocker, True)
406 block["blocked"] = row["domain"]
407 origin = row["origin"]
408 nodeinfo_url = row["nodeinfo_url"]
410 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
411 if block["blocked"] == "":
412 logger.debug("block[blocked] is empty - SKIPPED!")
415 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
416 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
417 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
419 if not domain_helper.is_wanted(block["blocked"]):
420 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
422 elif block["block_level"] in ["accept", "accepted"]:
423 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
425 elif not instances.is_registered(block["blocked"]):
426 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
427 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
429 block["block_level"] = blocks.alias_block_level(block["block_level"])
431 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
432 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
434 "blocked": block["blocked"],
435 "reason" : block["reason"],
438 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
439 cookies.clear(block["blocked"])
441 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
442 if instances.has_pending(blocker):
443 logger.debug("Flushing updates for blocker='%s' ...", blocker)
444 instances.update(blocker)
446 logger.debug("Invoking commit() ...")
447 database.connection.commit()
449 logger.debug("Invoking cookies.clear(%s) ...", blocker)
450 cookies.clear(blocker)
452 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
453 if config.get("bot_enabled") and len(blockdict) > 0:
454 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
455 network.send_bot_post(blocker, blockdict)
457 logger.debug("Success! - EXIT!")
460 def fetch_observer(args: argparse.Namespace) -> int:
461 logger.debug("args[]='%s' - CALLED!", type(args))
463 logger.debug("Invoking locking.acquire() ...")
466 source_domain = "fediverse.observer"
467 if sources.is_recent(source_domain):
468 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
471 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
472 sources.update(source_domain)
475 if args.software is None:
476 logger.info("Fetching software list ...")
477 raw = utils.fetch_url(
478 f"https://{source_domain}",
480 (config.get("connection_timeout"), config.get("read_timeout"))
482 logger.debug("raw[%s]()=%d", type(raw), len(raw))
484 doc = bs4.BeautifulSoup(raw, features="html.parser")
485 logger.debug("doc[]='%s'", type(doc))
487 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
488 logger.debug("navbar[]='%s'", type(navbar))
490 logger.warning("Cannot find navigation bar, cannot continue!")
493 items = navbar.findAll("a", {"class": "dropdown-item"})
494 logger.debug("items[]='%s'", type(items))
496 logger.info("Checking %d menu items ...", len(items))
498 logger.debug("item[%s]='%s'", type(item), item)
499 if item.text.lower() == "all":
500 logger.debug("Skipping 'All' menu entry ...")
503 logger.debug("Appending item.text='%s' ...", item.text)
504 types.append(tidyup.domain(item.text))
506 logger.info("Adding args.software='%s' as type ...", args.software)
507 types.append(args.software)
509 logger.info("Fetching %d different table data ...", len(types))
510 for software in types:
511 logger.debug("software='%s' - BEFORE!", software)
512 if args.software is not None and args.software != software:
513 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
518 logger.debug("Fetching table data for software='%s' ...", software)
519 raw = utils.fetch_url(
520 f"https://{source_domain}/app/views/tabledata.php?software={software}",
522 (config.get("connection_timeout"), config.get("read_timeout"))
524 logger.debug("raw[%s]()=%d", type(raw), len(raw))
526 doc = bs4.BeautifulSoup(raw, features="html.parser")
527 logger.debug("doc[]='%s'", type(doc))
528 except network.exceptions as exception:
529 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
532 items = doc.findAll("a", {"class": "url"})
533 logger.info("Checking %d items,software='%s' ...", len(items), software)
535 logger.debug("item[]='%s'", type(item))
536 domain = item.decode_contents()
537 domain = tidyup.domain(domain) if domain != None and domain != "" else None
538 logger.debug("domain='%s' - AFTER!", domain)
540 if domain is None or domain == "":
541 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
544 logger.debug("domain='%s' - BEFORE!", domain)
545 domain = domain.encode("idna").decode("utf-8")
546 logger.debug("domain='%s' - AFTER!", domain)
548 if not domain_helper.is_wanted(domain):
549 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
551 elif instances.is_registered(domain):
552 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
555 software = software_helper.alias(software)
556 logger.info("Fetching instances for domain='%s'", domain)
557 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
559 logger.debug("Success! - EXIT!")
562 def fetch_todon_wiki(args: argparse.Namespace) -> int:
563 logger.debug("args[]='%s' - CALLED!", type(args))
565 logger.debug("Invoking locking.acquire() ...")
568 source_domain = "wiki.todon.eu"
569 if sources.is_recent(source_domain):
570 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
573 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
574 sources.update(source_domain)
581 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
582 raw = utils.fetch_url(
583 f"https://{source_domain}/todon/domainblocks",
585 (config.get("connection_timeout"), config.get("read_timeout"))
587 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
589 doc = bs4.BeautifulSoup(raw, "html.parser")
590 logger.debug("doc[]='%s'", type(doc))
592 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
593 logger.info("Checking %d silenced/limited entries ...", len(silenced))
594 blocklist["silenced"] = utils.find_domains(silenced, "div")
596 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
597 logger.info("Checking %d suspended entries ...", len(suspended))
598 blocklist["reject"] = utils.find_domains(suspended, "div")
600 blocking = blocklist["silenced"] + blocklist["reject"]
603 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
604 instances.set_last_blocked(blocker)
605 instances.set_total_blocks(blocker, blocking)
608 for block_level in blocklist:
609 blockers = blocklist[block_level]
611 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
612 for blocked in blockers:
613 logger.debug("blocked='%s'", blocked)
615 if not instances.is_registered(blocked):
617 logger.info("Fetching instances from domain='%s' ...", blocked)
618 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
619 except network.exceptions as exception:
620 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
621 instances.set_last_error(blocked, exception)
623 if not domain_helper.is_wanted(blocked):
624 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
626 elif not domain_helper.is_wanted(blocker):
627 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
629 elif blocks.is_instance_blocked(blocker, blocked, block_level):
630 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
633 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
634 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
635 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
641 logger.debug("Invoking commit() ...")
642 database.connection.commit()
644 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
645 if config.get("bot_enabled") and len(blockdict) > 0:
646 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
647 network.send_bot_post(blocker, blockdict)
649 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
650 if instances.has_pending(blocker):
651 logger.debug("Flushing updates for blocker='%s' ...", blocker)
652 instances.update(blocker)
654 logger.debug("Success! - EXIT!")
657 def fetch_cs(args: argparse.Namespace):
658 logger.debug("args[]='%s' - CALLED!", type(args))
660 logger.debug("Invoking locking.acquire() ...")
688 source_domain = "raw.githubusercontent.com"
689 if sources.is_recent(source_domain):
690 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
693 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
694 sources.update(source_domain)
696 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
697 raw = utils.fetch_url(
698 f"https://{source_domain}/chaossocial/meta/master/federation.md",
700 (config.get("connection_timeout"), config.get("read_timeout"))
702 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
704 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
705 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
707 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
708 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
709 blocklist["silenced"] = federation.find_domains(silenced)
711 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
712 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
713 blocklist["reject"] = federation.find_domains(blocked)
715 blocking = blocklist["silenced"] + blocklist["reject"]
716 blocker = "chaos.social"
718 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
719 instances.set_last_blocked(blocker)
720 instances.set_total_blocks(blocker, blocking)
722 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
723 if len(blocking) > 0:
725 for block_level in blocklist:
726 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
728 for row in blocklist[block_level]:
729 logger.debug("row[%s]='%s'", type(row), row)
730 if not "domain" in row:
731 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
733 elif not instances.is_registered(row["domain"]):
735 logger.info("Fetching instances from domain='%s' ...", row["domain"])
736 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
737 except network.exceptions as exception:
738 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
739 instances.set_last_error(row["domain"], exception)
741 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
742 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
744 "blocked": row["domain"],
745 "reason" : row["reason"],
748 logger.debug("Invoking commit() ...")
749 database.connection.commit()
751 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
752 if config.get("bot_enabled") and len(blockdict) > 0:
753 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
754 network.send_bot_post(blocker, blockdict)
756 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
757 if instances.has_pending(blocker):
758 logger.debug("Flushing updates for blocker='%s' ...", blocker)
759 instances.update(blocker)
761 logger.debug("Success! - EXIT!")
764 def fetch_fba_rss(args: argparse.Namespace) -> int:
765 logger.debug("args[]='%s' - CALLED!", type(args))
769 logger.debug("Invoking locking.acquire() ...")
772 components = urlparse(args.feed)
773 domain = components.netloc.lower().split(":")[0]
775 logger.debug("domain='%s'", domain)
776 if sources.is_recent(domain):
777 logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
780 logger.debug("domain='%s' has not been recently used, marking ...", domain)
781 sources.update(domain)
783 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
784 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
786 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
787 if response.ok and response.status_code == 200 and len(response.text) > 0:
788 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
789 rss = atoma.parse_rss_bytes(response.content)
791 logger.debug("rss[]='%s'", type(rss))
792 for item in rss.items:
793 logger.debug("item[%s]='%s'", type(item), item)
794 domain = item.link.split("=")[1]
795 domain = tidyup.domain(domain) if domain != None and domain != "" else None
797 logger.debug("domain='%s' - AFTER!", domain)
798 if domain is None or domain == "":
799 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
802 logger.debug("domain='%s' - BEFORE!", domain)
803 domain = domain.encode("idna").decode("utf-8")
804 logger.debug("domain='%s' - AFTER!", domain)
806 if not domain_helper.is_wanted(domain):
807 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
809 elif domain in domains:
810 logger.debug("domain='%s' is already added - SKIPPED!", domain)
812 elif instances.is_registered(domain):
813 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
815 elif instances.is_recent(domain):
816 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
819 logger.debug("Adding domain='%s'", domain)
820 domains.append(domain)
822 logger.debug("domains()=%d", len(domains))
824 logger.info("Adding %d new instances ...", len(domains))
825 for domain in domains:
826 logger.debug("domain='%s'", domain)
828 logger.info("Fetching instances from domain='%s' ...", domain)
829 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
830 except network.exceptions as exception:
831 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
832 instances.set_last_error(domain, exception)
835 logger.debug("Success! - EXIT!")
838 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
839 logger.debug("args[]='%s' - CALLED!", type(args))
841 logger.debug("Invoking locking.acquire() ...")
844 source_domain = "ryona.agency"
845 feed = f"https://{source_domain}/users/fba/feed.atom"
847 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
848 if args.feed is not None and validators.url(args.feed):
849 logger.debug("Setting feed='%s' ...", args.feed)
850 feed = str(args.feed)
851 source_domain = urlparse(args.feed).netloc
853 if sources.is_recent(source_domain):
854 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
857 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
858 sources.update(source_domain)
862 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
863 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
865 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
866 if response.ok and response.status_code == 200 and len(response.text) > 0:
867 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
868 atom = atoma.parse_atom_bytes(response.content)
870 logger.debug("atom[]='%s'", type(atom))
871 for entry in atom.entries:
872 logger.debug("entry[]='%s'", type(entry))
873 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
874 logger.debug("doc[]='%s'", type(doc))
875 for element in doc.findAll("a"):
876 logger.debug("element[]='%s'", type(element))
877 for href in element["href"].split(","):
878 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
879 domain = tidyup.domain(href) if href != None and href != "" else None
881 logger.debug("domain='%s' - AFTER!", domain)
882 if domain is None or domain == "":
883 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
886 logger.debug("domain='%s' - BEFORE!", domain)
887 domain = domain.encode("idna").decode("utf-8")
888 logger.debug("domain='%s' - AFTER!", domain)
890 if not domain_helper.is_wanted(domain):
891 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
893 elif domain in domains:
894 logger.debug("domain='%s' is already added - SKIPPED!", domain)
896 elif instances.is_registered(domain):
897 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
899 elif instances.is_recent(domain):
900 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
903 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
904 domains.append(domain)
906 logger.debug("domains()=%d", len(domains))
908 logger.info("Adding %d new instances ...", len(domains))
909 for domain in domains:
910 logger.debug("domain='%s'", domain)
912 logger.info("Fetching instances from domain='%s' ...", domain)
913 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
914 except network.exceptions as exception:
915 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
916 instances.set_last_error(domain, exception)
919 logger.debug("Success! - EXIT!")
922 def fetch_instances(args: argparse.Namespace) -> int:
923 logger.debug("args[]='%s' - CALLED!", type(args))
925 logger.debug("args.domain='%s' - checking ...", args.domain)
926 if not validators.domain(args.domain):
927 logger.warning("args.domain='%s' is not valid.", args.domain)
929 elif blacklist.is_blacklisted(args.domain):
930 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
933 logger.debug("Invoking locking.acquire() ...")
937 domain = tidyup.domain(args.domain)
938 origin = software = None
941 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
942 row = database.cursor.fetchone()
944 origin = row["origin"]
945 software = row["software"]
947 if software_helper.is_relay(software):
948 logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
953 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
954 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
955 except network.exceptions as exception:
956 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
957 instances.set_last_error(args.domain, exception)
958 instances.update(args.domain)
962 logger.debug("Not fetching more instances - EXIT!")
965 # Loop through some instances
966 database.cursor.execute(
967 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
970 rows = database.cursor.fetchall()
971 logger.info("Checking %d entries ...", len(rows))
973 logger.debug("row[domain]='%s'", row["domain"])
974 if row["domain"] == "":
975 logger.debug("row[domain] is empty - SKIPPED!")
978 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
979 domain = row["domain"].encode("idna").decode("utf-8")
980 logger.debug("domain='%s' - AFTER!", domain)
982 if not domain_helper.is_wanted(domain):
983 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
987 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
988 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
989 except network.exceptions as exception:
990 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
991 instances.set_last_error(domain, exception)
993 logger.debug("Success - EXIT!")
996 def fetch_oliphant(args: argparse.Namespace) -> int:
997 logger.debug("args[]='%s' - CALLED!", type(args))
999 logger.debug("Invoking locking.acquire() ...")
1002 source_domain = "codeberg.org"
1003 if sources.is_recent(source_domain):
1004 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1007 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1008 sources.update(source_domain)
1011 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1015 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1016 for block in blocklists.oliphant_blocklists:
1017 # Is domain given and not equal blocker?
1018 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1019 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1021 elif args.domain in domains:
1022 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1025 instances.set_last_blocked(block["blocker"])
1028 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1029 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1031 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1032 if not response.ok or response.status_code > 200 or response.content == "":
1033 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1036 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1037 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1043 logger.debug("row[%s]='%s'", type(row), row)
1044 domain = severity = None
1045 reject_media = reject_reports = False
1047 if "#domain" in row:
1048 domain = row["#domain"]
1049 elif "domain" in row:
1050 domain = row["domain"]
1052 logger.debug("row='%s' does not contain domain column", row)
1055 if "#severity" in row:
1056 severity = blocks.alias_block_level(row["#severity"])
1057 elif "severity" in row:
1058 severity = blocks.alias_block_level(row["severity"])
1060 logger.debug("row='%s' does not contain severity column", row)
1063 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1065 elif "reject_media" in row and row["reject_media"].lower() == "true":
1068 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1069 reject_reports = True
1070 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1071 reject_reports = True
1074 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1075 if domain is None or domain == "":
1076 logger.debug("domain='%s' is empty - SKIPPED!", domain)
1078 elif domain.endswith(".onion"):
1079 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1081 elif domain.endswith(".arpa"):
1082 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1084 elif domain.endswith(".tld"):
1085 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1087 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1088 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1089 domain = utils.deobfuscate(domain, block["blocker"])
1090 logger.debug("domain='%s' - AFTER!", domain)
1092 if not validators.domain(domain):
1093 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1095 elif blacklist.is_blacklisted(domain):
1096 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1098 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1099 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1102 logger.debug("Marking domain='%s' as handled", domain)
1103 domains.append(domain)
1105 logger.debug("Processing domain='%s' ...", domain)
1106 processed = processing.instance(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1107 logger.debug("processed='%s'", processed)
1109 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1110 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1113 "reason" : block["reason"],
1117 processing.block(block["blocker"], domain, None, "reject_media")
1119 processing.block(block["blocker"], domain, None, "reject_reports")
1121 logger.debug("block[blocker]='%s'", block["blocker"])
1122 if not blocklists.has(block["blocker"]):
1123 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1124 instances.set_total_blocks(block["blocker"], domains)
1126 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1127 if instances.has_pending(block["blocker"]):
1128 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1129 instances.update(block["blocker"])
1131 logger.debug("Invoking commit() ...")
1132 database.connection.commit()
1134 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1135 if config.get("bot_enabled") and len(blockdict) > 0:
1136 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1137 network.send_bot_post(block["blocker"], blockdict)
1139 logger.debug("Success! - EXIT!")
1142 def fetch_txt(args: argparse.Namespace) -> int:
1143 logger.debug("args[]='%s' - CALLED!", type(args))
1145 logger.debug("Invoking locking.acquire() ...")
1150 "blocker": "seirdy.one",
1151 "url" : "https://seirdy.one/pb/bsl.txt",
1154 logger.info("Checking %d text file(s) ...", len(urls))
1156 logger.debug("Fetching row[url]='%s' ...", row["url"])
1157 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1159 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1160 if response.ok and response.status_code == 200 and response.text != "":
1161 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1162 domains = response.text.strip().split("\n")
1164 logger.info("Processing %d domains ...", len(domains))
1165 for domain in domains:
1166 logger.debug("domain='%s' - BEFORE!", domain)
1167 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1169 logger.debug("domain='%s' - AFTER!", domain)
1170 if domain is None or domain == "":
1171 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1173 elif not domain_helper.is_wanted(domain):
1174 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1176 elif instances.is_recent(domain):
1177 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1180 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1181 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1183 logger.debug("processed='%s'", processed)
1185 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1188 logger.debug("Success! - EXIT!")
1191 def fetch_fedipact(args: argparse.Namespace) -> int:
1192 logger.debug("args[]='%s' - CALLED!", type(args))
1194 logger.debug("Invoking locking.acquire() ...")
1197 source_domain = "fedipact.online"
1198 if sources.is_recent(source_domain):
1199 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1202 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1203 sources.update(source_domain)
1205 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1206 response = utils.fetch_url(
1207 f"https://{source_domain}",
1208 network.web_headers,
1209 (config.get("connection_timeout"), config.get("read_timeout"))
1212 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1213 if response.ok and response.status_code == 200 and response.text != "":
1214 logger.debug("Parsing %d Bytes ...", len(response.text))
1216 doc = bs4.BeautifulSoup(response.text, "html.parser")
1217 logger.debug("doc[]='%s'", type(doc))
1219 rows = doc.findAll("li")
1220 logger.info("Checking %d row(s) ...", len(rows))
1222 logger.debug("row[]='%s'", type(row))
1223 domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None
1225 logger.debug("domain='%s' - AFTER!", domain)
1226 if domain is None or domain == "":
1227 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1230 logger.debug("domain='%s' - BEFORE!", domain)
1231 domain = domain.encode("idna").decode("utf-8")
1232 logger.debug("domain='%s' - AFTER!", domain)
1234 if not domain_helper.is_wanted(domain):
1235 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1237 elif instances.is_registered(domain):
1238 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1240 elif instances.is_recent(domain):
1241 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1244 logger.info("Fetching domain='%s' ...", domain)
1245 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1247 logger.debug("Success! - EXIT!")
1250 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1251 logger.debug("args[]='%s' - CALLED!", type(args))
1253 logger.debug("Invoking locking.acquire() ...")
1256 source_domain = "instances.joinmobilizon.org"
1257 if sources.is_recent(source_domain):
1258 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1261 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1262 sources.update(source_domain)
1264 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1265 raw = utils.fetch_url(
1266 f"https://{source_domain}/api/v1/instances",
1267 network.web_headers,
1268 (config.get("connection_timeout"), config.get("read_timeout"))
1270 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1272 parsed = json.loads(raw)
1273 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1275 if "data" not in parsed:
1276 logger.warning("parsed()=%d does not contain key 'data'")
1279 logger.info("Checking %d instances ...", len(parsed["data"]))
1280 for row in parsed["data"]:
1281 logger.debug("row[]='%s'", type(row))
1282 if "host" not in row:
1283 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1285 elif not domain_helper.is_wanted(row["host"]):
1286 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1288 elif instances.is_registered(row["host"]):
1289 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1292 logger.info("Fetching row[host]='%s' ...", row["host"])
1293 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1295 logger.debug("Success! - EXIT!")
1298 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1299 logger.debug("args[]='%s' - CALLED!", type(args))
1301 logger.debug("Invoking locking.acquire() ...")
1304 source_domain = "instanceapp.misskey.page"
1305 if sources.is_recent(source_domain):
1306 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1309 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1310 sources.update(source_domain)
1312 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1313 raw = utils.fetch_url(
1314 f"https://{source_domain}/instances.json",
1315 network.web_headers,
1316 (config.get("connection_timeout"), config.get("read_timeout"))
1318 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1320 parsed = json.loads(raw)
1321 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1323 if "instancesInfos" not in parsed:
1324 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1327 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1328 for row in parsed["instancesInfos"]:
1329 logger.debug("row[%s]='%s'", type(row), row)
1330 if "url" not in row:
1331 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1333 elif not domain_helper.is_wanted(row["url"]):
1334 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1336 elif instances.is_registered(row["url"]):
1337 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1340 logger.info("Fetching row[url]='%s' ...", row["url"])
1341 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1343 logger.debug("Success! - EXIT!")
1346 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1347 logger.debug("args[]='%s' - CALLED!", type(args))
1349 logger.debug("Invoking locking.acquire() ...")
1352 source_domain = "joinfediverse.wiki"
1353 if sources.is_recent(source_domain):
1354 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1357 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1358 sources.update(source_domain)
1360 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1361 raw = utils.fetch_url(
1362 f"https://{source_domain}/FediBlock",
1363 network.web_headers,
1364 (config.get("connection_timeout"), config.get("read_timeout"))
1366 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1368 doc = bs4.BeautifulSoup(raw, "html.parser")
1369 logger.debug("doc[]='%s'", type(doc))
1371 tables = doc.findAll("table", {"class": "wikitable"})
1373 logger.info("Analyzing %d table(s) ...", len(tables))
1375 for table in tables:
1376 logger.debug("table[]='%s'", type(table))
1378 rows = table.findAll("tr")
1379 logger.info("Checking %d row(s) ...", len(rows))
1380 block_headers = dict()
1382 logger.debug("row[%s]='%s'", type(row), row)
1384 headers = row.findAll("th")
1385 logger.debug("Found headers()=%d header(s)", len(headers))
1386 if len(headers) > 1:
1387 block_headers = dict()
1389 for header in headers:
1391 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1392 text = header.contents[0]
1394 logger.debug("text[]='%s'", type(text))
1395 if not isinstance(text, str):
1396 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1398 elif validators.domain(text.strip()):
1399 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1402 text = tidyup.domain(text.strip())
1403 logger.debug("text='%s' - AFTER!", text)
1404 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1405 logger.debug("Found header: '%s'=%d", text, cnt)
1406 block_headers[cnt] = text
1408 elif len(block_headers) == 0:
1409 logger.debug("row is not scrapable - SKIPPED!")
1411 elif len(block_headers) > 0:
1412 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1416 for element in row.find_all(["th", "td"]):
1418 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1419 if cnt in block_headers:
1420 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1422 text = element.text.strip()
1423 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1425 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1426 if key in ["domain", "instance"]:
1428 elif key == "reason":
1429 block[key] = tidyup.reason(text)
1430 elif key == "subdomain(s)":
1433 block[key] = text.split("/")
1435 logger.debug("key='%s'", key)
1438 logger.debug("block()=%d ...", len(block))
1440 logger.debug("Appending block()=%d ...", len(block))
1441 blocklist.append(block)
1443 logger.debug("blocklist()=%d", len(blocklist))
1445 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1446 domains = database.cursor.fetchall()
1448 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1450 for block in blocklist:
1451 logger.debug("block='%s'", block)
1452 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1453 origin = block["blocked"]
1454 logger.debug("origin='%s'", origin)
1455 for subdomain in block["subdomain(s)"]:
1456 block["blocked"] = subdomain + "." + origin
1457 logger.debug("block[blocked]='%s'", block["blocked"])
1458 blocking.append(block)
1460 blocking.append(block)
1462 logger.debug("blocking()=%d", blocking)
1463 for block in blocking:
1464 logger.debug("block[]='%s'", type(block))
1465 if "blocked" not in block:
1466 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1468 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1469 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1471 if block["blocked"] == "":
1472 logger.debug("block[blocked] is empty - SKIPPED!")
1474 elif not domain_helper.is_wanted(block["blocked"]):
1475 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1477 elif instances.is_recent(block["blocked"]):
1478 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1481 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1482 processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1485 for blocker in domains:
1486 blocker = blocker[0]
1487 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1488 instances.set_last_blocked(blocker)
1490 for block in blocking:
1491 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1492 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1494 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1495 if block["blocked"] == "":
1496 logger.debug("block[blocked] is empty - SKIPPED!")
1498 elif not domain_helper.is_wanted(block["blocked"]):
1499 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1502 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1503 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1504 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1506 "blocked": block["blocked"],
1507 "reason" : block["reason"],
1510 if instances.has_pending(blocker):
1511 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1512 instances.update(blocker)
1514 logger.debug("Invoking commit() ...")
1515 database.connection.commit()
1517 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1518 if config.get("bot_enabled") and len(blockdict) > 0:
1519 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1520 network.send_bot_post(blocker, blockdict)
1522 logger.debug("Success! - EXIT!")
1525 def recheck_obfuscation(args: argparse.Namespace) -> int:
1526 logger.debug("args[]='%s' - CALLED!", type(args))
1528 logger.debug("Invoking locking.acquire() ...")
1531 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1532 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1533 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1534 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1536 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1538 rows = database.cursor.fetchall()
1539 logger.info("Checking %d domains ...", len(rows))
1541 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1542 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1543 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1546 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1547 blocking = federation.fetch_blocks(row["domain"])
1549 logger.debug("blocking()=%d", len(blocking))
1550 if len(blocking) == 0:
1551 if row["software"] == "pleroma":
1552 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1553 blocking = pleroma.fetch_blocks(row["domain"])
1554 elif row["software"] == "mastodon":
1555 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1556 blocking = mastodon.fetch_blocks(row["domain"])
1557 elif row["software"] == "lemmy":
1558 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1559 blocking = lemmy.fetch_blocks(row["domain"])
1560 elif row["software"] == "friendica":
1561 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1562 blocking = friendica.fetch_blocks(row["domain"])
1563 elif row["software"] == "misskey":
1564 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1565 blocking = misskey.fetch_blocks(row["domain"])
1567 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1569 # c.s isn't part of oliphant's "hidden" blocklists
1570 logger.debug("row[domain]='%s'", row["domain"])
1571 if row["domain"] != "chaos.social" and not software_helper.is_relay(software) and not blocklists.has(row["domain"]):
1572 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1573 instances.set_last_blocked(row["domain"])
1574 instances.set_total_blocks(row["domain"], blocking)
1579 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1580 for block in blocking:
1581 logger.debug("block[blocked]='%s'", block["blocked"])
1584 if block["blocked"] == "":
1585 logger.debug("block[blocked] is empty - SKIPPED!")
1587 elif block["blocked"].endswith(".arpa"):
1588 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1590 elif block["blocked"].endswith(".tld"):
1591 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1593 elif block["blocked"].endswith(".onion"):
1594 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1596 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1597 logger.debug("block='%s' is obfuscated.", block["blocked"])
1598 obfuscated = obfuscated + 1
1599 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1600 elif not domain_helper.is_wanted(block["blocked"]):
1601 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1603 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1604 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1607 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1608 if blocked is not None and blocked != block["blocked"]:
1609 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1610 obfuscated = obfuscated - 1
1612 if blacklist.is_blacklisted(blocked):
1613 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1615 elif blacklist.is_blacklisted(row["domain"]):
1616 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1618 elif blocks.is_instance_blocked(row["domain"], blocked):
1619 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1622 block["block_level"] = blocks.alias_block_level(block["block_level"])
1624 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1625 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1626 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1629 "reason" : block["reason"],
1632 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1633 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1635 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1636 if obfuscated == 0 and len(blocking) > 0:
1637 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1638 instances.set_has_obfuscation(row["domain"], False)
1640 if instances.has_pending(row["domain"]):
1641 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1642 instances.update(row["domain"])
1644 logger.debug("Invoking commit() ...")
1645 database.connection.commit()
1647 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1648 if config.get("bot_enabled") and len(blockdict) > 0:
1649 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1650 network.send_bot_post(row["domain"], blockdict)
1652 logger.debug("Success! - EXIT!")
1655 def fetch_fedilist(args: argparse.Namespace) -> int:
1656 logger.debug("args[]='%s' - CALLED!", type(args))
1658 logger.debug("Invoking locking.acquire() ...")
1661 source_domain = "demo.fedilist.com"
1662 if sources.is_recent(source_domain):
1663 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1666 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1667 sources.update(source_domain)
1669 url = f"http://{source_domain}/instance/csv?onion=not"
1670 if args.software is not None and args.software != "":
1671 logger.debug("args.software='%s'", args.software)
1672 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1674 logger.info("Fetching url='%s' ...", url)
1675 response = reqto.get(
1677 headers=network.web_headers,
1678 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1679 allow_redirects=False
1682 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1683 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1684 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1687 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1689 logger.debug("reader[]='%s'", type(reader))
1691 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1696 logger.info("Checking %d rows ...", len(rows))
1698 logger.debug("row[]='%s'", type(row))
1699 if "hostname" not in row:
1700 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1703 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1704 domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None
1705 logger.debug("domain='%s' - AFTER!", domain)
1707 if domain is None or domain == "":
1708 logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1711 logger.debug("domain='%s' - BEFORE!", domain)
1712 domain = domain.encode("idna").decode("utf-8")
1713 logger.debug("domain='%s' - AFTER!", domain)
1715 if not domain_helper.is_wanted(domain):
1716 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1718 elif (args.force is None or not args.force) and instances.is_registered(domain):
1719 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1721 elif instances.is_recent(domain):
1722 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1725 logger.info("Fetching instances from domain='%s' ...", domain)
1726 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1728 logger.debug("Success! - EXIT!")
1731 def update_nodeinfo(args: argparse.Namespace) -> int:
1732 logger.debug("args[]='%s' - CALLED!", type(args))
1734 logger.debug("Invoking locking.acquire() ...")
1737 if args.domain is not None and args.domain != "":
1738 logger.debug("Fetching args.domain='%s'", args.domain)
1739 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1740 elif args.software is not None and args.software != "":
1741 logger.info("Fetching domains for args.software='%s'", args.software)
1742 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1743 elif args.mode is not None and args.mode != "":
1744 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1745 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1746 elif args.no_software:
1747 logger.info("Fetching domains with no software type detected ...")
1748 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1750 logger.info("Fetching domains for recently updated ...")
1751 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1753 domains = database.cursor.fetchall()
1755 logger.info("Checking %d domain(s) ...", len(domains))
1758 logger.debug("row[]='%s'", type(row))
1759 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1760 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1764 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1765 software = federation.determine_software(row["domain"])
1767 logger.debug("Determined software='%s'", software)
1768 if (software != row["software"] and software is not None) or args.force is True:
1769 logger.debug("software='%s'", software)
1770 if software is None:
1771 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1772 instances.set_nodeinfo_url(row["domain"], None)
1774 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1775 instances.set_software(row["domain"], software)
1777 if software is not None:
1778 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1779 instances.set_success(row["domain"])
1780 except network.exceptions as exception:
1781 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1782 instances.set_last_error(row["domain"], exception)
1784 instances.set_last_nodeinfo(row["domain"])
1785 instances.update(row["domain"])
1788 logger.debug("Success! - EXIT!")
1791 def fetch_instances_social(args: argparse.Namespace) -> int:
1792 logger.debug("args[]='%s' - CALLED!", type(args))
1794 logger.debug("Invoking locking.acquire() ...")
1797 source_domain = "instances.social"
1799 if config.get("instances_social_api_key") == "":
1800 logger.error("API key not set. Please set in your config.json file.")
1802 elif sources.is_recent(source_domain):
1803 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1806 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1807 sources.update(source_domain)
1810 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1813 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1814 fetched = network.get_json_api(
1816 "/api/1.0/instances/list?count=0&sort_by=name",
1818 (config.get("connection_timeout"), config.get("read_timeout"))
1820 logger.debug("fetched[]='%s'", type(fetched))
1822 if "error_message" in fetched:
1823 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1825 elif "exception" in fetched:
1826 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1828 elif "json" not in fetched:
1829 logger.warning("fetched has no element 'json' - EXIT!")
1831 elif "instances" not in fetched["json"]:
1832 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1836 rows = fetched["json"]["instances"]
1838 logger.info("Checking %d row(s) ...", len(rows))
1840 logger.debug("row[]='%s'", type(row))
1841 domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None
1842 logger.debug("domain='%s' - AFTER!", domain)
1844 if domain is None and domain == "":
1845 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1848 logger.debug("domain='%s' - BEFORE!", domain)
1849 domain = domain.encode("idna").decode("utf-8")
1850 logger.debug("domain='%s' - AFTER!", domain)
1852 if not domain_helper.is_wanted(domain):
1853 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1855 elif domain in domains:
1856 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1858 elif instances.is_registered(domain):
1859 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1861 elif instances.is_recent(domain):
1862 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1865 logger.info("Fetching instances from domain='%s'", domain)
1866 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1868 logger.debug("Success! - EXIT!")
1871 def fetch_relays(args: argparse.Namespace) -> int:
1872 logger.debug("args[]='%s' - CALLED!", type(args))
1874 logger.debug("Invoking locking.acquire() ...")
1877 if args.domain is not None and args.domain != "":
1878 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1879 elif args.software is not None and args.software != "":
1880 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1882 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1885 rows = database.cursor.fetchall()
1887 logger.info("Checking %d relays ...", len(rows))
1889 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1891 if not args.force and instances.is_recent(row["domain"]):
1892 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1896 if row["software"] == "pub-relay":
1897 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1898 raw = network.fetch_api_url(
1899 row["nodeinfo_url"],
1900 (config.get("connection_timeout"), config.get("read_timeout"))
1903 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1904 if "exception" in raw:
1905 logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1906 raise raw["exception"]
1907 elif "error_message" in raw:
1908 logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1909 instances.set_last_error(row["domain"], raw)
1910 instances.set_last_instance_fetch(row["domain"])
1911 instances.update(row["domain"])
1913 elif not "json" in raw:
1914 logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1916 elif not "metadata" in raw["json"]:
1917 logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1919 elif not "peers" in raw["json"]["metadata"]:
1920 logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1923 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1924 raw = utils.fetch_url(
1925 f"https://{row['domain']}",
1926 network.web_headers,
1927 (config.get("connection_timeout"), config.get("read_timeout"))
1929 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1931 doc = bs4.BeautifulSoup(raw, features="html.parser")
1932 logger.debug("doc[]='%s'", type(doc))
1934 except network.exceptions as exception:
1935 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1936 instances.set_last_error(row["domain"], exception)
1937 instances.set_last_instance_fetch(row["domain"])
1938 instances.update(row["domain"])
1941 logger.debug("row[software]='%s'", row["software"])
1942 if row["software"] == "activityrelay":
1943 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1944 tags = doc.findAll("p")
1946 logger.debug("Checking %d paragraphs ...", len(tags))
1948 logger.debug("tag[]='%s'", type(tag))
1949 if len(tag.contents) == 0:
1950 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1952 elif "registered instances" not in tag.contents[0]:
1953 logger.debug("Skipping paragraph, text not found.")
1956 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1957 for domain in tag.contents:
1958 logger.debug("domain[%s]='%s'", type(domain), domain)
1959 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1962 domain = str(domain)
1963 logger.debug("domain='%s'", domain)
1964 if not domain_helper.is_wanted(domain):
1965 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1968 logger.debug("domain='%s' - BEFORE!", domain)
1969 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1970 logger.debug("domain='%s' - AFTER!", domain)
1972 if domain is None or domain == "":
1973 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1975 elif domain not in peers:
1976 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1977 peers.append(domain)
1979 if dict_helper.has_key(domains, "domain", domain):
1980 logger.debug("domain='%s' already added", domain)
1983 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1986 "origin": row["domain"],
1988 elif row["software"] in ["aoderelay", "selective-relay"]:
1989 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1990 if row["software"] == "aoderelay":
1991 tags = doc.findAll("section", {"class": "instance"})
1993 tags = doc.find("div", {"id": "instances"}).findAll("li")
1995 logger.debug("Checking %d tags ...", len(tags))
1997 logger.debug("tag[]='%s'", type(tag))
1999 link = tag.find("a")
2000 logger.debug("link[%s]='%s'", type(link), link)
2002 logger.warning("tag='%s' has no a-tag - SKIPPED!", tag)
2004 elif "href" not in link:
2005 logger.warning("link()=%d has no key 'href' - SKIPPED!", len(link))
2008 components = urlparse(link["href"])
2009 domain = components.netloc.lower().split(":")[0]
2011 logger.debug("domain='%s' - BEFORE!", domain)
2012 domain = tidyup.domain(domain) if domain != None and domain != "" else None
2013 logger.debug("domain='%s' - AFTER!", domain)
2015 if domain is None or domain == "":
2016 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
2018 elif domain not in peers:
2019 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
2020 peers.append(domain)
2022 if dict_helper.has_key(domains, "domain", domain):
2023 logger.debug("domain='%s' already added", domain)
2026 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
2029 "origin": row["domain"],
2031 elif row["software"] == "pub-relay":
2032 logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
2033 for domain in raw["json"]["metadata"]["peers"]:
2034 logger.debug("domain='%s' - BEFORE!", domain)
2035 domain = tidyup.domain(domain) if domain != None and domain != "" else None
2036 logger.debug("domain='%s' - AFTER!", domain)
2038 if domain is None or domain == "":
2039 logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
2041 elif domain not in peers:
2042 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
2043 peers.append(domain)
2045 if dict_helper.has_key(domains, "domain", domain):
2046 logger.debug("domain='%s' already added", domain)
2049 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
2052 "origin": row["domain"],
2055 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
2058 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
2059 instances.set_last_instance_fetch(row["domain"])
2061 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
2062 instances.set_total_peers(row["domain"], peers)
2064 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
2065 instances.update(row["domain"])
2067 logger.info("Checking %d domains ...", len(domains))
2069 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
2070 if not domain_helper.is_wanted(row["domain"]):
2071 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
2073 elif instances.is_registered(row["domain"]):
2074 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2077 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2078 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2080 logger.debug("Success! - EXIT!")
2083 def convert_idna(args: argparse.Namespace) -> int:
2084 logger.debug("args[]='%s' - CALLED!", type(args))
2086 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2087 rows = database.cursor.fetchall()
2089 logger.debug("rows[]='%s'", type(rows))
2090 instances.translate_idnas(rows, "domain")
2092 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2093 rows = database.cursor.fetchall()
2095 logger.debug("rows[]='%s'", type(rows))
2096 instances.translate_idnas(rows, "origin")
2098 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2099 rows = database.cursor.fetchall()
2101 logger.debug("rows[]='%s'", type(rows))
2102 blocks.translate_idnas(rows, "blocker")
2104 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2105 rows = database.cursor.fetchall()
2107 logger.debug("rows[]='%s'", type(rows))
2108 blocks.translate_idnas(rows, "blocked")
2110 logger.debug("Success! - EXIT!")
2113 def remove_invalid(args: argparse.Namespace) -> int:
2114 logger.debug("args[]='%s' - CALLED!", type(args))
2116 logger.debug("Invoking locking.acquire() ...")
2119 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2120 rows = database.cursor.fetchall()
2122 logger.info("Checking %d domains ...", len(rows))
2124 logger.debug("row[domain]='%s'", row["domain"])
2125 if not validators.domain(row["domain"].split("/")[0]):
2126 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2127 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2128 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2130 logger.debug("Invoking commit() ...")
2131 database.connection.commit()
2133 logger.info("Vaccum cleaning database ...")
2134 database.cursor.execute("VACUUM")
2136 logger.debug("Success! - EXIT!")