1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
68 if not validators.domain(args.domain):
69 logger.warning("args.domain='%s' is not valid", args.domain)
71 elif blacklist.is_blacklisted(args.domain):
72 logger.warning("args.domain='%s' is blacklisted", args.domain)
74 elif instances.is_registered(args.domain):
75 logger.warning("args.domain='%s' is already registered", args.domain)
78 logger.info("args.domain='%s' is not known", args.domain)
80 logger.debug("status=%d - EXIT!", status)
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84 logger.debug("args[]='%s' - CALLED!", type(args))
87 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
90 for row in database.cursor.fetchall():
91 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92 punycode = row["domain"].encode("idna").decode("utf-8")
94 if row["nodeinfo_url"].startswith("/"):
95 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
97 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
101 logger.info("Found %d row(s)", cnt)
103 logger.debug("EXIT!")
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107 logger.debug("args[]='%s' - CALLED!", type(args))
109 # No CSRF by default, you don't have to add network.source_headers by yourself here
111 source_domain = "pixelfed.org"
113 if sources.is_recent(source_domain):
114 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
117 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118 sources.update(source_domain)
121 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122 headers = csrf.determine(source_domain, dict())
123 except network.exceptions as exception:
124 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
128 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129 fetched = network.get_json_api(
131 "/api/v1/servers/all.json?scope=All&country=all&language=all",
133 (config.get("connection_timeout"), config.get("read_timeout"))
136 logger.debug("JSON API returned %d elements", len(fetched))
137 if "error_message" in fetched:
138 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
140 elif "data" not in fetched["json"]:
141 logger.warning("API did not return JSON with 'data' element - EXIT!")
144 rows = fetched["json"]["data"]
145 logger.info("Checking %d fetched rows ...", len(rows))
147 logger.debug("row[]='%s'", type(row))
148 if "domain" not in row:
149 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
151 elif row["domain"] == "":
152 logger.debug("row[domain] is empty - SKIPPED!")
155 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156 domain = row["domain"].encode("idna").decode("utf-8")
157 logger.debug("domain='%s' - AFTER!", domain)
159 if not domain_helper.is_wanted(domain):
160 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
162 elif instances.is_registered(domain):
163 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
165 elif instances.is_recent(domain):
166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
169 logger.debug("Fetching instances from domain='%s' ...", domain)
170 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
172 except network.exceptions as exception:
173 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
176 logger.debug("Success! - EXIT!")
179 def fetch_bkali(args: argparse.Namespace) -> int:
180 logger.debug("args[]='%s' - CALLED!", type(args))
182 logger.debug("Invoking locking.acquire() ...")
185 source_domain = "gql.api.bka.li"
186 if sources.is_recent(source_domain):
187 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
190 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191 sources.update(source_domain)
195 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196 fetched = network.post_json_api(
200 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
204 logger.debug("fetched[]='%s'", type(fetched))
205 if "error_message" in fetched:
206 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
208 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
212 rows = fetched["json"]
214 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
216 raise Exception("WARNING: Returned no records")
217 elif "data" not in rows:
218 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219 elif "nodeinfo" not in rows["data"]:
220 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
222 for entry in rows["data"]["nodeinfo"]:
223 logger.debug("entry[%s]='%s'", type(entry), entry)
224 if "domain" not in entry:
225 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
227 elif entry["domain"] == "":
228 logger.debug("entry[domain] is empty - SKIPPED!")
230 elif not domain_helper.is_wanted(entry["domain"]):
231 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
233 elif instances.is_registered(entry["domain"]):
234 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
236 elif instances.is_recent(entry["domain"]):
237 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
240 logger.debug("Adding domain='%s' ...", entry["domain"])
241 domains.append(entry["domain"])
243 except network.exceptions as exception:
244 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
247 logger.debug("domains()=%d", len(domains))
249 logger.info("Adding %d new instances ...", len(domains))
250 for domain in domains:
251 logger.debug("domain='%s' - BEFORE!", domain)
252 domain = domain.encode("idna").decode("utf-8")
253 logger.debug("domain='%s' - AFTER!", domain)
256 logger.info("Fetching instances from domain='%s' ...", domain)
257 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258 except network.exceptions as exception:
259 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260 instances.set_last_error(domain, exception)
263 logger.debug("Success - EXIT!")
266 def fetch_blocks(args: argparse.Namespace) -> int:
267 logger.debug("args[]='%s' - CALLED!", type(args))
268 if args.domain is not None and args.domain != "":
269 logger.debug("args.domain='%s' - checking ...", args.domain)
270 if not validators.domain(args.domain):
271 logger.warning("args.domain='%s' is not valid.", args.domain)
273 elif blacklist.is_blacklisted(args.domain):
274 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
276 elif not instances.is_registered(args.domain):
277 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
280 logger.debug("Invoking locking.acquire() ...")
283 if args.domain is not None and args.domain != "":
284 # Re-check single domain
285 logger.debug("Querying database for args.domain='%s' ...", args.domain)
286 database.cursor.execute(
287 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
289 elif args.software is not None and args.software != "":
290 # Re-check single software
291 logger.debug("Querying database for args.software='%s' ...", args.software)
292 database.cursor.execute(
293 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [args.software]
297 logger.debug("Re-checking all instances ...")
298 database.cursor.execute(
299 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC"
302 # Re-check after "timeout" (aka. minimum interval)
303 database.cursor.execute(
304 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [time.time() - config.get("recheck_block")]
307 rows = database.cursor.fetchall()
308 logger.info("Checking %d entries ...", len(rows))
309 for blocker, software, origin, nodeinfo_url in rows:
310 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
312 if not domain_helper.is_wanted(blocker):
313 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
316 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317 instances.set_last_blocked(blocker)
318 instances.set_has_obfuscation(blocker, False)
320 # c.s isn't part of oliphant's "hidden" blocklists
321 if blocker == "chaos.social" or blocklists.has(blocker):
322 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
325 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326 blocking = federation.fetch_blocks(blocker)
328 logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329 if len(blocking) == 0:
330 logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331 if software == "pleroma":
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 blocking = mastodon.fetch_blocks(blocker)
336 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "lemmy":
338 blocking = lemmy.fetch_blocks(blocker)
339 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 blocking = friendica.fetch_blocks(blocker)
342 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 blocking = misskey.fetch_blocks(blocker)
345 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
354 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
355 for block in blocking:
356 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
358 if block["block_level"] == "":
359 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
362 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
363 block["blocked"] = tidyup.domain(block["blocked"])
364 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
365 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
367 if block["blocked"] == "":
368 logger.warning("blocked is empty, blocker='%s'", blocker)
370 elif block["blocked"].endswith(".onion"):
371 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
373 elif block["blocked"].endswith(".arpa"):
374 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".tld"):
377 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
379 elif block["blocked"].find("*") >= 0:
380 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 # Some friendica servers also obscure domains without hash
383 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
385 logger.debug("row[]='%s'", type(row))
387 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
388 instances.set_has_obfuscation(blocker, True)
391 block["blocked"] = row["domain"]
392 origin = row["origin"]
393 nodeinfo_url = row["nodeinfo_url"]
394 elif block["blocked"].find("?") >= 0:
395 logger.debug("blocker='%s' uses obfuscated domains", blocker)
397 # Some obscure them with question marks, not sure if that's dependent on version or not
398 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
400 logger.debug("row[]='%s'", type(row))
402 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
403 instances.set_has_obfuscation(blocker, True)
406 block["blocked"] = row["domain"]
407 origin = row["origin"]
408 nodeinfo_url = row["nodeinfo_url"]
410 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
411 if block["blocked"] == "":
412 logger.debug("block[blocked] is empty - SKIPPED!")
415 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
416 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
417 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
419 if not domain_helper.is_wanted(block["blocked"]):
420 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
422 elif block["block_level"] in ["accept", "accepted"]:
423 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
425 elif not instances.is_registered(block["blocked"]):
426 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
427 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
429 block["block_level"] = blocks.alias_block_level(block["block_level"])
431 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
432 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
434 "blocked": block["blocked"],
435 "reason" : block["reason"],
438 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
439 cookies.clear(block["blocked"])
441 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
442 if instances.has_pending(blocker):
443 logger.debug("Flushing updates for blocker='%s' ...", blocker)
444 instances.update(blocker)
446 logger.debug("Invoking commit() ...")
447 database.connection.commit()
449 logger.debug("Invoking cookies.clear(%s) ...", blocker)
450 cookies.clear(blocker)
452 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
453 if config.get("bot_enabled") and len(blockdict) > 0:
454 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
455 network.send_bot_post(blocker, blockdict)
457 logger.debug("Success! - EXIT!")
460 def fetch_observer(args: argparse.Namespace) -> int:
461 logger.debug("args[]='%s' - CALLED!", type(args))
463 logger.debug("Invoking locking.acquire() ...")
466 source_domain = "fediverse.observer"
467 if sources.is_recent(source_domain):
468 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
471 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
472 sources.update(source_domain)
475 if args.software is None:
476 logger.info("Fetching software list ...")
477 raw = utils.fetch_url(
478 f"https://{source_domain}",
480 (config.get("connection_timeout"), config.get("read_timeout"))
482 logger.debug("raw[%s]()=%d", type(raw), len(raw))
484 doc = bs4.BeautifulSoup(raw, features="html.parser")
485 logger.debug("doc[]='%s'", type(doc))
487 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
488 logger.debug("navbar[]='%s'", type(navbar))
490 logger.warning("Cannot find navigation bar, cannot continue!")
493 items = navbar.findAll("a", {"class": "dropdown-item"})
494 logger.debug("items[]='%s'", type(items))
496 logger.info("Checking %d menu items ...", len(items))
498 logger.debug("item[%s]='%s'", type(item), item)
499 if item.text.lower() == "all":
500 logger.debug("Skipping 'All' menu entry ...")
503 logger.debug("Appending item.text='%s' ...", item.text)
504 types.append(tidyup.domain(item.text))
506 logger.info("Adding args.software='%s' as type ...", args.software)
507 types.append(args.software)
509 logger.info("Fetching %d different table data ...", len(types))
510 for software in types:
511 logger.debug("software='%s' - BEFORE!", software)
512 if args.software is not None and args.software != software:
513 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
518 logger.debug("Fetching table data for software='%s' ...", software)
519 raw = utils.fetch_url(
520 f"https://{source_domain}/app/views/tabledata.php?software={software}",
522 (config.get("connection_timeout"), config.get("read_timeout"))
524 logger.debug("raw[%s]()=%d", type(raw), len(raw))
526 doc = bs4.BeautifulSoup(raw, features="html.parser")
527 logger.debug("doc[]='%s'", type(doc))
528 except network.exceptions as exception:
529 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
532 items = doc.findAll("a", {"class": "url"})
533 logger.info("Checking %d items,software='%s' ...", len(items), software)
535 logger.debug("item[]='%s'", type(item))
536 domain = item.decode_contents()
537 logger.debug("domain='%s' - AFTER!", domain)
540 logger.debug("domain is empty - SKIPPED!")
543 logger.debug("domain='%s' - BEFORE!", domain)
544 domain = domain.encode("idna").decode("utf-8")
545 logger.debug("domain='%s' - AFTER!", domain)
547 if not domain_helper.is_wanted(domain):
548 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
550 elif instances.is_registered(domain):
551 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
554 software = software_helper.alias(software)
555 logger.info("Fetching instances for domain='%s'", domain)
556 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
558 logger.debug("Success! - EXIT!")
561 def fetch_todon_wiki(args: argparse.Namespace) -> int:
562 logger.debug("args[]='%s' - CALLED!", type(args))
564 logger.debug("Invoking locking.acquire() ...")
567 source_domain = "wiki.todon.eu"
568 if sources.is_recent(source_domain):
569 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
572 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
573 sources.update(source_domain)
580 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
581 raw = utils.fetch_url(
582 f"https://{source_domain}/todon/domainblocks",
584 (config.get("connection_timeout"), config.get("read_timeout"))
586 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
588 doc = bs4.BeautifulSoup(raw, "html.parser")
589 logger.debug("doc[]='%s'", type(doc))
591 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
592 logger.info("Checking %d silenced/limited entries ...", len(silenced))
593 blocklist["silenced"] = utils.find_domains(silenced, "div")
595 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
596 logger.info("Checking %d suspended entries ...", len(suspended))
597 blocklist["reject"] = utils.find_domains(suspended, "div")
599 blocking = blocklist["silenced"] + blocklist["reject"]
602 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
603 instances.set_last_blocked(blocker)
604 instances.set_total_blocks(blocker, blocking)
607 for block_level in blocklist:
608 blockers = blocklist[block_level]
610 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
611 for blocked in blockers:
612 logger.debug("blocked='%s'", blocked)
614 if not instances.is_registered(blocked):
616 logger.info("Fetching instances from domain='%s' ...", blocked)
617 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
618 except network.exceptions as exception:
619 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
620 instances.set_last_error(blocked, exception)
622 if not domain_helper.is_wanted(blocked):
623 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
625 elif not domain_helper.is_wanted(blocker):
626 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
628 elif blocks.is_instance_blocked(blocker, blocked, block_level):
629 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
632 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
633 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
634 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
640 logger.debug("Invoking commit() ...")
641 database.connection.commit()
643 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
644 if config.get("bot_enabled") and len(blockdict) > 0:
645 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
646 network.send_bot_post(blocker, blockdict)
648 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
649 if instances.has_pending(blocker):
650 logger.debug("Flushing updates for blocker='%s' ...", blocker)
651 instances.update(blocker)
653 logger.debug("Success! - EXIT!")
656 def fetch_cs(args: argparse.Namespace):
657 logger.debug("args[]='%s' - CALLED!", type(args))
659 logger.debug("Invoking locking.acquire() ...")
687 source_domain = "raw.githubusercontent.com"
688 if sources.is_recent(source_domain):
689 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
692 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
693 sources.update(source_domain)
695 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
696 raw = utils.fetch_url(
697 f"https://{source_domain}/chaossocial/meta/master/federation.md",
699 (config.get("connection_timeout"), config.get("read_timeout"))
701 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
703 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
704 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
706 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
707 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
708 blocklist["silenced"] = federation.find_domains(silenced)
710 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
711 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
712 blocklist["reject"] = federation.find_domains(blocked)
714 blocking = blocklist["silenced"] + blocklist["reject"]
715 blocker = "chaos.social"
717 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
718 instances.set_last_blocked(blocker)
719 instances.set_total_blocks(blocker, blocking)
721 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
722 if len(blocking) > 0:
724 for block_level in blocklist:
725 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
727 for row in blocklist[block_level]:
728 logger.debug("row[%s]='%s'", type(row), row)
729 if not "domain" in row:
730 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
732 elif not instances.is_registered(row["domain"]):
734 logger.info("Fetching instances from domain='%s' ...", row["domain"])
735 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
736 except network.exceptions as exception:
737 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
738 instances.set_last_error(row["domain"], exception)
740 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
741 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
743 "blocked": row["domain"],
744 "reason" : row["reason"],
747 logger.debug("Invoking commit() ...")
748 database.connection.commit()
750 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
751 if config.get("bot_enabled") and len(blockdict) > 0:
752 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
753 network.send_bot_post(blocker, blockdict)
755 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
756 if instances.has_pending(blocker):
757 logger.debug("Flushing updates for blocker='%s' ...", blocker)
758 instances.update(blocker)
760 logger.debug("Success! - EXIT!")
763 def fetch_fba_rss(args: argparse.Namespace) -> int:
764 logger.debug("args[]='%s' - CALLED!", type(args))
768 logger.debug("Invoking locking.acquire() ...")
771 components = urlparse(args.feed)
773 if sources.is_recent(components.netloc):
774 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
777 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
778 sources.update(components.netloc)
780 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
781 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
783 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
784 if response.ok and response.status_code == 200 and len(response.text) > 0:
785 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
786 rss = atoma.parse_rss_bytes(response.content)
788 logger.debug("rss[]='%s'", type(rss))
789 for item in rss.items:
790 logger.debug("item[%s]='%s'", type(item), item)
791 domain = tidyup.domain(item.link.split("=")[1])
793 logger.debug("domain='%s' - AFTER!", domain)
795 logger.debug("domain is empty - SKIPPED!")
798 logger.debug("domain='%s' - BEFORE!", domain)
799 domain = domain.encode("idna").decode("utf-8")
800 logger.debug("domain='%s' - AFTER!", domain)
802 if not domain_helper.is_wanted(domain):
803 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
805 elif domain in domains:
806 logger.debug("domain='%s' is already added - SKIPPED!", domain)
808 elif instances.is_registered(domain):
809 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
811 elif instances.is_recent(domain):
812 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
815 logger.debug("Adding domain='%s'", domain)
816 domains.append(domain)
818 logger.debug("domains()=%d", len(domains))
820 logger.info("Adding %d new instances ...", len(domains))
821 for domain in domains:
822 logger.debug("domain='%s'", domain)
824 logger.info("Fetching instances from domain='%s' ...", domain)
825 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
826 except network.exceptions as exception:
827 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
828 instances.set_last_error(domain, exception)
831 logger.debug("Success! - EXIT!")
834 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
835 logger.debug("args[]='%s' - CALLED!", type(args))
837 logger.debug("Invoking locking.acquire() ...")
840 source_domain = "ryona.agency"
841 feed = f"https://{source_domain}/users/fba/feed.atom"
843 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
844 if args.feed is not None and validators.url(args.feed):
845 logger.debug("Setting feed='%s' ...", args.feed)
846 feed = str(args.feed)
847 source_domain = urlparse(args.feed).netloc
849 if sources.is_recent(source_domain):
850 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
853 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
854 sources.update(source_domain)
858 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
859 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
861 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
862 if response.ok and response.status_code == 200 and len(response.text) > 0:
863 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
864 atom = atoma.parse_atom_bytes(response.content)
866 logger.debug("atom[]='%s'", type(atom))
867 for entry in atom.entries:
868 logger.debug("entry[]='%s'", type(entry))
869 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
870 logger.debug("doc[]='%s'", type(doc))
871 for element in doc.findAll("a"):
872 logger.debug("element[]='%s'", type(element))
873 for href in element["href"].split(","):
874 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
875 domain = tidyup.domain(href)
877 logger.debug("domain='%s' - AFTER!", domain)
879 logger.debug("domain is empty - SKIPPED!")
882 logger.debug("domain='%s' - BEFORE!", domain)
883 domain = domain.encode("idna").decode("utf-8")
884 logger.debug("domain='%s' - AFTER!", domain)
886 if not domain_helper.is_wanted(domain):
887 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
889 elif domain in domains:
890 logger.debug("domain='%s' is already added - SKIPPED!", domain)
892 elif instances.is_registered(domain):
893 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
895 elif instances.is_recent(domain):
896 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
899 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
900 domains.append(domain)
902 logger.debug("domains()=%d", len(domains))
904 logger.info("Adding %d new instances ...", len(domains))
905 for domain in domains:
906 logger.debug("domain='%s'", domain)
908 logger.info("Fetching instances from domain='%s' ...", domain)
909 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
910 except network.exceptions as exception:
911 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
912 instances.set_last_error(domain, exception)
915 logger.debug("Success! - EXIT!")
918 def fetch_instances(args: argparse.Namespace) -> int:
919 logger.debug("args[]='%s' - CALLED!", type(args))
921 logger.debug("args.domain='%s' - checking ...", args.domain)
922 if not validators.domain(args.domain):
923 logger.warning("args.domain='%s' is not valid.", args.domain)
925 elif blacklist.is_blacklisted(args.domain):
926 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
929 logger.debug("Invoking locking.acquire() ...")
933 domain = tidyup.domain(args.domain)
934 origin = software = None
937 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
938 row = database.cursor.fetchone()
940 origin = row["origin"]
941 software = row["software"]
945 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
946 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
947 except network.exceptions as exception:
948 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
949 instances.set_last_error(args.domain, exception)
950 instances.update(args.domain)
954 logger.debug("Not fetching more instances - EXIT!")
957 # Loop through some instances
958 database.cursor.execute(
959 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_updated ASC", [time.time() - config.get("recheck_instance")]
962 rows = database.cursor.fetchall()
963 logger.info("Checking %d entries ...", len(rows))
965 logger.debug("row[domain]='%s'", row["domain"])
966 if row["domain"] == "":
967 logger.debug("row[domain] is empty - SKIPPED!")
970 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
971 domain = row["domain"].encode("idna").decode("utf-8")
972 logger.debug("domain='%s' - AFTER!", domain)
974 if not domain_helper.is_wanted(domain):
975 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
979 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
980 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
981 except network.exceptions as exception:
982 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
983 instances.set_last_error(domain, exception)
985 logger.debug("Success - EXIT!")
988 def fetch_oliphant(args: argparse.Namespace) -> int:
989 logger.debug("args[]='%s' - CALLED!", type(args))
991 logger.debug("Invoking locking.acquire() ...")
994 source_domain = "codeberg.org"
995 if sources.is_recent(source_domain):
996 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
999 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1000 sources.update(source_domain)
1003 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1007 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1008 for block in blocklists.oliphant_blocklists:
1009 # Is domain given and not equal blocker?
1010 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1011 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1013 elif args.domain in domains:
1014 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1017 instances.set_last_blocked(block["blocker"])
1020 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1021 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1023 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1024 if not response.ok or response.status_code > 200 or response.content == "":
1025 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1028 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1029 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1035 logger.debug("row[%s]='%s'", type(row), row)
1036 domain = severity = None
1037 reject_media = reject_reports = False
1039 if "#domain" in row:
1040 domain = row["#domain"]
1041 elif "domain" in row:
1042 domain = row["domain"]
1044 logger.debug("row='%s' does not contain domain column", row)
1047 if "#severity" in row:
1048 severity = blocks.alias_block_level(row["#severity"])
1049 elif "severity" in row:
1050 severity = blocks.alias_block_level(row["severity"])
1052 logger.debug("row='%s' does not contain severity column", row)
1055 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1057 elif "reject_media" in row and row["reject_media"].lower() == "true":
1060 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1061 reject_reports = True
1062 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1063 reject_reports = True
1066 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1068 logger.debug("domain is empty - SKIPPED!")
1070 elif domain.endswith(".onion"):
1071 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1073 elif domain.endswith(".arpa"):
1074 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1076 elif domain.endswith(".tld"):
1077 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1079 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1080 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1081 domain = utils.deobfuscate(domain, block["blocker"])
1082 logger.debug("domain='%s' - AFTER!", domain)
1084 if not validators.domain(domain):
1085 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1087 elif blacklist.is_blacklisted(domain):
1088 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1090 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1091 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1094 logger.debug("Marking domain='%s' as handled", domain)
1095 domains.append(domain)
1097 logger.debug("Processing domain='%s' ...", domain)
1098 processed = processing.instance(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1099 logger.debug("processed='%s'", processed)
1101 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1102 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1105 "reason" : block["reason"],
1109 processing.block(block["blocker"], domain, None, "reject_media")
1111 processing.block(block["blocker"], domain, None, "reject_reports")
1113 logger.debug("block[blocker]='%s'", block["blocker"])
1114 if not blocklists.has(block["blocker"]):
1115 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1116 instances.set_total_blocks(block["blocker"], domains)
1118 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1119 if instances.has_pending(block["blocker"]):
1120 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1121 instances.update(block["blocker"])
1123 logger.debug("Invoking commit() ...")
1124 database.connection.commit()
1126 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1127 if config.get("bot_enabled") and len(blockdict) > 0:
1128 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1129 network.send_bot_post(block["blocker"], blockdict)
1131 logger.debug("Success! - EXIT!")
1134 def fetch_txt(args: argparse.Namespace) -> int:
1135 logger.debug("args[]='%s' - CALLED!", type(args))
1137 logger.debug("Invoking locking.acquire() ...")
1142 "blocker": "seirdy.one",
1143 "url" : "https://seirdy.one/pb/bsl.txt",
1146 logger.info("Checking %d text file(s) ...", len(urls))
1148 logger.debug("Fetching row[url]='%s' ...", row["url"])
1149 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1151 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1152 if response.ok and response.status_code == 200 and response.text != "":
1153 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1154 domains = response.text.strip().split("\n")
1156 logger.info("Processing %d domains ...", len(domains))
1157 for domain in domains:
1158 logger.debug("domain='%s' - BEFORE!", domain)
1159 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1161 logger.debug("domain='%s' - AFTER!", domain)
1162 if domain is None or domain == "":
1163 logger.debug("domain='%s' is empty - SKIPPED!", domain)
1165 elif not domain_helper.is_wanted(domain):
1166 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1168 elif instances.is_recent(domain):
1169 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1172 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1173 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1175 logger.debug("processed='%s'", processed)
1177 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1180 logger.debug("Success! - EXIT!")
1183 def fetch_fedipact(args: argparse.Namespace) -> int:
1184 logger.debug("args[]='%s' - CALLED!", type(args))
1186 logger.debug("Invoking locking.acquire() ...")
1189 source_domain = "fedipact.online"
1190 if sources.is_recent(source_domain):
1191 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1194 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1195 sources.update(source_domain)
1197 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1198 response = utils.fetch_url(
1199 f"https://{source_domain}",
1200 network.web_headers,
1201 (config.get("connection_timeout"), config.get("read_timeout"))
1204 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1205 if response.ok and response.status_code == 200 and response.text != "":
1206 logger.debug("Parsing %d Bytes ...", len(response.text))
1208 doc = bs4.BeautifulSoup(response.text, "html.parser")
1209 logger.debug("doc[]='%s'", type(doc))
1211 rows = doc.findAll("li")
1212 logger.info("Checking %d row(s) ...", len(rows))
1214 logger.debug("row[]='%s'", type(row))
1215 domain = tidyup.domain(row.contents[0])
1217 logger.debug("domain='%s' - AFTER!", domain)
1219 logger.debug("domain is empty - SKIPPED!")
1222 logger.debug("domain='%s' - BEFORE!", domain)
1223 domain = domain.encode("idna").decode("utf-8")
1224 logger.debug("domain='%s' - AFTER!", domain)
1226 if not domain_helper.is_wanted(domain):
1227 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1229 elif instances.is_registered(domain):
1230 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1232 elif instances.is_recent(domain):
1233 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1236 logger.info("Fetching domain='%s' ...", domain)
1237 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1239 logger.debug("Success! - EXIT!")
1242 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1243 logger.debug("args[]='%s' - CALLED!", type(args))
1245 logger.debug("Invoking locking.acquire() ...")
1248 source_domain = "instances.joinmobilizon.org"
1249 if sources.is_recent(source_domain):
1250 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1253 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1254 sources.update(source_domain)
1256 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1257 raw = utils.fetch_url(
1258 f"https://{source_domain}/api/v1/instances",
1259 network.web_headers,
1260 (config.get("connection_timeout"), config.get("read_timeout"))
1262 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1264 parsed = json.loads(raw)
1265 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1267 if "data" not in parsed:
1268 logger.warning("parsed()=%d does not contain key 'data'")
1271 logger.info("Checking %d instances ...", len(parsed["data"]))
1272 for row in parsed["data"]:
1273 logger.debug("row[]='%s'", type(row))
1274 if "host" not in row:
1275 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1277 elif not domain_helper.is_wanted(row["host"]):
1278 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1280 elif instances.is_registered(row["host"]):
1281 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1284 logger.info("Fetching row[host]='%s' ...", row["host"])
1285 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1287 logger.debug("Success! - EXIT!")
1290 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1291 logger.debug("args[]='%s' - CALLED!", type(args))
1293 logger.debug("Invoking locking.acquire() ...")
1296 source_domain = "instanceapp.misskey.page"
1297 if sources.is_recent(source_domain):
1298 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1301 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1302 sources.update(source_domain)
1304 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1305 raw = utils.fetch_url(
1306 f"https://{source_domain}/instances.json",
1307 network.web_headers,
1308 (config.get("connection_timeout"), config.get("read_timeout"))
1310 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1312 parsed = json.loads(raw)
1313 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1315 if "instancesInfos" not in parsed:
1316 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1319 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1320 for row in parsed["instancesInfos"]:
1321 logger.debug("row[%s]='%s'", type(row), row)
1322 if "url" not in row:
1323 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1325 elif not domain_helper.is_wanted(row["url"]):
1326 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1328 elif instances.is_registered(row["url"]):
1329 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1332 logger.info("Fetching row[url]='%s' ...", row["url"])
1333 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1335 logger.debug("Success! - EXIT!")
1338 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1339 logger.debug("args[]='%s' - CALLED!", type(args))
1341 logger.debug("Invoking locking.acquire() ...")
1344 source_domain = "joinfediverse.wiki"
1345 if sources.is_recent(source_domain):
1346 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1349 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1350 sources.update(source_domain)
1352 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1353 raw = utils.fetch_url(
1354 f"https://{source_domain}/FediBlock",
1355 network.web_headers,
1356 (config.get("connection_timeout"), config.get("read_timeout"))
1358 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1360 doc = bs4.BeautifulSoup(raw, "html.parser")
1361 logger.debug("doc[]='%s'", type(doc))
1363 tables = doc.findAll("table", {"class": "wikitable"})
1365 logger.info("Analyzing %d table(s) ...", len(tables))
1367 for table in tables:
1368 logger.debug("table[]='%s'", type(table))
1370 rows = table.findAll("tr")
1371 logger.info("Checking %d row(s) ...", len(rows))
1372 block_headers = dict()
1374 logger.debug("row[%s]='%s'", type(row), row)
1376 headers = row.findAll("th")
1377 logger.debug("Found headers()=%d header(s)", len(headers))
1378 if len(headers) > 1:
1379 block_headers = dict()
1381 for header in headers:
1383 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1384 text = header.contents[0]
1386 logger.debug("text[]='%s'", type(text))
1387 if not isinstance(text, str):
1388 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1390 elif validators.domain(text.strip()):
1391 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1394 text = tidyup.domain(text.strip())
1395 logger.debug("text='%s' - AFTER!", text)
1396 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1397 logger.debug("Found header: '%s'=%d", text, cnt)
1398 block_headers[cnt] = text
1400 elif len(block_headers) == 0:
1401 logger.debug("row is not scrapable - SKIPPED!")
1403 elif len(block_headers) > 0:
1404 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1408 for element in row.find_all(["th", "td"]):
1410 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1411 if cnt in block_headers:
1412 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1414 text = element.text.strip()
1415 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1417 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1418 if key in ["domain", "instance"]:
1420 elif key == "reason":
1421 block[key] = tidyup.reason(text)
1422 elif key == "subdomain(s)":
1425 block[key] = text.split("/")
1427 logger.debug("key='%s'", key)
1430 logger.debug("block()=%d ...", len(block))
1432 logger.debug("Appending block()=%d ...", len(block))
1433 blocklist.append(block)
1435 logger.debug("blocklist()=%d", len(blocklist))
1437 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1438 domains = database.cursor.fetchall()
1440 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1442 for block in blocklist:
1443 logger.debug("block='%s'", block)
1444 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1445 origin = block["blocked"]
1446 logger.debug("origin='%s'", origin)
1447 for subdomain in block["subdomain(s)"]:
1448 block["blocked"] = subdomain + "." + origin
1449 logger.debug("block[blocked]='%s'", block["blocked"])
1450 blocking.append(block)
1452 blocking.append(block)
1454 logger.debug("blocking()=%d", blocking)
1455 for block in blocking:
1456 logger.debug("block[]='%s'", type(block))
1457 if "blocked" not in block:
1458 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1460 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1461 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1463 if block["blocked"] == "":
1464 logger.debug("block[blocked] is empty - SKIPPED!")
1466 elif not domain_helper.is_wanted(block["blocked"]):
1467 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1469 elif instances.is_recent(block["blocked"]):
1470 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1473 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1474 processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1477 for blocker in domains:
1478 blocker = blocker[0]
1479 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1480 instances.set_last_blocked(blocker)
1482 for block in blocking:
1483 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1484 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1486 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1487 if block["blocked"] == "":
1488 logger.debug("block[blocked] is empty - SKIPPED!")
1490 elif not domain_helper.is_wanted(block["blocked"]):
1491 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1494 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1495 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1496 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1498 "blocked": block["blocked"],
1499 "reason" : block["reason"],
1502 if instances.has_pending(blocker):
1503 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1504 instances.update(blocker)
1506 logger.debug("Invoking commit() ...")
1507 database.connection.commit()
1509 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1510 if config.get("bot_enabled") and len(blockdict) > 0:
1511 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1512 network.send_bot_post(blocker, blockdict)
1514 logger.debug("Success! - EXIT!")
1517 def recheck_obfuscation(args: argparse.Namespace) -> int:
1518 logger.debug("args[]='%s' - CALLED!", type(args))
1520 logger.debug("Invoking locking.acquire() ...")
1523 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1524 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1525 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1526 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1528 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1530 rows = database.cursor.fetchall()
1531 logger.info("Checking %d domains ...", len(rows))
1533 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1534 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1535 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1538 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1539 blocking = federation.fetch_blocks(row["domain"])
1541 logger.debug("blocking()=%d", len(blocking))
1542 if len(blocking) == 0:
1543 if row["software"] == "pleroma":
1544 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1545 blocking = pleroma.fetch_blocks(row["domain"])
1546 elif row["software"] == "mastodon":
1547 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1548 blocking = mastodon.fetch_blocks(row["domain"])
1549 elif row["software"] == "lemmy":
1550 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1551 blocking = lemmy.fetch_blocks(row["domain"])
1552 elif row["software"] == "friendica":
1553 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1554 blocking = friendica.fetch_blocks(row["domain"])
1555 elif row["software"] == "misskey":
1556 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1557 blocking = misskey.fetch_blocks(row["domain"])
1559 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1561 # c.s isn't part of oliphant's "hidden" blocklists
1562 logger.debug("row[domain]='%s'", row["domain"])
1563 if row["domain"] != "chaos.social" and not blocklists.has(row["domain"]):
1564 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1565 instances.set_last_blocked(row["domain"])
1566 instances.set_total_blocks(row["domain"], blocking)
1571 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1572 for block in blocking:
1573 logger.debug("block[blocked]='%s'", block["blocked"])
1576 if block["blocked"] == "":
1577 logger.debug("block[blocked] is empty - SKIPPED!")
1579 elif block["blocked"].endswith(".arpa"):
1580 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1582 elif block["blocked"].endswith(".tld"):
1583 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1585 elif block["blocked"].endswith(".onion"):
1586 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1588 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1589 logger.debug("block='%s' is obfuscated.", block["blocked"])
1590 obfuscated = obfuscated + 1
1591 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1592 elif not domain_helper.is_wanted(block["blocked"]):
1593 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1595 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1596 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1599 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1600 if blocked is not None and blocked != block["blocked"]:
1601 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1602 obfuscated = obfuscated - 1
1604 if blacklist.is_blacklisted(blocked):
1605 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1607 elif blacklist.is_blacklisted(row["domain"]):
1608 logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1610 elif blocks.is_instance_blocked(row["domain"], blocked):
1611 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1614 block["block_level"] = blocks.alias_block_level(block["block_level"])
1616 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1617 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1618 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1621 "reason" : block["reason"],
1624 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1625 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1627 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1628 if obfuscated == 0 and len(blocking) > 0:
1629 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1630 instances.set_has_obfuscation(row["domain"], False)
1632 if instances.has_pending(row["domain"]):
1633 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1634 instances.update(row["domain"])
1636 logger.debug("Invoking commit() ...")
1637 database.connection.commit()
1639 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1640 if config.get("bot_enabled") and len(blockdict) > 0:
1641 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1642 network.send_bot_post(row["domain"], blockdict)
1644 logger.debug("Success! - EXIT!")
1647 def fetch_fedilist(args: argparse.Namespace) -> int:
1648 logger.debug("args[]='%s' - CALLED!", type(args))
1650 logger.debug("Invoking locking.acquire() ...")
1653 source_domain = "demo.fedilist.com"
1654 if sources.is_recent(source_domain):
1655 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1658 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1659 sources.update(source_domain)
1661 url = f"http://{source_domain}/instance/csv?onion=not"
1662 if args.software is not None and args.software != "":
1663 logger.debug("args.software='%s'", args.software)
1664 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1666 logger.info("Fetching url='%s' ...", url)
1667 response = reqto.get(
1669 headers=network.web_headers,
1670 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1671 allow_redirects=False
1674 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1675 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1676 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1679 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1681 logger.debug("reader[]='%s'", type(reader))
1683 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1688 logger.info("Checking %d rows ...", len(rows))
1690 logger.debug("row[]='%s'", type(row))
1691 if "hostname" not in row:
1692 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1695 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1696 domain = tidyup.domain(row["hostname"])
1697 logger.debug("domain='%s' - AFTER!", domain)
1700 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1703 logger.debug("domain='%s' - BEFORE!", domain)
1704 domain = domain.encode("idna").decode("utf-8")
1705 logger.debug("domain='%s' - AFTER!", domain)
1707 if not domain_helper.is_wanted(domain):
1708 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1710 elif (args.force is None or not args.force) and instances.is_registered(domain):
1711 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1713 elif instances.is_recent(domain):
1714 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1717 logger.info("Fetching instances from domain='%s' ...", domain)
1718 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1720 logger.debug("Success! - EXIT!")
1723 def update_nodeinfo(args: argparse.Namespace) -> int:
1724 logger.debug("args[]='%s' - CALLED!", type(args))
1726 logger.debug("Invoking locking.acquire() ...")
1729 if args.domain is not None and args.domain != "":
1730 logger.debug("Fetching args.domain='%s'", args.domain)
1731 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1732 elif args.software is not None and args.software != "":
1733 logger.info("Fetching domains for args.software='%s'", args.software)
1734 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1735 elif args.mode is not None and args.mode != "":
1736 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1737 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1738 elif args.no_software:
1739 logger.info("Fetching domains with no software type detected ...")
1740 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1742 logger.info("Fetching domains for recently updated ...")
1743 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1745 domains = database.cursor.fetchall()
1747 logger.info("Checking %d domain(s) ...", len(domains))
1750 logger.debug("row[]='%s'", type(row))
1751 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1752 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1756 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1757 software = federation.determine_software(row["domain"])
1759 logger.debug("Determined software='%s'", software)
1760 if (software != row["software"] and software is not None) or args.force is True:
1761 logger.debug("software='%s'", software)
1762 if software is None:
1763 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1764 instances.set_nodeinfo_url(row["domain"], None)
1766 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1767 instances.set_software(row["domain"], software)
1769 if software is not None:
1770 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1771 instances.set_success(row["domain"])
1772 except network.exceptions as exception:
1773 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1774 instances.set_last_error(row["domain"], exception)
1776 instances.set_last_nodeinfo(row["domain"])
1777 instances.update(row["domain"])
1780 logger.debug("Success! - EXIT!")
1783 def fetch_instances_social(args: argparse.Namespace) -> int:
1784 logger.debug("args[]='%s' - CALLED!", type(args))
1786 logger.debug("Invoking locking.acquire() ...")
1789 source_domain = "instances.social"
1791 if config.get("instances_social_api_key") == "":
1792 logger.error("API key not set. Please set in your config.json file.")
1794 elif sources.is_recent(source_domain):
1795 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1798 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1799 sources.update(source_domain)
1802 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1805 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1806 fetched = network.get_json_api(
1808 "/api/1.0/instances/list?count=0&sort_by=name",
1810 (config.get("connection_timeout"), config.get("read_timeout"))
1812 logger.debug("fetched[]='%s'", type(fetched))
1814 if "error_message" in fetched:
1815 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1817 elif "exception" in fetched:
1818 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1820 elif "json" not in fetched:
1821 logger.warning("fetched has no element 'json' - EXIT!")
1823 elif "instances" not in fetched["json"]:
1824 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1828 rows = fetched["json"]["instances"]
1830 logger.info("Checking %d row(s) ...", len(rows))
1832 logger.debug("row[]='%s'", type(row))
1833 domain = tidyup.domain(row["name"])
1834 logger.debug("domain='%s' - AFTER!", domain)
1837 logger.debug("domain is empty - SKIPPED!")
1840 logger.debug("domain='%s' - BEFORE!", domain)
1841 domain = domain.encode("idna").decode("utf-8")
1842 logger.debug("domain='%s' - AFTER!", domain)
1844 if not domain_helper.is_wanted(domain):
1845 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1847 elif domain in domains:
1848 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1850 elif instances.is_registered(domain):
1851 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1853 elif instances.is_recent(domain):
1854 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1857 logger.info("Fetching instances from domain='%s'", domain)
1858 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1860 logger.debug("Success! - EXIT!")
1863 def fetch_relays(args: argparse.Namespace) -> int:
1864 logger.debug("args[]='%s' - CALLED!", type(args))
1866 logger.debug("Invoking locking.acquire() ...")
1869 if args.domain is not None and args.domain != "":
1870 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1872 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1875 rows = database.cursor.fetchall()
1877 logger.info("Checking %d relays ...", len(rows))
1879 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1881 if not args.force and instances.is_recent(row["domain"]):
1882 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1886 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1887 raw = utils.fetch_url(
1888 f"https://{row['domain']}",
1889 network.web_headers,
1890 (config.get("connection_timeout"), config.get("read_timeout"))
1892 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1893 except network.exceptions as exception:
1894 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1895 instances.set_last_error(row["domain"], exception)
1896 instances.set_last_instance_fetch(row["domain"])
1897 instances.update(row["domain"])
1900 doc = bs4.BeautifulSoup(raw, features="html.parser")
1901 logger.debug("doc[]='%s'", type(doc))
1903 logger.debug("row[software]='%s'", row["software"])
1904 if row["software"] == "activityrelay":
1905 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1906 tags = doc.findAll("p")
1908 logger.debug("Checking %d paragraphs ...", len(tags))
1910 logger.debug("tag[]='%s'", type(tag))
1911 if len(tag.contents) == 0:
1912 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1914 elif "registered instances" not in tag.contents[0]:
1915 logger.debug("Skipping paragraph, text not found.")
1918 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1919 for domain in tag.contents:
1920 logger.debug("domain[%s]='%s'", type(domain), domain)
1921 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1924 domain = str(domain)
1925 logger.debug("domain='%s'", domain)
1926 if not domain_helper.is_wanted(domain):
1927 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1930 logger.debug("domain='%s' - BEFORE!", domain)
1931 domain = tidyup.domain(domain)
1932 logger.debug("domain='%s' - AFTER!", domain)
1935 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1937 elif domain not in peers:
1938 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1939 peers.append(domain)
1941 if dict_helper.has_key(domains, "domain", domain):
1942 logger.debug("domain='%s' already added", domain)
1945 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1948 "origin": row["domain"],
1950 elif row["software"] in ["aoderelay", "selective-relay"]:
1951 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1952 if row["software"] == "aoderelay":
1953 tags = doc.findAll("section", {"class": "instance"})
1955 tags = doc.find("div", {"id": "instances"}).findAll("li")
1957 logger.debug("Checking %d tags ...", len(tags))
1959 logger.debug("tag[]='%s'", type(tag))
1961 link = tag.find("a")
1962 logger.debug("link[%s]='%s'", type(link), link)
1964 logger.warning("tag='%s' has no a-tag ...", tag)
1967 components = urlparse(link["href"])
1968 domain = components.netloc.lower()
1970 if not domain_helper.is_wanted(domain):
1971 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1974 logger.debug("domain='%s' - BEFORE!", domain)
1975 domain = tidyup.domain(domain)
1976 logger.debug("domain='%s' - AFTER!", domain)
1979 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1981 elif domain not in peers:
1982 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1983 peers.append(domain)
1985 if dict_helper.has_key(domains, "domain", domain):
1986 logger.debug("domain='%s' already added", domain)
1989 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1992 "origin": row["domain"],
1995 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1997 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1998 instances.set_last_instance_fetch(row["domain"])
2000 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
2001 instances.set_total_peers(row["domain"], peers)
2003 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
2004 instances.update(row["domain"])
2006 logger.info("Checking %d domains ...", len(domains))
2008 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
2009 if instances.is_registered(row["domain"]):
2010 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2013 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2014 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2016 logger.debug("Success! - EXIT!")
2019 def convert_idna(args: argparse.Namespace) -> int:
2020 logger.debug("args[]='%s' - CALLED!", type(args))
2022 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2023 rows = database.cursor.fetchall()
2025 logger.debug("rows[]='%s'", type(rows))
2026 instances.translate_idnas(rows, "domain")
2028 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2029 rows = database.cursor.fetchall()
2031 logger.debug("rows[]='%s'", type(rows))
2032 instances.translate_idnas(rows, "origin")
2034 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2035 rows = database.cursor.fetchall()
2037 logger.debug("rows[]='%s'", type(rows))
2038 blocks.translate_idnas(rows, "blocker")
2040 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2041 rows = database.cursor.fetchall()
2043 logger.debug("rows[]='%s'", type(rows))
2044 blocks.translate_idnas(rows, "blocked")
2046 logger.debug("Success! - EXIT!")
2049 def remove_invalid(args: argparse.Namespace) -> int:
2050 logger.debug("args[]='%s' - CALLED!", type(args))
2052 logger.debug("Invoking locking.acquire() ...")
2055 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2056 rows = database.cursor.fetchall()
2058 logger.info("Checking %d domains ...", len(rows))
2060 logger.debug("row[domain]='%s'", row["domain"])
2061 if not validators.domain(row["domain"].split("/")[0]):
2062 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2063 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2064 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2066 logger.debug("Invoking commit() ...")
2067 database.connection.commit()
2069 logger.info("Vaccum cleaning database ...")
2070 database.cursor.execute("VACUUM")
2072 logger.debug("Success! - EXIT!")