1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import federation
47 from fba.http import network
49 from fba.models import blocks
50 from fba.models import instances
51 from fba.models import sources
53 from fba.networks import friendica
54 from fba.networks import lemmy
55 from fba.networks import mastodon
56 from fba.networks import misskey
57 from fba.networks import pleroma
59 logging.basicConfig(level=logging.INFO)
60 logger = logging.getLogger(__name__)
61 #logger.setLevel(logging.DEBUG)
63 def check_instance(args: argparse.Namespace) -> int:
64 logger.debug("args.domain='%s' - CALLED!", args.domain)
66 if not validators.domain(args.domain):
67 logger.warning("args.domain='%s' is not valid", args.domain)
69 elif blacklist.is_blacklisted(args.domain):
70 logger.warning("args.domain='%s' is blacklisted", args.domain)
72 elif instances.is_registered(args.domain):
73 logger.warning("args.domain='%s' is already registered", args.domain)
76 logger.info("args.domain='%s' is not known", args.domain)
78 logger.debug("status=%d - EXIT!", status)
81 def check_nodeinfo(args: argparse.Namespace) -> int:
82 logger.debug("args[]='%s' - CALLED!", type(args))
85 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88 for row in database.cursor.fetchall():
89 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
90 punycode = row["domain"].encode("idna").decode("utf-8")
92 if row["nodeinfo_url"].startswith("/"):
93 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
95 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
96 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99 logger.info("Found %d row(s)", cnt)
101 logger.debug("EXIT!")
104 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
105 logger.debug("args[]='%s' - CALLED!", type(args))
107 # No CSRF by default, you don't have to add network.source_headers by yourself here
109 source_domain = "pixelfed.org"
111 if sources.is_recent(source_domain):
112 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
116 sources.update(source_domain)
119 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
120 headers = csrf.determine(source_domain, dict())
121 except network.exceptions as exception:
122 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
126 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
127 fetched = network.get_json_api(
129 "/api/v1/servers/all.json?scope=All&country=all&language=all",
131 (config.get("connection_timeout"), config.get("read_timeout"))
134 logger.debug("JSON API returned %d elements", len(fetched))
135 if "error_message" in fetched:
136 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
138 elif "data" not in fetched["json"]:
139 logger.warning("API did not return JSON with 'data' element - EXIT!")
142 rows = fetched["json"]["data"]
143 logger.info("Checking %d fetched rows ...", len(rows))
145 logger.debug("row[]='%s'", type(row))
146 if "domain" not in row:
147 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
149 elif row["domain"] == "":
150 logger.debug("row[domain] is empty - SKIPPED!")
153 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
154 domain = row["domain"].encode("idna").decode("utf-8")
155 logger.debug("domain='%s' - AFTER!", domain)
157 if not domain_helper.is_wanted(domain):
158 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
160 elif instances.is_registered(domain):
161 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
163 elif instances.is_recent(domain):
164 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167 logger.debug("Fetching instances from domain='%s' ...", domain)
168 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
170 except network.exceptions as exception:
171 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174 logger.debug("Success! - EXIT!")
177 def fetch_bkali(args: argparse.Namespace) -> int:
178 logger.debug("args[]='%s' - CALLED!", type(args))
180 logger.debug("Invoking locking.acquire() ...")
183 source_domain = "gql.api.bka.li"
184 if sources.is_recent(source_domain):
185 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
189 sources.update(source_domain)
193 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
194 fetched = network.post_json_api(
198 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
202 logger.debug("fetched[]='%s'", type(fetched))
203 if "error_message" in fetched:
204 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
206 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
207 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210 rows = fetched["json"]
212 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
214 raise Exception("WARNING: Returned no records")
215 elif "data" not in rows:
216 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
217 elif "nodeinfo" not in rows["data"]:
218 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
220 for entry in rows["data"]["nodeinfo"]:
221 logger.debug("entry[%s]='%s'", type(entry), entry)
222 if "domain" not in entry:
223 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
225 elif entry["domain"] == "":
226 logger.debug("entry[domain] is empty - SKIPPED!")
228 elif not domain_helper.is_wanted(entry["domain"]):
229 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
231 elif instances.is_registered(entry["domain"]):
232 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
234 elif instances.is_recent(entry["domain"]):
235 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238 logger.debug("Adding domain='%s' ...", entry["domain"])
239 domains.append(entry["domain"])
241 except network.exceptions as exception:
242 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245 logger.debug("domains()=%d", len(domains))
247 logger.info("Adding %d new instances ...", len(domains))
248 for domain in domains:
249 logger.debug("domain='%s' - BEFORE!", domain)
250 domain = domain.encode("idna").decode("utf-8")
251 logger.debug("domain='%s' - AFTER!", domain)
254 logger.info("Fetching instances from domain='%s' ...", domain)
255 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
256 except network.exceptions as exception:
257 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
258 instances.set_last_error(domain, exception)
261 logger.debug("Success - EXIT!")
264 def fetch_blocks(args: argparse.Namespace) -> int:
265 logger.debug("args[]='%s' - CALLED!", type(args))
266 if args.domain is not None and args.domain != "":
267 logger.debug("args.domain='%s' - checking ...", args.domain)
268 if not validators.domain(args.domain):
269 logger.warning("args.domain='%s' is not valid.", args.domain)
271 elif blacklist.is_blacklisted(args.domain):
272 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
274 elif not instances.is_registered(args.domain):
275 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278 logger.debug("Invoking locking.acquire() ...")
281 if args.domain is not None and args.domain != "":
282 # Re-check single domain
283 logger.debug("Querying database for args.domain='%s' ...", args.domain)
284 database.cursor.execute(
285 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
287 elif args.software is not None and args.software != "":
288 # Re-check single software
289 logger.debug("Querying database for args.software='%s' ...", args.software)
290 database.cursor.execute(
291 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
295 logger.debug("Re-checking all instances ...")
296 database.cursor.execute(
297 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC"
300 # Re-check after "timeout" (aka. minimum interval)
301 database.cursor.execute(
302 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
305 rows = database.cursor.fetchall()
306 logger.info("Checking %d entries ...", len(rows))
307 for blocker, software, origin, nodeinfo_url in rows:
308 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
309 blocker = tidyup.domain(blocker)
310 logger.debug("blocker='%s' - AFTER!", blocker)
313 logger.warning("blocker is now empty!")
315 elif nodeinfo_url is None or nodeinfo_url == "":
316 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
318 elif not domain_helper.is_wanted(blocker):
319 logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker)
322 logger.debug("blocker='%s'", blocker)
323 instances.set_last_blocked(blocker)
324 instances.set_has_obfuscation(blocker, False)
327 if software == "pleroma":
328 logger.info("blocker='%s',software='%s'", blocker, software)
329 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
330 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
331 elif software == "mastodon":
332 logger.info("blocker='%s',software='%s'", blocker, software)
333 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
334 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
335 elif software == "lemmy":
336 logger.info("blocker='%s',software='%s'", blocker, software)
337 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "friendica":
340 logger.info("blocker='%s',software='%s'", blocker, software)
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 logger.info("blocker='%s',software='%s'", blocker, software)
345 blocking = misskey.fetch_blocks(blocker)
346 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
348 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
350 logger.debug("blocker='%s'", blocker)
351 if blocker != "chaos.social":
352 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
353 instances.set_total_blocks(blocker, blocking)
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
357 for block in blocking:
358 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
360 if block["block_level"] == "":
361 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
364 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
365 block["blocked"] = tidyup.domain(block["blocked"])
366 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
367 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
369 if block["blocked"] == "":
370 logger.warning("blocked is empty, blocker='%s'", blocker)
372 elif block["blocked"].endswith(".onion"):
373 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
375 elif block["blocked"].endswith(".arpa"):
376 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
378 elif block["blocked"].endswith(".tld"):
379 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
381 elif block["blocked"].find("*") >= 0:
382 logger.debug("blocker='%s' uses obfuscated domains", blocker)
384 # Some friendica servers also obscure domains without hash
385 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
387 logger.debug("row[]='%s'", type(row))
389 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
390 instances.set_has_obfuscation(blocker, True)
393 block["blocked"] = row["domain"]
394 origin = row["origin"]
395 nodeinfo_url = row["nodeinfo_url"]
396 elif block["blocked"].find("?") >= 0:
397 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 # Some obscure them with question marks, not sure if that's dependent on version or not
400 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
402 logger.debug("row[]='%s'", type(row))
404 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
405 instances.set_has_obfuscation(blocker, True)
408 block["blocked"] = row["domain"]
409 origin = row["origin"]
410 nodeinfo_url = row["nodeinfo_url"]
412 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
413 if block["blocked"] == "":
414 logger.debug("block[blocked] is empty - SKIPPED!")
417 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
418 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
419 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
421 if not domain_helper.is_wanted(block["blocked"]):
422 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
424 elif block["block_level"] in ["accept", "accepted"]:
425 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
427 elif not instances.is_registered(block["blocked"]):
428 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
429 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
431 block["block_level"] = blocks.alias_block_level(block["block_level"])
433 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
434 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
436 "blocked": block["blocked"],
437 "reason" : block["reason"],
440 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
441 cookies.clear(block["blocked"])
443 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
444 if instances.has_pending(blocker):
445 logger.debug("Flushing updates for blocker='%s' ...", blocker)
446 instances.update_data(blocker)
448 logger.debug("Invoking commit() ...")
449 database.connection.commit()
451 logger.debug("Invoking cookies.clear(%s) ...", blocker)
452 cookies.clear(blocker)
454 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
455 if config.get("bot_enabled") and len(blockdict) > 0:
456 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
457 network.send_bot_post(blocker, blockdict)
459 logger.debug("Success! - EXIT!")
462 def fetch_observer(args: argparse.Namespace) -> int:
463 logger.debug("args[]='%s' - CALLED!", type(args))
465 logger.debug("Invoking locking.acquire() ...")
468 source_domain = "fediverse.observer"
469 if sources.is_recent(source_domain):
470 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
473 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
474 sources.update(source_domain)
477 if args.software is None:
478 logger.info("Fetching software list ...")
479 raw = utils.fetch_url(
480 f"https://{source_domain}",
482 (config.get("connection_timeout"), config.get("read_timeout"))
484 logger.debug("raw[%s]()=%d", type(raw), len(raw))
486 doc = bs4.BeautifulSoup(raw, features="html.parser")
487 logger.debug("doc[]='%s'", type(doc))
489 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
490 logger.debug("navbar[]='%s'", type(navbar))
492 logger.warning("Cannot find navigation bar, cannot continue!")
495 items = navbar.findAll("a", {"class": "dropdown-item"})
496 logger.debug("items[]='%s'", type(items))
498 logger.info("Checking %d menu items ...", len(items))
500 logger.debug("item[%s]='%s'", type(item), item)
501 if item.text.lower() == "all":
502 logger.debug("Skipping 'All' menu entry ...")
505 logger.debug("Appending item.text='%s' ...", item.text)
506 types.append(tidyup.domain(item.text))
508 logger.info("Adding args.software='%s' as type ...", args.software)
509 types.append(args.software)
511 logger.info("Fetching %d different table data ...", len(types))
512 for software in types:
513 logger.debug("software='%s' - BEFORE!", software)
514 if args.software is not None and args.software != software:
515 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
520 logger.debug("Fetching table data for software='%s' ...", software)
521 raw = utils.fetch_url(
522 f"https://{source_domain}/app/views/tabledata.php?software={software}",
524 (config.get("connection_timeout"), config.get("read_timeout"))
526 logger.debug("raw[%s]()=%d", type(raw), len(raw))
528 doc = bs4.BeautifulSoup(raw, features="html.parser")
529 logger.debug("doc[]='%s'", type(doc))
530 except network.exceptions as exception:
531 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
534 items = doc.findAll("a", {"class": "url"})
535 logger.info("Checking %d items,software='%s' ...", len(items), software)
537 logger.debug("item[]='%s'", type(item))
538 domain = item.decode_contents()
539 logger.debug("domain='%s' - AFTER!", domain)
542 logger.debug("domain is empty - SKIPPED!")
545 logger.debug("domain='%s' - BEFORE!", domain)
546 domain = domain.encode("idna").decode("utf-8")
547 logger.debug("domain='%s' - AFTER!", domain)
549 if not domain_helper.is_wanted(domain):
550 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
552 elif instances.is_registered(domain):
553 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
555 elif instances.is_recent(domain):
556 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
559 software = software_helper.alias(software)
560 logger.info("Fetching instances for domain='%s'", domain)
561 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
563 logger.debug("Success! - EXIT!")
566 def fetch_todon_wiki(args: argparse.Namespace) -> int:
567 logger.debug("args[]='%s' - CALLED!", type(args))
569 logger.debug("Invoking locking.acquire() ...")
572 source_domain = "wiki.todon.eu"
573 if sources.is_recent(source_domain):
574 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
577 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
578 sources.update(source_domain)
585 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
586 raw = utils.fetch_url(
587 f"https://{source_domain}/todon/domainblocks",
589 (config.get("connection_timeout"), config.get("read_timeout"))
591 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
593 doc = bs4.BeautifulSoup(raw, "html.parser")
594 logger.debug("doc[]='%s'", type(doc))
596 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
597 logger.info("Checking %d silenced/limited entries ...", len(silenced))
598 blocklist["silenced"] = utils.find_domains(silenced, "div")
600 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
601 logger.info("Checking %d suspended entries ...", len(suspended))
602 blocklist["reject"] = utils.find_domains(suspended, "div")
604 blocking = blocklist["silenced"] + blocklist["reject"]
607 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
608 instances.set_total_blocks(blocker, blocking)
611 for block_level in blocklist:
612 blockers = blocklist[block_level]
614 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
615 for blocked in blockers:
616 logger.debug("blocked='%s'", blocked)
618 if not instances.is_registered(blocked):
620 logger.info("Fetching instances from domain='%s' ...", blocked)
621 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
622 except network.exceptions as exception:
623 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
624 instances.set_last_error(blocked, exception)
626 if blocks.is_instance_blocked(blocker, blocked, block_level):
627 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
630 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
631 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
632 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
638 logger.debug("Invoking commit() ...")
639 database.connection.commit()
641 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
642 if config.get("bot_enabled") and len(blockdict) > 0:
643 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
644 network.send_bot_post(blocker, blockdict)
646 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
647 if instances.has_pending(blocker):
648 logger.debug("Flushing updates for blocker='%s' ...", blocker)
649 instances.update_data(blocker)
651 logger.debug("Success! - EXIT!")
654 def fetch_cs(args: argparse.Namespace):
655 logger.debug("args[]='%s' - CALLED!", type(args))
657 logger.debug("Invoking locking.acquire() ...")
685 source_domain = "raw.githubusercontent.com"
686 if sources.is_recent(source_domain):
687 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
690 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
691 sources.update(source_domain)
693 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
694 raw = utils.fetch_url(
695 f"https://{source_domain}/chaossocial/meta/master/federation.md",
697 (config.get("connection_timeout"), config.get("read_timeout"))
699 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
701 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
702 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
704 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
705 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
706 blocklist["silenced"] = federation.find_domains(silenced)
708 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
709 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
710 blocklist["reject"] = federation.find_domains(blocked)
712 blocking = blocklist["silenced"] + blocklist["reject"]
713 blocker = "chaos.social"
715 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
716 instances.set_total_blocks(blocker, blocking)
718 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
719 if len(blocking) > 0:
721 for block_level in blocklist:
722 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
724 for row in blocklist[block_level]:
725 logger.debug("row[%s]='%s'", type(row), row)
726 if not "domain" in row:
727 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
729 elif not instances.is_registered(row["domain"]):
731 logger.info("Fetching instances from domain='%s' ...", row["domain"])
732 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
733 except network.exceptions as exception:
734 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
735 instances.set_last_error(row["domain"], exception)
737 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
738 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
740 "blocked": row["domain"],
741 "reason" : row["reason"],
744 logger.debug("Invoking commit() ...")
745 database.connection.commit()
747 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
748 if config.get("bot_enabled") and len(blockdict) > 0:
749 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
750 network.send_bot_post(blocker, blockdict)
752 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
753 if instances.has_pending(blocker):
754 logger.debug("Flushing updates for blocker='%s' ...", blocker)
755 instances.update_data(blocker)
757 logger.debug("Success! - EXIT!")
760 def fetch_fba_rss(args: argparse.Namespace) -> int:
761 logger.debug("args[]='%s' - CALLED!", type(args))
765 logger.debug("Invoking locking.acquire() ...")
768 components = urlparse(args.feed)
770 if sources.is_recent(components.netloc):
771 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
774 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
775 sources.update(components.netloc)
777 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
778 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
780 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
781 if response.ok and response.status_code < 300 and len(response.text) > 0:
782 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
783 rss = atoma.parse_rss_bytes(response.content)
785 logger.debug("rss[]='%s'", type(rss))
786 for item in rss.items:
787 logger.debug("item[%s]='%s'", type(item), item)
788 domain = tidyup.domain(item.link.split("=")[1])
790 logger.debug("domain='%s' - AFTER!", domain)
792 logger.debug("domain is empty - SKIPPED!")
795 logger.debug("domain='%s' - BEFORE!", domain)
796 domain = domain.encode("idna").decode("utf-8")
797 logger.debug("domain='%s' - AFTER!", domain)
799 if not domain_helper.is_wanted(domain):
800 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
802 elif domain in domains:
803 logger.debug("domain='%s' is already added - SKIPPED!", domain)
805 elif instances.is_registered(domain):
806 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
808 elif instances.is_recent(domain):
809 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
812 logger.debug("Adding domain='%s'", domain)
813 domains.append(domain)
815 logger.debug("domains()=%d", len(domains))
817 logger.info("Adding %d new instances ...", len(domains))
818 for domain in domains:
819 logger.debug("domain='%s'", domain)
821 logger.info("Fetching instances from domain='%s' ...", domain)
822 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
823 except network.exceptions as exception:
824 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
825 instances.set_last_error(domain, exception)
828 logger.debug("Success! - EXIT!")
831 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
832 logger.debug("args[]='%s' - CALLED!", type(args))
834 logger.debug("Invoking locking.acquire() ...")
837 source_domain = "ryona.agency"
838 if sources.is_recent(source_domain):
839 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
842 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
843 sources.update(source_domain)
845 feed = f"https://{source_domain}/users/fba/feed.atom"
849 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
850 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
852 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
853 if response.ok and response.status_code < 300 and len(response.text) > 0:
854 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
855 atom = atoma.parse_atom_bytes(response.content)
857 logger.debug("atom[]='%s'", type(atom))
858 for entry in atom.entries:
859 logger.debug("entry[]='%s'", type(entry))
860 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
861 logger.debug("doc[]='%s'", type(doc))
862 for element in doc.findAll("a"):
863 logger.debug("element[]='%s'", type(element))
864 for href in element["href"].split(","):
865 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
866 domain = tidyup.domain(href)
868 logger.debug("domain='%s' - AFTER!", domain)
870 logger.debug("domain is empty - SKIPPED!")
873 logger.debug("domain='%s' - BEFORE!", domain)
874 domain = domain.encode("idna").decode("utf-8")
875 logger.debug("domain='%s' - AFTER!", domain)
877 if not domain_helper.is_wanted(domain):
878 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
880 elif domain in domains:
881 logger.debug("domain='%s' is already added - SKIPPED!", domain)
883 elif instances.is_registered(domain):
884 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
886 elif instances.is_recent(domain):
887 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
890 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
891 domains.append(domain)
893 logger.debug("domains()=%d", len(domains))
895 logger.info("Adding %d new instances ...", len(domains))
896 for domain in domains:
897 logger.debug("domain='%s'", domain)
899 logger.info("Fetching instances from domain='%s' ...", domain)
900 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
901 except network.exceptions as exception:
902 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
903 instances.set_last_error(domain, exception)
906 logger.debug("Success! - EXIT!")
909 def fetch_instances(args: argparse.Namespace) -> int:
910 logger.debug("args[]='%s' - CALLED!", type(args))
912 logger.debug("args.domain='%s' - checking ...", args.domain)
913 if not validators.domain(args.domain):
914 logger.warning("args.domain='%s' is not valid.", args.domain)
916 elif blacklist.is_blacklisted(args.domain):
917 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
920 logger.debug("Invoking locking.acquire() ...")
925 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
926 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
927 except network.exceptions as exception:
928 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
929 instances.set_last_error(args.domain, exception)
930 instances.update_data(args.domain)
934 logger.debug("Not fetching more instances - EXIT!")
937 # Loop through some instances
938 database.cursor.execute(
939 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
942 rows = database.cursor.fetchall()
943 logger.info("Checking %d entries ...", len(rows))
945 logger.debug("row[domain]='%s'", row["domain"])
946 if row["domain"] == "":
947 logger.debug("row[domain] is empty - SKIPPED!")
950 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
951 domain = row["domain"].encode("idna").decode("utf-8")
952 logger.debug("domain='%s' - AFTER!", domain)
954 if not domain_helper.is_wanted(domain):
955 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
959 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
960 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
961 except network.exceptions as exception:
962 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
963 instances.set_last_error(domain, exception)
965 logger.debug("Success - EXIT!")
968 def fetch_oliphant(args: argparse.Namespace) -> int:
969 logger.debug("args[]='%s' - CALLED!", type(args))
971 logger.debug("Invoking locking.acquire() ...")
974 source_domain = "codeberg.org"
975 if sources.is_recent(source_domain):
976 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
979 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
980 sources.update(source_domain)
983 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
988 "blocker": "artisan.chat",
989 "csv_url": "mastodon/artisan.chat.csv",
991 "blocker": "mastodon.art",
992 "csv_url": "mastodon/mastodon.art.csv",
994 "blocker": "pleroma.envs.net",
995 "csv_url": "mastodon/pleroma.envs.net.csv",
997 "blocker": "oliphant.social",
998 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
1000 "blocker": "mastodon.online",
1001 "csv_url": "mastodon/mastodon.online.csv",
1003 "blocker": "mastodon.social",
1004 "csv_url": "mastodon/mastodon.social.csv",
1006 "blocker": "mastodon.social",
1007 "csv_url": "other/missing-tier0-mastodon.social.csv",
1009 "blocker": "rage.love",
1010 "csv_url": "mastodon/rage.love.csv",
1012 "blocker": "sunny.garden",
1013 "csv_url": "mastodon/sunny.garden.csv",
1015 "blocker": "sunny.garden",
1016 "csv_url": "mastodon/gardenfence.csv",
1018 "blocker": "solarpunk.moe",
1019 "csv_url": "mastodon/solarpunk.moe.csv",
1021 "blocker": "toot.wales",
1022 "csv_url": "mastodon/toot.wales.csv",
1024 "blocker": "union.place",
1025 "csv_url": "mastodon/union.place.csv",
1027 "blocker": "oliphant.social",
1028 "csv_url": "mastodon/birdsite.csv",
1034 logger.debug("Downloading %d files ...", len(blocklists))
1035 for block in blocklists:
1036 # Is domain given and not equal blocker?
1037 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1038 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1040 elif args.domain in domains:
1041 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1045 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1046 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1048 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1049 if not response.ok or response.status_code >= 300 or response.content == "":
1050 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1053 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1054 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1060 logger.debug("row[%s]='%s'", type(row), row)
1061 domain = severity = None
1062 reject_media = reject_reports = False
1064 if "#domain" in row:
1065 domain = row["#domain"]
1066 elif "domain" in row:
1067 domain = row["domain"]
1069 logger.debug("row='%s' does not contain domain column", row)
1072 if "#severity" in row:
1073 severity = blocks.alias_block_level(row["#severity"])
1074 elif "severity" in row:
1075 severity = blocks.alias_block_level(row["severity"])
1077 logger.debug("row='%s' does not contain severity column", row)
1080 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1082 elif "reject_media" in row and row["reject_media"].lower() == "true":
1085 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1086 reject_reports = True
1087 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1088 reject_reports = True
1091 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1093 logger.debug("domain is empty - SKIPPED!")
1095 elif domain.endswith(".onion"):
1096 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1098 elif domain.endswith(".arpa"):
1099 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1101 elif domain.endswith(".tld"):
1102 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1104 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1105 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1106 domain = utils.deobfuscate(domain, block["blocker"])
1107 logger.debug("domain='%s' - AFTER!", domain)
1109 if not validators.domain(domain):
1110 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1112 elif blacklist.is_blacklisted(domain):
1113 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1115 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1116 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1119 logger.debug("Marking domain='%s' as handled", domain)
1120 domains.append(domain)
1122 logger.debug("Processing domain='%s' ...", domain)
1123 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1124 logger.debug("processed='%s'", processed)
1126 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1127 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1130 "reason" : block["reason"],
1134 processing.block(block["blocker"], domain, None, "reject_media")
1136 processing.block(block["blocker"], domain, None, "reject_reports")
1138 logger.debug("block[blocker]='%s'", block["blocker"])
1139 if block["blocker"] != "chaos.social":
1140 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1141 instances.set_total_blocks(block["blocker"], domains)
1143 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1144 if instances.has_pending(block["blocker"]):
1145 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1146 instances.update_data(block["blocker"])
1148 logger.debug("Invoking commit() ...")
1149 database.connection.commit()
1151 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1152 if config.get("bot_enabled") and len(blockdict) > 0:
1153 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1154 network.send_bot_post(block["blocker"], blockdict)
1156 logger.debug("Success! - EXIT!")
1159 def fetch_txt(args: argparse.Namespace) -> int:
1160 logger.debug("args[]='%s' - CALLED!", type(args))
1162 logger.debug("Invoking locking.acquire() ...")
1167 "blocker": "seirdy.one",
1168 "url" : "https://seirdy.one/pb/bsl.txt",
1171 logger.info("Checking %d text file(s) ...", len(urls))
1173 logger.debug("Fetching row[url]='%s' ...", row["url"])
1174 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1176 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1177 if response.ok and response.status_code < 300 and response.text != "":
1178 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1179 domains = response.text.split("\n")
1181 logger.info("Processing %d domains ...", len(domains))
1182 for domain in domains:
1183 logger.debug("domain='%s' - BEFORE!", domain)
1184 domain = tidyup.domain(domain)
1186 logger.debug("domain='%s' - AFTER!", domain)
1188 logger.debug("domain is empty - SKIPPED!")
1190 elif not domain_helper.is_wanted(domain):
1191 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1193 elif instances.is_recent(domain):
1194 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1197 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1198 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1200 logger.debug("processed='%s'", processed)
1202 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1205 logger.debug("Success! - EXIT!")
1208 def fetch_fedipact(args: argparse.Namespace) -> int:
1209 logger.debug("args[]='%s' - CALLED!", type(args))
1211 logger.debug("Invoking locking.acquire() ...")
1214 source_domain = "fedipact.online"
1215 if sources.is_recent(source_domain):
1216 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1219 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1220 sources.update(source_domain)
1222 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1223 response = utils.fetch_url(
1224 f"https://{source_domain}",
1225 network.web_headers,
1226 (config.get("connection_timeout"), config.get("read_timeout"))
1229 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1230 if response.ok and response.status_code < 300 and response.text != "":
1231 logger.debug("Parsing %d Bytes ...", len(response.text))
1233 doc = bs4.BeautifulSoup(response.text, "html.parser")
1234 logger.debug("doc[]='%s'", type(doc))
1236 rows = doc.findAll("li")
1237 logger.info("Checking %d row(s) ...", len(rows))
1239 logger.debug("row[]='%s'", type(row))
1240 domain = tidyup.domain(row.contents[0])
1242 logger.debug("domain='%s' - AFTER!", domain)
1244 logger.debug("domain is empty - SKIPPED!")
1247 logger.debug("domain='%s' - BEFORE!", domain)
1248 domain = domain.encode("idna").decode("utf-8")
1249 logger.debug("domain='%s' - AFTER!", domain)
1251 if not domain_helper.is_wanted(domain):
1252 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1254 elif instances.is_registered(domain):
1255 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1257 elif instances.is_recent(domain):
1258 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1261 logger.info("Fetching domain='%s' ...", domain)
1262 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1264 logger.debug("Success! - EXIT!")
1267 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1268 logger.debug("args[]='%s' - CALLED!", type(args))
1270 logger.debug("Invoking locking.acquire() ...")
1273 source_domain = "instances.joinmobilizon.org"
1274 if sources.is_recent(source_domain):
1275 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1278 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1279 sources.update(source_domain)
1281 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1282 raw = utils.fetch_url(
1283 f"https://{source_domain}/api/v1/instances",
1284 network.web_headers,
1285 (config.get("connection_timeout"), config.get("read_timeout"))
1287 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1289 parsed = json.loads(raw)
1290 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1292 if "data" not in parsed:
1293 logger.warning("parsed()=%d does not contain key 'data'")
1296 logger.info("Checking %d instances ...", len(parsed["data"]))
1297 for row in parsed["data"]:
1298 logger.debug("row[]='%s'", type(row))
1299 if "host" not in row:
1300 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1302 elif not domain_helper.is_wanted(row["host"]):
1303 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1305 elif instances.is_registered(row["host"]):
1306 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1309 logger.info("Fetching row[host]='%s' ...", row["host"])
1310 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1312 logger.debug("Success! - EXIT!")
1315 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1316 logger.debug("args[]='%s' - CALLED!", type(args))
1318 logger.debug("Invoking locking.acquire() ...")
1321 source_domain = "instanceapp.misskey.page"
1322 if sources.is_recent(source_domain):
1323 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1326 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1327 sources.update(source_domain)
1329 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1330 raw = utils.fetch_url(
1331 f"https://{source_domain}/instances.json",
1332 network.web_headers,
1333 (config.get("connection_timeout"), config.get("read_timeout"))
1335 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1337 parsed = json.loads(raw)
1338 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1340 if "instancesInfos" not in parsed:
1341 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1344 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1345 for row in parsed["instancesInfos"]:
1346 logger.debug("row[%s]='%s'", type(row), row)
1347 if "url" not in row:
1348 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1350 elif not domain_helper.is_wanted(row["url"]):
1351 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1353 elif instances.is_registered(row["url"]):
1354 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1357 logger.info("Fetching row[url]='%s' ...", row["url"])
1358 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1360 logger.debug("Success! - EXIT!")
1363 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1364 logger.debug("args[]='%s' - CALLED!", type(args))
1366 logger.debug("Invoking locking.acquire() ...")
1369 source_domain = "joinfediverse.wiki"
1370 if sources.is_recent(source_domain):
1371 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1374 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1375 sources.update(source_domain)
1377 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1378 raw = utils.fetch_url(
1379 f"https://{source_domain}/FediBlock",
1380 network.web_headers,
1381 (config.get("connection_timeout"), config.get("read_timeout"))
1383 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1385 doc = bs4.BeautifulSoup(raw, "html.parser")
1386 logger.debug("doc[]='%s'", type(doc))
1388 tables = doc.findAll("table", {"class": "wikitable"})
1390 logger.info("Analyzing %d table(s) ...", len(tables))
1392 for table in tables:
1393 logger.debug("table[]='%s'", type(table))
1395 rows = table.findAll("tr")
1396 logger.info("Checking %d row(s) ...", len(rows))
1397 block_headers = dict()
1399 logger.debug("row[%s]='%s'", type(row), row)
1401 headers = row.findAll("th")
1402 logger.debug("Found headers()=%d header(s)", len(headers))
1403 if len(headers) > 1:
1404 block_headers = dict()
1406 for header in headers:
1408 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1409 text = header.contents[0]
1411 logger.debug("text[]='%s'", type(text))
1412 if not isinstance(text, str):
1413 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1415 elif validators.domain(text.strip()):
1416 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1419 text = tidyup.domain(text.strip())
1420 logger.debug("text='%s' - AFTER!", text)
1421 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1422 logger.debug("Found header: '%s'=%d", text, cnt)
1423 block_headers[cnt] = text
1425 elif len(block_headers) == 0:
1426 logger.debug("row is not scrapable - SKIPPED!")
1428 elif len(block_headers) > 0:
1429 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1433 for element in row.find_all(["th", "td"]):
1435 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1436 if cnt in block_headers:
1437 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1439 text = element.text.strip()
1440 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1442 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1443 if key in ["domain", "instance"]:
1445 elif key == "reason":
1446 block[key] = tidyup.reason(text)
1447 elif key == "subdomain(s)":
1450 block[key] = text.split("/")
1452 logger.debug("key='%s'", key)
1455 logger.debug("block()=%d ...", len(block))
1457 logger.debug("Appending block()=%d ...", len(block))
1458 blocklist.append(block)
1460 logger.debug("blocklist()=%d", len(blocklist))
1462 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1463 domains = database.cursor.fetchall()
1465 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1467 for block in blocklist:
1468 logger.debug("block='%s'", block)
1469 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1470 origin = block["blocked"]
1471 logger.debug("origin='%s'", origin)
1472 for subdomain in block["subdomain(s)"]:
1473 block["blocked"] = subdomain + "." + origin
1474 logger.debug("block[blocked]='%s'", block["blocked"])
1475 blocking.append(block)
1477 blocking.append(block)
1479 logger.debug("blocking()=%d", blocking)
1480 for block in blocking:
1481 logger.debug("block[]='%s'", type(block))
1482 if "blocked" not in block:
1483 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1485 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1486 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1488 if block["blocked"] == "":
1489 logger.debug("block[blocked] is empty - SKIPPED!")
1491 elif not domain_helper.is_wanted(block["blocked"]):
1492 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1494 elif instances.is_recent(block["blocked"]):
1495 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1498 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1499 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1502 for blocker in domains:
1503 blocker = blocker[0]
1504 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1506 for block in blocking:
1507 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1508 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1510 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1511 if block["blocked"] == "":
1512 logger.debug("block[blocked] is empty - SKIPPED!")
1514 elif not domain_helper.is_wanted(block["blocked"]):
1515 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1518 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1519 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1520 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1522 "blocked": block["blocked"],
1523 "reason" : block["reason"],
1526 if instances.has_pending(blocker):
1527 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1528 instances.update_data(blocker)
1530 logger.debug("Invoking commit() ...")
1531 database.connection.commit()
1533 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1534 if config.get("bot_enabled") and len(blockdict) > 0:
1535 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1536 network.send_bot_post(blocker, blockdict)
1538 logger.debug("Success! - EXIT!")
1541 def recheck_obfuscation(args: argparse.Namespace) -> int:
1542 logger.debug("args[]='%s' - CALLED!", type(args))
1544 logger.debug("Invoking locking.acquire() ...")
1547 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1548 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1549 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1550 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1552 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1554 rows = database.cursor.fetchall()
1555 logger.info("Checking %d domains ...", len(rows))
1557 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1558 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1559 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1563 if row["software"] == "pleroma":
1564 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1565 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1566 elif row["software"] == "mastodon":
1567 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1568 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1569 elif row["software"] == "lemmy":
1570 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1571 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1572 elif row["software"] == "friendica":
1573 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1574 blocking = friendica.fetch_blocks(row["domain"])
1575 elif row["software"] == "misskey":
1576 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1577 blocking = misskey.fetch_blocks(row["domain"])
1579 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1581 logger.debug("row[domain]='%s'", row["domain"])
1582 # chaos.social requires special care ...
1583 if row["domain"] != "chaos.social":
1584 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1585 instances.set_total_blocks(row["domain"], blocking)
1590 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1591 for block in blocking:
1592 logger.debug("block[blocked]='%s'", block["blocked"])
1595 if block["blocked"] == "":
1596 logger.debug("block[blocked] is empty - SKIPPED!")
1598 elif block["blocked"].endswith(".arpa"):
1599 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1601 elif block["blocked"].endswith(".tld"):
1602 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1604 elif block["blocked"].endswith(".onion"):
1605 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1607 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1608 logger.debug("block='%s' is obfuscated.", block["blocked"])
1609 obfuscated = obfuscated + 1
1610 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1611 elif not domain_helper.is_wanted(block["blocked"]):
1612 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1614 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1615 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1618 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1619 if blocked is not None and blocked != block["blocked"]:
1620 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1621 obfuscated = obfuscated - 1
1622 if blocks.is_instance_blocked(row["domain"], blocked):
1623 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1626 block["block_level"] = blocks.alias_block_level(block["block_level"])
1628 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1629 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1630 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1633 "reason" : block["reason"],
1636 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1637 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1639 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1640 if obfuscated == 0 and len(blocking) > 0:
1641 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1642 instances.set_has_obfuscation(row["domain"], False)
1644 if instances.has_pending(row["domain"]):
1645 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1646 instances.update_data(row["domain"])
1648 logger.debug("Invoking commit() ...")
1649 database.connection.commit()
1651 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1652 if config.get("bot_enabled") and len(blockdict) > 0:
1653 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1654 network.send_bot_post(row["domain"], blockdict)
1656 logger.debug("Success! - EXIT!")
1659 def fetch_fedilist(args: argparse.Namespace) -> int:
1660 logger.debug("args[]='%s' - CALLED!", type(args))
1662 logger.debug("Invoking locking.acquire() ...")
1665 source_domain = "demo.fedilist.com"
1666 if sources.is_recent(source_domain):
1667 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1670 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1671 sources.update(source_domain)
1673 url = f"http://{source_domain}/instance/csv?onion=not"
1674 if args.software is not None and args.software != "":
1675 logger.debug("args.software='%s'", args.software)
1676 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1678 logger.info("Fetching url='%s' ...", url)
1679 response = reqto.get(
1681 headers=network.web_headers,
1682 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1683 allow_redirects=False
1686 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1687 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1688 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1691 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1693 logger.debug("reader[]='%s'", type(reader))
1695 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1700 logger.info("Checking %d rows ...", len(rows))
1702 logger.debug("row[]='%s'", type(row))
1703 if "hostname" not in row:
1704 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1707 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1708 domain = tidyup.domain(row["hostname"])
1709 logger.debug("domain='%s' - AFTER!", domain)
1712 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1715 logger.debug("domain='%s' - BEFORE!", domain)
1716 domain = domain.encode("idna").decode("utf-8")
1717 logger.debug("domain='%s' - AFTER!", domain)
1719 if not domain_helper.is_wanted(domain):
1720 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1722 elif (args.force is None or not args.force) and instances.is_registered(domain):
1723 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1725 elif instances.is_recent(domain):
1726 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1729 logger.info("Fetching instances from domain='%s' ...", domain)
1730 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1732 logger.debug("Success! - EXIT!")
1735 def update_nodeinfo(args: argparse.Namespace) -> int:
1736 logger.debug("args[]='%s' - CALLED!", type(args))
1738 logger.debug("Invoking locking.acquire() ...")
1741 if args.domain is not None and args.domain != "":
1742 logger.debug("Fetching args.domain='%s'", args.domain)
1743 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1744 elif args.software is not None and args.software != "":
1745 logger.info("Fetching domains for args.software='%s'", args.software)
1746 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1748 logger.info("Fetching domains for recently updated ...")
1749 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1751 domains = database.cursor.fetchall()
1753 logger.info("Checking %d domain(s) ...", len(domains))
1756 logger.debug("row[]='%s'", type(row))
1757 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1758 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1762 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1763 software = federation.determine_software(row["domain"])
1765 logger.debug("Determined software='%s'", software)
1766 if (software != row["software"] and software is not None) or args.force is True:
1767 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1768 instances.set_software(row["domain"], software)
1770 if software is not None:
1771 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1772 instances.set_success(row["domain"])
1773 except network.exceptions as exception:
1774 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1775 instances.set_last_error(row["domain"], exception)
1777 instances.set_last_nodeinfo(row["domain"])
1778 instances.update_data(row["domain"])
1781 logger.debug("Success! - EXIT!")
1784 def fetch_instances_social(args: argparse.Namespace) -> int:
1785 logger.debug("args[]='%s' - CALLED!", type(args))
1787 logger.debug("Invoking locking.acquire() ...")
1790 source_domain = "instances.social"
1792 if config.get("instances_social_api_key") == "":
1793 logger.error("API key not set. Please set in your config.json file.")
1795 elif sources.is_recent(source_domain):
1796 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1799 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1800 sources.update(source_domain)
1803 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1806 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1807 fetched = network.get_json_api(
1809 "/api/1.0/instances/list?count=0&sort_by=name",
1811 (config.get("connection_timeout"), config.get("read_timeout"))
1813 logger.debug("fetched[]='%s'", type(fetched))
1815 if "error_message" in fetched:
1816 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1818 elif "exception" in fetched:
1819 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1821 elif "json" not in fetched:
1822 logger.warning("fetched has no element 'json' - EXIT!")
1824 elif "instances" not in fetched["json"]:
1825 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1829 rows = fetched["json"]["instances"]
1831 logger.info("Checking %d row(s) ...", len(rows))
1833 logger.debug("row[]='%s'", type(row))
1834 domain = tidyup.domain(row["name"])
1835 logger.debug("domain='%s' - AFTER!", domain)
1838 logger.debug("domain is empty - SKIPPED!")
1841 logger.debug("domain='%s' - BEFORE!", domain)
1842 domain = domain.encode("idna").decode("utf-8")
1843 logger.debug("domain='%s' - AFTER!", domain)
1845 if not domain_helper.is_wanted(domain):
1846 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1848 elif domain in domains:
1849 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1851 elif instances.is_registered(domain):
1852 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1854 elif instances.is_recent(domain):
1855 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1858 logger.info("Fetching instances from domain='%s'", domain)
1859 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1861 logger.debug("Success! - EXIT!")
1864 def fetch_relays(args: argparse.Namespace) -> int:
1865 logger.debug("args[]='%s' - CALLED!", type(args))
1867 logger.debug("Invoking locking.acquire() ...")
1870 if args.domain is not None and args.domain != "":
1871 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1873 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1876 rows = database.cursor.fetchall()
1878 logger.info("Checking %d relays ...", len(rows))
1880 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1881 if not args.force and instances.is_recent(row["domain"]):
1882 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1886 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1887 raw = utils.fetch_url(
1888 f"https://{row['domain']}",
1889 network.web_headers,
1890 (config.get("connection_timeout"), config.get("read_timeout"))
1892 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1893 except network.exceptions as exception:
1894 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1895 instances.set_last_error(row["domain"], exception)
1896 instances.set_last_instance_fetch(row["domain"])
1897 instances.update_data(row["domain"])
1900 doc = bs4.BeautifulSoup(raw, features="html.parser")
1901 logger.debug("doc[]='%s'", type(doc))
1903 logger.debug("row[software]='%s'", row["software"])
1904 if row["software"] == "activityrelay":
1905 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1906 tags = doc.findAll("p")
1908 logger.debug("Checking %d paragraphs ...", len(tags))
1910 logger.debug("tag[]='%s'", type(tag))
1911 if len(tag.contents) == 0:
1912 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1914 elif "registered instances" not in tag.contents[0]:
1915 logger.debug("Skipping paragraph, text not found.")
1918 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1919 for domain in tag.contents:
1920 logger.debug("domain[%s]='%s'", type(domain), domain)
1921 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1924 domain = str(domain)
1925 if not domain_helper.is_wanted(domain):
1926 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1929 logger.debug("domain='%s' - BEFORE!", domain)
1930 domain = tidyup.domain(domain)
1931 logger.debug("domain='%s' - AFTER!", domain)
1934 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1936 elif instances.is_registered(domain):
1937 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1939 elif dict_helper.has_key(domains, "domain", domain):
1940 logger.debug("domain='%s' already added", domain)
1943 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1946 "origin": row["domain"],
1948 elif row["software"] in ["aoderelay", "selective-relay"]:
1949 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1950 if row["software"] == "aoderelay":
1951 tags = doc.findAll("section", {"class": "instance"})
1953 tags = doc.find("div", {"id": "instances"}).findAll("li")
1955 logger.debug("Checking %d tags ...", len(tags))
1957 logger.debug("tag[]='%s'", type(tag))
1959 link = tag.find("a")
1960 logger.debug("link[%s]='%s'", type(link), link)
1962 logger.warning("tag='%s' has no a-tag ...", tag)
1965 components = urlparse(link["href"])
1966 domain = components.netloc.lower()
1968 if not domain_helper.is_wanted(domain):
1969 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1972 logger.debug("domain='%s' - BEFORE!", domain)
1973 domain = tidyup.domain(domain)
1974 logger.debug("domain='%s' - AFTER!", domain)
1977 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1979 elif instances.is_registered(domain):
1980 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1982 elif dict_helper.has_key(domains, "domain", domain):
1983 logger.debug("domain='%s' already added", domain)
1986 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1989 "origin": row["domain"],
1992 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1994 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1995 instances.set_last_instance_fetch(row["domain"])
1996 instances.update_data(row["domain"])
1998 logger.info("Found %d domains to add ...", len(domains))
2000 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2001 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2003 logger.debug("Success! - EXIT!")
2006 def convert_idna(args: argparse.Namespace) -> int:
2007 logger.debug("args[]='%s' - CALLED!", type(args))
2009 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2010 rows = database.cursor.fetchall()
2012 logger.debug("rows[]='%s'", type(rows))
2013 instances.translate_idnas(rows, "domain")
2015 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2016 rows = database.cursor.fetchall()
2018 logger.debug("rows[]='%s'", type(rows))
2019 instances.translate_idnas(rows, "origin")
2021 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2022 rows = database.cursor.fetchall()
2024 logger.debug("rows[]='%s'", type(rows))
2025 blocks.translate_idnas(rows, "blocker")
2027 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2028 rows = database.cursor.fetchall()
2030 logger.debug("rows[]='%s'", type(rows))
2031 blocks.translate_idnas(rows, "blocked")
2033 logger.debug("Success! - EXIT!")