1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import federation
47 from fba.http import network
49 from fba.models import blocks
50 from fba.models import instances
51 from fba.models import sources
53 from fba.networks import friendica
54 from fba.networks import lemmy
55 from fba.networks import mastodon
56 from fba.networks import misskey
57 from fba.networks import pleroma
59 logging.basicConfig(level=logging.INFO)
60 logger = logging.getLogger(__name__)
61 #logger.setLevel(logging.DEBUG)
63 def check_instance(args: argparse.Namespace) -> int:
64 logger.debug("args.domain='%s' - CALLED!", args.domain)
66 if not validators.domain(args.domain):
67 logger.warning("args.domain='%s' is not valid", args.domain)
69 elif blacklist.is_blacklisted(args.domain):
70 logger.warning("args.domain='%s' is blacklisted", args.domain)
72 elif instances.is_registered(args.domain):
73 logger.warning("args.domain='%s' is already registered", args.domain)
76 logger.info("args.domain='%s' is not known", args.domain)
78 logger.debug("status=%d - EXIT!", status)
81 def check_nodeinfo(args: argparse.Namespace) -> int:
82 logger.debug("args[]='%s' - CALLED!", type(args))
85 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88 for row in database.cursor.fetchall():
89 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
90 punycode = row["domain"].encode("idna").decode("utf-8")
92 if row["nodeinfo_url"].startswith("/"):
93 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
95 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
96 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99 logger.info("Found %d row(s)", cnt)
101 logger.debug("EXIT!")
104 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
105 logger.debug("args[]='%s' - CALLED!", type(args))
107 # No CSRF by default, you don't have to add network.source_headers by yourself here
109 source_domain = "pixelfed.org"
111 if sources.is_recent(source_domain):
112 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
116 sources.update(source_domain)
119 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
120 headers = csrf.determine(source_domain, dict())
121 except network.exceptions as exception:
122 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
126 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
127 fetched = network.get_json_api(
129 "/api/v1/servers/all.json?scope=All&country=all&language=all",
131 (config.get("connection_timeout"), config.get("read_timeout"))
134 logger.debug("JSON API returned %d elements", len(fetched))
135 if "error_message" in fetched:
136 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
138 elif "data" not in fetched["json"]:
139 logger.warning("API did not return JSON with 'data' element - EXIT!")
142 rows = fetched["json"]["data"]
143 logger.info("Checking %d fetched rows ...", len(rows))
145 logger.debug("row[]='%s'", type(row))
146 if "domain" not in row:
147 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
149 elif row["domain"] == "":
150 logger.debug("row[domain] is empty - SKIPPED!")
153 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
154 domain = row["domain"].encode("idna").decode("utf-8")
155 logger.debug("domain='%s' - AFTER!", domain)
157 if not domain_helper.is_wanted(domain):
158 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
160 elif instances.is_registered(domain):
161 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
163 elif instances.is_recent(domain):
164 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167 logger.debug("Fetching instances from domain='%s' ...", domain)
168 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
170 except network.exceptions as exception:
171 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174 logger.debug("Success! - EXIT!")
177 def fetch_bkali(args: argparse.Namespace) -> int:
178 logger.debug("args[]='%s' - CALLED!", type(args))
180 logger.debug("Invoking locking.acquire() ...")
183 source_domain = "gql.api.bka.li"
184 if sources.is_recent(source_domain):
185 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
189 sources.update(source_domain)
193 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
194 fetched = network.post_json_api(
198 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
202 logger.debug("fetched[]='%s'", type(fetched))
203 if "error_message" in fetched:
204 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
206 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
207 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210 rows = fetched["json"]
212 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
214 raise Exception("WARNING: Returned no records")
215 elif "data" not in rows:
216 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
217 elif "nodeinfo" not in rows["data"]:
218 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
220 for entry in rows["data"]["nodeinfo"]:
221 logger.debug("entry[%s]='%s'", type(entry), entry)
222 if "domain" not in entry:
223 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
225 elif entry["domain"] == "":
226 logger.debug("entry[domain] is empty - SKIPPED!")
228 elif not domain_helper.is_wanted(entry["domain"]):
229 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
231 elif instances.is_registered(entry["domain"]):
232 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
234 elif instances.is_recent(entry["domain"]):
235 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238 logger.debug("Adding domain='%s' ...", entry["domain"])
239 domains.append(entry["domain"])
241 except network.exceptions as exception:
242 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245 logger.debug("domains()=%d", len(domains))
247 logger.info("Adding %d new instances ...", len(domains))
248 for domain in domains:
249 logger.debug("domain='%s' - BEFORE!", domain)
250 domain = domain.encode("idna").decode("utf-8")
251 logger.debug("domain='%s' - AFTER!", domain)
254 logger.info("Fetching instances from domain='%s' ...", domain)
255 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
256 except network.exceptions as exception:
257 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
258 instances.set_last_error(domain, exception)
261 logger.debug("Success - EXIT!")
264 def fetch_blocks(args: argparse.Namespace) -> int:
265 logger.debug("args[]='%s' - CALLED!", type(args))
266 if args.domain is not None and args.domain != "":
267 logger.debug("args.domain='%s' - checking ...", args.domain)
268 if not validators.domain(args.domain):
269 logger.warning("args.domain='%s' is not valid.", args.domain)
271 elif blacklist.is_blacklisted(args.domain):
272 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
274 elif not instances.is_registered(args.domain):
275 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278 logger.debug("Invoking locking.acquire() ...")
281 if args.domain is not None and args.domain != "":
282 # Re-check single domain
283 logger.debug("Querying database for args.domain='%s' ...", args.domain)
284 database.cursor.execute(
285 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
287 elif args.software is not None and args.software != "":
288 # Re-check single software
289 logger.debug("Querying database for args.software='%s' ...", args.software)
290 database.cursor.execute(
291 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
295 logger.debug("Re-checking all instances ...")
296 database.cursor.execute(
297 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC"
300 # Re-check after "timeout" (aka. minimum interval)
301 database.cursor.execute(
302 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
305 rows = database.cursor.fetchall()
306 logger.info("Checking %d entries ...", len(rows))
307 for blocker, software, origin, nodeinfo_url in rows:
308 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
309 blocker = tidyup.domain(blocker)
310 logger.debug("blocker='%s' - AFTER!", blocker)
313 logger.warning("blocker is now empty!")
315 elif nodeinfo_url is None or nodeinfo_url == "":
316 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
318 elif not domain_helper.is_wanted(blocker):
319 logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker)
322 logger.debug("blocker='%s'", blocker)
323 instances.set_last_blocked(blocker)
324 instances.set_has_obfuscation(blocker, False)
327 if software == "pleroma":
328 logger.info("blocker='%s',software='%s'", blocker, software)
329 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
330 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
331 elif software == "mastodon":
332 logger.info("blocker='%s',software='%s'", blocker, software)
333 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
334 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
335 elif software == "lemmy":
336 logger.info("blocker='%s',software='%s'", blocker, software)
337 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
338 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339 elif software == "friendica":
340 logger.info("blocker='%s',software='%s'", blocker, software)
341 blocking = friendica.fetch_blocks(blocker)
342 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343 elif software == "misskey":
344 logger.info("blocker='%s',software='%s'", blocker, software)
345 blocking = misskey.fetch_blocks(blocker)
346 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
348 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
350 logger.debug("blocker='%s'", blocker)
351 if blocker != "chaos.social":
352 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
353 instances.set_total_blocks(blocker, blocking)
355 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
357 for block in blocking:
358 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
360 if block["block_level"] == "":
361 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
364 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
365 block["blocked"] = tidyup.domain(block["blocked"])
366 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
367 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
369 if block["blocked"] == "":
370 logger.warning("blocked is empty, blocker='%s'", blocker)
372 elif block["blocked"].endswith(".onion"):
373 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
375 elif block["blocked"].endswith(".arpa"):
376 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
378 elif block["blocked"].endswith(".tld"):
379 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
381 elif block["blocked"].find("*") >= 0:
382 logger.debug("blocker='%s' uses obfuscated domains", blocker)
384 # Some friendica servers also obscure domains without hash
385 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
387 logger.debug("row[]='%s'", type(row))
389 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
390 instances.set_has_obfuscation(blocker, True)
393 block["blocked"] = row["domain"]
394 origin = row["origin"]
395 nodeinfo_url = row["nodeinfo_url"]
396 elif block["blocked"].find("?") >= 0:
397 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399 # Some obscure them with question marks, not sure if that's dependent on version or not
400 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
402 logger.debug("row[]='%s'", type(row))
404 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
405 instances.set_has_obfuscation(blocker, True)
408 block["blocked"] = row["domain"]
409 origin = row["origin"]
410 nodeinfo_url = row["nodeinfo_url"]
412 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
413 if block["blocked"] == "":
414 logger.debug("block[blocked] is empty - SKIPPED!")
417 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
418 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
419 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
421 if not domain_helper.is_wanted(block["blocked"]):
422 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
424 elif block["block_level"] in ["accept", "accepted"]:
425 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
427 elif not instances.is_registered(block["blocked"]):
428 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
429 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
431 block["block_level"] = blocks.alias_block_level(block["block_level"])
433 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
434 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
436 "blocked": block["blocked"],
437 "reason" : block["reason"],
440 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
441 cookies.clear(block["blocked"])
443 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
444 if instances.has_pending(blocker):
445 logger.debug("Flushing updates for blocker='%s' ...", blocker)
446 instances.update_data(blocker)
448 logger.debug("Invoking commit() ...")
449 database.connection.commit()
451 logger.debug("Invoking cookies.clear(%s) ...", blocker)
452 cookies.clear(blocker)
454 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
455 if config.get("bot_enabled") and len(blockdict) > 0:
456 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
457 network.send_bot_post(blocker, blockdict)
459 logger.debug("Success! - EXIT!")
462 def fetch_observer(args: argparse.Namespace) -> int:
463 logger.debug("args[]='%s' - CALLED!", type(args))
465 logger.debug("Invoking locking.acquire() ...")
468 source_domain = "fediverse.observer"
469 if sources.is_recent(source_domain):
470 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
473 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
474 sources.update(source_domain)
477 if args.software is None:
478 logger.info("Fetching software list ...")
479 raw = utils.fetch_url(
480 f"https://{source_domain}",
482 (config.get("connection_timeout"), config.get("read_timeout"))
484 logger.debug("raw[%s]()=%d", type(raw), len(raw))
486 doc = bs4.BeautifulSoup(raw, features="html.parser")
487 logger.debug("doc[]='%s'", type(doc))
489 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
490 logger.debug("navbar[]='%s'", type(navbar))
492 logger.warning("Cannot find navigation bar, cannot continue!")
495 items = navbar.findAll("a", {"class": "dropdown-item"})
496 logger.debug("items[]='%s'", type(items))
498 logger.info("Checking %d menu items ...", len(items))
500 logger.debug("item[%s]='%s'", type(item), item)
501 if item.text.lower() == "all":
502 logger.debug("Skipping 'All' menu entry ...")
505 logger.debug("Appending item.text='%s' ...", item.text)
506 types.append(tidyup.domain(item.text))
508 logger.info("Adding args.software='%s' as type ...", args.software)
509 types.append(args.software)
511 logger.info("Fetching %d different table data ...", len(types))
512 for software in types:
513 logger.debug("software='%s' - BEFORE!", software)
514 if args.software is not None and args.software != software:
515 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
520 logger.debug("Fetching table data for software='%s' ...", software)
521 raw = utils.fetch_url(
522 f"https://{source_domain}/app/views/tabledata.php?software={software}",
524 (config.get("connection_timeout"), config.get("read_timeout"))
526 logger.debug("raw[%s]()=%d", type(raw), len(raw))
528 doc = bs4.BeautifulSoup(raw, features="html.parser")
529 logger.debug("doc[]='%s'", type(doc))
530 except network.exceptions as exception:
531 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
534 items = doc.findAll("a", {"class": "url"})
535 logger.info("Checking %d items,software='%s' ...", len(items), software)
537 logger.debug("item[]='%s'", type(item))
538 domain = item.decode_contents()
539 logger.debug("domain='%s' - AFTER!", domain)
542 logger.debug("domain is empty - SKIPPED!")
545 logger.debug("domain='%s' - BEFORE!", domain)
546 domain = domain.encode("idna").decode("utf-8")
547 logger.debug("domain='%s' - AFTER!", domain)
549 if not domain_helper.is_wanted(domain):
550 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
552 elif instances.is_registered(domain):
553 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
555 elif instances.is_recent(domain):
556 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
559 software = software_helper.alias(software)
560 logger.info("Fetching instances for domain='%s'", domain)
561 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
563 logger.debug("Success! - EXIT!")
566 def fetch_todon_wiki(args: argparse.Namespace) -> int:
567 logger.debug("args[]='%s' - CALLED!", type(args))
569 logger.debug("Invoking locking.acquire() ...")
572 source_domain = "wiki.todon.eu"
573 if sources.is_recent(source_domain):
574 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
577 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
578 sources.update(source_domain)
585 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
586 raw = utils.fetch_url(
587 f"https://{source_domain}/todon/domainblocks",
589 (config.get("connection_timeout"), config.get("read_timeout"))
591 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
593 doc = bs4.BeautifulSoup(raw, "html.parser")
594 logger.debug("doc[]='%s'", type(doc))
596 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
597 logger.info("Checking %d silenced/limited entries ...", len(silenced))
598 blocklist["silenced"] = utils.find_domains(silenced, "div")
600 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
601 logger.info("Checking %d suspended entries ...", len(suspended))
602 blocklist["reject"] = utils.find_domains(suspended, "div")
604 blocking = blocklist["silenced"] + blocklist["reject"]
607 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
608 instances.set_last_blocked(blocker)
609 instances.set_total_blocks(blocker, blocking)
612 for block_level in blocklist:
613 blockers = blocklist[block_level]
615 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
616 for blocked in blockers:
617 logger.debug("blocked='%s'", blocked)
619 if not instances.is_registered(blocked):
621 logger.info("Fetching instances from domain='%s' ...", blocked)
622 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
623 except network.exceptions as exception:
624 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
625 instances.set_last_error(blocked, exception)
627 if blocks.is_instance_blocked(blocker, blocked, block_level):
628 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
631 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
632 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
633 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
639 logger.debug("Invoking commit() ...")
640 database.connection.commit()
642 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
643 if config.get("bot_enabled") and len(blockdict) > 0:
644 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
645 network.send_bot_post(blocker, blockdict)
647 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
648 if instances.has_pending(blocker):
649 logger.debug("Flushing updates for blocker='%s' ...", blocker)
650 instances.update_data(blocker)
652 logger.debug("Success! - EXIT!")
655 def fetch_cs(args: argparse.Namespace):
656 logger.debug("args[]='%s' - CALLED!", type(args))
658 logger.debug("Invoking locking.acquire() ...")
686 source_domain = "raw.githubusercontent.com"
687 if sources.is_recent(source_domain):
688 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
691 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
692 sources.update(source_domain)
694 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
695 raw = utils.fetch_url(
696 f"https://{source_domain}/chaossocial/meta/master/federation.md",
698 (config.get("connection_timeout"), config.get("read_timeout"))
700 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
702 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
703 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
705 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
706 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
707 blocklist["silenced"] = federation.find_domains(silenced)
709 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
710 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
711 blocklist["reject"] = federation.find_domains(blocked)
713 blocking = blocklist["silenced"] + blocklist["reject"]
714 blocker = "chaos.social"
716 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
717 instances.set_last_blocked(blocker)
718 instances.set_total_blocks(blocker, blocking)
720 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
721 if len(blocking) > 0:
723 for block_level in blocklist:
724 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
726 for row in blocklist[block_level]:
727 logger.debug("row[%s]='%s'", type(row), row)
728 if not "domain" in row:
729 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
731 elif not instances.is_registered(row["domain"]):
733 logger.info("Fetching instances from domain='%s' ...", row["domain"])
734 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
735 except network.exceptions as exception:
736 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
737 instances.set_last_error(row["domain"], exception)
739 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
740 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
742 "blocked": row["domain"],
743 "reason" : row["reason"],
746 logger.debug("Invoking commit() ...")
747 database.connection.commit()
749 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
750 if config.get("bot_enabled") and len(blockdict) > 0:
751 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
752 network.send_bot_post(blocker, blockdict)
754 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
755 if instances.has_pending(blocker):
756 logger.debug("Flushing updates for blocker='%s' ...", blocker)
757 instances.update_data(blocker)
759 logger.debug("Success! - EXIT!")
762 def fetch_fba_rss(args: argparse.Namespace) -> int:
763 logger.debug("args[]='%s' - CALLED!", type(args))
767 logger.debug("Invoking locking.acquire() ...")
770 components = urlparse(args.feed)
772 if sources.is_recent(components.netloc):
773 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
776 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
777 sources.update(components.netloc)
779 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
780 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
782 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
783 if response.ok and response.status_code < 300 and len(response.text) > 0:
784 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
785 rss = atoma.parse_rss_bytes(response.content)
787 logger.debug("rss[]='%s'", type(rss))
788 for item in rss.items:
789 logger.debug("item[%s]='%s'", type(item), item)
790 domain = tidyup.domain(item.link.split("=")[1])
792 logger.debug("domain='%s' - AFTER!", domain)
794 logger.debug("domain is empty - SKIPPED!")
797 logger.debug("domain='%s' - BEFORE!", domain)
798 domain = domain.encode("idna").decode("utf-8")
799 logger.debug("domain='%s' - AFTER!", domain)
801 if not domain_helper.is_wanted(domain):
802 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
804 elif domain in domains:
805 logger.debug("domain='%s' is already added - SKIPPED!", domain)
807 elif instances.is_registered(domain):
808 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
810 elif instances.is_recent(domain):
811 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
814 logger.debug("Adding domain='%s'", domain)
815 domains.append(domain)
817 logger.debug("domains()=%d", len(domains))
819 logger.info("Adding %d new instances ...", len(domains))
820 for domain in domains:
821 logger.debug("domain='%s'", domain)
823 logger.info("Fetching instances from domain='%s' ...", domain)
824 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
825 except network.exceptions as exception:
826 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
827 instances.set_last_error(domain, exception)
830 logger.debug("Success! - EXIT!")
833 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
834 logger.debug("args[]='%s' - CALLED!", type(args))
836 logger.debug("Invoking locking.acquire() ...")
839 source_domain = "ryona.agency"
840 if sources.is_recent(source_domain):
841 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
844 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
845 sources.update(source_domain)
847 feed = f"https://{source_domain}/users/fba/feed.atom"
851 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
852 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
854 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
855 if response.ok and response.status_code < 300 and len(response.text) > 0:
856 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
857 atom = atoma.parse_atom_bytes(response.content)
859 logger.debug("atom[]='%s'", type(atom))
860 for entry in atom.entries:
861 logger.debug("entry[]='%s'", type(entry))
862 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
863 logger.debug("doc[]='%s'", type(doc))
864 for element in doc.findAll("a"):
865 logger.debug("element[]='%s'", type(element))
866 for href in element["href"].split(","):
867 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
868 domain = tidyup.domain(href)
870 logger.debug("domain='%s' - AFTER!", domain)
872 logger.debug("domain is empty - SKIPPED!")
875 logger.debug("domain='%s' - BEFORE!", domain)
876 domain = domain.encode("idna").decode("utf-8")
877 logger.debug("domain='%s' - AFTER!", domain)
879 if not domain_helper.is_wanted(domain):
880 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
882 elif domain in domains:
883 logger.debug("domain='%s' is already added - SKIPPED!", domain)
885 elif instances.is_registered(domain):
886 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
888 elif instances.is_recent(domain):
889 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
892 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
893 domains.append(domain)
895 logger.debug("domains()=%d", len(domains))
897 logger.info("Adding %d new instances ...", len(domains))
898 for domain in domains:
899 logger.debug("domain='%s'", domain)
901 logger.info("Fetching instances from domain='%s' ...", domain)
902 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
903 except network.exceptions as exception:
904 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
905 instances.set_last_error(domain, exception)
908 logger.debug("Success! - EXIT!")
911 def fetch_instances(args: argparse.Namespace) -> int:
912 logger.debug("args[]='%s' - CALLED!", type(args))
914 logger.debug("args.domain='%s' - checking ...", args.domain)
915 if not validators.domain(args.domain):
916 logger.warning("args.domain='%s' is not valid.", args.domain)
918 elif blacklist.is_blacklisted(args.domain):
919 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
922 logger.debug("Invoking locking.acquire() ...")
927 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
928 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
929 except network.exceptions as exception:
930 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
931 instances.set_last_error(args.domain, exception)
932 instances.update_data(args.domain)
936 logger.debug("Not fetching more instances - EXIT!")
939 # Loop through some instances
940 database.cursor.execute(
941 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
944 rows = database.cursor.fetchall()
945 logger.info("Checking %d entries ...", len(rows))
947 logger.debug("row[domain]='%s'", row["domain"])
948 if row["domain"] == "":
949 logger.debug("row[domain] is empty - SKIPPED!")
952 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
953 domain = row["domain"].encode("idna").decode("utf-8")
954 logger.debug("domain='%s' - AFTER!", domain)
956 if not domain_helper.is_wanted(domain):
957 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
961 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
962 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
963 except network.exceptions as exception:
964 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
965 instances.set_last_error(domain, exception)
967 logger.debug("Success - EXIT!")
970 def fetch_oliphant(args: argparse.Namespace) -> int:
971 logger.debug("args[]='%s' - CALLED!", type(args))
973 logger.debug("Invoking locking.acquire() ...")
976 source_domain = "codeberg.org"
977 if sources.is_recent(source_domain):
978 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
981 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
982 sources.update(source_domain)
985 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
990 "blocker": "artisan.chat",
991 "csv_url": "mastodon/artisan.chat.csv",
993 "blocker": "mastodon.art",
994 "csv_url": "mastodon/mastodon.art.csv",
996 "blocker": "pleroma.envs.net",
997 "csv_url": "mastodon/pleroma.envs.net.csv",
999 "blocker": "oliphant.social",
1000 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
1002 "blocker": "mastodon.online",
1003 "csv_url": "mastodon/mastodon.online.csv",
1005 "blocker": "mastodon.social",
1006 "csv_url": "mastodon/mastodon.social.csv",
1008 "blocker": "mastodon.social",
1009 "csv_url": "other/missing-tier0-mastodon.social.csv",
1011 "blocker": "rage.love",
1012 "csv_url": "mastodon/rage.love.csv",
1014 "blocker": "sunny.garden",
1015 "csv_url": "mastodon/sunny.garden.csv",
1017 "blocker": "sunny.garden",
1018 "csv_url": "mastodon/gardenfence.csv",
1020 "blocker": "solarpunk.moe",
1021 "csv_url": "mastodon/solarpunk.moe.csv",
1023 "blocker": "toot.wales",
1024 "csv_url": "mastodon/toot.wales.csv",
1026 "blocker": "union.place",
1027 "csv_url": "mastodon/union.place.csv",
1029 "blocker": "oliphant.social",
1030 "csv_url": "mastodon/birdsite.csv",
1036 logger.debug("Downloading %d files ...", len(blocklists))
1037 for block in blocklists:
1038 # Is domain given and not equal blocker?
1039 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1040 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1042 elif args.domain in domains:
1043 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1046 instances.set_last_blocked(block["blocker"])
1049 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1050 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1052 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1053 if not response.ok or response.status_code >= 300 or response.content == "":
1054 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1057 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1058 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1064 logger.debug("row[%s]='%s'", type(row), row)
1065 domain = severity = None
1066 reject_media = reject_reports = False
1068 if "#domain" in row:
1069 domain = row["#domain"]
1070 elif "domain" in row:
1071 domain = row["domain"]
1073 logger.debug("row='%s' does not contain domain column", row)
1076 if "#severity" in row:
1077 severity = blocks.alias_block_level(row["#severity"])
1078 elif "severity" in row:
1079 severity = blocks.alias_block_level(row["severity"])
1081 logger.debug("row='%s' does not contain severity column", row)
1084 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1086 elif "reject_media" in row and row["reject_media"].lower() == "true":
1089 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1090 reject_reports = True
1091 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1092 reject_reports = True
1095 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1097 logger.debug("domain is empty - SKIPPED!")
1099 elif domain.endswith(".onion"):
1100 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1102 elif domain.endswith(".arpa"):
1103 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1105 elif domain.endswith(".tld"):
1106 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1108 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1109 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1110 domain = utils.deobfuscate(domain, block["blocker"])
1111 logger.debug("domain='%s' - AFTER!", domain)
1113 if not validators.domain(domain):
1114 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1116 elif blacklist.is_blacklisted(domain):
1117 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1119 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1120 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1123 logger.debug("Marking domain='%s' as handled", domain)
1124 domains.append(domain)
1126 logger.debug("Processing domain='%s' ...", domain)
1127 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1128 logger.debug("processed='%s'", processed)
1130 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1131 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1134 "reason" : block["reason"],
1138 processing.block(block["blocker"], domain, None, "reject_media")
1140 processing.block(block["blocker"], domain, None, "reject_reports")
1142 logger.debug("block[blocker]='%s'", block["blocker"])
1143 if block["blocker"] != "chaos.social":
1144 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1145 instances.set_total_blocks(block["blocker"], domains)
1147 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1148 if instances.has_pending(block["blocker"]):
1149 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1150 instances.update_data(block["blocker"])
1152 logger.debug("Invoking commit() ...")
1153 database.connection.commit()
1155 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1156 if config.get("bot_enabled") and len(blockdict) > 0:
1157 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1158 network.send_bot_post(block["blocker"], blockdict)
1160 logger.debug("Success! - EXIT!")
1163 def fetch_txt(args: argparse.Namespace) -> int:
1164 logger.debug("args[]='%s' - CALLED!", type(args))
1166 logger.debug("Invoking locking.acquire() ...")
1171 "blocker": "seirdy.one",
1172 "url" : "https://seirdy.one/pb/bsl.txt",
1175 logger.info("Checking %d text file(s) ...", len(urls))
1177 logger.debug("Fetching row[url]='%s' ...", row["url"])
1178 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1180 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1181 if response.ok and response.status_code < 300 and response.text != "":
1182 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1183 domains = response.text.split("\n")
1185 logger.info("Processing %d domains ...", len(domains))
1186 for domain in domains:
1187 logger.debug("domain='%s' - BEFORE!", domain)
1188 domain = tidyup.domain(domain)
1190 logger.debug("domain='%s' - AFTER!", domain)
1192 logger.debug("domain is empty - SKIPPED!")
1194 elif not domain_helper.is_wanted(domain):
1195 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1197 elif instances.is_recent(domain):
1198 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1201 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1202 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1204 logger.debug("processed='%s'", processed)
1206 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1209 logger.debug("Success! - EXIT!")
1212 def fetch_fedipact(args: argparse.Namespace) -> int:
1213 logger.debug("args[]='%s' - CALLED!", type(args))
1215 logger.debug("Invoking locking.acquire() ...")
1218 source_domain = "fedipact.online"
1219 if sources.is_recent(source_domain):
1220 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1223 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1224 sources.update(source_domain)
1226 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1227 response = utils.fetch_url(
1228 f"https://{source_domain}",
1229 network.web_headers,
1230 (config.get("connection_timeout"), config.get("read_timeout"))
1233 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1234 if response.ok and response.status_code < 300 and response.text != "":
1235 logger.debug("Parsing %d Bytes ...", len(response.text))
1237 doc = bs4.BeautifulSoup(response.text, "html.parser")
1238 logger.debug("doc[]='%s'", type(doc))
1240 rows = doc.findAll("li")
1241 logger.info("Checking %d row(s) ...", len(rows))
1243 logger.debug("row[]='%s'", type(row))
1244 domain = tidyup.domain(row.contents[0])
1246 logger.debug("domain='%s' - AFTER!", domain)
1248 logger.debug("domain is empty - SKIPPED!")
1251 logger.debug("domain='%s' - BEFORE!", domain)
1252 domain = domain.encode("idna").decode("utf-8")
1253 logger.debug("domain='%s' - AFTER!", domain)
1255 if not domain_helper.is_wanted(domain):
1256 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1258 elif instances.is_registered(domain):
1259 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1261 elif instances.is_recent(domain):
1262 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1265 logger.info("Fetching domain='%s' ...", domain)
1266 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1268 logger.debug("Success! - EXIT!")
1271 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1272 logger.debug("args[]='%s' - CALLED!", type(args))
1274 logger.debug("Invoking locking.acquire() ...")
1277 source_domain = "instances.joinmobilizon.org"
1278 if sources.is_recent(source_domain):
1279 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1282 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1283 sources.update(source_domain)
1285 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1286 raw = utils.fetch_url(
1287 f"https://{source_domain}/api/v1/instances",
1288 network.web_headers,
1289 (config.get("connection_timeout"), config.get("read_timeout"))
1291 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1293 parsed = json.loads(raw)
1294 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1296 if "data" not in parsed:
1297 logger.warning("parsed()=%d does not contain key 'data'")
1300 logger.info("Checking %d instances ...", len(parsed["data"]))
1301 for row in parsed["data"]:
1302 logger.debug("row[]='%s'", type(row))
1303 if "host" not in row:
1304 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1306 elif not domain_helper.is_wanted(row["host"]):
1307 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1309 elif instances.is_registered(row["host"]):
1310 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1313 logger.info("Fetching row[host]='%s' ...", row["host"])
1314 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1316 logger.debug("Success! - EXIT!")
1319 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1320 logger.debug("args[]='%s' - CALLED!", type(args))
1322 logger.debug("Invoking locking.acquire() ...")
1325 source_domain = "instanceapp.misskey.page"
1326 if sources.is_recent(source_domain):
1327 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1330 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1331 sources.update(source_domain)
1333 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1334 raw = utils.fetch_url(
1335 f"https://{source_domain}/instances.json",
1336 network.web_headers,
1337 (config.get("connection_timeout"), config.get("read_timeout"))
1339 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1341 parsed = json.loads(raw)
1342 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1344 if "instancesInfos" not in parsed:
1345 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1348 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1349 for row in parsed["instancesInfos"]:
1350 logger.debug("row[%s]='%s'", type(row), row)
1351 if "url" not in row:
1352 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1354 elif not domain_helper.is_wanted(row["url"]):
1355 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1357 elif instances.is_registered(row["url"]):
1358 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1361 logger.info("Fetching row[url]='%s' ...", row["url"])
1362 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1364 logger.debug("Success! - EXIT!")
1367 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1368 logger.debug("args[]='%s' - CALLED!", type(args))
1370 logger.debug("Invoking locking.acquire() ...")
1373 source_domain = "joinfediverse.wiki"
1374 if sources.is_recent(source_domain):
1375 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1378 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1379 sources.update(source_domain)
1381 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1382 raw = utils.fetch_url(
1383 f"https://{source_domain}/FediBlock",
1384 network.web_headers,
1385 (config.get("connection_timeout"), config.get("read_timeout"))
1387 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1389 doc = bs4.BeautifulSoup(raw, "html.parser")
1390 logger.debug("doc[]='%s'", type(doc))
1392 tables = doc.findAll("table", {"class": "wikitable"})
1394 logger.info("Analyzing %d table(s) ...", len(tables))
1396 for table in tables:
1397 logger.debug("table[]='%s'", type(table))
1399 rows = table.findAll("tr")
1400 logger.info("Checking %d row(s) ...", len(rows))
1401 block_headers = dict()
1403 logger.debug("row[%s]='%s'", type(row), row)
1405 headers = row.findAll("th")
1406 logger.debug("Found headers()=%d header(s)", len(headers))
1407 if len(headers) > 1:
1408 block_headers = dict()
1410 for header in headers:
1412 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1413 text = header.contents[0]
1415 logger.debug("text[]='%s'", type(text))
1416 if not isinstance(text, str):
1417 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1419 elif validators.domain(text.strip()):
1420 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1423 text = tidyup.domain(text.strip())
1424 logger.debug("text='%s' - AFTER!", text)
1425 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1426 logger.debug("Found header: '%s'=%d", text, cnt)
1427 block_headers[cnt] = text
1429 elif len(block_headers) == 0:
1430 logger.debug("row is not scrapable - SKIPPED!")
1432 elif len(block_headers) > 0:
1433 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1437 for element in row.find_all(["th", "td"]):
1439 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1440 if cnt in block_headers:
1441 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1443 text = element.text.strip()
1444 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1446 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1447 if key in ["domain", "instance"]:
1449 elif key == "reason":
1450 block[key] = tidyup.reason(text)
1451 elif key == "subdomain(s)":
1454 block[key] = text.split("/")
1456 logger.debug("key='%s'", key)
1459 logger.debug("block()=%d ...", len(block))
1461 logger.debug("Appending block()=%d ...", len(block))
1462 blocklist.append(block)
1464 logger.debug("blocklist()=%d", len(blocklist))
1466 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1467 domains = database.cursor.fetchall()
1469 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1471 for block in blocklist:
1472 logger.debug("block='%s'", block)
1473 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1474 origin = block["blocked"]
1475 logger.debug("origin='%s'", origin)
1476 for subdomain in block["subdomain(s)"]:
1477 block["blocked"] = subdomain + "." + origin
1478 logger.debug("block[blocked]='%s'", block["blocked"])
1479 blocking.append(block)
1481 blocking.append(block)
1483 logger.debug("blocking()=%d", blocking)
1484 for block in blocking:
1485 logger.debug("block[]='%s'", type(block))
1486 if "blocked" not in block:
1487 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1489 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1490 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1492 if block["blocked"] == "":
1493 logger.debug("block[blocked] is empty - SKIPPED!")
1495 elif not domain_helper.is_wanted(block["blocked"]):
1496 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1498 elif instances.is_recent(block["blocked"]):
1499 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1502 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1503 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1506 for blocker in domains:
1507 blocker = blocker[0]
1508 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1509 instances.set_last_blocked(blocker)
1511 for block in blocking:
1512 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1513 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1515 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1516 if block["blocked"] == "":
1517 logger.debug("block[blocked] is empty - SKIPPED!")
1519 elif not domain_helper.is_wanted(block["blocked"]):
1520 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1523 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1524 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1525 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1527 "blocked": block["blocked"],
1528 "reason" : block["reason"],
1531 if instances.has_pending(blocker):
1532 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1533 instances.update_data(blocker)
1535 logger.debug("Invoking commit() ...")
1536 database.connection.commit()
1538 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1539 if config.get("bot_enabled") and len(blockdict) > 0:
1540 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1541 network.send_bot_post(blocker, blockdict)
1543 logger.debug("Success! - EXIT!")
1546 def recheck_obfuscation(args: argparse.Namespace) -> int:
1547 logger.debug("args[]='%s' - CALLED!", type(args))
1549 logger.debug("Invoking locking.acquire() ...")
1552 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1553 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1554 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1555 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1557 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1559 rows = database.cursor.fetchall()
1560 logger.info("Checking %d domains ...", len(rows))
1562 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1563 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1564 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1568 if row["software"] == "pleroma":
1569 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1570 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1571 elif row["software"] == "mastodon":
1572 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1573 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1574 elif row["software"] == "lemmy":
1575 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1576 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1577 elif row["software"] == "friendica":
1578 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1579 blocking = friendica.fetch_blocks(row["domain"])
1580 elif row["software"] == "misskey":
1581 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1582 blocking = misskey.fetch_blocks(row["domain"])
1584 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1586 logger.debug("row[domain]='%s'", row["domain"])
1587 # chaos.social requires special care ...
1588 if row["domain"] != "chaos.social":
1589 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1590 instances.set_last_blocked(row["domain"])
1591 instances.set_total_blocks(row["domain"], blocking)
1596 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1597 for block in blocking:
1598 logger.debug("block[blocked]='%s'", block["blocked"])
1601 if block["blocked"] == "":
1602 logger.debug("block[blocked] is empty - SKIPPED!")
1604 elif block["blocked"].endswith(".arpa"):
1605 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1607 elif block["blocked"].endswith(".tld"):
1608 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1610 elif block["blocked"].endswith(".onion"):
1611 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1613 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1614 logger.debug("block='%s' is obfuscated.", block["blocked"])
1615 obfuscated = obfuscated + 1
1616 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1617 elif not domain_helper.is_wanted(block["blocked"]):
1618 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1620 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1621 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1624 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1625 if blocked is not None and blocked != block["blocked"]:
1626 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1627 obfuscated = obfuscated - 1
1629 if blocks.is_instance_blocked(row["domain"], blocked):
1630 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1632 elif blacklist.is_blacklisted(blocked):
1633 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1636 block["block_level"] = blocks.alias_block_level(block["block_level"])
1638 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1639 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1640 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1643 "reason" : block["reason"],
1646 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1647 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1649 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1650 if obfuscated == 0 and len(blocking) > 0:
1651 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1652 instances.set_has_obfuscation(row["domain"], False)
1654 if instances.has_pending(row["domain"]):
1655 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1656 instances.update_data(row["domain"])
1658 logger.debug("Invoking commit() ...")
1659 database.connection.commit()
1661 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1662 if config.get("bot_enabled") and len(blockdict) > 0:
1663 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1664 network.send_bot_post(row["domain"], blockdict)
1666 logger.debug("Success! - EXIT!")
1669 def fetch_fedilist(args: argparse.Namespace) -> int:
1670 logger.debug("args[]='%s' - CALLED!", type(args))
1672 logger.debug("Invoking locking.acquire() ...")
1675 source_domain = "demo.fedilist.com"
1676 if sources.is_recent(source_domain):
1677 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1680 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1681 sources.update(source_domain)
1683 url = f"http://{source_domain}/instance/csv?onion=not"
1684 if args.software is not None and args.software != "":
1685 logger.debug("args.software='%s'", args.software)
1686 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1688 logger.info("Fetching url='%s' ...", url)
1689 response = reqto.get(
1691 headers=network.web_headers,
1692 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1693 allow_redirects=False
1696 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1697 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1698 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1701 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1703 logger.debug("reader[]='%s'", type(reader))
1705 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1710 logger.info("Checking %d rows ...", len(rows))
1712 logger.debug("row[]='%s'", type(row))
1713 if "hostname" not in row:
1714 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1717 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1718 domain = tidyup.domain(row["hostname"])
1719 logger.debug("domain='%s' - AFTER!", domain)
1722 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1725 logger.debug("domain='%s' - BEFORE!", domain)
1726 domain = domain.encode("idna").decode("utf-8")
1727 logger.debug("domain='%s' - AFTER!", domain)
1729 if not domain_helper.is_wanted(domain):
1730 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1732 elif (args.force is None or not args.force) and instances.is_registered(domain):
1733 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1735 elif instances.is_recent(domain):
1736 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1739 logger.info("Fetching instances from domain='%s' ...", domain)
1740 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1742 logger.debug("Success! - EXIT!")
1745 def update_nodeinfo(args: argparse.Namespace) -> int:
1746 logger.debug("args[]='%s' - CALLED!", type(args))
1748 logger.debug("Invoking locking.acquire() ...")
1751 if args.domain is not None and args.domain != "":
1752 logger.debug("Fetching args.domain='%s'", args.domain)
1753 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1754 elif args.software is not None and args.software != "":
1755 logger.info("Fetching domains for args.software='%s'", args.software)
1756 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1758 logger.info("Fetching domains for recently updated ...")
1759 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1761 domains = database.cursor.fetchall()
1763 logger.info("Checking %d domain(s) ...", len(domains))
1766 logger.debug("row[]='%s'", type(row))
1767 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1768 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1772 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1773 software = federation.determine_software(row["domain"])
1775 logger.debug("Determined software='%s'", software)
1776 if (software != row["software"] and software is not None) or args.force is True:
1777 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1778 instances.set_software(row["domain"], software)
1780 if software is not None:
1781 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1782 instances.set_success(row["domain"])
1783 except network.exceptions as exception:
1784 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1785 instances.set_last_error(row["domain"], exception)
1787 instances.set_last_nodeinfo(row["domain"])
1788 instances.update_data(row["domain"])
1791 logger.debug("Success! - EXIT!")
1794 def fetch_instances_social(args: argparse.Namespace) -> int:
1795 logger.debug("args[]='%s' - CALLED!", type(args))
1797 logger.debug("Invoking locking.acquire() ...")
1800 source_domain = "instances.social"
1802 if config.get("instances_social_api_key") == "":
1803 logger.error("API key not set. Please set in your config.json file.")
1805 elif sources.is_recent(source_domain):
1806 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1809 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1810 sources.update(source_domain)
1813 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1816 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1817 fetched = network.get_json_api(
1819 "/api/1.0/instances/list?count=0&sort_by=name",
1821 (config.get("connection_timeout"), config.get("read_timeout"))
1823 logger.debug("fetched[]='%s'", type(fetched))
1825 if "error_message" in fetched:
1826 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1828 elif "exception" in fetched:
1829 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1831 elif "json" not in fetched:
1832 logger.warning("fetched has no element 'json' - EXIT!")
1834 elif "instances" not in fetched["json"]:
1835 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1839 rows = fetched["json"]["instances"]
1841 logger.info("Checking %d row(s) ...", len(rows))
1843 logger.debug("row[]='%s'", type(row))
1844 domain = tidyup.domain(row["name"])
1845 logger.debug("domain='%s' - AFTER!", domain)
1848 logger.debug("domain is empty - SKIPPED!")
1851 logger.debug("domain='%s' - BEFORE!", domain)
1852 domain = domain.encode("idna").decode("utf-8")
1853 logger.debug("domain='%s' - AFTER!", domain)
1855 if not domain_helper.is_wanted(domain):
1856 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1858 elif domain in domains:
1859 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1861 elif instances.is_registered(domain):
1862 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1864 elif instances.is_recent(domain):
1865 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1868 logger.info("Fetching instances from domain='%s'", domain)
1869 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1871 logger.debug("Success! - EXIT!")
1874 def fetch_relays(args: argparse.Namespace) -> int:
1875 logger.debug("args[]='%s' - CALLED!", type(args))
1877 logger.debug("Invoking locking.acquire() ...")
1880 if args.domain is not None and args.domain != "":
1881 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1883 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1886 rows = database.cursor.fetchall()
1888 logger.info("Checking %d relays ...", len(rows))
1890 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1891 if not args.force and instances.is_recent(row["domain"]):
1892 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1896 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1897 raw = utils.fetch_url(
1898 f"https://{row['domain']}",
1899 network.web_headers,
1900 (config.get("connection_timeout"), config.get("read_timeout"))
1902 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1903 except network.exceptions as exception:
1904 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1905 instances.set_last_error(row["domain"], exception)
1906 instances.set_last_instance_fetch(row["domain"])
1907 instances.update_data(row["domain"])
1910 doc = bs4.BeautifulSoup(raw, features="html.parser")
1911 logger.debug("doc[]='%s'", type(doc))
1913 logger.debug("row[software]='%s'", row["software"])
1914 if row["software"] == "activityrelay":
1915 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1916 tags = doc.findAll("p")
1918 logger.debug("Checking %d paragraphs ...", len(tags))
1920 logger.debug("tag[]='%s'", type(tag))
1921 if len(tag.contents) == 0:
1922 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1924 elif "registered instances" not in tag.contents[0]:
1925 logger.debug("Skipping paragraph, text not found.")
1928 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1929 for domain in tag.contents:
1930 logger.debug("domain[%s]='%s'", type(domain), domain)
1931 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1934 domain = str(domain)
1935 if not domain_helper.is_wanted(domain):
1936 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1939 logger.debug("domain='%s' - BEFORE!", domain)
1940 domain = tidyup.domain(domain)
1941 logger.debug("domain='%s' - AFTER!", domain)
1944 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1946 elif instances.is_registered(domain):
1947 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1949 elif dict_helper.has_key(domains, "domain", domain):
1950 logger.debug("domain='%s' already added", domain)
1953 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1956 "origin": row["domain"],
1958 elif row["software"] in ["aoderelay", "selective-relay"]:
1959 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1960 if row["software"] == "aoderelay":
1961 tags = doc.findAll("section", {"class": "instance"})
1963 tags = doc.find("div", {"id": "instances"}).findAll("li")
1965 logger.debug("Checking %d tags ...", len(tags))
1967 logger.debug("tag[]='%s'", type(tag))
1969 link = tag.find("a")
1970 logger.debug("link[%s]='%s'", type(link), link)
1972 logger.warning("tag='%s' has no a-tag ...", tag)
1975 components = urlparse(link["href"])
1976 domain = components.netloc.lower()
1978 if not domain_helper.is_wanted(domain):
1979 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1982 logger.debug("domain='%s' - BEFORE!", domain)
1983 domain = tidyup.domain(domain)
1984 logger.debug("domain='%s' - AFTER!", domain)
1987 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1989 elif instances.is_registered(domain):
1990 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1992 elif dict_helper.has_key(domains, "domain", domain):
1993 logger.debug("domain='%s' already added", domain)
1996 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1999 "origin": row["domain"],
2002 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
2004 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
2005 instances.set_last_instance_fetch(row["domain"])
2006 instances.update_data(row["domain"])
2008 logger.info("Found %d domains to add ...", len(domains))
2010 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2011 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2013 logger.debug("Success! - EXIT!")
2016 def convert_idna(args: argparse.Namespace) -> int:
2017 logger.debug("args[]='%s' - CALLED!", type(args))
2019 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2020 rows = database.cursor.fetchall()
2022 logger.debug("rows[]='%s'", type(rows))
2023 instances.translate_idnas(rows, "domain")
2025 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2026 rows = database.cursor.fetchall()
2028 logger.debug("rows[]='%s'", type(rows))
2029 instances.translate_idnas(rows, "origin")
2031 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2032 rows = database.cursor.fetchall()
2034 logger.debug("rows[]='%s'", type(rows))
2035 blocks.translate_idnas(rows, "blocker")
2037 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2038 rows = database.cursor.fetchall()
2040 logger.debug("rows[]='%s'", type(rows))
2041 blocks.translate_idnas(rows, "blocked")
2043 logger.debug("Success! - EXIT!")