1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import blocklists
38 from fba.helpers import config
39 from fba.helpers import cookies
40 from fba.helpers import dicts as dict_helper
41 from fba.helpers import domain as domain_helper
42 from fba.helpers import locking
43 from fba.helpers import processing
44 from fba.helpers import software as software_helper
45 from fba.helpers import tidyup
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
67 if not validators.domain(args.domain):
68 logger.warning("args.domain='%s' is not valid", args.domain)
70 elif blacklist.is_blacklisted(args.domain):
71 logger.warning("args.domain='%s' is blacklisted", args.domain)
73 elif instances.is_registered(args.domain):
74 logger.warning("args.domain='%s' is already registered", args.domain)
77 logger.info("args.domain='%s' is not known", args.domain)
79 logger.debug("status=%d - EXIT!", status)
82 def check_nodeinfo(args: argparse.Namespace) -> int:
83 logger.debug("args[]='%s' - CALLED!", type(args))
86 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
89 for row in database.cursor.fetchall():
90 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
91 punycode = row["domain"].encode("idna").decode("utf-8")
93 if row["nodeinfo_url"].startswith("/"):
94 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
97 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
100 logger.info("Found %d row(s)", cnt)
102 logger.debug("EXIT!")
105 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
106 logger.debug("args[]='%s' - CALLED!", type(args))
108 # No CSRF by default, you don't have to add network.source_headers by yourself here
110 source_domain = "pixelfed.org"
112 if sources.is_recent(source_domain):
113 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
116 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
117 sources.update(source_domain)
120 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
121 headers = csrf.determine(source_domain, dict())
122 except network.exceptions as exception:
123 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
127 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
128 fetched = network.get_json_api(
130 "/api/v1/servers/all.json?scope=All&country=all&language=all",
132 (config.get("connection_timeout"), config.get("read_timeout"))
135 logger.debug("JSON API returned %d elements", len(fetched))
136 if "error_message" in fetched:
137 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139 elif "data" not in fetched["json"]:
140 logger.warning("API did not return JSON with 'data' element - EXIT!")
143 rows = fetched["json"]["data"]
144 logger.info("Checking %d fetched rows ...", len(rows))
146 logger.debug("row[]='%s'", type(row))
147 if "domain" not in row:
148 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150 elif row["domain"] == "":
151 logger.debug("row[domain] is empty - SKIPPED!")
154 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
155 domain = row["domain"].encode("idna").decode("utf-8")
156 logger.debug("domain='%s' - AFTER!", domain)
158 if not domain_helper.is_wanted(domain):
159 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161 elif instances.is_registered(domain):
162 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164 elif instances.is_recent(domain):
165 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
168 logger.debug("Fetching instances from domain='%s' ...", domain)
169 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171 except network.exceptions as exception:
172 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
175 logger.debug("Success! - EXIT!")
178 def fetch_bkali(args: argparse.Namespace) -> int:
179 logger.debug("args[]='%s' - CALLED!", type(args))
181 logger.debug("Invoking locking.acquire() ...")
184 source_domain = "gql.api.bka.li"
185 if sources.is_recent(source_domain):
186 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
189 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
190 sources.update(source_domain)
194 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
195 fetched = network.post_json_api(
199 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
203 logger.debug("fetched[]='%s'", type(fetched))
204 if "error_message" in fetched:
205 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
208 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
211 rows = fetched["json"]
213 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215 raise Exception("WARNING: Returned no records")
216 elif "data" not in rows:
217 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
218 elif "nodeinfo" not in rows["data"]:
219 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221 for entry in rows["data"]["nodeinfo"]:
222 logger.debug("entry[%s]='%s'", type(entry), entry)
223 if "domain" not in entry:
224 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226 elif entry["domain"] == "":
227 logger.debug("entry[domain] is empty - SKIPPED!")
229 elif not domain_helper.is_wanted(entry["domain"]):
230 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232 elif instances.is_registered(entry["domain"]):
233 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235 elif instances.is_recent(entry["domain"]):
236 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
239 logger.debug("Adding domain='%s' ...", entry["domain"])
240 domains.append(entry["domain"])
242 except network.exceptions as exception:
243 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
246 logger.debug("domains()=%d", len(domains))
248 logger.info("Adding %d new instances ...", len(domains))
249 for domain in domains:
250 logger.debug("domain='%s' - BEFORE!", domain)
251 domain = domain.encode("idna").decode("utf-8")
252 logger.debug("domain='%s' - AFTER!", domain)
255 logger.info("Fetching instances from domain='%s' ...", domain)
256 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
257 except network.exceptions as exception:
258 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
259 instances.set_last_error(domain, exception)
262 logger.debug("Success - EXIT!")
265 def fetch_blocks(args: argparse.Namespace) -> int:
266 logger.debug("args[]='%s' - CALLED!", type(args))
267 if args.domain is not None and args.domain != "":
268 logger.debug("args.domain='%s' - checking ...", args.domain)
269 if not validators.domain(args.domain):
270 logger.warning("args.domain='%s' is not valid.", args.domain)
272 elif blacklist.is_blacklisted(args.domain):
273 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275 elif not instances.is_registered(args.domain):
276 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
279 logger.debug("Invoking locking.acquire() ...")
282 if args.domain is not None and args.domain != "":
283 # Re-check single domain
284 logger.debug("Querying database for args.domain='%s' ...", args.domain)
285 database.cursor.execute(
286 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
288 elif args.software is not None and args.software != "":
289 # Re-check single software
290 logger.debug("Querying database for args.software='%s' ...", args.software)
291 database.cursor.execute(
292 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
296 logger.debug("Re-checking all instances ...")
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if nodeinfo_url is None:
312 logger.debug("blocker='%s',software='%s' has no nodeinfo_url set - SKIPPED!", blocker, software)
314 elif not domain_helper.is_wanted(blocker):
315 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
318 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
319 instances.set_last_blocked(blocker)
320 instances.set_has_obfuscation(blocker, False)
324 if blocker != "chaos.social" and not blocklists.is_excluded(blocker):
325 logger.debug("blocker='%s',software='%s'", blocker, software)
326 if software == "pleroma":
327 logger.info("blocker='%s',software='%s'", blocker, software)
328 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
329 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
330 elif software == "mastodon":
331 logger.info("blocker='%s',software='%s'", blocker, software)
332 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "lemmy":
335 logger.info("blocker='%s',software='%s'", blocker, software)
336 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
337 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
338 elif software == "friendica":
339 logger.info("blocker='%s',software='%s'", blocker, software)
340 blocking = friendica.fetch_blocks(blocker)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "misskey":
343 logger.info("blocker='%s',software='%s'", blocker, software)
344 blocking = misskey.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
347 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
349 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350 instances.set_total_blocks(blocker, blocking)
352 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
354 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356 for block in blocking:
357 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
359 if block["block_level"] == "":
360 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364 block["blocked"] = tidyup.domain(block["blocked"])
365 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
368 if block["blocked"] == "":
369 logger.warning("blocked is empty, blocker='%s'", blocker)
371 elif block["blocked"].endswith(".onion"):
372 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
374 elif block["blocked"].endswith(".arpa"):
375 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".tld"):
378 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
380 elif block["blocked"].find("*") >= 0:
381 logger.debug("blocker='%s' uses obfuscated domains", blocker)
383 # Some friendica servers also obscure domains without hash
384 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
386 logger.debug("row[]='%s'", type(row))
388 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
389 instances.set_has_obfuscation(blocker, True)
392 block["blocked"] = row["domain"]
393 origin = row["origin"]
394 nodeinfo_url = row["nodeinfo_url"]
395 elif block["blocked"].find("?") >= 0:
396 logger.debug("blocker='%s' uses obfuscated domains", blocker)
398 # Some obscure them with question marks, not sure if that's dependent on version or not
399 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
401 logger.debug("row[]='%s'", type(row))
403 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
404 instances.set_has_obfuscation(blocker, True)
407 block["blocked"] = row["domain"]
408 origin = row["origin"]
409 nodeinfo_url = row["nodeinfo_url"]
411 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
412 if block["blocked"] == "":
413 logger.debug("block[blocked] is empty - SKIPPED!")
416 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
417 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
418 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
420 if not domain_helper.is_wanted(block["blocked"]):
421 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
423 elif block["block_level"] in ["accept", "accepted"]:
424 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
426 elif not instances.is_registered(block["blocked"]):
427 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
428 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
430 block["block_level"] = blocks.alias_block_level(block["block_level"])
432 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
433 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
435 "blocked": block["blocked"],
436 "reason" : block["reason"],
439 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
440 cookies.clear(block["blocked"])
442 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
443 if instances.has_pending(blocker):
444 logger.debug("Flushing updates for blocker='%s' ...", blocker)
445 instances.update_data(blocker)
447 logger.debug("Invoking commit() ...")
448 database.connection.commit()
450 logger.debug("Invoking cookies.clear(%s) ...", blocker)
451 cookies.clear(blocker)
453 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
454 if config.get("bot_enabled") and len(blockdict) > 0:
455 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
456 network.send_bot_post(blocker, blockdict)
458 logger.debug("Success! - EXIT!")
461 def fetch_observer(args: argparse.Namespace) -> int:
462 logger.debug("args[]='%s' - CALLED!", type(args))
464 logger.debug("Invoking locking.acquire() ...")
467 source_domain = "fediverse.observer"
468 if sources.is_recent(source_domain):
469 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
472 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
473 sources.update(source_domain)
476 if args.software is None:
477 logger.info("Fetching software list ...")
478 raw = utils.fetch_url(
479 f"https://{source_domain}",
481 (config.get("connection_timeout"), config.get("read_timeout"))
483 logger.debug("raw[%s]()=%d", type(raw), len(raw))
485 doc = bs4.BeautifulSoup(raw, features="html.parser")
486 logger.debug("doc[]='%s'", type(doc))
488 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
489 logger.debug("navbar[]='%s'", type(navbar))
491 logger.warning("Cannot find navigation bar, cannot continue!")
494 items = navbar.findAll("a", {"class": "dropdown-item"})
495 logger.debug("items[]='%s'", type(items))
497 logger.info("Checking %d menu items ...", len(items))
499 logger.debug("item[%s]='%s'", type(item), item)
500 if item.text.lower() == "all":
501 logger.debug("Skipping 'All' menu entry ...")
504 logger.debug("Appending item.text='%s' ...", item.text)
505 types.append(tidyup.domain(item.text))
507 logger.info("Adding args.software='%s' as type ...", args.software)
508 types.append(args.software)
510 logger.info("Fetching %d different table data ...", len(types))
511 for software in types:
512 logger.debug("software='%s' - BEFORE!", software)
513 if args.software is not None and args.software != software:
514 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
519 logger.debug("Fetching table data for software='%s' ...", software)
520 raw = utils.fetch_url(
521 f"https://{source_domain}/app/views/tabledata.php?software={software}",
523 (config.get("connection_timeout"), config.get("read_timeout"))
525 logger.debug("raw[%s]()=%d", type(raw), len(raw))
527 doc = bs4.BeautifulSoup(raw, features="html.parser")
528 logger.debug("doc[]='%s'", type(doc))
529 except network.exceptions as exception:
530 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
533 items = doc.findAll("a", {"class": "url"})
534 logger.info("Checking %d items,software='%s' ...", len(items), software)
536 logger.debug("item[]='%s'", type(item))
537 domain = item.decode_contents()
538 logger.debug("domain='%s' - AFTER!", domain)
541 logger.debug("domain is empty - SKIPPED!")
544 logger.debug("domain='%s' - BEFORE!", domain)
545 domain = domain.encode("idna").decode("utf-8")
546 logger.debug("domain='%s' - AFTER!", domain)
548 if not domain_helper.is_wanted(domain):
549 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
551 elif instances.is_registered(domain):
552 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
555 software = software_helper.alias(software)
556 logger.info("Fetching instances for domain='%s'", domain)
557 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
559 logger.debug("Success! - EXIT!")
562 def fetch_todon_wiki(args: argparse.Namespace) -> int:
563 logger.debug("args[]='%s' - CALLED!", type(args))
565 logger.debug("Invoking locking.acquire() ...")
568 source_domain = "wiki.todon.eu"
569 if sources.is_recent(source_domain):
570 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
573 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
574 sources.update(source_domain)
581 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
582 raw = utils.fetch_url(
583 f"https://{source_domain}/todon/domainblocks",
585 (config.get("connection_timeout"), config.get("read_timeout"))
587 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
589 doc = bs4.BeautifulSoup(raw, "html.parser")
590 logger.debug("doc[]='%s'", type(doc))
592 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
593 logger.info("Checking %d silenced/limited entries ...", len(silenced))
594 blocklist["silenced"] = utils.find_domains(silenced, "div")
596 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
597 logger.info("Checking %d suspended entries ...", len(suspended))
598 blocklist["reject"] = utils.find_domains(suspended, "div")
600 blocking = blocklist["silenced"] + blocklist["reject"]
603 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
604 instances.set_last_blocked(blocker)
605 instances.set_total_blocks(blocker, blocking)
608 for block_level in blocklist:
609 blockers = blocklist[block_level]
611 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
612 for blocked in blockers:
613 logger.debug("blocked='%s'", blocked)
615 if not instances.is_registered(blocked):
617 logger.info("Fetching instances from domain='%s' ...", blocked)
618 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
619 except network.exceptions as exception:
620 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
621 instances.set_last_error(blocked, exception)
623 if blocks.is_instance_blocked(blocker, blocked, block_level):
624 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
627 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
628 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
629 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
635 logger.debug("Invoking commit() ...")
636 database.connection.commit()
638 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
639 if config.get("bot_enabled") and len(blockdict) > 0:
640 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
641 network.send_bot_post(blocker, blockdict)
643 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
644 if instances.has_pending(blocker):
645 logger.debug("Flushing updates for blocker='%s' ...", blocker)
646 instances.update_data(blocker)
648 logger.debug("Success! - EXIT!")
651 def fetch_cs(args: argparse.Namespace):
652 logger.debug("args[]='%s' - CALLED!", type(args))
654 logger.debug("Invoking locking.acquire() ...")
682 source_domain = "raw.githubusercontent.com"
683 if sources.is_recent(source_domain):
684 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
687 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
688 sources.update(source_domain)
690 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
691 raw = utils.fetch_url(
692 f"https://{source_domain}/chaossocial/meta/master/federation.md",
694 (config.get("connection_timeout"), config.get("read_timeout"))
696 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
698 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
699 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
701 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
702 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
703 blocklist["silenced"] = federation.find_domains(silenced)
705 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
706 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
707 blocklist["reject"] = federation.find_domains(blocked)
709 blocking = blocklist["silenced"] + blocklist["reject"]
710 blocker = "chaos.social"
712 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
713 instances.set_last_blocked(blocker)
714 instances.set_total_blocks(blocker, blocking)
716 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
717 if len(blocking) > 0:
719 for block_level in blocklist:
720 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
722 for row in blocklist[block_level]:
723 logger.debug("row[%s]='%s'", type(row), row)
724 if not "domain" in row:
725 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
727 elif not instances.is_registered(row["domain"]):
729 logger.info("Fetching instances from domain='%s' ...", row["domain"])
730 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
731 except network.exceptions as exception:
732 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
733 instances.set_last_error(row["domain"], exception)
735 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
736 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
738 "blocked": row["domain"],
739 "reason" : row["reason"],
742 logger.debug("Invoking commit() ...")
743 database.connection.commit()
745 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
746 if config.get("bot_enabled") and len(blockdict) > 0:
747 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
748 network.send_bot_post(blocker, blockdict)
750 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
751 if instances.has_pending(blocker):
752 logger.debug("Flushing updates for blocker='%s' ...", blocker)
753 instances.update_data(blocker)
755 logger.debug("Success! - EXIT!")
758 def fetch_fba_rss(args: argparse.Namespace) -> int:
759 logger.debug("args[]='%s' - CALLED!", type(args))
763 logger.debug("Invoking locking.acquire() ...")
766 components = urlparse(args.feed)
768 if sources.is_recent(components.netloc):
769 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
772 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
773 sources.update(components.netloc)
775 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
776 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
778 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
779 if response.ok and response.status_code < 300 and len(response.text) > 0:
780 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
781 rss = atoma.parse_rss_bytes(response.content)
783 logger.debug("rss[]='%s'", type(rss))
784 for item in rss.items:
785 logger.debug("item[%s]='%s'", type(item), item)
786 domain = tidyup.domain(item.link.split("=")[1])
788 logger.debug("domain='%s' - AFTER!", domain)
790 logger.debug("domain is empty - SKIPPED!")
793 logger.debug("domain='%s' - BEFORE!", domain)
794 domain = domain.encode("idna").decode("utf-8")
795 logger.debug("domain='%s' - AFTER!", domain)
797 if not domain_helper.is_wanted(domain):
798 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
800 elif domain in domains:
801 logger.debug("domain='%s' is already added - SKIPPED!", domain)
803 elif instances.is_registered(domain):
804 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
806 elif instances.is_recent(domain):
807 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
810 logger.debug("Adding domain='%s'", domain)
811 domains.append(domain)
813 logger.debug("domains()=%d", len(domains))
815 logger.info("Adding %d new instances ...", len(domains))
816 for domain in domains:
817 logger.debug("domain='%s'", domain)
819 logger.info("Fetching instances from domain='%s' ...", domain)
820 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
821 except network.exceptions as exception:
822 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
823 instances.set_last_error(domain, exception)
826 logger.debug("Success! - EXIT!")
829 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
830 logger.debug("args[]='%s' - CALLED!", type(args))
832 logger.debug("Invoking locking.acquire() ...")
835 source_domain = "ryona.agency"
836 feed = f"https://{source_domain}/users/fba/feed.atom"
838 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
839 if args.feed is not None and validators.url(args.feed):
840 logger.debug("Setting feed='%s' ...", args.feed)
841 feed = str(args.feed)
842 source_domain = urlparse(args.feed).netloc
844 if sources.is_recent(source_domain):
845 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
848 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
849 sources.update(source_domain)
853 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
854 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
856 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
857 if response.ok and response.status_code < 300 and len(response.text) > 0:
858 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
859 atom = atoma.parse_atom_bytes(response.content)
861 logger.debug("atom[]='%s'", type(atom))
862 for entry in atom.entries:
863 logger.debug("entry[]='%s'", type(entry))
864 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
865 logger.debug("doc[]='%s'", type(doc))
866 for element in doc.findAll("a"):
867 logger.debug("element[]='%s'", type(element))
868 for href in element["href"].split(","):
869 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
870 domain = tidyup.domain(href)
872 logger.debug("domain='%s' - AFTER!", domain)
874 logger.debug("domain is empty - SKIPPED!")
877 logger.debug("domain='%s' - BEFORE!", domain)
878 domain = domain.encode("idna").decode("utf-8")
879 logger.debug("domain='%s' - AFTER!", domain)
881 if not domain_helper.is_wanted(domain):
882 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
884 elif domain in domains:
885 logger.debug("domain='%s' is already added - SKIPPED!", domain)
887 elif instances.is_registered(domain):
888 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
890 elif instances.is_recent(domain):
891 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
894 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
895 domains.append(domain)
897 logger.debug("domains()=%d", len(domains))
899 logger.info("Adding %d new instances ...", len(domains))
900 for domain in domains:
901 logger.debug("domain='%s'", domain)
903 logger.info("Fetching instances from domain='%s' ...", domain)
904 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
905 except network.exceptions as exception:
906 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
907 instances.set_last_error(domain, exception)
910 logger.debug("Success! - EXIT!")
913 def fetch_instances(args: argparse.Namespace) -> int:
914 logger.debug("args[]='%s' - CALLED!", type(args))
916 logger.debug("args.domain='%s' - checking ...", args.domain)
917 if not validators.domain(args.domain):
918 logger.warning("args.domain='%s' is not valid.", args.domain)
920 elif blacklist.is_blacklisted(args.domain):
921 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
924 logger.debug("Invoking locking.acquire() ...")
928 domain = tidyup.domain(args.domain)
929 origin = software = None
932 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
933 row = database.cursor.fetchone()
935 origin = row["origin"]
936 software = row["software"]
940 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
941 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
942 except network.exceptions as exception:
943 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
944 instances.set_last_error(args.domain, exception)
945 instances.update_data(args.domain)
949 logger.debug("Not fetching more instances - EXIT!")
952 # Loop through some instances
953 database.cursor.execute(
954 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
957 rows = database.cursor.fetchall()
958 logger.info("Checking %d entries ...", len(rows))
960 logger.debug("row[domain]='%s'", row["domain"])
961 if row["domain"] == "":
962 logger.debug("row[domain] is empty - SKIPPED!")
965 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
966 domain = row["domain"].encode("idna").decode("utf-8")
967 logger.debug("domain='%s' - AFTER!", domain)
969 if not domain_helper.is_wanted(domain):
970 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
974 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
975 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
976 except network.exceptions as exception:
977 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
978 instances.set_last_error(domain, exception)
980 logger.debug("Success - EXIT!")
983 def fetch_oliphant(args: argparse.Namespace) -> int:
984 logger.debug("args[]='%s' - CALLED!", type(args))
986 logger.debug("Invoking locking.acquire() ...")
989 source_domain = "codeberg.org"
990 if sources.is_recent(source_domain):
991 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
994 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
995 sources.update(source_domain)
998 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1002 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1003 for block in blocklists.oliphant_blocklists:
1004 # Is domain given and not equal blocker?
1005 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1006 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1008 elif args.domain in domains:
1009 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1012 instances.set_last_blocked(block["blocker"])
1015 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1016 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1018 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1019 if not response.ok or response.status_code >= 300 or response.content == "":
1020 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1023 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1024 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1030 logger.debug("row[%s]='%s'", type(row), row)
1031 domain = severity = None
1032 reject_media = reject_reports = False
1034 if "#domain" in row:
1035 domain = row["#domain"]
1036 elif "domain" in row:
1037 domain = row["domain"]
1039 logger.debug("row='%s' does not contain domain column", row)
1042 if "#severity" in row:
1043 severity = blocks.alias_block_level(row["#severity"])
1044 elif "severity" in row:
1045 severity = blocks.alias_block_level(row["severity"])
1047 logger.debug("row='%s' does not contain severity column", row)
1050 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1052 elif "reject_media" in row and row["reject_media"].lower() == "true":
1055 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1056 reject_reports = True
1057 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1058 reject_reports = True
1061 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1063 logger.debug("domain is empty - SKIPPED!")
1065 elif domain.endswith(".onion"):
1066 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1068 elif domain.endswith(".arpa"):
1069 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1071 elif domain.endswith(".tld"):
1072 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1074 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1075 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1076 domain = utils.deobfuscate(domain, block["blocker"])
1077 logger.debug("domain='%s' - AFTER!", domain)
1079 if not validators.domain(domain):
1080 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1082 elif blacklist.is_blacklisted(domain):
1083 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1085 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1086 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1089 logger.debug("Marking domain='%s' as handled", domain)
1090 domains.append(domain)
1092 logger.debug("Processing domain='%s' ...", domain)
1093 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1094 logger.debug("processed='%s'", processed)
1096 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1097 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1100 "reason" : block["reason"],
1104 processing.block(block["blocker"], domain, None, "reject_media")
1106 processing.block(block["blocker"], domain, None, "reject_reports")
1108 logger.debug("block[blocker]='%s'", block["blocker"])
1109 if not blocklists.is_excluded(block["blocker"]):
1110 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1111 instances.set_total_blocks(block["blocker"], domains)
1113 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1114 if instances.has_pending(block["blocker"]):
1115 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1116 instances.update_data(block["blocker"])
1118 logger.debug("Invoking commit() ...")
1119 database.connection.commit()
1121 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1122 if config.get("bot_enabled") and len(blockdict) > 0:
1123 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1124 network.send_bot_post(block["blocker"], blockdict)
1126 logger.debug("Success! - EXIT!")
1129 def fetch_txt(args: argparse.Namespace) -> int:
1130 logger.debug("args[]='%s' - CALLED!", type(args))
1132 logger.debug("Invoking locking.acquire() ...")
1137 "blocker": "seirdy.one",
1138 "url" : "https://seirdy.one/pb/bsl.txt",
1141 logger.info("Checking %d text file(s) ...", len(urls))
1143 logger.debug("Fetching row[url]='%s' ...", row["url"])
1144 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1146 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1147 if response.ok and response.status_code < 300 and response.text != "":
1148 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1149 domains = response.text.split("\n")
1151 logger.info("Processing %d domains ...", len(domains))
1152 for domain in domains:
1153 logger.debug("domain='%s' - BEFORE!", domain)
1154 domain = tidyup.domain(domain)
1156 logger.debug("domain='%s' - AFTER!", domain)
1158 logger.debug("domain is empty - SKIPPED!")
1160 elif not domain_helper.is_wanted(domain):
1161 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1163 elif instances.is_recent(domain):
1164 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1167 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1168 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1170 logger.debug("processed='%s'", processed)
1172 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1175 logger.debug("Success! - EXIT!")
1178 def fetch_fedipact(args: argparse.Namespace) -> int:
1179 logger.debug("args[]='%s' - CALLED!", type(args))
1181 logger.debug("Invoking locking.acquire() ...")
1184 source_domain = "fedipact.online"
1185 if sources.is_recent(source_domain):
1186 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1189 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1190 sources.update(source_domain)
1192 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1193 response = utils.fetch_url(
1194 f"https://{source_domain}",
1195 network.web_headers,
1196 (config.get("connection_timeout"), config.get("read_timeout"))
1199 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1200 if response.ok and response.status_code < 300 and response.text != "":
1201 logger.debug("Parsing %d Bytes ...", len(response.text))
1203 doc = bs4.BeautifulSoup(response.text, "html.parser")
1204 logger.debug("doc[]='%s'", type(doc))
1206 rows = doc.findAll("li")
1207 logger.info("Checking %d row(s) ...", len(rows))
1209 logger.debug("row[]='%s'", type(row))
1210 domain = tidyup.domain(row.contents[0])
1212 logger.debug("domain='%s' - AFTER!", domain)
1214 logger.debug("domain is empty - SKIPPED!")
1217 logger.debug("domain='%s' - BEFORE!", domain)
1218 domain = domain.encode("idna").decode("utf-8")
1219 logger.debug("domain='%s' - AFTER!", domain)
1221 if not domain_helper.is_wanted(domain):
1222 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1224 elif instances.is_registered(domain):
1225 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1227 elif instances.is_recent(domain):
1228 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1231 logger.info("Fetching domain='%s' ...", domain)
1232 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1234 logger.debug("Success! - EXIT!")
1237 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1238 logger.debug("args[]='%s' - CALLED!", type(args))
1240 logger.debug("Invoking locking.acquire() ...")
1243 source_domain = "instances.joinmobilizon.org"
1244 if sources.is_recent(source_domain):
1245 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1248 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1249 sources.update(source_domain)
1251 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1252 raw = utils.fetch_url(
1253 f"https://{source_domain}/api/v1/instances",
1254 network.web_headers,
1255 (config.get("connection_timeout"), config.get("read_timeout"))
1257 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1259 parsed = json.loads(raw)
1260 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1262 if "data" not in parsed:
1263 logger.warning("parsed()=%d does not contain key 'data'")
1266 logger.info("Checking %d instances ...", len(parsed["data"]))
1267 for row in parsed["data"]:
1268 logger.debug("row[]='%s'", type(row))
1269 if "host" not in row:
1270 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1272 elif not domain_helper.is_wanted(row["host"]):
1273 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1275 elif instances.is_registered(row["host"]):
1276 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1279 logger.info("Fetching row[host]='%s' ...", row["host"])
1280 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1282 logger.debug("Success! - EXIT!")
1285 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1286 logger.debug("args[]='%s' - CALLED!", type(args))
1288 logger.debug("Invoking locking.acquire() ...")
1291 source_domain = "instanceapp.misskey.page"
1292 if sources.is_recent(source_domain):
1293 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1296 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1297 sources.update(source_domain)
1299 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1300 raw = utils.fetch_url(
1301 f"https://{source_domain}/instances.json",
1302 network.web_headers,
1303 (config.get("connection_timeout"), config.get("read_timeout"))
1305 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1307 parsed = json.loads(raw)
1308 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1310 if "instancesInfos" not in parsed:
1311 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1314 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1315 for row in parsed["instancesInfos"]:
1316 logger.debug("row[%s]='%s'", type(row), row)
1317 if "url" not in row:
1318 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1320 elif not domain_helper.is_wanted(row["url"]):
1321 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1323 elif instances.is_registered(row["url"]):
1324 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1327 logger.info("Fetching row[url]='%s' ...", row["url"])
1328 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1330 logger.debug("Success! - EXIT!")
1333 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1334 logger.debug("args[]='%s' - CALLED!", type(args))
1336 logger.debug("Invoking locking.acquire() ...")
1339 source_domain = "joinfediverse.wiki"
1340 if sources.is_recent(source_domain):
1341 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1344 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1345 sources.update(source_domain)
1347 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1348 raw = utils.fetch_url(
1349 f"https://{source_domain}/FediBlock",
1350 network.web_headers,
1351 (config.get("connection_timeout"), config.get("read_timeout"))
1353 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1355 doc = bs4.BeautifulSoup(raw, "html.parser")
1356 logger.debug("doc[]='%s'", type(doc))
1358 tables = doc.findAll("table", {"class": "wikitable"})
1360 logger.info("Analyzing %d table(s) ...", len(tables))
1362 for table in tables:
1363 logger.debug("table[]='%s'", type(table))
1365 rows = table.findAll("tr")
1366 logger.info("Checking %d row(s) ...", len(rows))
1367 block_headers = dict()
1369 logger.debug("row[%s]='%s'", type(row), row)
1371 headers = row.findAll("th")
1372 logger.debug("Found headers()=%d header(s)", len(headers))
1373 if len(headers) > 1:
1374 block_headers = dict()
1376 for header in headers:
1378 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1379 text = header.contents[0]
1381 logger.debug("text[]='%s'", type(text))
1382 if not isinstance(text, str):
1383 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1385 elif validators.domain(text.strip()):
1386 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1389 text = tidyup.domain(text.strip())
1390 logger.debug("text='%s' - AFTER!", text)
1391 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1392 logger.debug("Found header: '%s'=%d", text, cnt)
1393 block_headers[cnt] = text
1395 elif len(block_headers) == 0:
1396 logger.debug("row is not scrapable - SKIPPED!")
1398 elif len(block_headers) > 0:
1399 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1403 for element in row.find_all(["th", "td"]):
1405 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1406 if cnt in block_headers:
1407 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1409 text = element.text.strip()
1410 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1412 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1413 if key in ["domain", "instance"]:
1415 elif key == "reason":
1416 block[key] = tidyup.reason(text)
1417 elif key == "subdomain(s)":
1420 block[key] = text.split("/")
1422 logger.debug("key='%s'", key)
1425 logger.debug("block()=%d ...", len(block))
1427 logger.debug("Appending block()=%d ...", len(block))
1428 blocklist.append(block)
1430 logger.debug("blocklist()=%d", len(blocklist))
1432 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1433 domains = database.cursor.fetchall()
1435 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1437 for block in blocklist:
1438 logger.debug("block='%s'", block)
1439 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1440 origin = block["blocked"]
1441 logger.debug("origin='%s'", origin)
1442 for subdomain in block["subdomain(s)"]:
1443 block["blocked"] = subdomain + "." + origin
1444 logger.debug("block[blocked]='%s'", block["blocked"])
1445 blocking.append(block)
1447 blocking.append(block)
1449 logger.debug("blocking()=%d", blocking)
1450 for block in blocking:
1451 logger.debug("block[]='%s'", type(block))
1452 if "blocked" not in block:
1453 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1455 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1456 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1458 if block["blocked"] == "":
1459 logger.debug("block[blocked] is empty - SKIPPED!")
1461 elif not domain_helper.is_wanted(block["blocked"]):
1462 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1464 elif instances.is_recent(block["blocked"]):
1465 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1468 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1469 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1472 for blocker in domains:
1473 blocker = blocker[0]
1474 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1475 instances.set_last_blocked(blocker)
1477 for block in blocking:
1478 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1479 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1481 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1482 if block["blocked"] == "":
1483 logger.debug("block[blocked] is empty - SKIPPED!")
1485 elif not domain_helper.is_wanted(block["blocked"]):
1486 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1489 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1490 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1491 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1493 "blocked": block["blocked"],
1494 "reason" : block["reason"],
1497 if instances.has_pending(blocker):
1498 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1499 instances.update_data(blocker)
1501 logger.debug("Invoking commit() ...")
1502 database.connection.commit()
1504 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1505 if config.get("bot_enabled") and len(blockdict) > 0:
1506 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1507 network.send_bot_post(blocker, blockdict)
1509 logger.debug("Success! - EXIT!")
1512 def recheck_obfuscation(args: argparse.Namespace) -> int:
1513 logger.debug("args[]='%s' - CALLED!", type(args))
1515 logger.debug("Invoking locking.acquire() ...")
1518 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1519 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1520 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1521 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1523 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1525 rows = database.cursor.fetchall()
1526 logger.info("Checking %d domains ...", len(rows))
1528 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1529 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1530 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1534 if row["software"] == "pleroma":
1535 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1536 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1537 elif row["software"] == "mastodon":
1538 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1539 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1540 elif row["software"] == "lemmy":
1541 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1542 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1543 elif row["software"] == "friendica":
1544 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1545 blocking = friendica.fetch_blocks(row["domain"])
1546 elif row["software"] == "misskey":
1547 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1548 blocking = misskey.fetch_blocks(row["domain"])
1550 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1552 logger.debug("row[domain]='%s'", row["domain"])
1554 # chaos.social requires special care ...
1555 if row["domain"] != "chaos.social" and not blocklists.is_excluded(row["domain"]):
1556 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1557 instances.set_last_blocked(row["domain"])
1558 instances.set_total_blocks(row["domain"], blocking)
1563 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1564 for block in blocking:
1565 logger.debug("block[blocked]='%s'", block["blocked"])
1568 if block["blocked"] == "":
1569 logger.debug("block[blocked] is empty - SKIPPED!")
1571 elif block["blocked"].endswith(".arpa"):
1572 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1574 elif block["blocked"].endswith(".tld"):
1575 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1577 elif block["blocked"].endswith(".onion"):
1578 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1580 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1581 logger.debug("block='%s' is obfuscated.", block["blocked"])
1582 obfuscated = obfuscated + 1
1583 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1584 elif not domain_helper.is_wanted(block["blocked"]):
1585 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1587 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1588 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1591 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1592 if blocked is not None and blocked != block["blocked"]:
1593 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1594 obfuscated = obfuscated - 1
1596 if blocks.is_instance_blocked(row["domain"], blocked):
1597 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1599 elif blacklist.is_blacklisted(blocked):
1600 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1603 block["block_level"] = blocks.alias_block_level(block["block_level"])
1605 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1606 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1607 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1610 "reason" : block["reason"],
1613 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1614 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1616 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1617 if obfuscated == 0 and len(blocking) > 0:
1618 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1619 instances.set_has_obfuscation(row["domain"], False)
1621 if instances.has_pending(row["domain"]):
1622 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1623 instances.update_data(row["domain"])
1625 logger.debug("Invoking commit() ...")
1626 database.connection.commit()
1628 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1629 if config.get("bot_enabled") and len(blockdict) > 0:
1630 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1631 network.send_bot_post(row["domain"], blockdict)
1633 logger.debug("Success! - EXIT!")
1636 def fetch_fedilist(args: argparse.Namespace) -> int:
1637 logger.debug("args[]='%s' - CALLED!", type(args))
1639 logger.debug("Invoking locking.acquire() ...")
1642 source_domain = "demo.fedilist.com"
1643 if sources.is_recent(source_domain):
1644 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1647 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1648 sources.update(source_domain)
1650 url = f"http://{source_domain}/instance/csv?onion=not"
1651 if args.software is not None and args.software != "":
1652 logger.debug("args.software='%s'", args.software)
1653 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1655 logger.info("Fetching url='%s' ...", url)
1656 response = reqto.get(
1658 headers=network.web_headers,
1659 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1660 allow_redirects=False
1663 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1664 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1665 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1668 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1670 logger.debug("reader[]='%s'", type(reader))
1672 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1677 logger.info("Checking %d rows ...", len(rows))
1679 logger.debug("row[]='%s'", type(row))
1680 if "hostname" not in row:
1681 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1684 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1685 domain = tidyup.domain(row["hostname"])
1686 logger.debug("domain='%s' - AFTER!", domain)
1689 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1692 logger.debug("domain='%s' - BEFORE!", domain)
1693 domain = domain.encode("idna").decode("utf-8")
1694 logger.debug("domain='%s' - AFTER!", domain)
1696 if not domain_helper.is_wanted(domain):
1697 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1699 elif (args.force is None or not args.force) and instances.is_registered(domain):
1700 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1702 elif instances.is_recent(domain):
1703 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1706 logger.info("Fetching instances from domain='%s' ...", domain)
1707 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1709 logger.debug("Success! - EXIT!")
1712 def update_nodeinfo(args: argparse.Namespace) -> int:
1713 logger.debug("args[]='%s' - CALLED!", type(args))
1715 logger.debug("Invoking locking.acquire() ...")
1718 if args.domain is not None and args.domain != "":
1719 logger.debug("Fetching args.domain='%s'", args.domain)
1720 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1721 elif args.software is not None and args.software != "":
1722 logger.info("Fetching domains for args.software='%s'", args.software)
1723 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1725 logger.info("Fetching domains for recently updated ...")
1726 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1728 domains = database.cursor.fetchall()
1730 logger.info("Checking %d domain(s) ...", len(domains))
1733 logger.debug("row[]='%s'", type(row))
1734 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1735 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1739 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1740 software = federation.determine_software(row["domain"])
1742 logger.debug("Determined software='%s'", software)
1743 if (software != row["software"] and software is not None) or args.force is True:
1744 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1745 instances.set_software(row["domain"], software)
1747 if software is not None:
1748 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1749 instances.set_success(row["domain"])
1750 except network.exceptions as exception:
1751 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1752 instances.set_last_error(row["domain"], exception)
1754 instances.set_last_nodeinfo(row["domain"])
1755 instances.update_data(row["domain"])
1758 logger.debug("Success! - EXIT!")
1761 def fetch_instances_social(args: argparse.Namespace) -> int:
1762 logger.debug("args[]='%s' - CALLED!", type(args))
1764 logger.debug("Invoking locking.acquire() ...")
1767 source_domain = "instances.social"
1769 if config.get("instances_social_api_key") == "":
1770 logger.error("API key not set. Please set in your config.json file.")
1772 elif sources.is_recent(source_domain):
1773 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1776 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1777 sources.update(source_domain)
1780 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1783 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1784 fetched = network.get_json_api(
1786 "/api/1.0/instances/list?count=0&sort_by=name",
1788 (config.get("connection_timeout"), config.get("read_timeout"))
1790 logger.debug("fetched[]='%s'", type(fetched))
1792 if "error_message" in fetched:
1793 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1795 elif "exception" in fetched:
1796 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1798 elif "json" not in fetched:
1799 logger.warning("fetched has no element 'json' - EXIT!")
1801 elif "instances" not in fetched["json"]:
1802 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1806 rows = fetched["json"]["instances"]
1808 logger.info("Checking %d row(s) ...", len(rows))
1810 logger.debug("row[]='%s'", type(row))
1811 domain = tidyup.domain(row["name"])
1812 logger.debug("domain='%s' - AFTER!", domain)
1815 logger.debug("domain is empty - SKIPPED!")
1818 logger.debug("domain='%s' - BEFORE!", domain)
1819 domain = domain.encode("idna").decode("utf-8")
1820 logger.debug("domain='%s' - AFTER!", domain)
1822 if not domain_helper.is_wanted(domain):
1823 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1825 elif domain in domains:
1826 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1828 elif instances.is_registered(domain):
1829 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1831 elif instances.is_recent(domain):
1832 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1835 logger.info("Fetching instances from domain='%s'", domain)
1836 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1838 logger.debug("Success! - EXIT!")
1841 def fetch_relays(args: argparse.Namespace) -> int:
1842 logger.debug("args[]='%s' - CALLED!", type(args))
1844 logger.debug("Invoking locking.acquire() ...")
1847 if args.domain is not None and args.domain != "":
1848 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1850 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1853 rows = database.cursor.fetchall()
1855 logger.info("Checking %d relays ...", len(rows))
1857 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1859 if not args.force and instances.is_recent(row["domain"]):
1860 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1864 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1865 raw = utils.fetch_url(
1866 f"https://{row['domain']}",
1867 network.web_headers,
1868 (config.get("connection_timeout"), config.get("read_timeout"))
1870 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1871 except network.exceptions as exception:
1872 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1873 instances.set_last_error(row["domain"], exception)
1874 instances.set_last_instance_fetch(row["domain"])
1875 instances.update_data(row["domain"])
1878 doc = bs4.BeautifulSoup(raw, features="html.parser")
1879 logger.debug("doc[]='%s'", type(doc))
1881 logger.debug("row[software]='%s'", row["software"])
1882 if row["software"] == "activityrelay":
1883 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1884 tags = doc.findAll("p")
1886 logger.debug("Checking %d paragraphs ...", len(tags))
1888 logger.debug("tag[]='%s'", type(tag))
1889 if len(tag.contents) == 0:
1890 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1892 elif "registered instances" not in tag.contents[0]:
1893 logger.debug("Skipping paragraph, text not found.")
1896 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1897 for domain in tag.contents:
1898 logger.debug("domain[%s]='%s'", type(domain), domain)
1899 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1902 domain = str(domain)
1903 logger.debug("domain='%s'", domain)
1904 if not domain_helper.is_wanted(domain):
1905 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1908 logger.debug("domain='%s' - BEFORE!", domain)
1909 domain = tidyup.domain(domain)
1910 logger.debug("domain='%s' - AFTER!", domain)
1913 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1915 elif domain not in peers:
1916 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1917 peers.append(domain)
1919 if dict_helper.has_key(domains, "domain", domain):
1920 logger.debug("domain='%s' already added", domain)
1923 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1926 "origin": row["domain"],
1928 elif row["software"] in ["aoderelay", "selective-relay"]:
1929 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1930 if row["software"] == "aoderelay":
1931 tags = doc.findAll("section", {"class": "instance"})
1933 tags = doc.find("div", {"id": "instances"}).findAll("li")
1935 logger.debug("Checking %d tags ...", len(tags))
1937 logger.debug("tag[]='%s'", type(tag))
1939 link = tag.find("a")
1940 logger.debug("link[%s]='%s'", type(link), link)
1942 logger.warning("tag='%s' has no a-tag ...", tag)
1945 components = urlparse(link["href"])
1946 domain = components.netloc.lower()
1948 if not domain_helper.is_wanted(domain):
1949 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1952 logger.debug("domain='%s' - BEFORE!", domain)
1953 domain = tidyup.domain(domain)
1954 logger.debug("domain='%s' - AFTER!", domain)
1957 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1959 elif domain not in peers:
1960 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1961 peers.append(domain)
1963 if dict_helper.has_key(domains, "domain", domain):
1964 logger.debug("domain='%s' already added", domain)
1967 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1970 "origin": row["domain"],
1973 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1975 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1976 instances.set_last_instance_fetch(row["domain"])
1978 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1979 instances.set_total_peers(row["domain"], peers)
1981 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1982 instances.update_data(row["domain"])
1984 logger.info("Checking %d domains ...", len(domains))
1986 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1987 if instances.is_registered(row["domain"]):
1988 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1991 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1992 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1994 logger.debug("Success! - EXIT!")
1997 def convert_idna(args: argparse.Namespace) -> int:
1998 logger.debug("args[]='%s' - CALLED!", type(args))
2000 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2001 rows = database.cursor.fetchall()
2003 logger.debug("rows[]='%s'", type(rows))
2004 instances.translate_idnas(rows, "domain")
2006 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2007 rows = database.cursor.fetchall()
2009 logger.debug("rows[]='%s'", type(rows))
2010 instances.translate_idnas(rows, "origin")
2012 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2013 rows = database.cursor.fetchall()
2015 logger.debug("rows[]='%s'", type(rows))
2016 blocks.translate_idnas(rows, "blocker")
2018 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2019 rows = database.cursor.fetchall()
2021 logger.debug("rows[]='%s'", type(rows))
2022 blocks.translate_idnas(rows, "blocked")
2024 logger.debug("Success! - EXIT!")
2027 def remove_invalid(args: argparse.Namespace) -> int:
2028 logger.debug("args[]='%s' - CALLED!", type(args))
2030 logger.debug("Invoking locking.acquire() ...")
2033 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2034 rows = database.cursor.fetchall()
2036 logger.info("Checking %d domains ...", len(rows))
2038 logger.debug("row[domain]='%s'", row["domain"])
2039 if not validators.domain(row["domain"].split("/")[0]):
2040 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2041 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2042 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2044 logger.debug("Invoking commit() ...")
2045 database.connection.commit()
2047 logger.info("Vaccum cleaning database ...")
2048 database.cursor.execute("VACUUM")
2050 logger.debug("Success! - EXIT!")