1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
67 if not validators.domain(args.domain):
68 logger.warning("args.domain='%s' is not valid", args.domain)
70 elif blacklist.is_blacklisted(args.domain):
71 logger.warning("args.domain='%s' is blacklisted", args.domain)
73 elif instances.is_registered(args.domain):
74 logger.warning("args.domain='%s' is already registered", args.domain)
77 logger.info("args.domain='%s' is not known", args.domain)
79 logger.debug("status=%d - EXIT!", status)
82 def check_nodeinfo(args: argparse.Namespace) -> int:
83 logger.debug("args[]='%s' - CALLED!", type(args))
86 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
89 for row in database.cursor.fetchall():
90 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
91 punycode = row["domain"].encode("idna").decode("utf-8")
93 if row["nodeinfo_url"].startswith("/"):
94 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
97 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
100 logger.info("Found %d row(s)", cnt)
102 logger.debug("EXIT!")
105 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
106 logger.debug("args[]='%s' - CALLED!", type(args))
108 # No CSRF by default, you don't have to add network.source_headers by yourself here
110 source_domain = "pixelfed.org"
112 if sources.is_recent(source_domain):
113 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
116 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
117 sources.update(source_domain)
120 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
121 headers = csrf.determine(source_domain, dict())
122 except network.exceptions as exception:
123 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
127 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
128 fetched = network.get_json_api(
130 "/api/v1/servers/all.json?scope=All&country=all&language=all",
132 (config.get("connection_timeout"), config.get("read_timeout"))
135 logger.debug("JSON API returned %d elements", len(fetched))
136 if "error_message" in fetched:
137 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139 elif "data" not in fetched["json"]:
140 logger.warning("API did not return JSON with 'data' element - EXIT!")
143 rows = fetched["json"]["data"]
144 logger.info("Checking %d fetched rows ...", len(rows))
146 logger.debug("row[]='%s'", type(row))
147 if "domain" not in row:
148 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150 elif row["domain"] == "":
151 logger.debug("row[domain] is empty - SKIPPED!")
154 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
155 domain = row["domain"].encode("idna").decode("utf-8")
156 logger.debug("domain='%s' - AFTER!", domain)
158 if not domain_helper.is_wanted(domain):
159 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161 elif instances.is_registered(domain):
162 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164 elif instances.is_recent(domain):
165 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
168 logger.debug("Fetching instances from domain='%s' ...", domain)
169 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171 except network.exceptions as exception:
172 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
175 logger.debug("Success! - EXIT!")
178 def fetch_bkali(args: argparse.Namespace) -> int:
179 logger.debug("args[]='%s' - CALLED!", type(args))
181 logger.debug("Invoking locking.acquire() ...")
184 source_domain = "gql.api.bka.li"
185 if sources.is_recent(source_domain):
186 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
189 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
190 sources.update(source_domain)
194 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
195 fetched = network.post_json_api(
199 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
203 logger.debug("fetched[]='%s'", type(fetched))
204 if "error_message" in fetched:
205 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
208 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
211 rows = fetched["json"]
213 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215 raise Exception("WARNING: Returned no records")
216 elif "data" not in rows:
217 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
218 elif "nodeinfo" not in rows["data"]:
219 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221 for entry in rows["data"]["nodeinfo"]:
222 logger.debug("entry[%s]='%s'", type(entry), entry)
223 if "domain" not in entry:
224 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226 elif entry["domain"] == "":
227 logger.debug("entry[domain] is empty - SKIPPED!")
229 elif not domain_helper.is_wanted(entry["domain"]):
230 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232 elif instances.is_registered(entry["domain"]):
233 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235 elif instances.is_recent(entry["domain"]):
236 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
239 logger.debug("Adding domain='%s' ...", entry["domain"])
240 domains.append(entry["domain"])
242 except network.exceptions as exception:
243 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
246 logger.debug("domains()=%d", len(domains))
248 logger.info("Adding %d new instances ...", len(domains))
249 for domain in domains:
250 logger.debug("domain='%s' - BEFORE!", domain)
251 domain = domain.encode("idna").decode("utf-8")
252 logger.debug("domain='%s' - AFTER!", domain)
255 logger.info("Fetching instances from domain='%s' ...", domain)
256 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
257 except network.exceptions as exception:
258 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
259 instances.set_last_error(domain, exception)
262 logger.debug("Success - EXIT!")
265 def fetch_blocks(args: argparse.Namespace) -> int:
266 logger.debug("args[]='%s' - CALLED!", type(args))
267 if args.domain is not None and args.domain != "":
268 logger.debug("args.domain='%s' - checking ...", args.domain)
269 if not validators.domain(args.domain):
270 logger.warning("args.domain='%s' is not valid.", args.domain)
272 elif blacklist.is_blacklisted(args.domain):
273 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275 elif not instances.is_registered(args.domain):
276 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
279 logger.debug("Invoking locking.acquire() ...")
282 if args.domain is not None and args.domain != "":
283 # Re-check single domain
284 logger.debug("Querying database for args.domain='%s' ...", args.domain)
285 database.cursor.execute(
286 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288 elif args.software is not None and args.software != "":
289 # Re-check single software
290 logger.debug("Querying database for args.software='%s' ...", args.software)
291 database.cursor.execute(
292 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [args.software]
296 logger.debug("Re-checking all instances ...")
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [time.time() - config.get("recheck_block")]
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if not domain_helper.is_wanted(blocker):
312 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
315 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
316 instances.set_last_blocked(blocker)
317 instances.set_has_obfuscation(blocker, False)
319 # c.s isn't part of oliphant's "hidden" blocklists
320 if blocker == "chaos.social" or blocklists.has(blocker):
321 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
324 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
325 blocking = federation.fetch_blocks(blocker)
327 logger.debug("blocking()=%d,nodeinfo_url='%s'", len(blocking), nodeinfo_url)
328 if len(blocking) == 0:
329 logger.debug("blocker='%s',software='%s'", blocker, software)
330 if software == "pleroma":
331 logger.info("blocker='%s',software='%s'", blocker, software)
332 blocking = pleroma.fetch_blocks(blocker)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 logger.info("blocker='%s',software='%s'", blocker, software)
336 blocking = mastodon.fetch_blocks(blocker)
337 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
338 elif software == "lemmy":
339 logger.info("blocker='%s',software='%s'", blocker, software)
340 blocking = lemmy.fetch_blocks(blocker)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "friendica":
343 logger.info("blocker='%s',software='%s'", blocker, software)
344 blocking = friendica.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346 elif software == "misskey":
347 logger.info("blocker='%s',software='%s'", blocker, software)
348 blocking = misskey.fetch_blocks(blocker)
349 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
351 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
353 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
354 instances.set_total_blocks(blocker, blocking)
358 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
359 for block in blocking:
360 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
362 if block["block_level"] == "":
363 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
366 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
367 block["blocked"] = tidyup.domain(block["blocked"])
368 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
369 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
371 if block["blocked"] == "":
372 logger.warning("blocked is empty, blocker='%s'", blocker)
374 elif block["blocked"].endswith(".onion"):
375 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
377 elif block["blocked"].endswith(".arpa"):
378 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
380 elif block["blocked"].endswith(".tld"):
381 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
383 elif block["blocked"].find("*") >= 0:
384 logger.debug("blocker='%s' uses obfuscated domains", blocker)
386 # Some friendica servers also obscure domains without hash
387 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
389 logger.debug("row[]='%s'", type(row))
391 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392 instances.set_has_obfuscation(blocker, True)
395 block["blocked"] = row["domain"]
396 origin = row["origin"]
397 nodeinfo_url = row["nodeinfo_url"]
398 elif block["blocked"].find("?") >= 0:
399 logger.debug("blocker='%s' uses obfuscated domains", blocker)
401 # Some obscure them with question marks, not sure if that's dependent on version or not
402 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
404 logger.debug("row[]='%s'", type(row))
406 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
407 instances.set_has_obfuscation(blocker, True)
410 block["blocked"] = row["domain"]
411 origin = row["origin"]
412 nodeinfo_url = row["nodeinfo_url"]
414 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
415 if block["blocked"] == "":
416 logger.debug("block[blocked] is empty - SKIPPED!")
419 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
420 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
421 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
423 if not domain_helper.is_wanted(block["blocked"]):
424 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
426 elif block["block_level"] in ["accept", "accepted"]:
427 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
429 elif not instances.is_registered(block["blocked"]):
430 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
431 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
433 block["block_level"] = blocks.alias_block_level(block["block_level"])
435 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
436 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
438 "blocked": block["blocked"],
439 "reason" : block["reason"],
442 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
443 cookies.clear(block["blocked"])
445 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
446 if instances.has_pending(blocker):
447 logger.debug("Flushing updates for blocker='%s' ...", blocker)
448 instances.update(blocker)
450 logger.debug("Invoking commit() ...")
451 database.connection.commit()
453 logger.debug("Invoking cookies.clear(%s) ...", blocker)
454 cookies.clear(blocker)
456 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
457 if config.get("bot_enabled") and len(blockdict) > 0:
458 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
459 network.send_bot_post(blocker, blockdict)
461 logger.debug("Success! - EXIT!")
464 def fetch_observer(args: argparse.Namespace) -> int:
465 logger.debug("args[]='%s' - CALLED!", type(args))
467 logger.debug("Invoking locking.acquire() ...")
470 source_domain = "fediverse.observer"
471 if sources.is_recent(source_domain):
472 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
475 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
476 sources.update(source_domain)
479 if args.software is None:
480 logger.info("Fetching software list ...")
481 raw = utils.fetch_url(
482 f"https://{source_domain}",
484 (config.get("connection_timeout"), config.get("read_timeout"))
486 logger.debug("raw[%s]()=%d", type(raw), len(raw))
488 doc = bs4.BeautifulSoup(raw, features="html.parser")
489 logger.debug("doc[]='%s'", type(doc))
491 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
492 logger.debug("navbar[]='%s'", type(navbar))
494 logger.warning("Cannot find navigation bar, cannot continue!")
497 items = navbar.findAll("a", {"class": "dropdown-item"})
498 logger.debug("items[]='%s'", type(items))
500 logger.info("Checking %d menu items ...", len(items))
502 logger.debug("item[%s]='%s'", type(item), item)
503 if item.text.lower() == "all":
504 logger.debug("Skipping 'All' menu entry ...")
507 logger.debug("Appending item.text='%s' ...", item.text)
508 types.append(tidyup.domain(item.text))
510 logger.info("Adding args.software='%s' as type ...", args.software)
511 types.append(args.software)
513 logger.info("Fetching %d different table data ...", len(types))
514 for software in types:
515 logger.debug("software='%s' - BEFORE!", software)
516 if args.software is not None and args.software != software:
517 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
522 logger.debug("Fetching table data for software='%s' ...", software)
523 raw = utils.fetch_url(
524 f"https://{source_domain}/app/views/tabledata.php?software={software}",
526 (config.get("connection_timeout"), config.get("read_timeout"))
528 logger.debug("raw[%s]()=%d", type(raw), len(raw))
530 doc = bs4.BeautifulSoup(raw, features="html.parser")
531 logger.debug("doc[]='%s'", type(doc))
532 except network.exceptions as exception:
533 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
536 items = doc.findAll("a", {"class": "url"})
537 logger.info("Checking %d items,software='%s' ...", len(items), software)
539 logger.debug("item[]='%s'", type(item))
540 domain = item.decode_contents()
541 logger.debug("domain='%s' - AFTER!", domain)
544 logger.debug("domain is empty - SKIPPED!")
547 logger.debug("domain='%s' - BEFORE!", domain)
548 domain = domain.encode("idna").decode("utf-8")
549 logger.debug("domain='%s' - AFTER!", domain)
551 if not domain_helper.is_wanted(domain):
552 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
554 elif instances.is_registered(domain):
555 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
558 software = software_helper.alias(software)
559 logger.info("Fetching instances for domain='%s'", domain)
560 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
562 logger.debug("Success! - EXIT!")
565 def fetch_todon_wiki(args: argparse.Namespace) -> int:
566 logger.debug("args[]='%s' - CALLED!", type(args))
568 logger.debug("Invoking locking.acquire() ...")
571 source_domain = "wiki.todon.eu"
572 if sources.is_recent(source_domain):
573 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
576 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
577 sources.update(source_domain)
584 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
585 raw = utils.fetch_url(
586 f"https://{source_domain}/todon/domainblocks",
588 (config.get("connection_timeout"), config.get("read_timeout"))
590 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
592 doc = bs4.BeautifulSoup(raw, "html.parser")
593 logger.debug("doc[]='%s'", type(doc))
595 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
596 logger.info("Checking %d silenced/limited entries ...", len(silenced))
597 blocklist["silenced"] = utils.find_domains(silenced, "div")
599 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
600 logger.info("Checking %d suspended entries ...", len(suspended))
601 blocklist["reject"] = utils.find_domains(suspended, "div")
603 blocking = blocklist["silenced"] + blocklist["reject"]
606 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
607 instances.set_last_blocked(blocker)
608 instances.set_total_blocks(blocker, blocking)
611 for block_level in blocklist:
612 blockers = blocklist[block_level]
614 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
615 for blocked in blockers:
616 logger.debug("blocked='%s'", blocked)
618 if not instances.is_registered(blocked):
620 logger.info("Fetching instances from domain='%s' ...", blocked)
621 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
622 except network.exceptions as exception:
623 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
624 instances.set_last_error(blocked, exception)
626 if blocks.is_instance_blocked(blocker, blocked, block_level):
627 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
630 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
631 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
632 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
638 logger.debug("Invoking commit() ...")
639 database.connection.commit()
641 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
642 if config.get("bot_enabled") and len(blockdict) > 0:
643 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
644 network.send_bot_post(blocker, blockdict)
646 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
647 if instances.has_pending(blocker):
648 logger.debug("Flushing updates for blocker='%s' ...", blocker)
649 instances.update(blocker)
651 logger.debug("Success! - EXIT!")
654 def fetch_cs(args: argparse.Namespace):
655 logger.debug("args[]='%s' - CALLED!", type(args))
657 logger.debug("Invoking locking.acquire() ...")
685 source_domain = "raw.githubusercontent.com"
686 if sources.is_recent(source_domain):
687 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
690 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
691 sources.update(source_domain)
693 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
694 raw = utils.fetch_url(
695 f"https://{source_domain}/chaossocial/meta/master/federation.md",
697 (config.get("connection_timeout"), config.get("read_timeout"))
699 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
701 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
702 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
704 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
705 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
706 blocklist["silenced"] = federation.find_domains(silenced)
708 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
709 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
710 blocklist["reject"] = federation.find_domains(blocked)
712 blocking = blocklist["silenced"] + blocklist["reject"]
713 blocker = "chaos.social"
715 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
716 instances.set_last_blocked(blocker)
717 instances.set_total_blocks(blocker, blocking)
719 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
720 if len(blocking) > 0:
722 for block_level in blocklist:
723 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
725 for row in blocklist[block_level]:
726 logger.debug("row[%s]='%s'", type(row), row)
727 if not "domain" in row:
728 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
730 elif not instances.is_registered(row["domain"]):
732 logger.info("Fetching instances from domain='%s' ...", row["domain"])
733 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
734 except network.exceptions as exception:
735 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
736 instances.set_last_error(row["domain"], exception)
738 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
739 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
741 "blocked": row["domain"],
742 "reason" : row["reason"],
745 logger.debug("Invoking commit() ...")
746 database.connection.commit()
748 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
749 if config.get("bot_enabled") and len(blockdict) > 0:
750 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
751 network.send_bot_post(blocker, blockdict)
753 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
754 if instances.has_pending(blocker):
755 logger.debug("Flushing updates for blocker='%s' ...", blocker)
756 instances.update(blocker)
758 logger.debug("Success! - EXIT!")
761 def fetch_fba_rss(args: argparse.Namespace) -> int:
762 logger.debug("args[]='%s' - CALLED!", type(args))
766 logger.debug("Invoking locking.acquire() ...")
769 components = urlparse(args.feed)
771 if sources.is_recent(components.netloc):
772 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
775 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
776 sources.update(components.netloc)
778 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
779 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
781 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
782 if response.ok and response.status_code == 200 and len(response.text) > 0:
783 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
784 rss = atoma.parse_rss_bytes(response.content)
786 logger.debug("rss[]='%s'", type(rss))
787 for item in rss.items:
788 logger.debug("item[%s]='%s'", type(item), item)
789 domain = tidyup.domain(item.link.split("=")[1])
791 logger.debug("domain='%s' - AFTER!", domain)
793 logger.debug("domain is empty - SKIPPED!")
796 logger.debug("domain='%s' - BEFORE!", domain)
797 domain = domain.encode("idna").decode("utf-8")
798 logger.debug("domain='%s' - AFTER!", domain)
800 if not domain_helper.is_wanted(domain):
801 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
803 elif domain in domains:
804 logger.debug("domain='%s' is already added - SKIPPED!", domain)
806 elif instances.is_registered(domain):
807 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
809 elif instances.is_recent(domain):
810 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
813 logger.debug("Adding domain='%s'", domain)
814 domains.append(domain)
816 logger.debug("domains()=%d", len(domains))
818 logger.info("Adding %d new instances ...", len(domains))
819 for domain in domains:
820 logger.debug("domain='%s'", domain)
822 logger.info("Fetching instances from domain='%s' ...", domain)
823 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
824 except network.exceptions as exception:
825 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
826 instances.set_last_error(domain, exception)
829 logger.debug("Success! - EXIT!")
832 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
833 logger.debug("args[]='%s' - CALLED!", type(args))
835 logger.debug("Invoking locking.acquire() ...")
838 source_domain = "ryona.agency"
839 feed = f"https://{source_domain}/users/fba/feed.atom"
841 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
842 if args.feed is not None and validators.url(args.feed):
843 logger.debug("Setting feed='%s' ...", args.feed)
844 feed = str(args.feed)
845 source_domain = urlparse(args.feed).netloc
847 if sources.is_recent(source_domain):
848 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
851 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
852 sources.update(source_domain)
856 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
857 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
859 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
860 if response.ok and response.status_code == 200 and len(response.text) > 0:
861 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
862 atom = atoma.parse_atom_bytes(response.content)
864 logger.debug("atom[]='%s'", type(atom))
865 for entry in atom.entries:
866 logger.debug("entry[]='%s'", type(entry))
867 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
868 logger.debug("doc[]='%s'", type(doc))
869 for element in doc.findAll("a"):
870 logger.debug("element[]='%s'", type(element))
871 for href in element["href"].split(","):
872 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
873 domain = tidyup.domain(href)
875 logger.debug("domain='%s' - AFTER!", domain)
877 logger.debug("domain is empty - SKIPPED!")
880 logger.debug("domain='%s' - BEFORE!", domain)
881 domain = domain.encode("idna").decode("utf-8")
882 logger.debug("domain='%s' - AFTER!", domain)
884 if not domain_helper.is_wanted(domain):
885 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
887 elif domain in domains:
888 logger.debug("domain='%s' is already added - SKIPPED!", domain)
890 elif instances.is_registered(domain):
891 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
893 elif instances.is_recent(domain):
894 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
897 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
898 domains.append(domain)
900 logger.debug("domains()=%d", len(domains))
902 logger.info("Adding %d new instances ...", len(domains))
903 for domain in domains:
904 logger.debug("domain='%s'", domain)
906 logger.info("Fetching instances from domain='%s' ...", domain)
907 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
908 except network.exceptions as exception:
909 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
910 instances.set_last_error(domain, exception)
913 logger.debug("Success! - EXIT!")
916 def fetch_instances(args: argparse.Namespace) -> int:
917 logger.debug("args[]='%s' - CALLED!", type(args))
919 logger.debug("args.domain='%s' - checking ...", args.domain)
920 if not validators.domain(args.domain):
921 logger.warning("args.domain='%s' is not valid.", args.domain)
923 elif blacklist.is_blacklisted(args.domain):
924 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
927 logger.debug("Invoking locking.acquire() ...")
931 domain = tidyup.domain(args.domain)
932 origin = software = None
935 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
936 row = database.cursor.fetchone()
938 origin = row["origin"]
939 software = row["software"]
943 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
944 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
945 except network.exceptions as exception:
946 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
947 instances.set_last_error(args.domain, exception)
948 instances.update(args.domain)
952 logger.debug("Not fetching more instances - EXIT!")
955 # Loop through some instances
956 database.cursor.execute(
957 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_updated ASC", [time.time() - config.get("recheck_instance")]
960 rows = database.cursor.fetchall()
961 logger.info("Checking %d entries ...", len(rows))
963 logger.debug("row[domain]='%s'", row["domain"])
964 if row["domain"] == "":
965 logger.debug("row[domain] is empty - SKIPPED!")
968 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
969 domain = row["domain"].encode("idna").decode("utf-8")
970 logger.debug("domain='%s' - AFTER!", domain)
972 if not domain_helper.is_wanted(domain):
973 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
977 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
978 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
979 except network.exceptions as exception:
980 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
981 instances.set_last_error(domain, exception)
983 logger.debug("Success - EXIT!")
986 def fetch_oliphant(args: argparse.Namespace) -> int:
987 logger.debug("args[]='%s' - CALLED!", type(args))
989 logger.debug("Invoking locking.acquire() ...")
992 source_domain = "codeberg.org"
993 if sources.is_recent(source_domain):
994 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
997 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
998 sources.update(source_domain)
1001 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1005 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1006 for block in blocklists.oliphant_blocklists:
1007 # Is domain given and not equal blocker?
1008 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1009 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1011 elif args.domain in domains:
1012 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1015 instances.set_last_blocked(block["blocker"])
1018 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1019 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1021 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1022 if not response.ok or response.status_code > 200 or response.content == "":
1023 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1026 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1027 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1033 logger.debug("row[%s]='%s'", type(row), row)
1034 domain = severity = None
1035 reject_media = reject_reports = False
1037 if "#domain" in row:
1038 domain = row["#domain"]
1039 elif "domain" in row:
1040 domain = row["domain"]
1042 logger.debug("row='%s' does not contain domain column", row)
1045 if "#severity" in row:
1046 severity = blocks.alias_block_level(row["#severity"])
1047 elif "severity" in row:
1048 severity = blocks.alias_block_level(row["severity"])
1050 logger.debug("row='%s' does not contain severity column", row)
1053 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1055 elif "reject_media" in row and row["reject_media"].lower() == "true":
1058 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1059 reject_reports = True
1060 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1061 reject_reports = True
1064 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1066 logger.debug("domain is empty - SKIPPED!")
1068 elif domain.endswith(".onion"):
1069 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1071 elif domain.endswith(".arpa"):
1072 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1074 elif domain.endswith(".tld"):
1075 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1077 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1078 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1079 domain = utils.deobfuscate(domain, block["blocker"])
1080 logger.debug("domain='%s' - AFTER!", domain)
1082 if not validators.domain(domain):
1083 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1085 elif blacklist.is_blacklisted(domain):
1086 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1088 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1089 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1092 logger.debug("Marking domain='%s' as handled", domain)
1093 domains.append(domain)
1095 logger.debug("Processing domain='%s' ...", domain)
1096 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1097 logger.debug("processed='%s'", processed)
1099 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1100 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1103 "reason" : block["reason"],
1107 processing.block(block["blocker"], domain, None, "reject_media")
1109 processing.block(block["blocker"], domain, None, "reject_reports")
1111 logger.debug("block[blocker]='%s'", block["blocker"])
1112 if not blocklists.has(block["blocker"]):
1113 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1114 instances.set_total_blocks(block["blocker"], domains)
1116 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1117 if instances.has_pending(block["blocker"]):
1118 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1119 instances.update(block["blocker"])
1121 logger.debug("Invoking commit() ...")
1122 database.connection.commit()
1124 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1125 if config.get("bot_enabled") and len(blockdict) > 0:
1126 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1127 network.send_bot_post(block["blocker"], blockdict)
1129 logger.debug("Success! - EXIT!")
1132 def fetch_txt(args: argparse.Namespace) -> int:
1133 logger.debug("args[]='%s' - CALLED!", type(args))
1135 logger.debug("Invoking locking.acquire() ...")
1140 "blocker": "seirdy.one",
1141 "url" : "https://seirdy.one/pb/bsl.txt",
1144 logger.info("Checking %d text file(s) ...", len(urls))
1146 logger.debug("Fetching row[url]='%s' ...", row["url"])
1147 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1149 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1150 if response.ok and response.status_code == 200 and response.text != "":
1151 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1152 domains = response.text.split("\n")
1154 logger.info("Processing %d domains ...", len(domains))
1155 for domain in domains:
1156 logger.debug("domain='%s' - BEFORE!", domain)
1157 domain = tidyup.domain(domain)
1159 logger.debug("domain='%s' - AFTER!", domain)
1161 logger.debug("domain is empty - SKIPPED!")
1163 elif not domain_helper.is_wanted(domain):
1164 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1166 elif instances.is_recent(domain):
1167 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1170 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1171 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1173 logger.debug("processed='%s'", processed)
1175 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1178 logger.debug("Success! - EXIT!")
1181 def fetch_fedipact(args: argparse.Namespace) -> int:
1182 logger.debug("args[]='%s' - CALLED!", type(args))
1184 logger.debug("Invoking locking.acquire() ...")
1187 source_domain = "fedipact.online"
1188 if sources.is_recent(source_domain):
1189 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1192 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1193 sources.update(source_domain)
1195 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1196 response = utils.fetch_url(
1197 f"https://{source_domain}",
1198 network.web_headers,
1199 (config.get("connection_timeout"), config.get("read_timeout"))
1202 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1203 if response.ok and response.status_code == 200 and response.text != "":
1204 logger.debug("Parsing %d Bytes ...", len(response.text))
1206 doc = bs4.BeautifulSoup(response.text, "html.parser")
1207 logger.debug("doc[]='%s'", type(doc))
1209 rows = doc.findAll("li")
1210 logger.info("Checking %d row(s) ...", len(rows))
1212 logger.debug("row[]='%s'", type(row))
1213 domain = tidyup.domain(row.contents[0])
1215 logger.debug("domain='%s' - AFTER!", domain)
1217 logger.debug("domain is empty - SKIPPED!")
1220 logger.debug("domain='%s' - BEFORE!", domain)
1221 domain = domain.encode("idna").decode("utf-8")
1222 logger.debug("domain='%s' - AFTER!", domain)
1224 if not domain_helper.is_wanted(domain):
1225 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1227 elif instances.is_registered(domain):
1228 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1230 elif instances.is_recent(domain):
1231 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1234 logger.info("Fetching domain='%s' ...", domain)
1235 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1237 logger.debug("Success! - EXIT!")
1240 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1241 logger.debug("args[]='%s' - CALLED!", type(args))
1243 logger.debug("Invoking locking.acquire() ...")
1246 source_domain = "instances.joinmobilizon.org"
1247 if sources.is_recent(source_domain):
1248 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1251 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1252 sources.update(source_domain)
1254 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1255 raw = utils.fetch_url(
1256 f"https://{source_domain}/api/v1/instances",
1257 network.web_headers,
1258 (config.get("connection_timeout"), config.get("read_timeout"))
1260 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1262 parsed = json.loads(raw)
1263 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1265 if "data" not in parsed:
1266 logger.warning("parsed()=%d does not contain key 'data'")
1269 logger.info("Checking %d instances ...", len(parsed["data"]))
1270 for row in parsed["data"]:
1271 logger.debug("row[]='%s'", type(row))
1272 if "host" not in row:
1273 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1275 elif not domain_helper.is_wanted(row["host"]):
1276 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1278 elif instances.is_registered(row["host"]):
1279 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1282 logger.info("Fetching row[host]='%s' ...", row["host"])
1283 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1285 logger.debug("Success! - EXIT!")
1288 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1289 logger.debug("args[]='%s' - CALLED!", type(args))
1291 logger.debug("Invoking locking.acquire() ...")
1294 source_domain = "instanceapp.misskey.page"
1295 if sources.is_recent(source_domain):
1296 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1299 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1300 sources.update(source_domain)
1302 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1303 raw = utils.fetch_url(
1304 f"https://{source_domain}/instances.json",
1305 network.web_headers,
1306 (config.get("connection_timeout"), config.get("read_timeout"))
1308 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1310 parsed = json.loads(raw)
1311 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1313 if "instancesInfos" not in parsed:
1314 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1317 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1318 for row in parsed["instancesInfos"]:
1319 logger.debug("row[%s]='%s'", type(row), row)
1320 if "url" not in row:
1321 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1323 elif not domain_helper.is_wanted(row["url"]):
1324 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1326 elif instances.is_registered(row["url"]):
1327 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1330 logger.info("Fetching row[url]='%s' ...", row["url"])
1331 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1333 logger.debug("Success! - EXIT!")
1336 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1337 logger.debug("args[]='%s' - CALLED!", type(args))
1339 logger.debug("Invoking locking.acquire() ...")
1342 source_domain = "joinfediverse.wiki"
1343 if sources.is_recent(source_domain):
1344 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1347 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1348 sources.update(source_domain)
1350 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1351 raw = utils.fetch_url(
1352 f"https://{source_domain}/FediBlock",
1353 network.web_headers,
1354 (config.get("connection_timeout"), config.get("read_timeout"))
1356 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1358 doc = bs4.BeautifulSoup(raw, "html.parser")
1359 logger.debug("doc[]='%s'", type(doc))
1361 tables = doc.findAll("table", {"class": "wikitable"})
1363 logger.info("Analyzing %d table(s) ...", len(tables))
1365 for table in tables:
1366 logger.debug("table[]='%s'", type(table))
1368 rows = table.findAll("tr")
1369 logger.info("Checking %d row(s) ...", len(rows))
1370 block_headers = dict()
1372 logger.debug("row[%s]='%s'", type(row), row)
1374 headers = row.findAll("th")
1375 logger.debug("Found headers()=%d header(s)", len(headers))
1376 if len(headers) > 1:
1377 block_headers = dict()
1379 for header in headers:
1381 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1382 text = header.contents[0]
1384 logger.debug("text[]='%s'", type(text))
1385 if not isinstance(text, str):
1386 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1388 elif validators.domain(text.strip()):
1389 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1392 text = tidyup.domain(text.strip())
1393 logger.debug("text='%s' - AFTER!", text)
1394 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1395 logger.debug("Found header: '%s'=%d", text, cnt)
1396 block_headers[cnt] = text
1398 elif len(block_headers) == 0:
1399 logger.debug("row is not scrapable - SKIPPED!")
1401 elif len(block_headers) > 0:
1402 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1406 for element in row.find_all(["th", "td"]):
1408 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1409 if cnt in block_headers:
1410 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1412 text = element.text.strip()
1413 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1415 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1416 if key in ["domain", "instance"]:
1418 elif key == "reason":
1419 block[key] = tidyup.reason(text)
1420 elif key == "subdomain(s)":
1423 block[key] = text.split("/")
1425 logger.debug("key='%s'", key)
1428 logger.debug("block()=%d ...", len(block))
1430 logger.debug("Appending block()=%d ...", len(block))
1431 blocklist.append(block)
1433 logger.debug("blocklist()=%d", len(blocklist))
1435 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1436 domains = database.cursor.fetchall()
1438 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1440 for block in blocklist:
1441 logger.debug("block='%s'", block)
1442 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1443 origin = block["blocked"]
1444 logger.debug("origin='%s'", origin)
1445 for subdomain in block["subdomain(s)"]:
1446 block["blocked"] = subdomain + "." + origin
1447 logger.debug("block[blocked]='%s'", block["blocked"])
1448 blocking.append(block)
1450 blocking.append(block)
1452 logger.debug("blocking()=%d", blocking)
1453 for block in blocking:
1454 logger.debug("block[]='%s'", type(block))
1455 if "blocked" not in block:
1456 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1458 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1459 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1461 if block["blocked"] == "":
1462 logger.debug("block[blocked] is empty - SKIPPED!")
1464 elif not domain_helper.is_wanted(block["blocked"]):
1465 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1467 elif instances.is_recent(block["blocked"]):
1468 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1471 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1472 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1475 for blocker in domains:
1476 blocker = blocker[0]
1477 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1478 instances.set_last_blocked(blocker)
1480 for block in blocking:
1481 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1482 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1484 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1485 if block["blocked"] == "":
1486 logger.debug("block[blocked] is empty - SKIPPED!")
1488 elif not domain_helper.is_wanted(block["blocked"]):
1489 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1492 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1493 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1494 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1496 "blocked": block["blocked"],
1497 "reason" : block["reason"],
1500 if instances.has_pending(blocker):
1501 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1502 instances.update(blocker)
1504 logger.debug("Invoking commit() ...")
1505 database.connection.commit()
1507 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1508 if config.get("bot_enabled") and len(blockdict) > 0:
1509 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1510 network.send_bot_post(blocker, blockdict)
1512 logger.debug("Success! - EXIT!")
1515 def recheck_obfuscation(args: argparse.Namespace) -> int:
1516 logger.debug("args[]='%s' - CALLED!", type(args))
1518 logger.debug("Invoking locking.acquire() ...")
1521 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1522 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1523 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1524 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1526 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1528 rows = database.cursor.fetchall()
1529 logger.info("Checking %d domains ...", len(rows))
1531 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1532 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1533 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1536 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1537 blocking = federation.fetch_blocks(row["domain"])
1539 logger.debug("blocking()=%d", len(blocking))
1540 if len(blocking) == 0:
1541 if row["software"] == "pleroma":
1542 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1543 blocking = pleroma.fetch_blocks(row["domain"])
1544 elif row["software"] == "mastodon":
1545 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1546 blocking = mastodon.fetch_blocks(row["domain"])
1547 elif row["software"] == "lemmy":
1548 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1549 blocking = lemmy.fetch_blocks(row["domain"])
1550 elif row["software"] == "friendica":
1551 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1552 blocking = friendica.fetch_blocks(row["domain"])
1553 elif row["software"] == "misskey":
1554 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1555 blocking = misskey.fetch_blocks(row["domain"])
1557 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1559 # c.s isn't part of oliphant's "hidden" blocklists
1560 logger.debug("row[domain]='%s'", row["domain"])
1561 if row["domain"] != "chaos.social" and not blocklists.has(row["domain"]):
1562 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1563 instances.set_last_blocked(row["domain"])
1564 instances.set_total_blocks(row["domain"], blocking)
1569 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1570 for block in blocking:
1571 logger.debug("block[blocked]='%s'", block["blocked"])
1574 if block["blocked"] == "":
1575 logger.debug("block[blocked] is empty - SKIPPED!")
1577 elif block["blocked"].endswith(".arpa"):
1578 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1580 elif block["blocked"].endswith(".tld"):
1581 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1583 elif block["blocked"].endswith(".onion"):
1584 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1586 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1587 logger.debug("block='%s' is obfuscated.", block["blocked"])
1588 obfuscated = obfuscated + 1
1589 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1590 elif not domain_helper.is_wanted(block["blocked"]):
1591 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1593 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1594 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1597 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1598 if blocked is not None and blocked != block["blocked"]:
1599 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1600 obfuscated = obfuscated - 1
1602 if blocks.is_instance_blocked(row["domain"], blocked):
1603 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1605 elif blacklist.is_blacklisted(blocked):
1606 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1609 block["block_level"] = blocks.alias_block_level(block["block_level"])
1611 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1612 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1613 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1616 "reason" : block["reason"],
1619 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1620 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1622 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1623 if obfuscated == 0 and len(blocking) > 0:
1624 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1625 instances.set_has_obfuscation(row["domain"], False)
1627 if instances.has_pending(row["domain"]):
1628 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1629 instances.update(row["domain"])
1631 logger.debug("Invoking commit() ...")
1632 database.connection.commit()
1634 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1635 if config.get("bot_enabled") and len(blockdict) > 0:
1636 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1637 network.send_bot_post(row["domain"], blockdict)
1639 logger.debug("Success! - EXIT!")
1642 def fetch_fedilist(args: argparse.Namespace) -> int:
1643 logger.debug("args[]='%s' - CALLED!", type(args))
1645 logger.debug("Invoking locking.acquire() ...")
1648 source_domain = "demo.fedilist.com"
1649 if sources.is_recent(source_domain):
1650 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1653 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1654 sources.update(source_domain)
1656 url = f"http://{source_domain}/instance/csv?onion=not"
1657 if args.software is not None and args.software != "":
1658 logger.debug("args.software='%s'", args.software)
1659 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1661 logger.info("Fetching url='%s' ...", url)
1662 response = reqto.get(
1664 headers=network.web_headers,
1665 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1666 allow_redirects=False
1669 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1670 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1671 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1674 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1676 logger.debug("reader[]='%s'", type(reader))
1678 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1683 logger.info("Checking %d rows ...", len(rows))
1685 logger.debug("row[]='%s'", type(row))
1686 if "hostname" not in row:
1687 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1690 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1691 domain = tidyup.domain(row["hostname"])
1692 logger.debug("domain='%s' - AFTER!", domain)
1695 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1698 logger.debug("domain='%s' - BEFORE!", domain)
1699 domain = domain.encode("idna").decode("utf-8")
1700 logger.debug("domain='%s' - AFTER!", domain)
1702 if not domain_helper.is_wanted(domain):
1703 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1705 elif (args.force is None or not args.force) and instances.is_registered(domain):
1706 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1708 elif instances.is_recent(domain):
1709 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1712 logger.info("Fetching instances from domain='%s' ...", domain)
1713 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1715 logger.debug("Success! - EXIT!")
1718 def update_nodeinfo(args: argparse.Namespace) -> int:
1719 logger.debug("args[]='%s' - CALLED!", type(args))
1721 logger.debug("Invoking locking.acquire() ...")
1724 if args.domain is not None and args.domain != "":
1725 logger.debug("Fetching args.domain='%s'", args.domain)
1726 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1727 elif args.software is not None and args.software != "":
1728 logger.info("Fetching domains for args.software='%s'", args.software)
1729 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1730 elif args.mode is not None and args.mode != "":
1731 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1732 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1733 elif args.no_software:
1734 logger.info("Fetching domains with no software type detected ...")
1735 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1737 logger.info("Fetching domains for recently updated ...")
1738 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1740 domains = database.cursor.fetchall()
1742 logger.info("Checking %d domain(s) ...", len(domains))
1745 logger.debug("row[]='%s'", type(row))
1746 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1747 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1751 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1752 software = federation.determine_software(row["domain"])
1754 logger.debug("Determined software='%s'", software)
1755 if (software != row["software"] and software is not None) or args.force is True:
1756 logger.debug("software='%s'", software)
1757 if software is None:
1758 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1759 instances.set_nodeinfo_url(row["domain"], None)
1761 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1762 instances.set_software(row["domain"], software)
1764 if software is not None:
1765 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1766 instances.set_success(row["domain"])
1767 except network.exceptions as exception:
1768 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1769 instances.set_last_error(row["domain"], exception)
1771 instances.set_last_nodeinfo(row["domain"])
1772 instances.update(row["domain"])
1775 logger.debug("Success! - EXIT!")
1778 def fetch_instances_social(args: argparse.Namespace) -> int:
1779 logger.debug("args[]='%s' - CALLED!", type(args))
1781 logger.debug("Invoking locking.acquire() ...")
1784 source_domain = "instances.social"
1786 if config.get("instances_social_api_key") == "":
1787 logger.error("API key not set. Please set in your config.json file.")
1789 elif sources.is_recent(source_domain):
1790 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1793 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1794 sources.update(source_domain)
1797 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1800 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1801 fetched = network.get_json_api(
1803 "/api/1.0/instances/list?count=0&sort_by=name",
1805 (config.get("connection_timeout"), config.get("read_timeout"))
1807 logger.debug("fetched[]='%s'", type(fetched))
1809 if "error_message" in fetched:
1810 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1812 elif "exception" in fetched:
1813 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1815 elif "json" not in fetched:
1816 logger.warning("fetched has no element 'json' - EXIT!")
1818 elif "instances" not in fetched["json"]:
1819 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1823 rows = fetched["json"]["instances"]
1825 logger.info("Checking %d row(s) ...", len(rows))
1827 logger.debug("row[]='%s'", type(row))
1828 domain = tidyup.domain(row["name"])
1829 logger.debug("domain='%s' - AFTER!", domain)
1832 logger.debug("domain is empty - SKIPPED!")
1835 logger.debug("domain='%s' - BEFORE!", domain)
1836 domain = domain.encode("idna").decode("utf-8")
1837 logger.debug("domain='%s' - AFTER!", domain)
1839 if not domain_helper.is_wanted(domain):
1840 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1842 elif domain in domains:
1843 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1845 elif instances.is_registered(domain):
1846 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1848 elif instances.is_recent(domain):
1849 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1852 logger.info("Fetching instances from domain='%s'", domain)
1853 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1855 logger.debug("Success! - EXIT!")
1858 def fetch_relays(args: argparse.Namespace) -> int:
1859 logger.debug("args[]='%s' - CALLED!", type(args))
1861 logger.debug("Invoking locking.acquire() ...")
1864 if args.domain is not None and args.domain != "":
1865 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1867 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1870 rows = database.cursor.fetchall()
1872 logger.info("Checking %d relays ...", len(rows))
1874 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1876 if not args.force and instances.is_recent(row["domain"]):
1877 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1881 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1882 raw = utils.fetch_url(
1883 f"https://{row['domain']}",
1884 network.web_headers,
1885 (config.get("connection_timeout"), config.get("read_timeout"))
1887 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1888 except network.exceptions as exception:
1889 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1890 instances.set_last_error(row["domain"], exception)
1891 instances.set_last_instance_fetch(row["domain"])
1892 instances.update(row["domain"])
1895 doc = bs4.BeautifulSoup(raw, features="html.parser")
1896 logger.debug("doc[]='%s'", type(doc))
1898 logger.debug("row[software]='%s'", row["software"])
1899 if row["software"] == "activityrelay":
1900 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1901 tags = doc.findAll("p")
1903 logger.debug("Checking %d paragraphs ...", len(tags))
1905 logger.debug("tag[]='%s'", type(tag))
1906 if len(tag.contents) == 0:
1907 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1909 elif "registered instances" not in tag.contents[0]:
1910 logger.debug("Skipping paragraph, text not found.")
1913 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1914 for domain in tag.contents:
1915 logger.debug("domain[%s]='%s'", type(domain), domain)
1916 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1919 domain = str(domain)
1920 logger.debug("domain='%s'", domain)
1921 if not domain_helper.is_wanted(domain):
1922 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1925 logger.debug("domain='%s' - BEFORE!", domain)
1926 domain = tidyup.domain(domain)
1927 logger.debug("domain='%s' - AFTER!", domain)
1930 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1932 elif domain not in peers:
1933 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1934 peers.append(domain)
1936 if dict_helper.has_key(domains, "domain", domain):
1937 logger.debug("domain='%s' already added", domain)
1940 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1943 "origin": row["domain"],
1945 elif row["software"] in ["aoderelay", "selective-relay"]:
1946 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1947 if row["software"] == "aoderelay":
1948 tags = doc.findAll("section", {"class": "instance"})
1950 tags = doc.find("div", {"id": "instances"}).findAll("li")
1952 logger.debug("Checking %d tags ...", len(tags))
1954 logger.debug("tag[]='%s'", type(tag))
1956 link = tag.find("a")
1957 logger.debug("link[%s]='%s'", type(link), link)
1959 logger.warning("tag='%s' has no a-tag ...", tag)
1962 components = urlparse(link["href"])
1963 domain = components.netloc.lower()
1965 if not domain_helper.is_wanted(domain):
1966 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1969 logger.debug("domain='%s' - BEFORE!", domain)
1970 domain = tidyup.domain(domain)
1971 logger.debug("domain='%s' - AFTER!", domain)
1974 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1976 elif domain not in peers:
1977 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1978 peers.append(domain)
1980 if dict_helper.has_key(domains, "domain", domain):
1981 logger.debug("domain='%s' already added", domain)
1984 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1987 "origin": row["domain"],
1990 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1992 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1993 instances.set_last_instance_fetch(row["domain"])
1995 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1996 instances.set_total_peers(row["domain"], peers)
1998 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1999 instances.update(row["domain"])
2001 logger.info("Checking %d domains ...", len(domains))
2003 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
2004 if instances.is_registered(row["domain"]):
2005 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2008 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2009 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2011 logger.debug("Success! - EXIT!")
2014 def convert_idna(args: argparse.Namespace) -> int:
2015 logger.debug("args[]='%s' - CALLED!", type(args))
2017 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2018 rows = database.cursor.fetchall()
2020 logger.debug("rows[]='%s'", type(rows))
2021 instances.translate_idnas(rows, "domain")
2023 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2024 rows = database.cursor.fetchall()
2026 logger.debug("rows[]='%s'", type(rows))
2027 instances.translate_idnas(rows, "origin")
2029 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2030 rows = database.cursor.fetchall()
2032 logger.debug("rows[]='%s'", type(rows))
2033 blocks.translate_idnas(rows, "blocker")
2035 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2036 rows = database.cursor.fetchall()
2038 logger.debug("rows[]='%s'", type(rows))
2039 blocks.translate_idnas(rows, "blocked")
2041 logger.debug("Success! - EXIT!")
2044 def remove_invalid(args: argparse.Namespace) -> int:
2045 logger.debug("args[]='%s' - CALLED!", type(args))
2047 logger.debug("Invoking locking.acquire() ...")
2050 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2051 rows = database.cursor.fetchall()
2053 logger.info("Checking %d domains ...", len(rows))
2055 logger.debug("row[domain]='%s'", row["domain"])
2056 if not validators.domain(row["domain"].split("/")[0]):
2057 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2058 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2059 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2061 logger.debug("Invoking commit() ...")
2062 database.connection.commit()
2064 logger.info("Vaccum cleaning database ...")
2065 database.cursor.execute("VACUUM")
2067 logger.debug("Success! - EXIT!")