1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
32 from fba import database
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
67 if not validators.domain(args.domain):
68 logger.warning("args.domain='%s' is not valid", args.domain)
70 elif blacklist.is_blacklisted(args.domain):
71 logger.warning("args.domain='%s' is blacklisted", args.domain)
73 elif instances.is_registered(args.domain):
74 logger.warning("args.domain='%s' is already registered", args.domain)
77 logger.info("args.domain='%s' is not known", args.domain)
79 logger.debug("status=%d - EXIT!", status)
82 def check_nodeinfo(args: argparse.Namespace) -> int:
83 logger.debug("args[]='%s' - CALLED!", type(args))
86 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
89 for row in database.cursor.fetchall():
90 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
91 punycode = row["domain"].encode("idna").decode("utf-8")
93 if row["nodeinfo_url"].startswith("/"):
94 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
97 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
100 logger.info("Found %d row(s)", cnt)
102 logger.debug("EXIT!")
105 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
106 logger.debug("args[]='%s' - CALLED!", type(args))
108 # No CSRF by default, you don't have to add network.source_headers by yourself here
110 source_domain = "pixelfed.org"
112 if sources.is_recent(source_domain):
113 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
116 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
117 sources.update(source_domain)
120 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
121 headers = csrf.determine(source_domain, dict())
122 except network.exceptions as exception:
123 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
127 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
128 fetched = network.get_json_api(
130 "/api/v1/servers/all.json?scope=All&country=all&language=all",
132 (config.get("connection_timeout"), config.get("read_timeout"))
135 logger.debug("JSON API returned %d elements", len(fetched))
136 if "error_message" in fetched:
137 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139 elif "data" not in fetched["json"]:
140 logger.warning("API did not return JSON with 'data' element - EXIT!")
143 rows = fetched["json"]["data"]
144 logger.info("Checking %d fetched rows ...", len(rows))
146 logger.debug("row[]='%s'", type(row))
147 if "domain" not in row:
148 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150 elif row["domain"] == "":
151 logger.debug("row[domain] is empty - SKIPPED!")
154 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
155 domain = row["domain"].encode("idna").decode("utf-8")
156 logger.debug("domain='%s' - AFTER!", domain)
158 if not domain_helper.is_wanted(domain):
159 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161 elif instances.is_registered(domain):
162 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164 elif instances.is_recent(domain):
165 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
168 logger.debug("Fetching instances from domain='%s' ...", domain)
169 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171 except network.exceptions as exception:
172 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
175 logger.debug("Success! - EXIT!")
178 def fetch_bkali(args: argparse.Namespace) -> int:
179 logger.debug("args[]='%s' - CALLED!", type(args))
181 logger.debug("Invoking locking.acquire() ...")
184 source_domain = "gql.api.bka.li"
185 if sources.is_recent(source_domain):
186 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
189 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
190 sources.update(source_domain)
194 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
195 fetched = network.post_json_api(
199 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
203 logger.debug("fetched[]='%s'", type(fetched))
204 if "error_message" in fetched:
205 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
208 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
211 rows = fetched["json"]
213 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215 raise Exception("WARNING: Returned no records")
216 elif "data" not in rows:
217 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
218 elif "nodeinfo" not in rows["data"]:
219 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221 for entry in rows["data"]["nodeinfo"]:
222 logger.debug("entry[%s]='%s'", type(entry), entry)
223 if "domain" not in entry:
224 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226 elif entry["domain"] == "":
227 logger.debug("entry[domain] is empty - SKIPPED!")
229 elif not domain_helper.is_wanted(entry["domain"]):
230 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232 elif instances.is_registered(entry["domain"]):
233 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235 elif instances.is_recent(entry["domain"]):
236 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
239 logger.debug("Adding domain='%s' ...", entry["domain"])
240 domains.append(entry["domain"])
242 except network.exceptions as exception:
243 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
246 logger.debug("domains()=%d", len(domains))
248 logger.info("Adding %d new instances ...", len(domains))
249 for domain in domains:
250 logger.debug("domain='%s' - BEFORE!", domain)
251 domain = domain.encode("idna").decode("utf-8")
252 logger.debug("domain='%s' - AFTER!", domain)
255 logger.info("Fetching instances from domain='%s' ...", domain)
256 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
257 except network.exceptions as exception:
258 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
259 instances.set_last_error(domain, exception)
262 logger.debug("Success - EXIT!")
265 def fetch_blocks(args: argparse.Namespace) -> int:
266 logger.debug("args[]='%s' - CALLED!", type(args))
267 if args.domain is not None and args.domain != "":
268 logger.debug("args.domain='%s' - checking ...", args.domain)
269 if not validators.domain(args.domain):
270 logger.warning("args.domain='%s' is not valid.", args.domain)
272 elif blacklist.is_blacklisted(args.domain):
273 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275 elif not instances.is_registered(args.domain):
276 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
279 logger.debug("Invoking locking.acquire() ...")
282 if args.domain is not None and args.domain != "":
283 # Re-check single domain
284 logger.debug("Querying database for args.domain='%s' ...", args.domain)
285 database.cursor.execute(
286 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288 elif args.software is not None and args.software != "":
289 # Re-check single software
290 logger.debug("Querying database for args.software='%s' ...", args.software)
291 database.cursor.execute(
292 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [args.software]
296 logger.debug("Re-checking all instances ...")
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [time.time() - config.get("recheck_block")]
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if not domain_helper.is_wanted(blocker):
312 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
315 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
316 instances.set_last_blocked(blocker)
317 instances.set_has_obfuscation(blocker, False)
319 # c.s isn't part of oliphant's "hidden" blocklists
320 if blocker == "chaos.social" or blocklists.has(blocker):
321 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
324 logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
325 blocking = federation.fetch_blocks(blocker)
327 logger.debug("blocking()=%d,nodeinfo_url='%s'", len(blocking), nodeinfo_url)
328 if len(blocking) == 0:
329 logger.debug("blocker='%s',software='%s'", blocker, software)
330 if software == "pleroma":
331 logger.info("blocker='%s',software='%s'", blocker, software)
332 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
333 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334 elif software == "mastodon":
335 logger.info("blocker='%s',software='%s'", blocker, software)
336 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
337 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
338 elif software == "lemmy":
339 logger.info("blocker='%s',software='%s'", blocker, software)
340 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
341 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342 elif software == "friendica":
343 logger.info("blocker='%s',software='%s'", blocker, software)
344 blocking = friendica.fetch_blocks(blocker)
345 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346 elif software == "misskey":
347 logger.info("blocker='%s',software='%s'", blocker, software)
348 blocking = misskey.fetch_blocks(blocker)
349 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
351 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
353 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
354 instances.set_total_blocks(blocker, blocking)
356 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
358 for block in blocking:
359 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
361 if block["block_level"] == "":
362 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
365 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
366 block["blocked"] = tidyup.domain(block["blocked"])
367 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
368 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
370 if block["blocked"] == "":
371 logger.warning("blocked is empty, blocker='%s'", blocker)
373 elif block["blocked"].endswith(".onion"):
374 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".arpa"):
377 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
379 elif block["blocked"].endswith(".tld"):
380 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
382 elif block["blocked"].find("*") >= 0:
383 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
391 instances.set_has_obfuscation(blocker, True)
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
400 # Some obscure them with question marks, not sure if that's dependent on version or not
401 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
403 logger.debug("row[]='%s'", type(row))
405 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
406 instances.set_has_obfuscation(blocker, True)
409 block["blocked"] = row["domain"]
410 origin = row["origin"]
411 nodeinfo_url = row["nodeinfo_url"]
413 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
414 if block["blocked"] == "":
415 logger.debug("block[blocked] is empty - SKIPPED!")
418 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
419 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
420 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
422 if not domain_helper.is_wanted(block["blocked"]):
423 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
425 elif block["block_level"] in ["accept", "accepted"]:
426 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
428 elif not instances.is_registered(block["blocked"]):
429 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
430 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
432 block["block_level"] = blocks.alias_block_level(block["block_level"])
434 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
435 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
437 "blocked": block["blocked"],
438 "reason" : block["reason"],
441 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
442 cookies.clear(block["blocked"])
444 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
445 if instances.has_pending(blocker):
446 logger.debug("Flushing updates for blocker='%s' ...", blocker)
447 instances.update(blocker)
449 logger.debug("Invoking commit() ...")
450 database.connection.commit()
452 logger.debug("Invoking cookies.clear(%s) ...", blocker)
453 cookies.clear(blocker)
455 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
456 if config.get("bot_enabled") and len(blockdict) > 0:
457 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
458 network.send_bot_post(blocker, blockdict)
460 logger.debug("Success! - EXIT!")
463 def fetch_observer(args: argparse.Namespace) -> int:
464 logger.debug("args[]='%s' - CALLED!", type(args))
466 logger.debug("Invoking locking.acquire() ...")
469 source_domain = "fediverse.observer"
470 if sources.is_recent(source_domain):
471 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
474 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
475 sources.update(source_domain)
478 if args.software is None:
479 logger.info("Fetching software list ...")
480 raw = utils.fetch_url(
481 f"https://{source_domain}",
483 (config.get("connection_timeout"), config.get("read_timeout"))
485 logger.debug("raw[%s]()=%d", type(raw), len(raw))
487 doc = bs4.BeautifulSoup(raw, features="html.parser")
488 logger.debug("doc[]='%s'", type(doc))
490 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
491 logger.debug("navbar[]='%s'", type(navbar))
493 logger.warning("Cannot find navigation bar, cannot continue!")
496 items = navbar.findAll("a", {"class": "dropdown-item"})
497 logger.debug("items[]='%s'", type(items))
499 logger.info("Checking %d menu items ...", len(items))
501 logger.debug("item[%s]='%s'", type(item), item)
502 if item.text.lower() == "all":
503 logger.debug("Skipping 'All' menu entry ...")
506 logger.debug("Appending item.text='%s' ...", item.text)
507 types.append(tidyup.domain(item.text))
509 logger.info("Adding args.software='%s' as type ...", args.software)
510 types.append(args.software)
512 logger.info("Fetching %d different table data ...", len(types))
513 for software in types:
514 logger.debug("software='%s' - BEFORE!", software)
515 if args.software is not None and args.software != software:
516 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
521 logger.debug("Fetching table data for software='%s' ...", software)
522 raw = utils.fetch_url(
523 f"https://{source_domain}/app/views/tabledata.php?software={software}",
525 (config.get("connection_timeout"), config.get("read_timeout"))
527 logger.debug("raw[%s]()=%d", type(raw), len(raw))
529 doc = bs4.BeautifulSoup(raw, features="html.parser")
530 logger.debug("doc[]='%s'", type(doc))
531 except network.exceptions as exception:
532 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
535 items = doc.findAll("a", {"class": "url"})
536 logger.info("Checking %d items,software='%s' ...", len(items), software)
538 logger.debug("item[]='%s'", type(item))
539 domain = item.decode_contents()
540 logger.debug("domain='%s' - AFTER!", domain)
543 logger.debug("domain is empty - SKIPPED!")
546 logger.debug("domain='%s' - BEFORE!", domain)
547 domain = domain.encode("idna").decode("utf-8")
548 logger.debug("domain='%s' - AFTER!", domain)
550 if not domain_helper.is_wanted(domain):
551 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
553 elif instances.is_registered(domain):
554 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
557 software = software_helper.alias(software)
558 logger.info("Fetching instances for domain='%s'", domain)
559 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
561 logger.debug("Success! - EXIT!")
564 def fetch_todon_wiki(args: argparse.Namespace) -> int:
565 logger.debug("args[]='%s' - CALLED!", type(args))
567 logger.debug("Invoking locking.acquire() ...")
570 source_domain = "wiki.todon.eu"
571 if sources.is_recent(source_domain):
572 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
575 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
576 sources.update(source_domain)
583 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
584 raw = utils.fetch_url(
585 f"https://{source_domain}/todon/domainblocks",
587 (config.get("connection_timeout"), config.get("read_timeout"))
589 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
591 doc = bs4.BeautifulSoup(raw, "html.parser")
592 logger.debug("doc[]='%s'", type(doc))
594 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
595 logger.info("Checking %d silenced/limited entries ...", len(silenced))
596 blocklist["silenced"] = utils.find_domains(silenced, "div")
598 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
599 logger.info("Checking %d suspended entries ...", len(suspended))
600 blocklist["reject"] = utils.find_domains(suspended, "div")
602 blocking = blocklist["silenced"] + blocklist["reject"]
605 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
606 instances.set_last_blocked(blocker)
607 instances.set_total_blocks(blocker, blocking)
610 for block_level in blocklist:
611 blockers = blocklist[block_level]
613 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
614 for blocked in blockers:
615 logger.debug("blocked='%s'", blocked)
617 if not instances.is_registered(blocked):
619 logger.info("Fetching instances from domain='%s' ...", blocked)
620 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
621 except network.exceptions as exception:
622 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
623 instances.set_last_error(blocked, exception)
625 if blocks.is_instance_blocked(blocker, blocked, block_level):
626 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
629 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
630 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
631 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
637 logger.debug("Invoking commit() ...")
638 database.connection.commit()
640 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
641 if config.get("bot_enabled") and len(blockdict) > 0:
642 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
643 network.send_bot_post(blocker, blockdict)
645 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
646 if instances.has_pending(blocker):
647 logger.debug("Flushing updates for blocker='%s' ...", blocker)
648 instances.update(blocker)
650 logger.debug("Success! - EXIT!")
653 def fetch_cs(args: argparse.Namespace):
654 logger.debug("args[]='%s' - CALLED!", type(args))
656 logger.debug("Invoking locking.acquire() ...")
684 source_domain = "raw.githubusercontent.com"
685 if sources.is_recent(source_domain):
686 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
689 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
690 sources.update(source_domain)
692 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
693 raw = utils.fetch_url(
694 f"https://{source_domain}/chaossocial/meta/master/federation.md",
696 (config.get("connection_timeout"), config.get("read_timeout"))
698 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
700 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
701 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
703 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
704 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
705 blocklist["silenced"] = federation.find_domains(silenced)
707 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
708 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
709 blocklist["reject"] = federation.find_domains(blocked)
711 blocking = blocklist["silenced"] + blocklist["reject"]
712 blocker = "chaos.social"
714 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
715 instances.set_last_blocked(blocker)
716 instances.set_total_blocks(blocker, blocking)
718 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
719 if len(blocking) > 0:
721 for block_level in blocklist:
722 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
724 for row in blocklist[block_level]:
725 logger.debug("row[%s]='%s'", type(row), row)
726 if not "domain" in row:
727 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
729 elif not instances.is_registered(row["domain"]):
731 logger.info("Fetching instances from domain='%s' ...", row["domain"])
732 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
733 except network.exceptions as exception:
734 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
735 instances.set_last_error(row["domain"], exception)
737 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
738 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
740 "blocked": row["domain"],
741 "reason" : row["reason"],
744 logger.debug("Invoking commit() ...")
745 database.connection.commit()
747 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
748 if config.get("bot_enabled") and len(blockdict) > 0:
749 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
750 network.send_bot_post(blocker, blockdict)
752 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
753 if instances.has_pending(blocker):
754 logger.debug("Flushing updates for blocker='%s' ...", blocker)
755 instances.update(blocker)
757 logger.debug("Success! - EXIT!")
760 def fetch_fba_rss(args: argparse.Namespace) -> int:
761 logger.debug("args[]='%s' - CALLED!", type(args))
765 logger.debug("Invoking locking.acquire() ...")
768 components = urlparse(args.feed)
770 if sources.is_recent(components.netloc):
771 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
774 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
775 sources.update(components.netloc)
777 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
778 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
780 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
781 if response.ok and response.status_code == 200 and len(response.text) > 0:
782 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
783 rss = atoma.parse_rss_bytes(response.content)
785 logger.debug("rss[]='%s'", type(rss))
786 for item in rss.items:
787 logger.debug("item[%s]='%s'", type(item), item)
788 domain = tidyup.domain(item.link.split("=")[1])
790 logger.debug("domain='%s' - AFTER!", domain)
792 logger.debug("domain is empty - SKIPPED!")
795 logger.debug("domain='%s' - BEFORE!", domain)
796 domain = domain.encode("idna").decode("utf-8")
797 logger.debug("domain='%s' - AFTER!", domain)
799 if not domain_helper.is_wanted(domain):
800 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
802 elif domain in domains:
803 logger.debug("domain='%s' is already added - SKIPPED!", domain)
805 elif instances.is_registered(domain):
806 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
808 elif instances.is_recent(domain):
809 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
812 logger.debug("Adding domain='%s'", domain)
813 domains.append(domain)
815 logger.debug("domains()=%d", len(domains))
817 logger.info("Adding %d new instances ...", len(domains))
818 for domain in domains:
819 logger.debug("domain='%s'", domain)
821 logger.info("Fetching instances from domain='%s' ...", domain)
822 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
823 except network.exceptions as exception:
824 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
825 instances.set_last_error(domain, exception)
828 logger.debug("Success! - EXIT!")
831 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
832 logger.debug("args[]='%s' - CALLED!", type(args))
834 logger.debug("Invoking locking.acquire() ...")
837 source_domain = "ryona.agency"
838 feed = f"https://{source_domain}/users/fba/feed.atom"
840 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
841 if args.feed is not None and validators.url(args.feed):
842 logger.debug("Setting feed='%s' ...", args.feed)
843 feed = str(args.feed)
844 source_domain = urlparse(args.feed).netloc
846 if sources.is_recent(source_domain):
847 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
850 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
851 sources.update(source_domain)
855 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
856 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
858 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
859 if response.ok and response.status_code == 200 and len(response.text) > 0:
860 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
861 atom = atoma.parse_atom_bytes(response.content)
863 logger.debug("atom[]='%s'", type(atom))
864 for entry in atom.entries:
865 logger.debug("entry[]='%s'", type(entry))
866 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
867 logger.debug("doc[]='%s'", type(doc))
868 for element in doc.findAll("a"):
869 logger.debug("element[]='%s'", type(element))
870 for href in element["href"].split(","):
871 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
872 domain = tidyup.domain(href)
874 logger.debug("domain='%s' - AFTER!", domain)
876 logger.debug("domain is empty - SKIPPED!")
879 logger.debug("domain='%s' - BEFORE!", domain)
880 domain = domain.encode("idna").decode("utf-8")
881 logger.debug("domain='%s' - AFTER!", domain)
883 if not domain_helper.is_wanted(domain):
884 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
886 elif domain in domains:
887 logger.debug("domain='%s' is already added - SKIPPED!", domain)
889 elif instances.is_registered(domain):
890 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
892 elif instances.is_recent(domain):
893 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
896 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
897 domains.append(domain)
899 logger.debug("domains()=%d", len(domains))
901 logger.info("Adding %d new instances ...", len(domains))
902 for domain in domains:
903 logger.debug("domain='%s'", domain)
905 logger.info("Fetching instances from domain='%s' ...", domain)
906 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
907 except network.exceptions as exception:
908 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
909 instances.set_last_error(domain, exception)
912 logger.debug("Success! - EXIT!")
915 def fetch_instances(args: argparse.Namespace) -> int:
916 logger.debug("args[]='%s' - CALLED!", type(args))
918 logger.debug("args.domain='%s' - checking ...", args.domain)
919 if not validators.domain(args.domain):
920 logger.warning("args.domain='%s' is not valid.", args.domain)
922 elif blacklist.is_blacklisted(args.domain):
923 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
926 logger.debug("Invoking locking.acquire() ...")
930 domain = tidyup.domain(args.domain)
931 origin = software = None
934 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
935 row = database.cursor.fetchone()
937 origin = row["origin"]
938 software = row["software"]
942 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
943 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
944 except network.exceptions as exception:
945 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
946 instances.set_last_error(args.domain, exception)
947 instances.update(args.domain)
951 logger.debug("Not fetching more instances - EXIT!")
954 # Loop through some instances
955 database.cursor.execute(
956 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_updated ASC", [time.time() - config.get("recheck_instance")]
959 rows = database.cursor.fetchall()
960 logger.info("Checking %d entries ...", len(rows))
962 logger.debug("row[domain]='%s'", row["domain"])
963 if row["domain"] == "":
964 logger.debug("row[domain] is empty - SKIPPED!")
967 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
968 domain = row["domain"].encode("idna").decode("utf-8")
969 logger.debug("domain='%s' - AFTER!", domain)
971 if not domain_helper.is_wanted(domain):
972 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
976 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
977 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
978 except network.exceptions as exception:
979 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
980 instances.set_last_error(domain, exception)
982 logger.debug("Success - EXIT!")
985 def fetch_oliphant(args: argparse.Namespace) -> int:
986 logger.debug("args[]='%s' - CALLED!", type(args))
988 logger.debug("Invoking locking.acquire() ...")
991 source_domain = "codeberg.org"
992 if sources.is_recent(source_domain):
993 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
996 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
997 sources.update(source_domain)
1000 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1004 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1005 for block in blocklists.oliphant_blocklists:
1006 # Is domain given and not equal blocker?
1007 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1008 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1010 elif args.domain in domains:
1011 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1014 instances.set_last_blocked(block["blocker"])
1017 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1018 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1020 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1021 if not response.ok or response.status_code > 200 or response.content == "":
1022 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1025 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1026 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1032 logger.debug("row[%s]='%s'", type(row), row)
1033 domain = severity = None
1034 reject_media = reject_reports = False
1036 if "#domain" in row:
1037 domain = row["#domain"]
1038 elif "domain" in row:
1039 domain = row["domain"]
1041 logger.debug("row='%s' does not contain domain column", row)
1044 if "#severity" in row:
1045 severity = blocks.alias_block_level(row["#severity"])
1046 elif "severity" in row:
1047 severity = blocks.alias_block_level(row["severity"])
1049 logger.debug("row='%s' does not contain severity column", row)
1052 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1054 elif "reject_media" in row and row["reject_media"].lower() == "true":
1057 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1058 reject_reports = True
1059 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1060 reject_reports = True
1063 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1065 logger.debug("domain is empty - SKIPPED!")
1067 elif domain.endswith(".onion"):
1068 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1070 elif domain.endswith(".arpa"):
1071 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1073 elif domain.endswith(".tld"):
1074 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1076 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1077 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1078 domain = utils.deobfuscate(domain, block["blocker"])
1079 logger.debug("domain='%s' - AFTER!", domain)
1081 if not validators.domain(domain):
1082 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1084 elif blacklist.is_blacklisted(domain):
1085 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1087 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1088 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1091 logger.debug("Marking domain='%s' as handled", domain)
1092 domains.append(domain)
1094 logger.debug("Processing domain='%s' ...", domain)
1095 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1096 logger.debug("processed='%s'", processed)
1098 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1099 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1102 "reason" : block["reason"],
1106 processing.block(block["blocker"], domain, None, "reject_media")
1108 processing.block(block["blocker"], domain, None, "reject_reports")
1110 logger.debug("block[blocker]='%s'", block["blocker"])
1111 if not blocklists.has(block["blocker"]):
1112 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1113 instances.set_total_blocks(block["blocker"], domains)
1115 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1116 if instances.has_pending(block["blocker"]):
1117 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1118 instances.update(block["blocker"])
1120 logger.debug("Invoking commit() ...")
1121 database.connection.commit()
1123 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1124 if config.get("bot_enabled") and len(blockdict) > 0:
1125 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1126 network.send_bot_post(block["blocker"], blockdict)
1128 logger.debug("Success! - EXIT!")
1131 def fetch_txt(args: argparse.Namespace) -> int:
1132 logger.debug("args[]='%s' - CALLED!", type(args))
1134 logger.debug("Invoking locking.acquire() ...")
1139 "blocker": "seirdy.one",
1140 "url" : "https://seirdy.one/pb/bsl.txt",
1143 logger.info("Checking %d text file(s) ...", len(urls))
1145 logger.debug("Fetching row[url]='%s' ...", row["url"])
1146 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1148 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1149 if response.ok and response.status_code == 200 and response.text != "":
1150 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1151 domains = response.text.split("\n")
1153 logger.info("Processing %d domains ...", len(domains))
1154 for domain in domains:
1155 logger.debug("domain='%s' - BEFORE!", domain)
1156 domain = tidyup.domain(domain)
1158 logger.debug("domain='%s' - AFTER!", domain)
1160 logger.debug("domain is empty - SKIPPED!")
1162 elif not domain_helper.is_wanted(domain):
1163 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1165 elif instances.is_recent(domain):
1166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1169 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1170 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1172 logger.debug("processed='%s'", processed)
1174 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1177 logger.debug("Success! - EXIT!")
1180 def fetch_fedipact(args: argparse.Namespace) -> int:
1181 logger.debug("args[]='%s' - CALLED!", type(args))
1183 logger.debug("Invoking locking.acquire() ...")
1186 source_domain = "fedipact.online"
1187 if sources.is_recent(source_domain):
1188 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1191 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1192 sources.update(source_domain)
1194 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1195 response = utils.fetch_url(
1196 f"https://{source_domain}",
1197 network.web_headers,
1198 (config.get("connection_timeout"), config.get("read_timeout"))
1201 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1202 if response.ok and response.status_code == 200 and response.text != "":
1203 logger.debug("Parsing %d Bytes ...", len(response.text))
1205 doc = bs4.BeautifulSoup(response.text, "html.parser")
1206 logger.debug("doc[]='%s'", type(doc))
1208 rows = doc.findAll("li")
1209 logger.info("Checking %d row(s) ...", len(rows))
1211 logger.debug("row[]='%s'", type(row))
1212 domain = tidyup.domain(row.contents[0])
1214 logger.debug("domain='%s' - AFTER!", domain)
1216 logger.debug("domain is empty - SKIPPED!")
1219 logger.debug("domain='%s' - BEFORE!", domain)
1220 domain = domain.encode("idna").decode("utf-8")
1221 logger.debug("domain='%s' - AFTER!", domain)
1223 if not domain_helper.is_wanted(domain):
1224 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1226 elif instances.is_registered(domain):
1227 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1229 elif instances.is_recent(domain):
1230 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1233 logger.info("Fetching domain='%s' ...", domain)
1234 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1236 logger.debug("Success! - EXIT!")
1239 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1240 logger.debug("args[]='%s' - CALLED!", type(args))
1242 logger.debug("Invoking locking.acquire() ...")
1245 source_domain = "instances.joinmobilizon.org"
1246 if sources.is_recent(source_domain):
1247 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1250 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1251 sources.update(source_domain)
1253 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1254 raw = utils.fetch_url(
1255 f"https://{source_domain}/api/v1/instances",
1256 network.web_headers,
1257 (config.get("connection_timeout"), config.get("read_timeout"))
1259 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1261 parsed = json.loads(raw)
1262 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1264 if "data" not in parsed:
1265 logger.warning("parsed()=%d does not contain key 'data'")
1268 logger.info("Checking %d instances ...", len(parsed["data"]))
1269 for row in parsed["data"]:
1270 logger.debug("row[]='%s'", type(row))
1271 if "host" not in row:
1272 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1274 elif not domain_helper.is_wanted(row["host"]):
1275 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1277 elif instances.is_registered(row["host"]):
1278 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1281 logger.info("Fetching row[host]='%s' ...", row["host"])
1282 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1284 logger.debug("Success! - EXIT!")
1287 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1288 logger.debug("args[]='%s' - CALLED!", type(args))
1290 logger.debug("Invoking locking.acquire() ...")
1293 source_domain = "instanceapp.misskey.page"
1294 if sources.is_recent(source_domain):
1295 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1298 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1299 sources.update(source_domain)
1301 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1302 raw = utils.fetch_url(
1303 f"https://{source_domain}/instances.json",
1304 network.web_headers,
1305 (config.get("connection_timeout"), config.get("read_timeout"))
1307 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1309 parsed = json.loads(raw)
1310 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1312 if "instancesInfos" not in parsed:
1313 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1316 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1317 for row in parsed["instancesInfos"]:
1318 logger.debug("row[%s]='%s'", type(row), row)
1319 if "url" not in row:
1320 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1322 elif not domain_helper.is_wanted(row["url"]):
1323 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1325 elif instances.is_registered(row["url"]):
1326 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1329 logger.info("Fetching row[url]='%s' ...", row["url"])
1330 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1332 logger.debug("Success! - EXIT!")
1335 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1336 logger.debug("args[]='%s' - CALLED!", type(args))
1338 logger.debug("Invoking locking.acquire() ...")
1341 source_domain = "joinfediverse.wiki"
1342 if sources.is_recent(source_domain):
1343 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1346 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1347 sources.update(source_domain)
1349 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1350 raw = utils.fetch_url(
1351 f"https://{source_domain}/FediBlock",
1352 network.web_headers,
1353 (config.get("connection_timeout"), config.get("read_timeout"))
1355 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1357 doc = bs4.BeautifulSoup(raw, "html.parser")
1358 logger.debug("doc[]='%s'", type(doc))
1360 tables = doc.findAll("table", {"class": "wikitable"})
1362 logger.info("Analyzing %d table(s) ...", len(tables))
1364 for table in tables:
1365 logger.debug("table[]='%s'", type(table))
1367 rows = table.findAll("tr")
1368 logger.info("Checking %d row(s) ...", len(rows))
1369 block_headers = dict()
1371 logger.debug("row[%s]='%s'", type(row), row)
1373 headers = row.findAll("th")
1374 logger.debug("Found headers()=%d header(s)", len(headers))
1375 if len(headers) > 1:
1376 block_headers = dict()
1378 for header in headers:
1380 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1381 text = header.contents[0]
1383 logger.debug("text[]='%s'", type(text))
1384 if not isinstance(text, str):
1385 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1387 elif validators.domain(text.strip()):
1388 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1391 text = tidyup.domain(text.strip())
1392 logger.debug("text='%s' - AFTER!", text)
1393 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1394 logger.debug("Found header: '%s'=%d", text, cnt)
1395 block_headers[cnt] = text
1397 elif len(block_headers) == 0:
1398 logger.debug("row is not scrapable - SKIPPED!")
1400 elif len(block_headers) > 0:
1401 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1405 for element in row.find_all(["th", "td"]):
1407 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1408 if cnt in block_headers:
1409 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1411 text = element.text.strip()
1412 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1414 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1415 if key in ["domain", "instance"]:
1417 elif key == "reason":
1418 block[key] = tidyup.reason(text)
1419 elif key == "subdomain(s)":
1422 block[key] = text.split("/")
1424 logger.debug("key='%s'", key)
1427 logger.debug("block()=%d ...", len(block))
1429 logger.debug("Appending block()=%d ...", len(block))
1430 blocklist.append(block)
1432 logger.debug("blocklist()=%d", len(blocklist))
1434 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1435 domains = database.cursor.fetchall()
1437 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1439 for block in blocklist:
1440 logger.debug("block='%s'", block)
1441 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1442 origin = block["blocked"]
1443 logger.debug("origin='%s'", origin)
1444 for subdomain in block["subdomain(s)"]:
1445 block["blocked"] = subdomain + "." + origin
1446 logger.debug("block[blocked]='%s'", block["blocked"])
1447 blocking.append(block)
1449 blocking.append(block)
1451 logger.debug("blocking()=%d", blocking)
1452 for block in blocking:
1453 logger.debug("block[]='%s'", type(block))
1454 if "blocked" not in block:
1455 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1457 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1458 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1460 if block["blocked"] == "":
1461 logger.debug("block[blocked] is empty - SKIPPED!")
1463 elif not domain_helper.is_wanted(block["blocked"]):
1464 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1466 elif instances.is_recent(block["blocked"]):
1467 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1470 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1471 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1474 for blocker in domains:
1475 blocker = blocker[0]
1476 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1477 instances.set_last_blocked(blocker)
1479 for block in blocking:
1480 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1481 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1483 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1484 if block["blocked"] == "":
1485 logger.debug("block[blocked] is empty - SKIPPED!")
1487 elif not domain_helper.is_wanted(block["blocked"]):
1488 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1491 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1492 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1493 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1495 "blocked": block["blocked"],
1496 "reason" : block["reason"],
1499 if instances.has_pending(blocker):
1500 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1501 instances.update(blocker)
1503 logger.debug("Invoking commit() ...")
1504 database.connection.commit()
1506 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1507 if config.get("bot_enabled") and len(blockdict) > 0:
1508 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1509 network.send_bot_post(blocker, blockdict)
1511 logger.debug("Success! - EXIT!")
1514 def recheck_obfuscation(args: argparse.Namespace) -> int:
1515 logger.debug("args[]='%s' - CALLED!", type(args))
1517 logger.debug("Invoking locking.acquire() ...")
1520 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1521 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1522 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1523 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1525 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1527 rows = database.cursor.fetchall()
1528 logger.info("Checking %d domains ...", len(rows))
1530 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1531 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1532 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1535 logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1536 blocking = federation.fetch_blocks(row["domain"])
1538 logger.debug("blocking()=%d", len(blocking))
1539 if len(blocking) == 0:
1540 if row["software"] == "pleroma":
1541 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1542 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1543 elif row["software"] == "mastodon":
1544 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1545 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1546 elif row["software"] == "lemmy":
1547 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1548 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1549 elif row["software"] == "friendica":
1550 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1551 blocking = friendica.fetch_blocks(row["domain"])
1552 elif row["software"] == "misskey":
1553 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1554 blocking = misskey.fetch_blocks(row["domain"])
1556 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1558 # c.s isn't part of oliphant's "hidden" blocklists
1559 logger.debug("row[domain]='%s'", row["domain"])
1560 if row["domain"] != "chaos.social" and not blocklists.has(row["domain"]):
1561 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1562 instances.set_last_blocked(row["domain"])
1563 instances.set_total_blocks(row["domain"], blocking)
1568 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1569 for block in blocking:
1570 logger.debug("block[blocked]='%s'", block["blocked"])
1573 if block["blocked"] == "":
1574 logger.debug("block[blocked] is empty - SKIPPED!")
1576 elif block["blocked"].endswith(".arpa"):
1577 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1579 elif block["blocked"].endswith(".tld"):
1580 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1582 elif block["blocked"].endswith(".onion"):
1583 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1585 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1586 logger.debug("block='%s' is obfuscated.", block["blocked"])
1587 obfuscated = obfuscated + 1
1588 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1589 elif not domain_helper.is_wanted(block["blocked"]):
1590 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1592 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1593 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1596 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1597 if blocked is not None and blocked != block["blocked"]:
1598 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1599 obfuscated = obfuscated - 1
1601 if blocks.is_instance_blocked(row["domain"], blocked):
1602 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1604 elif blacklist.is_blacklisted(blocked):
1605 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1608 block["block_level"] = blocks.alias_block_level(block["block_level"])
1610 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1611 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1612 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1615 "reason" : block["reason"],
1618 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1619 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1621 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1622 if obfuscated == 0 and len(blocking) > 0:
1623 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1624 instances.set_has_obfuscation(row["domain"], False)
1626 if instances.has_pending(row["domain"]):
1627 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1628 instances.update(row["domain"])
1630 logger.debug("Invoking commit() ...")
1631 database.connection.commit()
1633 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1634 if config.get("bot_enabled") and len(blockdict) > 0:
1635 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1636 network.send_bot_post(row["domain"], blockdict)
1638 logger.debug("Success! - EXIT!")
1641 def fetch_fedilist(args: argparse.Namespace) -> int:
1642 logger.debug("args[]='%s' - CALLED!", type(args))
1644 logger.debug("Invoking locking.acquire() ...")
1647 source_domain = "demo.fedilist.com"
1648 if sources.is_recent(source_domain):
1649 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1652 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1653 sources.update(source_domain)
1655 url = f"http://{source_domain}/instance/csv?onion=not"
1656 if args.software is not None and args.software != "":
1657 logger.debug("args.software='%s'", args.software)
1658 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1660 logger.info("Fetching url='%s' ...", url)
1661 response = reqto.get(
1663 headers=network.web_headers,
1664 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1665 allow_redirects=False
1668 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1669 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1670 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1673 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1675 logger.debug("reader[]='%s'", type(reader))
1677 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1682 logger.info("Checking %d rows ...", len(rows))
1684 logger.debug("row[]='%s'", type(row))
1685 if "hostname" not in row:
1686 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1689 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1690 domain = tidyup.domain(row["hostname"])
1691 logger.debug("domain='%s' - AFTER!", domain)
1694 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1697 logger.debug("domain='%s' - BEFORE!", domain)
1698 domain = domain.encode("idna").decode("utf-8")
1699 logger.debug("domain='%s' - AFTER!", domain)
1701 if not domain_helper.is_wanted(domain):
1702 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1704 elif (args.force is None or not args.force) and instances.is_registered(domain):
1705 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1707 elif instances.is_recent(domain):
1708 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1711 logger.info("Fetching instances from domain='%s' ...", domain)
1712 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1714 logger.debug("Success! - EXIT!")
1717 def update_nodeinfo(args: argparse.Namespace) -> int:
1718 logger.debug("args[]='%s' - CALLED!", type(args))
1720 logger.debug("Invoking locking.acquire() ...")
1723 if args.domain is not None and args.domain != "":
1724 logger.debug("Fetching args.domain='%s'", args.domain)
1725 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1726 elif args.software is not None and args.software != "":
1727 logger.info("Fetching domains for args.software='%s'", args.software)
1728 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1729 elif args.mode is not None and args.mode != "":
1730 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1731 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1732 elif args.no_software:
1733 logger.info("Fetching domains with no software type detected ...")
1734 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1736 logger.info("Fetching domains for recently updated ...")
1737 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1739 domains = database.cursor.fetchall()
1741 logger.info("Checking %d domain(s) ...", len(domains))
1744 logger.debug("row[]='%s'", type(row))
1745 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1746 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1750 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1751 software = federation.determine_software(row["domain"])
1753 logger.debug("Determined software='%s'", software)
1754 if (software != row["software"] and software is not None) or args.force is True:
1755 logger.debug("software='%s'", software)
1756 if software is None:
1757 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1758 instances.set_nodeinfo_url(row["domain"], None)
1760 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1761 instances.set_software(row["domain"], software)
1763 if software is not None:
1764 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1765 instances.set_success(row["domain"])
1766 except network.exceptions as exception:
1767 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1768 instances.set_last_error(row["domain"], exception)
1770 instances.set_last_nodeinfo(row["domain"])
1771 instances.update(row["domain"])
1774 logger.debug("Success! - EXIT!")
1777 def fetch_instances_social(args: argparse.Namespace) -> int:
1778 logger.debug("args[]='%s' - CALLED!", type(args))
1780 logger.debug("Invoking locking.acquire() ...")
1783 source_domain = "instances.social"
1785 if config.get("instances_social_api_key") == "":
1786 logger.error("API key not set. Please set in your config.json file.")
1788 elif sources.is_recent(source_domain):
1789 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1792 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1793 sources.update(source_domain)
1796 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1799 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1800 fetched = network.get_json_api(
1802 "/api/1.0/instances/list?count=0&sort_by=name",
1804 (config.get("connection_timeout"), config.get("read_timeout"))
1806 logger.debug("fetched[]='%s'", type(fetched))
1808 if "error_message" in fetched:
1809 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1811 elif "exception" in fetched:
1812 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1814 elif "json" not in fetched:
1815 logger.warning("fetched has no element 'json' - EXIT!")
1817 elif "instances" not in fetched["json"]:
1818 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1822 rows = fetched["json"]["instances"]
1824 logger.info("Checking %d row(s) ...", len(rows))
1826 logger.debug("row[]='%s'", type(row))
1827 domain = tidyup.domain(row["name"])
1828 logger.debug("domain='%s' - AFTER!", domain)
1831 logger.debug("domain is empty - SKIPPED!")
1834 logger.debug("domain='%s' - BEFORE!", domain)
1835 domain = domain.encode("idna").decode("utf-8")
1836 logger.debug("domain='%s' - AFTER!", domain)
1838 if not domain_helper.is_wanted(domain):
1839 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1841 elif domain in domains:
1842 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1844 elif instances.is_registered(domain):
1845 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1847 elif instances.is_recent(domain):
1848 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1851 logger.info("Fetching instances from domain='%s'", domain)
1852 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1854 logger.debug("Success! - EXIT!")
1857 def fetch_relays(args: argparse.Namespace) -> int:
1858 logger.debug("args[]='%s' - CALLED!", type(args))
1860 logger.debug("Invoking locking.acquire() ...")
1863 if args.domain is not None and args.domain != "":
1864 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1866 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1869 rows = database.cursor.fetchall()
1871 logger.info("Checking %d relays ...", len(rows))
1873 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1875 if not args.force and instances.is_recent(row["domain"]):
1876 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1880 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1881 raw = utils.fetch_url(
1882 f"https://{row['domain']}",
1883 network.web_headers,
1884 (config.get("connection_timeout"), config.get("read_timeout"))
1886 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1887 except network.exceptions as exception:
1888 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1889 instances.set_last_error(row["domain"], exception)
1890 instances.set_last_instance_fetch(row["domain"])
1891 instances.update(row["domain"])
1894 doc = bs4.BeautifulSoup(raw, features="html.parser")
1895 logger.debug("doc[]='%s'", type(doc))
1897 logger.debug("row[software]='%s'", row["software"])
1898 if row["software"] == "activityrelay":
1899 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1900 tags = doc.findAll("p")
1902 logger.debug("Checking %d paragraphs ...", len(tags))
1904 logger.debug("tag[]='%s'", type(tag))
1905 if len(tag.contents) == 0:
1906 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1908 elif "registered instances" not in tag.contents[0]:
1909 logger.debug("Skipping paragraph, text not found.")
1912 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1913 for domain in tag.contents:
1914 logger.debug("domain[%s]='%s'", type(domain), domain)
1915 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1918 domain = str(domain)
1919 logger.debug("domain='%s'", domain)
1920 if not domain_helper.is_wanted(domain):
1921 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1924 logger.debug("domain='%s' - BEFORE!", domain)
1925 domain = tidyup.domain(domain)
1926 logger.debug("domain='%s' - AFTER!", domain)
1929 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1931 elif domain not in peers:
1932 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1933 peers.append(domain)
1935 if dict_helper.has_key(domains, "domain", domain):
1936 logger.debug("domain='%s' already added", domain)
1939 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1942 "origin": row["domain"],
1944 elif row["software"] in ["aoderelay", "selective-relay"]:
1945 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1946 if row["software"] == "aoderelay":
1947 tags = doc.findAll("section", {"class": "instance"})
1949 tags = doc.find("div", {"id": "instances"}).findAll("li")
1951 logger.debug("Checking %d tags ...", len(tags))
1953 logger.debug("tag[]='%s'", type(tag))
1955 link = tag.find("a")
1956 logger.debug("link[%s]='%s'", type(link), link)
1958 logger.warning("tag='%s' has no a-tag ...", tag)
1961 components = urlparse(link["href"])
1962 domain = components.netloc.lower()
1964 if not domain_helper.is_wanted(domain):
1965 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1968 logger.debug("domain='%s' - BEFORE!", domain)
1969 domain = tidyup.domain(domain)
1970 logger.debug("domain='%s' - AFTER!", domain)
1973 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1975 elif domain not in peers:
1976 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1977 peers.append(domain)
1979 if dict_helper.has_key(domains, "domain", domain):
1980 logger.debug("domain='%s' already added", domain)
1983 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1986 "origin": row["domain"],
1989 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1991 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1992 instances.set_last_instance_fetch(row["domain"])
1994 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1995 instances.set_total_peers(row["domain"], peers)
1997 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1998 instances.update(row["domain"])
2000 logger.info("Checking %d domains ...", len(domains))
2002 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
2003 if instances.is_registered(row["domain"]):
2004 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2007 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2008 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2010 logger.debug("Success! - EXIT!")
2013 def convert_idna(args: argparse.Namespace) -> int:
2014 logger.debug("args[]='%s' - CALLED!", type(args))
2016 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2017 rows = database.cursor.fetchall()
2019 logger.debug("rows[]='%s'", type(rows))
2020 instances.translate_idnas(rows, "domain")
2022 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2023 rows = database.cursor.fetchall()
2025 logger.debug("rows[]='%s'", type(rows))
2026 instances.translate_idnas(rows, "origin")
2028 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2029 rows = database.cursor.fetchall()
2031 logger.debug("rows[]='%s'", type(rows))
2032 blocks.translate_idnas(rows, "blocker")
2034 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2035 rows = database.cursor.fetchall()
2037 logger.debug("rows[]='%s'", type(rows))
2038 blocks.translate_idnas(rows, "blocked")
2040 logger.debug("Success! - EXIT!")
2043 def remove_invalid(args: argparse.Namespace) -> int:
2044 logger.debug("args[]='%s' - CALLED!", type(args))
2046 logger.debug("Invoking locking.acquire() ...")
2049 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2050 rows = database.cursor.fetchall()
2052 logger.info("Checking %d domains ...", len(rows))
2054 logger.debug("row[domain]='%s'", row["domain"])
2055 if not validators.domain(row["domain"].split("/")[0]):
2056 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2057 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2058 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2060 logger.debug("Invoking commit() ...")
2061 database.connection.commit()
2063 logger.info("Vaccum cleaning database ...")
2064 database.cursor.execute("VACUUM")
2066 logger.debug("Success! - EXIT!")