1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import blocklists
38 from fba.helpers import config
39 from fba.helpers import cookies
40 from fba.helpers import dicts as dict_helper
41 from fba.helpers import domain as domain_helper
42 from fba.helpers import locking
43 from fba.helpers import processing
44 from fba.helpers import software as software_helper
45 from fba.helpers import tidyup
47 from fba.http import federation
48 from fba.http import network
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
64 def check_instance(args: argparse.Namespace) -> int:
65 logger.debug("args.domain='%s' - CALLED!", args.domain)
67 if not validators.domain(args.domain):
68 logger.warning("args.domain='%s' is not valid", args.domain)
70 elif blacklist.is_blacklisted(args.domain):
71 logger.warning("args.domain='%s' is blacklisted", args.domain)
73 elif instances.is_registered(args.domain):
74 logger.warning("args.domain='%s' is already registered", args.domain)
77 logger.info("args.domain='%s' is not known", args.domain)
79 logger.debug("status=%d - EXIT!", status)
82 def check_nodeinfo(args: argparse.Namespace) -> int:
83 logger.debug("args[]='%s' - CALLED!", type(args))
86 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
89 for row in database.cursor.fetchall():
90 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
91 punycode = row["domain"].encode("idna").decode("utf-8")
93 if row["nodeinfo_url"].startswith("/"):
94 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
97 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
100 logger.info("Found %d row(s)", cnt)
102 logger.debug("EXIT!")
105 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
106 logger.debug("args[]='%s' - CALLED!", type(args))
108 # No CSRF by default, you don't have to add network.source_headers by yourself here
110 source_domain = "pixelfed.org"
112 if sources.is_recent(source_domain):
113 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
116 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
117 sources.update(source_domain)
120 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
121 headers = csrf.determine(source_domain, dict())
122 except network.exceptions as exception:
123 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
127 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
128 fetched = network.get_json_api(
130 "/api/v1/servers/all.json?scope=All&country=all&language=all",
132 (config.get("connection_timeout"), config.get("read_timeout"))
135 logger.debug("JSON API returned %d elements", len(fetched))
136 if "error_message" in fetched:
137 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139 elif "data" not in fetched["json"]:
140 logger.warning("API did not return JSON with 'data' element - EXIT!")
143 rows = fetched["json"]["data"]
144 logger.info("Checking %d fetched rows ...", len(rows))
146 logger.debug("row[]='%s'", type(row))
147 if "domain" not in row:
148 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150 elif row["domain"] == "":
151 logger.debug("row[domain] is empty - SKIPPED!")
154 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
155 domain = row["domain"].encode("idna").decode("utf-8")
156 logger.debug("domain='%s' - AFTER!", domain)
158 if not domain_helper.is_wanted(domain):
159 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161 elif instances.is_registered(domain):
162 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164 elif instances.is_recent(domain):
165 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
168 logger.debug("Fetching instances from domain='%s' ...", domain)
169 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171 except network.exceptions as exception:
172 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
175 logger.debug("Success! - EXIT!")
178 def fetch_bkali(args: argparse.Namespace) -> int:
179 logger.debug("args[]='%s' - CALLED!", type(args))
181 logger.debug("Invoking locking.acquire() ...")
184 source_domain = "gql.api.bka.li"
185 if sources.is_recent(source_domain):
186 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
189 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
190 sources.update(source_domain)
194 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
195 fetched = network.post_json_api(
199 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
203 logger.debug("fetched[]='%s'", type(fetched))
204 if "error_message" in fetched:
205 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
208 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
211 rows = fetched["json"]
213 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215 raise Exception("WARNING: Returned no records")
216 elif "data" not in rows:
217 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
218 elif "nodeinfo" not in rows["data"]:
219 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221 for entry in rows["data"]["nodeinfo"]:
222 logger.debug("entry[%s]='%s'", type(entry), entry)
223 if "domain" not in entry:
224 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226 elif entry["domain"] == "":
227 logger.debug("entry[domain] is empty - SKIPPED!")
229 elif not domain_helper.is_wanted(entry["domain"]):
230 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232 elif instances.is_registered(entry["domain"]):
233 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235 elif instances.is_recent(entry["domain"]):
236 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
239 logger.debug("Adding domain='%s' ...", entry["domain"])
240 domains.append(entry["domain"])
242 except network.exceptions as exception:
243 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
246 logger.debug("domains()=%d", len(domains))
248 logger.info("Adding %d new instances ...", len(domains))
249 for domain in domains:
250 logger.debug("domain='%s' - BEFORE!", domain)
251 domain = domain.encode("idna").decode("utf-8")
252 logger.debug("domain='%s' - AFTER!", domain)
255 logger.info("Fetching instances from domain='%s' ...", domain)
256 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
257 except network.exceptions as exception:
258 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
259 instances.set_last_error(domain, exception)
262 logger.debug("Success - EXIT!")
265 def fetch_blocks(args: argparse.Namespace) -> int:
266 logger.debug("args[]='%s' - CALLED!", type(args))
267 if args.domain is not None and args.domain != "":
268 logger.debug("args.domain='%s' - checking ...", args.domain)
269 if not validators.domain(args.domain):
270 logger.warning("args.domain='%s' is not valid.", args.domain)
272 elif blacklist.is_blacklisted(args.domain):
273 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275 elif not instances.is_registered(args.domain):
276 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
279 logger.debug("Invoking locking.acquire() ...")
282 if args.domain is not None and args.domain != "":
283 # Re-check single domain
284 logger.debug("Querying database for args.domain='%s' ...", args.domain)
285 database.cursor.execute(
286 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288 elif args.software is not None and args.software != "":
289 # Re-check single software
290 logger.debug("Querying database for args.software='%s' ...", args.software)
291 database.cursor.execute(
292 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [args.software]
296 logger.debug("Re-checking all instances ...")
297 database.cursor.execute(
298 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC"
301 # Re-check after "timeout" (aka. minimum interval)
302 database.cursor.execute(
303 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_updated ASC", [time.time() - config.get("recheck_block")]
306 rows = database.cursor.fetchall()
307 logger.info("Checking %d entries ...", len(rows))
308 for blocker, software, origin, nodeinfo_url in rows:
309 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311 if nodeinfo_url is None:
312 logger.debug("blocker='%s',software='%s' has no nodeinfo_url set - SKIPPED!", blocker, software)
314 elif not domain_helper.is_wanted(blocker):
315 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
318 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
319 instances.set_last_blocked(blocker)
320 instances.set_has_obfuscation(blocker, False)
324 # c.s isn't part of oliphant's "hidden" blocklists
325 logger.debug("blocker='%s'", blocker)
326 if blocker != "chaos.social" and not blocklists.is_excluded(blocker):
327 logger.debug("blocker='%s',software='%s'", blocker, software)
328 if software == "pleroma":
329 logger.info("blocker='%s',software='%s'", blocker, software)
330 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
331 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
332 elif software == "mastodon":
333 logger.info("blocker='%s',software='%s'", blocker, software)
334 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
335 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
336 elif software == "lemmy":
337 logger.info("blocker='%s',software='%s'", blocker, software)
338 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
339 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340 elif software == "friendica":
341 logger.info("blocker='%s',software='%s'", blocker, software)
342 blocking = friendica.fetch_blocks(blocker)
343 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
344 elif software == "misskey":
345 logger.info("blocker='%s',software='%s'", blocker, software)
346 blocking = misskey.fetch_blocks(blocker)
347 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
349 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
351 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
352 instances.set_total_blocks(blocker, blocking)
354 logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
356 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
358 for block in blocking:
359 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
361 if block["block_level"] == "":
362 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
365 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
366 block["blocked"] = tidyup.domain(block["blocked"])
367 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
368 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
370 if block["blocked"] == "":
371 logger.warning("blocked is empty, blocker='%s'", blocker)
373 elif block["blocked"].endswith(".onion"):
374 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".arpa"):
377 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
379 elif block["blocked"].endswith(".tld"):
380 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
382 elif block["blocked"].find("*") >= 0:
383 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385 # Some friendica servers also obscure domains without hash
386 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
388 logger.debug("row[]='%s'", type(row))
390 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
391 instances.set_has_obfuscation(blocker, True)
394 block["blocked"] = row["domain"]
395 origin = row["origin"]
396 nodeinfo_url = row["nodeinfo_url"]
397 elif block["blocked"].find("?") >= 0:
398 logger.debug("blocker='%s' uses obfuscated domains", blocker)
400 # Some obscure them with question marks, not sure if that's dependent on version or not
401 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
403 logger.debug("row[]='%s'", type(row))
405 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
406 instances.set_has_obfuscation(blocker, True)
409 block["blocked"] = row["domain"]
410 origin = row["origin"]
411 nodeinfo_url = row["nodeinfo_url"]
413 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
414 if block["blocked"] == "":
415 logger.debug("block[blocked] is empty - SKIPPED!")
418 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
419 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
420 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
422 if not domain_helper.is_wanted(block["blocked"]):
423 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
425 elif block["block_level"] in ["accept", "accepted"]:
426 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
428 elif not instances.is_registered(block["blocked"]):
429 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
430 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
432 block["block_level"] = blocks.alias_block_level(block["block_level"])
434 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
435 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
437 "blocked": block["blocked"],
438 "reason" : block["reason"],
441 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
442 cookies.clear(block["blocked"])
444 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
445 if instances.has_pending(blocker):
446 logger.debug("Flushing updates for blocker='%s' ...", blocker)
447 instances.update(blocker)
449 logger.debug("Invoking commit() ...")
450 database.connection.commit()
452 logger.debug("Invoking cookies.clear(%s) ...", blocker)
453 cookies.clear(blocker)
455 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
456 if config.get("bot_enabled") and len(blockdict) > 0:
457 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
458 network.send_bot_post(blocker, blockdict)
460 logger.debug("Success! - EXIT!")
463 def fetch_observer(args: argparse.Namespace) -> int:
464 logger.debug("args[]='%s' - CALLED!", type(args))
466 logger.debug("Invoking locking.acquire() ...")
469 source_domain = "fediverse.observer"
470 if sources.is_recent(source_domain):
471 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
474 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
475 sources.update(source_domain)
478 if args.software is None:
479 logger.info("Fetching software list ...")
480 raw = utils.fetch_url(
481 f"https://{source_domain}",
483 (config.get("connection_timeout"), config.get("read_timeout"))
485 logger.debug("raw[%s]()=%d", type(raw), len(raw))
487 doc = bs4.BeautifulSoup(raw, features="html.parser")
488 logger.debug("doc[]='%s'", type(doc))
490 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
491 logger.debug("navbar[]='%s'", type(navbar))
493 logger.warning("Cannot find navigation bar, cannot continue!")
496 items = navbar.findAll("a", {"class": "dropdown-item"})
497 logger.debug("items[]='%s'", type(items))
499 logger.info("Checking %d menu items ...", len(items))
501 logger.debug("item[%s]='%s'", type(item), item)
502 if item.text.lower() == "all":
503 logger.debug("Skipping 'All' menu entry ...")
506 logger.debug("Appending item.text='%s' ...", item.text)
507 types.append(tidyup.domain(item.text))
509 logger.info("Adding args.software='%s' as type ...", args.software)
510 types.append(args.software)
512 logger.info("Fetching %d different table data ...", len(types))
513 for software in types:
514 logger.debug("software='%s' - BEFORE!", software)
515 if args.software is not None and args.software != software:
516 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
521 logger.debug("Fetching table data for software='%s' ...", software)
522 raw = utils.fetch_url(
523 f"https://{source_domain}/app/views/tabledata.php?software={software}",
525 (config.get("connection_timeout"), config.get("read_timeout"))
527 logger.debug("raw[%s]()=%d", type(raw), len(raw))
529 doc = bs4.BeautifulSoup(raw, features="html.parser")
530 logger.debug("doc[]='%s'", type(doc))
531 except network.exceptions as exception:
532 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
535 items = doc.findAll("a", {"class": "url"})
536 logger.info("Checking %d items,software='%s' ...", len(items), software)
538 logger.debug("item[]='%s'", type(item))
539 domain = item.decode_contents()
540 logger.debug("domain='%s' - AFTER!", domain)
543 logger.debug("domain is empty - SKIPPED!")
546 logger.debug("domain='%s' - BEFORE!", domain)
547 domain = domain.encode("idna").decode("utf-8")
548 logger.debug("domain='%s' - AFTER!", domain)
550 if not domain_helper.is_wanted(domain):
551 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
553 elif instances.is_registered(domain):
554 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
557 software = software_helper.alias(software)
558 logger.info("Fetching instances for domain='%s'", domain)
559 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
561 logger.debug("Success! - EXIT!")
564 def fetch_todon_wiki(args: argparse.Namespace) -> int:
565 logger.debug("args[]='%s' - CALLED!", type(args))
567 logger.debug("Invoking locking.acquire() ...")
570 source_domain = "wiki.todon.eu"
571 if sources.is_recent(source_domain):
572 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
575 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
576 sources.update(source_domain)
583 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
584 raw = utils.fetch_url(
585 f"https://{source_domain}/todon/domainblocks",
587 (config.get("connection_timeout"), config.get("read_timeout"))
589 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
591 doc = bs4.BeautifulSoup(raw, "html.parser")
592 logger.debug("doc[]='%s'", type(doc))
594 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
595 logger.info("Checking %d silenced/limited entries ...", len(silenced))
596 blocklist["silenced"] = utils.find_domains(silenced, "div")
598 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
599 logger.info("Checking %d suspended entries ...", len(suspended))
600 blocklist["reject"] = utils.find_domains(suspended, "div")
602 blocking = blocklist["silenced"] + blocklist["reject"]
605 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
606 instances.set_last_blocked(blocker)
607 instances.set_total_blocks(blocker, blocking)
610 for block_level in blocklist:
611 blockers = blocklist[block_level]
613 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
614 for blocked in blockers:
615 logger.debug("blocked='%s'", blocked)
617 if not instances.is_registered(blocked):
619 logger.info("Fetching instances from domain='%s' ...", blocked)
620 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
621 except network.exceptions as exception:
622 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
623 instances.set_last_error(blocked, exception)
625 if blocks.is_instance_blocked(blocker, blocked, block_level):
626 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
629 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
630 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
631 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
637 logger.debug("Invoking commit() ...")
638 database.connection.commit()
640 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
641 if config.get("bot_enabled") and len(blockdict) > 0:
642 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
643 network.send_bot_post(blocker, blockdict)
645 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
646 if instances.has_pending(blocker):
647 logger.debug("Flushing updates for blocker='%s' ...", blocker)
648 instances.update(blocker)
650 logger.debug("Success! - EXIT!")
653 def fetch_cs(args: argparse.Namespace):
654 logger.debug("args[]='%s' - CALLED!", type(args))
656 logger.debug("Invoking locking.acquire() ...")
684 source_domain = "raw.githubusercontent.com"
685 if sources.is_recent(source_domain):
686 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
689 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
690 sources.update(source_domain)
692 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
693 raw = utils.fetch_url(
694 f"https://{source_domain}/chaossocial/meta/master/federation.md",
696 (config.get("connection_timeout"), config.get("read_timeout"))
698 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
700 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
701 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
703 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
704 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
705 blocklist["silenced"] = federation.find_domains(silenced)
707 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
708 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
709 blocklist["reject"] = federation.find_domains(blocked)
711 blocking = blocklist["silenced"] + blocklist["reject"]
712 blocker = "chaos.social"
714 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
715 instances.set_last_blocked(blocker)
716 instances.set_total_blocks(blocker, blocking)
718 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
719 if len(blocking) > 0:
721 for block_level in blocklist:
722 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
724 for row in blocklist[block_level]:
725 logger.debug("row[%s]='%s'", type(row), row)
726 if not "domain" in row:
727 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
729 elif not instances.is_registered(row["domain"]):
731 logger.info("Fetching instances from domain='%s' ...", row["domain"])
732 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
733 except network.exceptions as exception:
734 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
735 instances.set_last_error(row["domain"], exception)
737 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
738 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
740 "blocked": row["domain"],
741 "reason" : row["reason"],
744 logger.debug("Invoking commit() ...")
745 database.connection.commit()
747 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
748 if config.get("bot_enabled") and len(blockdict) > 0:
749 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
750 network.send_bot_post(blocker, blockdict)
752 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
753 if instances.has_pending(blocker):
754 logger.debug("Flushing updates for blocker='%s' ...", blocker)
755 instances.update(blocker)
757 logger.debug("Success! - EXIT!")
760 def fetch_fba_rss(args: argparse.Namespace) -> int:
761 logger.debug("args[]='%s' - CALLED!", type(args))
765 logger.debug("Invoking locking.acquire() ...")
768 components = urlparse(args.feed)
770 if sources.is_recent(components.netloc):
771 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
774 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
775 sources.update(components.netloc)
777 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
778 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
780 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
781 if response.ok and response.status_code == 200 and len(response.text) > 0:
782 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
783 rss = atoma.parse_rss_bytes(response.content)
785 logger.debug("rss[]='%s'", type(rss))
786 for item in rss.items:
787 logger.debug("item[%s]='%s'", type(item), item)
788 domain = tidyup.domain(item.link.split("=")[1])
790 logger.debug("domain='%s' - AFTER!", domain)
792 logger.debug("domain is empty - SKIPPED!")
795 logger.debug("domain='%s' - BEFORE!", domain)
796 domain = domain.encode("idna").decode("utf-8")
797 logger.debug("domain='%s' - AFTER!", domain)
799 if not domain_helper.is_wanted(domain):
800 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
802 elif domain in domains:
803 logger.debug("domain='%s' is already added - SKIPPED!", domain)
805 elif instances.is_registered(domain):
806 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
808 elif instances.is_recent(domain):
809 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
812 logger.debug("Adding domain='%s'", domain)
813 domains.append(domain)
815 logger.debug("domains()=%d", len(domains))
817 logger.info("Adding %d new instances ...", len(domains))
818 for domain in domains:
819 logger.debug("domain='%s'", domain)
821 logger.info("Fetching instances from domain='%s' ...", domain)
822 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
823 except network.exceptions as exception:
824 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
825 instances.set_last_error(domain, exception)
828 logger.debug("Success! - EXIT!")
831 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
832 logger.debug("args[]='%s' - CALLED!", type(args))
834 logger.debug("Invoking locking.acquire() ...")
837 source_domain = "ryona.agency"
838 feed = f"https://{source_domain}/users/fba/feed.atom"
840 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
841 if args.feed is not None and validators.url(args.feed):
842 logger.debug("Setting feed='%s' ...", args.feed)
843 feed = str(args.feed)
844 source_domain = urlparse(args.feed).netloc
846 if sources.is_recent(source_domain):
847 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
850 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
851 sources.update(source_domain)
855 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
856 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
858 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
859 if response.ok and response.status_code == 200 and len(response.text) > 0:
860 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
861 atom = atoma.parse_atom_bytes(response.content)
863 logger.debug("atom[]='%s'", type(atom))
864 for entry in atom.entries:
865 logger.debug("entry[]='%s'", type(entry))
866 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
867 logger.debug("doc[]='%s'", type(doc))
868 for element in doc.findAll("a"):
869 logger.debug("element[]='%s'", type(element))
870 for href in element["href"].split(","):
871 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
872 domain = tidyup.domain(href)
874 logger.debug("domain='%s' - AFTER!", domain)
876 logger.debug("domain is empty - SKIPPED!")
879 logger.debug("domain='%s' - BEFORE!", domain)
880 domain = domain.encode("idna").decode("utf-8")
881 logger.debug("domain='%s' - AFTER!", domain)
883 if not domain_helper.is_wanted(domain):
884 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
886 elif domain in domains:
887 logger.debug("domain='%s' is already added - SKIPPED!", domain)
889 elif instances.is_registered(domain):
890 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
892 elif instances.is_recent(domain):
893 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
896 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
897 domains.append(domain)
899 logger.debug("domains()=%d", len(domains))
901 logger.info("Adding %d new instances ...", len(domains))
902 for domain in domains:
903 logger.debug("domain='%s'", domain)
905 logger.info("Fetching instances from domain='%s' ...", domain)
906 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
907 except network.exceptions as exception:
908 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
909 instances.set_last_error(domain, exception)
912 logger.debug("Success! - EXIT!")
915 def fetch_instances(args: argparse.Namespace) -> int:
916 logger.debug("args[]='%s' - CALLED!", type(args))
918 logger.debug("args.domain='%s' - checking ...", args.domain)
919 if not validators.domain(args.domain):
920 logger.warning("args.domain='%s' is not valid.", args.domain)
922 elif blacklist.is_blacklisted(args.domain):
923 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
926 logger.debug("Invoking locking.acquire() ...")
930 domain = tidyup.domain(args.domain)
931 origin = software = None
934 database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
935 row = database.cursor.fetchone()
937 origin = row["origin"]
938 software = row["software"]
942 logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
943 federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
944 except network.exceptions as exception:
945 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
946 instances.set_last_error(args.domain, exception)
947 instances.update(args.domain)
951 logger.debug("Not fetching more instances - EXIT!")
954 # Loop through some instances
955 database.cursor.execute(
956 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_updated ASC", [time.time() - config.get("recheck_instance")]
959 rows = database.cursor.fetchall()
960 logger.info("Checking %d entries ...", len(rows))
962 logger.debug("row[domain]='%s'", row["domain"])
963 if row["domain"] == "":
964 logger.debug("row[domain] is empty - SKIPPED!")
967 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
968 domain = row["domain"].encode("idna").decode("utf-8")
969 logger.debug("domain='%s' - AFTER!", domain)
971 if not domain_helper.is_wanted(domain):
972 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
976 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
977 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
978 except network.exceptions as exception:
979 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
980 instances.set_last_error(domain, exception)
982 logger.debug("Success - EXIT!")
985 def fetch_oliphant(args: argparse.Namespace) -> int:
986 logger.debug("args[]='%s' - CALLED!", type(args))
988 logger.debug("Invoking locking.acquire() ...")
991 source_domain = "codeberg.org"
992 if sources.is_recent(source_domain):
993 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
996 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
997 sources.update(source_domain)
1000 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1004 logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1005 for block in blocklists.oliphant_blocklists:
1006 # Is domain given and not equal blocker?
1007 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1008 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1010 elif args.domain in domains:
1011 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1014 instances.set_last_blocked(block["blocker"])
1017 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1018 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1020 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1021 if not response.ok or response.status_code > 200 or response.content == "":
1022 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1025 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1026 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1032 logger.debug("row[%s]='%s'", type(row), row)
1033 domain = severity = None
1034 reject_media = reject_reports = False
1036 if "#domain" in row:
1037 domain = row["#domain"]
1038 elif "domain" in row:
1039 domain = row["domain"]
1041 logger.debug("row='%s' does not contain domain column", row)
1044 if "#severity" in row:
1045 severity = blocks.alias_block_level(row["#severity"])
1046 elif "severity" in row:
1047 severity = blocks.alias_block_level(row["severity"])
1049 logger.debug("row='%s' does not contain severity column", row)
1052 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1054 elif "reject_media" in row and row["reject_media"].lower() == "true":
1057 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1058 reject_reports = True
1059 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1060 reject_reports = True
1063 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1065 logger.debug("domain is empty - SKIPPED!")
1067 elif domain.endswith(".onion"):
1068 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1070 elif domain.endswith(".arpa"):
1071 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1073 elif domain.endswith(".tld"):
1074 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1076 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1077 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1078 domain = utils.deobfuscate(domain, block["blocker"])
1079 logger.debug("domain='%s' - AFTER!", domain)
1081 if not validators.domain(domain):
1082 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1084 elif blacklist.is_blacklisted(domain):
1085 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1087 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1088 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1091 logger.debug("Marking domain='%s' as handled", domain)
1092 domains.append(domain)
1094 logger.debug("Processing domain='%s' ...", domain)
1095 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1096 logger.debug("processed='%s'", processed)
1098 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1099 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1102 "reason" : block["reason"],
1106 processing.block(block["blocker"], domain, None, "reject_media")
1108 processing.block(block["blocker"], domain, None, "reject_reports")
1110 logger.debug("block[blocker]='%s'", block["blocker"])
1111 if not blocklists.is_excluded(block["blocker"]):
1112 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1113 instances.set_total_blocks(block["blocker"], domains)
1115 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1116 if instances.has_pending(block["blocker"]):
1117 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1118 instances.update(block["blocker"])
1120 logger.debug("Invoking commit() ...")
1121 database.connection.commit()
1123 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1124 if config.get("bot_enabled") and len(blockdict) > 0:
1125 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1126 network.send_bot_post(block["blocker"], blockdict)
1128 logger.debug("Success! - EXIT!")
1131 def fetch_txt(args: argparse.Namespace) -> int:
1132 logger.debug("args[]='%s' - CALLED!", type(args))
1134 logger.debug("Invoking locking.acquire() ...")
1139 "blocker": "seirdy.one",
1140 "url" : "https://seirdy.one/pb/bsl.txt",
1143 logger.info("Checking %d text file(s) ...", len(urls))
1145 logger.debug("Fetching row[url]='%s' ...", row["url"])
1146 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1148 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1149 if response.ok and response.status_code == 200 and response.text != "":
1150 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1151 domains = response.text.split("\n")
1153 logger.info("Processing %d domains ...", len(domains))
1154 for domain in domains:
1155 logger.debug("domain='%s' - BEFORE!", domain)
1156 domain = tidyup.domain(domain)
1158 logger.debug("domain='%s' - AFTER!", domain)
1160 logger.debug("domain is empty - SKIPPED!")
1162 elif not domain_helper.is_wanted(domain):
1163 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1165 elif instances.is_recent(domain):
1166 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1169 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1170 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1172 logger.debug("processed='%s'", processed)
1174 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1177 logger.debug("Success! - EXIT!")
1180 def fetch_fedipact(args: argparse.Namespace) -> int:
1181 logger.debug("args[]='%s' - CALLED!", type(args))
1183 logger.debug("Invoking locking.acquire() ...")
1186 source_domain = "fedipact.online"
1187 if sources.is_recent(source_domain):
1188 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1191 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1192 sources.update(source_domain)
1194 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1195 response = utils.fetch_url(
1196 f"https://{source_domain}",
1197 network.web_headers,
1198 (config.get("connection_timeout"), config.get("read_timeout"))
1201 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1202 if response.ok and response.status_code == 200 and response.text != "":
1203 logger.debug("Parsing %d Bytes ...", len(response.text))
1205 doc = bs4.BeautifulSoup(response.text, "html.parser")
1206 logger.debug("doc[]='%s'", type(doc))
1208 rows = doc.findAll("li")
1209 logger.info("Checking %d row(s) ...", len(rows))
1211 logger.debug("row[]='%s'", type(row))
1212 domain = tidyup.domain(row.contents[0])
1214 logger.debug("domain='%s' - AFTER!", domain)
1216 logger.debug("domain is empty - SKIPPED!")
1219 logger.debug("domain='%s' - BEFORE!", domain)
1220 domain = domain.encode("idna").decode("utf-8")
1221 logger.debug("domain='%s' - AFTER!", domain)
1223 if not domain_helper.is_wanted(domain):
1224 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1226 elif instances.is_registered(domain):
1227 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1229 elif instances.is_recent(domain):
1230 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1233 logger.info("Fetching domain='%s' ...", domain)
1234 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1236 logger.debug("Success! - EXIT!")
1239 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1240 logger.debug("args[]='%s' - CALLED!", type(args))
1242 logger.debug("Invoking locking.acquire() ...")
1245 source_domain = "instances.joinmobilizon.org"
1246 if sources.is_recent(source_domain):
1247 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1250 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1251 sources.update(source_domain)
1253 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1254 raw = utils.fetch_url(
1255 f"https://{source_domain}/api/v1/instances",
1256 network.web_headers,
1257 (config.get("connection_timeout"), config.get("read_timeout"))
1259 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1261 parsed = json.loads(raw)
1262 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1264 if "data" not in parsed:
1265 logger.warning("parsed()=%d does not contain key 'data'")
1268 logger.info("Checking %d instances ...", len(parsed["data"]))
1269 for row in parsed["data"]:
1270 logger.debug("row[]='%s'", type(row))
1271 if "host" not in row:
1272 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1274 elif not domain_helper.is_wanted(row["host"]):
1275 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1277 elif instances.is_registered(row["host"]):
1278 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1281 logger.info("Fetching row[host]='%s' ...", row["host"])
1282 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1284 logger.debug("Success! - EXIT!")
1287 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1288 logger.debug("args[]='%s' - CALLED!", type(args))
1290 logger.debug("Invoking locking.acquire() ...")
1293 source_domain = "instanceapp.misskey.page"
1294 if sources.is_recent(source_domain):
1295 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1298 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1299 sources.update(source_domain)
1301 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1302 raw = utils.fetch_url(
1303 f"https://{source_domain}/instances.json",
1304 network.web_headers,
1305 (config.get("connection_timeout"), config.get("read_timeout"))
1307 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1309 parsed = json.loads(raw)
1310 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1312 if "instancesInfos" not in parsed:
1313 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1316 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1317 for row in parsed["instancesInfos"]:
1318 logger.debug("row[%s]='%s'", type(row), row)
1319 if "url" not in row:
1320 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1322 elif not domain_helper.is_wanted(row["url"]):
1323 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1325 elif instances.is_registered(row["url"]):
1326 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1329 logger.info("Fetching row[url]='%s' ...", row["url"])
1330 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1332 logger.debug("Success! - EXIT!")
1335 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1336 logger.debug("args[]='%s' - CALLED!", type(args))
1338 logger.debug("Invoking locking.acquire() ...")
1341 source_domain = "joinfediverse.wiki"
1342 if sources.is_recent(source_domain):
1343 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1346 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1347 sources.update(source_domain)
1349 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1350 raw = utils.fetch_url(
1351 f"https://{source_domain}/FediBlock",
1352 network.web_headers,
1353 (config.get("connection_timeout"), config.get("read_timeout"))
1355 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1357 doc = bs4.BeautifulSoup(raw, "html.parser")
1358 logger.debug("doc[]='%s'", type(doc))
1360 tables = doc.findAll("table", {"class": "wikitable"})
1362 logger.info("Analyzing %d table(s) ...", len(tables))
1364 for table in tables:
1365 logger.debug("table[]='%s'", type(table))
1367 rows = table.findAll("tr")
1368 logger.info("Checking %d row(s) ...", len(rows))
1369 block_headers = dict()
1371 logger.debug("row[%s]='%s'", type(row), row)
1373 headers = row.findAll("th")
1374 logger.debug("Found headers()=%d header(s)", len(headers))
1375 if len(headers) > 1:
1376 block_headers = dict()
1378 for header in headers:
1380 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1381 text = header.contents[0]
1383 logger.debug("text[]='%s'", type(text))
1384 if not isinstance(text, str):
1385 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1387 elif validators.domain(text.strip()):
1388 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1391 text = tidyup.domain(text.strip())
1392 logger.debug("text='%s' - AFTER!", text)
1393 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1394 logger.debug("Found header: '%s'=%d", text, cnt)
1395 block_headers[cnt] = text
1397 elif len(block_headers) == 0:
1398 logger.debug("row is not scrapable - SKIPPED!")
1400 elif len(block_headers) > 0:
1401 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1405 for element in row.find_all(["th", "td"]):
1407 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1408 if cnt in block_headers:
1409 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1411 text = element.text.strip()
1412 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1414 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1415 if key in ["domain", "instance"]:
1417 elif key == "reason":
1418 block[key] = tidyup.reason(text)
1419 elif key == "subdomain(s)":
1422 block[key] = text.split("/")
1424 logger.debug("key='%s'", key)
1427 logger.debug("block()=%d ...", len(block))
1429 logger.debug("Appending block()=%d ...", len(block))
1430 blocklist.append(block)
1432 logger.debug("blocklist()=%d", len(blocklist))
1434 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1435 domains = database.cursor.fetchall()
1437 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1439 for block in blocklist:
1440 logger.debug("block='%s'", block)
1441 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1442 origin = block["blocked"]
1443 logger.debug("origin='%s'", origin)
1444 for subdomain in block["subdomain(s)"]:
1445 block["blocked"] = subdomain + "." + origin
1446 logger.debug("block[blocked]='%s'", block["blocked"])
1447 blocking.append(block)
1449 blocking.append(block)
1451 logger.debug("blocking()=%d", blocking)
1452 for block in blocking:
1453 logger.debug("block[]='%s'", type(block))
1454 if "blocked" not in block:
1455 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1457 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1458 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1460 if block["blocked"] == "":
1461 logger.debug("block[blocked] is empty - SKIPPED!")
1463 elif not domain_helper.is_wanted(block["blocked"]):
1464 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1466 elif instances.is_recent(block["blocked"]):
1467 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1470 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1471 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1474 for blocker in domains:
1475 blocker = blocker[0]
1476 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1477 instances.set_last_blocked(blocker)
1479 for block in blocking:
1480 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1481 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1483 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1484 if block["blocked"] == "":
1485 logger.debug("block[blocked] is empty - SKIPPED!")
1487 elif not domain_helper.is_wanted(block["blocked"]):
1488 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1491 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1492 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1493 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1495 "blocked": block["blocked"],
1496 "reason" : block["reason"],
1499 if instances.has_pending(blocker):
1500 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1501 instances.update(blocker)
1503 logger.debug("Invoking commit() ...")
1504 database.connection.commit()
1506 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1507 if config.get("bot_enabled") and len(blockdict) > 0:
1508 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1509 network.send_bot_post(blocker, blockdict)
1511 logger.debug("Success! - EXIT!")
1514 def recheck_obfuscation(args: argparse.Namespace) -> int:
1515 logger.debug("args[]='%s' - CALLED!", type(args))
1517 logger.debug("Invoking locking.acquire() ...")
1520 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1521 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1522 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1523 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1525 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1527 rows = database.cursor.fetchall()
1528 logger.info("Checking %d domains ...", len(rows))
1530 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1531 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1532 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1536 if row["software"] == "pleroma":
1537 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1538 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1539 elif row["software"] == "mastodon":
1540 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1541 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1542 elif row["software"] == "lemmy":
1543 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1544 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1545 elif row["software"] == "friendica":
1546 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1547 blocking = friendica.fetch_blocks(row["domain"])
1548 elif row["software"] == "misskey":
1549 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1550 blocking = misskey.fetch_blocks(row["domain"])
1552 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1554 # c.s isn't part of oliphant's "hidden" blocklists
1555 logger.debug("row[domain]='%s'", row["domain"])
1556 if row["domain"] != "chaos.social" and not blocklists.is_excluded(row["domain"]):
1557 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1558 instances.set_last_blocked(row["domain"])
1559 instances.set_total_blocks(row["domain"], blocking)
1564 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1565 for block in blocking:
1566 logger.debug("block[blocked]='%s'", block["blocked"])
1569 if block["blocked"] == "":
1570 logger.debug("block[blocked] is empty - SKIPPED!")
1572 elif block["blocked"].endswith(".arpa"):
1573 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1575 elif block["blocked"].endswith(".tld"):
1576 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1578 elif block["blocked"].endswith(".onion"):
1579 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1581 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1582 logger.debug("block='%s' is obfuscated.", block["blocked"])
1583 obfuscated = obfuscated + 1
1584 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1585 elif not domain_helper.is_wanted(block["blocked"]):
1586 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1588 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1589 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1592 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1593 if blocked is not None and blocked != block["blocked"]:
1594 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1595 obfuscated = obfuscated - 1
1597 if blocks.is_instance_blocked(row["domain"], blocked):
1598 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1600 elif blacklist.is_blacklisted(blocked):
1601 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1604 block["block_level"] = blocks.alias_block_level(block["block_level"])
1606 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1607 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1608 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1611 "reason" : block["reason"],
1614 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1615 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1617 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1618 if obfuscated == 0 and len(blocking) > 0:
1619 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1620 instances.set_has_obfuscation(row["domain"], False)
1622 if instances.has_pending(row["domain"]):
1623 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1624 instances.update(row["domain"])
1626 logger.debug("Invoking commit() ...")
1627 database.connection.commit()
1629 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1630 if config.get("bot_enabled") and len(blockdict) > 0:
1631 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1632 network.send_bot_post(row["domain"], blockdict)
1634 logger.debug("Success! - EXIT!")
1637 def fetch_fedilist(args: argparse.Namespace) -> int:
1638 logger.debug("args[]='%s' - CALLED!", type(args))
1640 logger.debug("Invoking locking.acquire() ...")
1643 source_domain = "demo.fedilist.com"
1644 if sources.is_recent(source_domain):
1645 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1648 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1649 sources.update(source_domain)
1651 url = f"http://{source_domain}/instance/csv?onion=not"
1652 if args.software is not None and args.software != "":
1653 logger.debug("args.software='%s'", args.software)
1654 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1656 logger.info("Fetching url='%s' ...", url)
1657 response = reqto.get(
1659 headers=network.web_headers,
1660 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1661 allow_redirects=False
1664 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1665 if not response.ok or response.status_code > 200 or len(response.content) == 0:
1666 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1669 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1671 logger.debug("reader[]='%s'", type(reader))
1673 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1678 logger.info("Checking %d rows ...", len(rows))
1680 logger.debug("row[]='%s'", type(row))
1681 if "hostname" not in row:
1682 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1685 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1686 domain = tidyup.domain(row["hostname"])
1687 logger.debug("domain='%s' - AFTER!", domain)
1690 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1693 logger.debug("domain='%s' - BEFORE!", domain)
1694 domain = domain.encode("idna").decode("utf-8")
1695 logger.debug("domain='%s' - AFTER!", domain)
1697 if not domain_helper.is_wanted(domain):
1698 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1700 elif (args.force is None or not args.force) and instances.is_registered(domain):
1701 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1703 elif instances.is_recent(domain):
1704 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1707 logger.info("Fetching instances from domain='%s' ...", domain)
1708 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1710 logger.debug("Success! - EXIT!")
1713 def update_nodeinfo(args: argparse.Namespace) -> int:
1714 logger.debug("args[]='%s' - CALLED!", type(args))
1716 logger.debug("Invoking locking.acquire() ...")
1719 if args.domain is not None and args.domain != "":
1720 logger.debug("Fetching args.domain='%s'", args.domain)
1721 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1722 elif args.software is not None and args.software != "":
1723 logger.info("Fetching domains for args.software='%s'", args.software)
1724 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1725 elif args.mode is not None and args.mode != "":
1726 logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1727 database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1728 elif args.no_software:
1729 logger.info("Fetching domains with no software type detected ...")
1730 database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1732 logger.info("Fetching domains for recently updated ...")
1733 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1735 domains = database.cursor.fetchall()
1737 logger.info("Checking %d domain(s) ...", len(domains))
1740 logger.debug("row[]='%s'", type(row))
1741 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1742 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1746 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1747 software = federation.determine_software(row["domain"])
1749 logger.debug("Determined software='%s'", software)
1750 if (software != row["software"] and software is not None) or args.force is True:
1751 logger.debug("software='%s'", software)
1752 if software is None:
1753 logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1754 instances.set_nodeinfo_url(row["domain"], None)
1756 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1757 instances.set_software(row["domain"], software)
1759 if software is not None:
1760 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1761 instances.set_success(row["domain"])
1762 except network.exceptions as exception:
1763 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1764 instances.set_last_error(row["domain"], exception)
1766 instances.set_last_nodeinfo(row["domain"])
1767 instances.update(row["domain"])
1770 logger.debug("Success! - EXIT!")
1773 def fetch_instances_social(args: argparse.Namespace) -> int:
1774 logger.debug("args[]='%s' - CALLED!", type(args))
1776 logger.debug("Invoking locking.acquire() ...")
1779 source_domain = "instances.social"
1781 if config.get("instances_social_api_key") == "":
1782 logger.error("API key not set. Please set in your config.json file.")
1784 elif sources.is_recent(source_domain):
1785 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1788 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1789 sources.update(source_domain)
1792 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1795 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1796 fetched = network.get_json_api(
1798 "/api/1.0/instances/list?count=0&sort_by=name",
1800 (config.get("connection_timeout"), config.get("read_timeout"))
1802 logger.debug("fetched[]='%s'", type(fetched))
1804 if "error_message" in fetched:
1805 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1807 elif "exception" in fetched:
1808 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1810 elif "json" not in fetched:
1811 logger.warning("fetched has no element 'json' - EXIT!")
1813 elif "instances" not in fetched["json"]:
1814 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1818 rows = fetched["json"]["instances"]
1820 logger.info("Checking %d row(s) ...", len(rows))
1822 logger.debug("row[]='%s'", type(row))
1823 domain = tidyup.domain(row["name"])
1824 logger.debug("domain='%s' - AFTER!", domain)
1827 logger.debug("domain is empty - SKIPPED!")
1830 logger.debug("domain='%s' - BEFORE!", domain)
1831 domain = domain.encode("idna").decode("utf-8")
1832 logger.debug("domain='%s' - AFTER!", domain)
1834 if not domain_helper.is_wanted(domain):
1835 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1837 elif domain in domains:
1838 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1840 elif instances.is_registered(domain):
1841 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1843 elif instances.is_recent(domain):
1844 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1847 logger.info("Fetching instances from domain='%s'", domain)
1848 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1850 logger.debug("Success! - EXIT!")
1853 def fetch_relays(args: argparse.Namespace) -> int:
1854 logger.debug("args[]='%s' - CALLED!", type(args))
1856 logger.debug("Invoking locking.acquire() ...")
1859 if args.domain is not None and args.domain != "":
1860 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1862 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1865 rows = database.cursor.fetchall()
1867 logger.info("Checking %d relays ...", len(rows))
1869 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1871 if not args.force and instances.is_recent(row["domain"]):
1872 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1876 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1877 raw = utils.fetch_url(
1878 f"https://{row['domain']}",
1879 network.web_headers,
1880 (config.get("connection_timeout"), config.get("read_timeout"))
1882 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1883 except network.exceptions as exception:
1884 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1885 instances.set_last_error(row["domain"], exception)
1886 instances.set_last_instance_fetch(row["domain"])
1887 instances.update(row["domain"])
1890 doc = bs4.BeautifulSoup(raw, features="html.parser")
1891 logger.debug("doc[]='%s'", type(doc))
1893 logger.debug("row[software]='%s'", row["software"])
1894 if row["software"] == "activityrelay":
1895 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1896 tags = doc.findAll("p")
1898 logger.debug("Checking %d paragraphs ...", len(tags))
1900 logger.debug("tag[]='%s'", type(tag))
1901 if len(tag.contents) == 0:
1902 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1904 elif "registered instances" not in tag.contents[0]:
1905 logger.debug("Skipping paragraph, text not found.")
1908 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1909 for domain in tag.contents:
1910 logger.debug("domain[%s]='%s'", type(domain), domain)
1911 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1914 domain = str(domain)
1915 logger.debug("domain='%s'", domain)
1916 if not domain_helper.is_wanted(domain):
1917 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1920 logger.debug("domain='%s' - BEFORE!", domain)
1921 domain = tidyup.domain(domain)
1922 logger.debug("domain='%s' - AFTER!", domain)
1925 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1927 elif domain not in peers:
1928 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1929 peers.append(domain)
1931 if dict_helper.has_key(domains, "domain", domain):
1932 logger.debug("domain='%s' already added", domain)
1935 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1938 "origin": row["domain"],
1940 elif row["software"] in ["aoderelay", "selective-relay"]:
1941 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1942 if row["software"] == "aoderelay":
1943 tags = doc.findAll("section", {"class": "instance"})
1945 tags = doc.find("div", {"id": "instances"}).findAll("li")
1947 logger.debug("Checking %d tags ...", len(tags))
1949 logger.debug("tag[]='%s'", type(tag))
1951 link = tag.find("a")
1952 logger.debug("link[%s]='%s'", type(link), link)
1954 logger.warning("tag='%s' has no a-tag ...", tag)
1957 components = urlparse(link["href"])
1958 domain = components.netloc.lower()
1960 if not domain_helper.is_wanted(domain):
1961 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1964 logger.debug("domain='%s' - BEFORE!", domain)
1965 domain = tidyup.domain(domain)
1966 logger.debug("domain='%s' - AFTER!", domain)
1969 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1971 elif domain not in peers:
1972 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1973 peers.append(domain)
1975 if dict_helper.has_key(domains, "domain", domain):
1976 logger.debug("domain='%s' already added", domain)
1979 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1982 "origin": row["domain"],
1985 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1987 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1988 instances.set_last_instance_fetch(row["domain"])
1990 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1991 instances.set_total_peers(row["domain"], peers)
1993 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1994 instances.update(row["domain"])
1996 logger.info("Checking %d domains ...", len(domains))
1998 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1999 if instances.is_registered(row["domain"]):
2000 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2003 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2004 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2006 logger.debug("Success! - EXIT!")
2009 def convert_idna(args: argparse.Namespace) -> int:
2010 logger.debug("args[]='%s' - CALLED!", type(args))
2012 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2013 rows = database.cursor.fetchall()
2015 logger.debug("rows[]='%s'", type(rows))
2016 instances.translate_idnas(rows, "domain")
2018 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2019 rows = database.cursor.fetchall()
2021 logger.debug("rows[]='%s'", type(rows))
2022 instances.translate_idnas(rows, "origin")
2024 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2025 rows = database.cursor.fetchall()
2027 logger.debug("rows[]='%s'", type(rows))
2028 blocks.translate_idnas(rows, "blocker")
2030 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2031 rows = database.cursor.fetchall()
2033 logger.debug("rows[]='%s'", type(rows))
2034 blocks.translate_idnas(rows, "blocked")
2036 logger.debug("Success! - EXIT!")
2039 def remove_invalid(args: argparse.Namespace) -> int:
2040 logger.debug("args[]='%s' - CALLED!", type(args))
2042 logger.debug("Invoking locking.acquire() ...")
2045 database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2046 rows = database.cursor.fetchall()
2048 logger.info("Checking %d domains ...", len(rows))
2050 logger.debug("row[domain]='%s'", row["domain"])
2051 if not validators.domain(row["domain"].split("/")[0]):
2052 logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2053 database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2054 database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2056 logger.debug("Invoking commit() ...")
2057 database.connection.commit()
2059 logger.info("Vaccum cleaning database ...")
2060 database.cursor.execute("VACUUM")
2062 logger.debug("Success! - EXIT!")