1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
46 from fba.http import federation
47 from fba.http import network
49 from fba.models import blocks
50 from fba.models import instances
51 from fba.models import sources
53 from fba.networks import friendica
54 from fba.networks import lemmy
55 from fba.networks import mastodon
56 from fba.networks import misskey
57 from fba.networks import pleroma
59 logging.basicConfig(level=logging.INFO)
60 logger = logging.getLogger(__name__)
61 #logger.setLevel(logging.DEBUG)
63 def check_instance(args: argparse.Namespace) -> int:
64 logger.debug("args.domain='%s' - CALLED!", args.domain)
66 if not validators.domain(args.domain):
67 logger.warning("args.domain='%s' is not valid", args.domain)
69 elif blacklist.is_blacklisted(args.domain):
70 logger.warning("args.domain='%s' is blacklisted", args.domain)
72 elif instances.is_registered(args.domain):
73 logger.warning("args.domain='%s' is already registered", args.domain)
76 logger.info("args.domain='%s' is not known", args.domain)
78 logger.debug("status=%d - EXIT!", status)
81 def check_nodeinfo(args: argparse.Namespace) -> int:
82 logger.debug("args[]='%s' - CALLED!", type(args))
85 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88 for row in database.cursor.fetchall():
89 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
90 punycode = row["domain"].encode("idna").decode("utf-8")
92 if row["nodeinfo_url"].startswith("/"):
93 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
95 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
96 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99 logger.info("Found %d row(s)", cnt)
101 logger.debug("EXIT!")
104 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
105 logger.debug("args[]='%s' - CALLED!", type(args))
107 # No CSRF by default, you don't have to add network.source_headers by yourself here
109 source_domain = "pixelfed.org"
111 if sources.is_recent(source_domain):
112 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
116 sources.update(source_domain)
119 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
120 headers = csrf.determine(source_domain, dict())
121 except network.exceptions as exception:
122 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
126 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
127 fetched = network.get_json_api(
129 "/api/v1/servers/all.json?scope=All&country=all&language=all",
131 (config.get("connection_timeout"), config.get("read_timeout"))
134 logger.debug("JSON API returned %d elements", len(fetched))
135 if "error_message" in fetched:
136 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
138 elif "data" not in fetched["json"]:
139 logger.warning("API did not return JSON with 'data' element - EXIT!")
142 rows = fetched["json"]["data"]
143 logger.info("Checking %d fetched rows ...", len(rows))
145 logger.debug("row[]='%s'", type(row))
146 if "domain" not in row:
147 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
149 elif row["domain"] == "":
150 logger.debug("row[domain] is empty - SKIPPED!")
153 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
154 domain = row["domain"].encode("idna").decode("utf-8")
155 logger.debug("domain='%s' - AFTER!", domain)
157 if not domain_helper.is_wanted(domain):
158 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
160 elif instances.is_registered(domain):
161 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
163 elif instances.is_recent(domain):
164 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167 logger.debug("Fetching instances from domain='%s' ...", domain)
168 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
170 except network.exceptions as exception:
171 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174 logger.debug("Success! - EXIT!")
177 def fetch_bkali(args: argparse.Namespace) -> int:
178 logger.debug("args[]='%s' - CALLED!", type(args))
180 logger.debug("Invoking locking.acquire() ...")
183 source_domain = "gql.api.bka.li"
184 if sources.is_recent(source_domain):
185 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
189 sources.update(source_domain)
193 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
194 fetched = network.post_json_api(
198 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
202 logger.debug("fetched[]='%s'", type(fetched))
203 if "error_message" in fetched:
204 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
206 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
207 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210 rows = fetched["json"]
212 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
214 raise Exception("WARNING: Returned no records")
215 elif "data" not in rows:
216 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
217 elif "nodeinfo" not in rows["data"]:
218 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
220 for entry in rows["data"]["nodeinfo"]:
221 logger.debug("entry[%s]='%s'", type(entry), entry)
222 if "domain" not in entry:
223 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
225 elif entry["domain"] == "":
226 logger.debug("entry[domain] is empty - SKIPPED!")
228 elif not domain_helper.is_wanted(entry["domain"]):
229 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
231 elif instances.is_registered(entry["domain"]):
232 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
234 elif instances.is_recent(entry["domain"]):
235 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238 logger.debug("Adding domain='%s' ...", entry["domain"])
239 domains.append(entry["domain"])
241 except network.exceptions as exception:
242 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245 logger.debug("domains()=%d", len(domains))
247 logger.info("Adding %d new instances ...", len(domains))
248 for domain in domains:
249 logger.debug("domain='%s' - BEFORE!", domain)
250 domain = domain.encode("idna").decode("utf-8")
251 logger.debug("domain='%s' - AFTER!", domain)
254 logger.info("Fetching instances from domain='%s' ...", domain)
255 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
256 except network.exceptions as exception:
257 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
258 instances.set_last_error(domain, exception)
261 logger.debug("Success - EXIT!")
264 def fetch_blocks(args: argparse.Namespace) -> int:
265 logger.debug("args[]='%s' - CALLED!", type(args))
266 if args.domain is not None and args.domain != "":
267 logger.debug("args.domain='%s' - checking ...", args.domain)
268 if not validators.domain(args.domain):
269 logger.warning("args.domain='%s' is not valid.", args.domain)
271 elif blacklist.is_blacklisted(args.domain):
272 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
274 elif not instances.is_registered(args.domain):
275 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278 logger.debug("Invoking locking.acquire() ...")
281 if args.domain is not None and args.domain != "":
282 # Re-check single domain
283 logger.debug("Querying database for args.domain='%s' ...", args.domain)
284 database.cursor.execute(
285 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
287 elif args.software is not None and args.software != "":
288 # Re-check single software
289 logger.debug("Querying database for args.software='%s' ...", args.software)
290 database.cursor.execute(
291 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
295 logger.debug("Re-checking all instances ...")
296 database.cursor.execute(
297 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC"
300 # Re-check after "timeout" (aka. minimum interval)
301 database.cursor.execute(
302 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
305 rows = database.cursor.fetchall()
306 logger.info("Checking %d entries ...", len(rows))
307 for blocker, software, origin, nodeinfo_url in rows:
308 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
310 if nodeinfo_url is None:
311 logger.debug("blocker='%s',software='%s' has no nodeinfo_url set - SKIPPED!", blocker, software)
313 elif not domain_helper.is_wanted(blocker):
314 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
317 logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
318 instances.set_last_blocked(blocker)
319 instances.set_has_obfuscation(blocker, False)
323 if blocker != "chaos.social":
324 logger.debug("blocker='%s',software='%s'", blocker, software)
325 if software == "pleroma":
326 logger.info("blocker='%s',software='%s'", blocker, software)
327 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
328 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
329 elif software == "mastodon":
330 logger.info("blocker='%s',software='%s'", blocker, software)
331 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
332 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
333 elif software == "lemmy":
334 logger.info("blocker='%s',software='%s'", blocker, software)
335 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
336 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337 elif software == "friendica":
338 logger.info("blocker='%s',software='%s'", blocker, software)
339 blocking = friendica.fetch_blocks(blocker)
340 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
341 elif software == "misskey":
342 logger.info("blocker='%s',software='%s'", blocker, software)
343 blocking = misskey.fetch_blocks(blocker)
344 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
348 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
349 instances.set_total_blocks(blocker, blocking)
351 logger.debug("Skipping chaos.social, run ./fba.py fetch_cs instead!")
353 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
355 for block in blocking:
356 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
358 if block["block_level"] == "":
359 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
362 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
363 block["blocked"] = tidyup.domain(block["blocked"])
364 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
365 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
367 if block["blocked"] == "":
368 logger.warning("blocked is empty, blocker='%s'", blocker)
370 elif block["blocked"].endswith(".onion"):
371 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
373 elif block["blocked"].endswith(".arpa"):
374 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
376 elif block["blocked"].endswith(".tld"):
377 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
379 elif block["blocked"].find("*") >= 0:
380 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382 # Some friendica servers also obscure domains without hash
383 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
385 logger.debug("row[]='%s'", type(row))
387 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
388 instances.set_has_obfuscation(blocker, True)
391 block["blocked"] = row["domain"]
392 origin = row["origin"]
393 nodeinfo_url = row["nodeinfo_url"]
394 elif block["blocked"].find("?") >= 0:
395 logger.debug("blocker='%s' uses obfuscated domains", blocker)
397 # Some obscure them with question marks, not sure if that's dependent on version or not
398 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
400 logger.debug("row[]='%s'", type(row))
402 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
403 instances.set_has_obfuscation(blocker, True)
406 block["blocked"] = row["domain"]
407 origin = row["origin"]
408 nodeinfo_url = row["nodeinfo_url"]
410 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
411 if block["blocked"] == "":
412 logger.debug("block[blocked] is empty - SKIPPED!")
415 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
416 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
417 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
419 if not domain_helper.is_wanted(block["blocked"]):
420 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
422 elif block["block_level"] in ["accept", "accepted"]:
423 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
425 elif not instances.is_registered(block["blocked"]):
426 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
427 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
429 block["block_level"] = blocks.alias_block_level(block["block_level"])
431 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
432 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
434 "blocked": block["blocked"],
435 "reason" : block["reason"],
438 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
439 cookies.clear(block["blocked"])
441 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
442 if instances.has_pending(blocker):
443 logger.debug("Flushing updates for blocker='%s' ...", blocker)
444 instances.update_data(blocker)
446 logger.debug("Invoking commit() ...")
447 database.connection.commit()
449 logger.debug("Invoking cookies.clear(%s) ...", blocker)
450 cookies.clear(blocker)
452 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
453 if config.get("bot_enabled") and len(blockdict) > 0:
454 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
455 network.send_bot_post(blocker, blockdict)
457 logger.debug("Success! - EXIT!")
460 def fetch_observer(args: argparse.Namespace) -> int:
461 logger.debug("args[]='%s' - CALLED!", type(args))
463 logger.debug("Invoking locking.acquire() ...")
466 source_domain = "fediverse.observer"
467 if sources.is_recent(source_domain):
468 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
471 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
472 sources.update(source_domain)
475 if args.software is None:
476 logger.info("Fetching software list ...")
477 raw = utils.fetch_url(
478 f"https://{source_domain}",
480 (config.get("connection_timeout"), config.get("read_timeout"))
482 logger.debug("raw[%s]()=%d", type(raw), len(raw))
484 doc = bs4.BeautifulSoup(raw, features="html.parser")
485 logger.debug("doc[]='%s'", type(doc))
487 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
488 logger.debug("navbar[]='%s'", type(navbar))
490 logger.warning("Cannot find navigation bar, cannot continue!")
493 items = navbar.findAll("a", {"class": "dropdown-item"})
494 logger.debug("items[]='%s'", type(items))
496 logger.info("Checking %d menu items ...", len(items))
498 logger.debug("item[%s]='%s'", type(item), item)
499 if item.text.lower() == "all":
500 logger.debug("Skipping 'All' menu entry ...")
503 logger.debug("Appending item.text='%s' ...", item.text)
504 types.append(tidyup.domain(item.text))
506 logger.info("Adding args.software='%s' as type ...", args.software)
507 types.append(args.software)
509 logger.info("Fetching %d different table data ...", len(types))
510 for software in types:
511 logger.debug("software='%s' - BEFORE!", software)
512 if args.software is not None and args.software != software:
513 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
518 logger.debug("Fetching table data for software='%s' ...", software)
519 raw = utils.fetch_url(
520 f"https://{source_domain}/app/views/tabledata.php?software={software}",
522 (config.get("connection_timeout"), config.get("read_timeout"))
524 logger.debug("raw[%s]()=%d", type(raw), len(raw))
526 doc = bs4.BeautifulSoup(raw, features="html.parser")
527 logger.debug("doc[]='%s'", type(doc))
528 except network.exceptions as exception:
529 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
532 items = doc.findAll("a", {"class": "url"})
533 logger.info("Checking %d items,software='%s' ...", len(items), software)
535 logger.debug("item[]='%s'", type(item))
536 domain = item.decode_contents()
537 logger.debug("domain='%s' - AFTER!", domain)
540 logger.debug("domain is empty - SKIPPED!")
543 logger.debug("domain='%s' - BEFORE!", domain)
544 domain = domain.encode("idna").decode("utf-8")
545 logger.debug("domain='%s' - AFTER!", domain)
547 if not domain_helper.is_wanted(domain):
548 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
550 elif instances.is_registered(domain):
551 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
554 software = software_helper.alias(software)
555 logger.info("Fetching instances for domain='%s'", domain)
556 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
558 logger.debug("Success! - EXIT!")
561 def fetch_todon_wiki(args: argparse.Namespace) -> int:
562 logger.debug("args[]='%s' - CALLED!", type(args))
564 logger.debug("Invoking locking.acquire() ...")
567 source_domain = "wiki.todon.eu"
568 if sources.is_recent(source_domain):
569 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
572 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
573 sources.update(source_domain)
580 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
581 raw = utils.fetch_url(
582 f"https://{source_domain}/todon/domainblocks",
584 (config.get("connection_timeout"), config.get("read_timeout"))
586 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
588 doc = bs4.BeautifulSoup(raw, "html.parser")
589 logger.debug("doc[]='%s'", type(doc))
591 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
592 logger.info("Checking %d silenced/limited entries ...", len(silenced))
593 blocklist["silenced"] = utils.find_domains(silenced, "div")
595 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
596 logger.info("Checking %d suspended entries ...", len(suspended))
597 blocklist["reject"] = utils.find_domains(suspended, "div")
599 blocking = blocklist["silenced"] + blocklist["reject"]
602 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
603 instances.set_last_blocked(blocker)
604 instances.set_total_blocks(blocker, blocking)
607 for block_level in blocklist:
608 blockers = blocklist[block_level]
610 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
611 for blocked in blockers:
612 logger.debug("blocked='%s'", blocked)
614 if not instances.is_registered(blocked):
616 logger.info("Fetching instances from domain='%s' ...", blocked)
617 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
618 except network.exceptions as exception:
619 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
620 instances.set_last_error(blocked, exception)
622 if blocks.is_instance_blocked(blocker, blocked, block_level):
623 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
626 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
627 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
628 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
634 logger.debug("Invoking commit() ...")
635 database.connection.commit()
637 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
638 if config.get("bot_enabled") and len(blockdict) > 0:
639 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
640 network.send_bot_post(blocker, blockdict)
642 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
643 if instances.has_pending(blocker):
644 logger.debug("Flushing updates for blocker='%s' ...", blocker)
645 instances.update_data(blocker)
647 logger.debug("Success! - EXIT!")
650 def fetch_cs(args: argparse.Namespace):
651 logger.debug("args[]='%s' - CALLED!", type(args))
653 logger.debug("Invoking locking.acquire() ...")
681 source_domain = "raw.githubusercontent.com"
682 if sources.is_recent(source_domain):
683 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
686 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
687 sources.update(source_domain)
689 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
690 raw = utils.fetch_url(
691 f"https://{source_domain}/chaossocial/meta/master/federation.md",
693 (config.get("connection_timeout"), config.get("read_timeout"))
695 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
697 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
698 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
700 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
701 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
702 blocklist["silenced"] = federation.find_domains(silenced)
704 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
705 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
706 blocklist["reject"] = federation.find_domains(blocked)
708 blocking = blocklist["silenced"] + blocklist["reject"]
709 blocker = "chaos.social"
711 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
712 instances.set_last_blocked(blocker)
713 instances.set_total_blocks(blocker, blocking)
715 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
716 if len(blocking) > 0:
718 for block_level in blocklist:
719 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
721 for row in blocklist[block_level]:
722 logger.debug("row[%s]='%s'", type(row), row)
723 if not "domain" in row:
724 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
726 elif not instances.is_registered(row["domain"]):
728 logger.info("Fetching instances from domain='%s' ...", row["domain"])
729 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
730 except network.exceptions as exception:
731 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
732 instances.set_last_error(row["domain"], exception)
734 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
735 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
737 "blocked": row["domain"],
738 "reason" : row["reason"],
741 logger.debug("Invoking commit() ...")
742 database.connection.commit()
744 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
745 if config.get("bot_enabled") and len(blockdict) > 0:
746 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
747 network.send_bot_post(blocker, blockdict)
749 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
750 if instances.has_pending(blocker):
751 logger.debug("Flushing updates for blocker='%s' ...", blocker)
752 instances.update_data(blocker)
754 logger.debug("Success! - EXIT!")
757 def fetch_fba_rss(args: argparse.Namespace) -> int:
758 logger.debug("args[]='%s' - CALLED!", type(args))
762 logger.debug("Invoking locking.acquire() ...")
765 components = urlparse(args.feed)
767 if sources.is_recent(components.netloc):
768 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
771 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
772 sources.update(components.netloc)
774 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
775 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
777 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
778 if response.ok and response.status_code < 300 and len(response.text) > 0:
779 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
780 rss = atoma.parse_rss_bytes(response.content)
782 logger.debug("rss[]='%s'", type(rss))
783 for item in rss.items:
784 logger.debug("item[%s]='%s'", type(item), item)
785 domain = tidyup.domain(item.link.split("=")[1])
787 logger.debug("domain='%s' - AFTER!", domain)
789 logger.debug("domain is empty - SKIPPED!")
792 logger.debug("domain='%s' - BEFORE!", domain)
793 domain = domain.encode("idna").decode("utf-8")
794 logger.debug("domain='%s' - AFTER!", domain)
796 if not domain_helper.is_wanted(domain):
797 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
799 elif domain in domains:
800 logger.debug("domain='%s' is already added - SKIPPED!", domain)
802 elif instances.is_registered(domain):
803 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
805 elif instances.is_recent(domain):
806 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
809 logger.debug("Adding domain='%s'", domain)
810 domains.append(domain)
812 logger.debug("domains()=%d", len(domains))
814 logger.info("Adding %d new instances ...", len(domains))
815 for domain in domains:
816 logger.debug("domain='%s'", domain)
818 logger.info("Fetching instances from domain='%s' ...", domain)
819 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
820 except network.exceptions as exception:
821 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
822 instances.set_last_error(domain, exception)
825 logger.debug("Success! - EXIT!")
828 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
829 logger.debug("args[]='%s' - CALLED!", type(args))
831 logger.debug("Invoking locking.acquire() ...")
834 source_domain = "ryona.agency"
835 feed = f"https://{source_domain}/users/fba/feed.atom"
837 logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
838 if args.feed is not None and validators.url(args.feed):
839 logger.debug("Setting feed='%s' ...", args.feed)
840 feed = str(args.feed)
841 source_domain = urlparse(args.feed).netloc
843 if sources.is_recent(source_domain):
844 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
847 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
848 sources.update(source_domain)
852 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
853 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
855 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
856 if response.ok and response.status_code < 300 and len(response.text) > 0:
857 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
858 atom = atoma.parse_atom_bytes(response.content)
860 logger.debug("atom[]='%s'", type(atom))
861 for entry in atom.entries:
862 logger.debug("entry[]='%s'", type(entry))
863 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
864 logger.debug("doc[]='%s'", type(doc))
865 for element in doc.findAll("a"):
866 logger.debug("element[]='%s'", type(element))
867 for href in element["href"].split(","):
868 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
869 domain = tidyup.domain(href)
871 logger.debug("domain='%s' - AFTER!", domain)
873 logger.debug("domain is empty - SKIPPED!")
876 logger.debug("domain='%s' - BEFORE!", domain)
877 domain = domain.encode("idna").decode("utf-8")
878 logger.debug("domain='%s' - AFTER!", domain)
880 if not domain_helper.is_wanted(domain):
881 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
883 elif domain in domains:
884 logger.debug("domain='%s' is already added - SKIPPED!", domain)
886 elif instances.is_registered(domain):
887 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
889 elif instances.is_recent(domain):
890 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
893 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
894 domains.append(domain)
896 logger.debug("domains()=%d", len(domains))
898 logger.info("Adding %d new instances ...", len(domains))
899 for domain in domains:
900 logger.debug("domain='%s'", domain)
902 logger.info("Fetching instances from domain='%s' ...", domain)
903 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
904 except network.exceptions as exception:
905 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
906 instances.set_last_error(domain, exception)
909 logger.debug("Success! - EXIT!")
912 def fetch_instances(args: argparse.Namespace) -> int:
913 logger.debug("args[]='%s' - CALLED!", type(args))
915 logger.debug("args.domain='%s' - checking ...", args.domain)
916 if not validators.domain(args.domain):
917 logger.warning("args.domain='%s' is not valid.", args.domain)
919 elif blacklist.is_blacklisted(args.domain):
920 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
923 logger.debug("Invoking locking.acquire() ...")
928 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
929 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
930 except network.exceptions as exception:
931 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
932 instances.set_last_error(args.domain, exception)
933 instances.update_data(args.domain)
937 logger.debug("Not fetching more instances - EXIT!")
940 # Loop through some instances
941 database.cursor.execute(
942 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
945 rows = database.cursor.fetchall()
946 logger.info("Checking %d entries ...", len(rows))
948 logger.debug("row[domain]='%s'", row["domain"])
949 if row["domain"] == "":
950 logger.debug("row[domain] is empty - SKIPPED!")
953 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
954 domain = row["domain"].encode("idna").decode("utf-8")
955 logger.debug("domain='%s' - AFTER!", domain)
957 if not domain_helper.is_wanted(domain):
958 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
962 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
963 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
964 except network.exceptions as exception:
965 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
966 instances.set_last_error(domain, exception)
968 logger.debug("Success - EXIT!")
971 def fetch_oliphant(args: argparse.Namespace) -> int:
972 logger.debug("args[]='%s' - CALLED!", type(args))
974 logger.debug("Invoking locking.acquire() ...")
977 source_domain = "codeberg.org"
978 if sources.is_recent(source_domain):
979 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
982 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
983 sources.update(source_domain)
986 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
991 "blocker": "artisan.chat",
992 "csv_url": "mastodon/artisan.chat.csv",
994 "blocker": "mastodon.art",
995 "csv_url": "mastodon/mastodon.art.csv",
997 "blocker": "pleroma.envs.net",
998 "csv_url": "mastodon/pleroma.envs.net.csv",
1000 "blocker": "oliphant.social",
1001 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
1003 "blocker": "mastodon.online",
1004 "csv_url": "mastodon/mastodon.online.csv",
1006 "blocker": "mastodon.social",
1007 "csv_url": "mastodon/mastodon.social.csv",
1009 "blocker": "mastodon.social",
1010 "csv_url": "other/missing-tier0-mastodon.social.csv",
1012 "blocker": "rage.love",
1013 "csv_url": "mastodon/rage.love.csv",
1015 "blocker": "sunny.garden",
1016 "csv_url": "mastodon/sunny.garden.csv",
1018 "blocker": "sunny.garden",
1019 "csv_url": "mastodon/gardenfence.csv",
1021 "blocker": "solarpunk.moe",
1022 "csv_url": "mastodon/solarpunk.moe.csv",
1024 "blocker": "toot.wales",
1025 "csv_url": "mastodon/toot.wales.csv",
1027 "blocker": "union.place",
1028 "csv_url": "mastodon/union.place.csv",
1030 "blocker": "oliphant.social",
1031 "csv_url": "mastodon/birdsite.csv",
1037 logger.debug("Downloading %d files ...", len(blocklists))
1038 for block in blocklists:
1039 # Is domain given and not equal blocker?
1040 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1041 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1043 elif args.domain in domains:
1044 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1047 instances.set_last_blocked(block["blocker"])
1050 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1051 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1053 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1054 if not response.ok or response.status_code >= 300 or response.content == "":
1055 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1058 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1059 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1065 logger.debug("row[%s]='%s'", type(row), row)
1066 domain = severity = None
1067 reject_media = reject_reports = False
1069 if "#domain" in row:
1070 domain = row["#domain"]
1071 elif "domain" in row:
1072 domain = row["domain"]
1074 logger.debug("row='%s' does not contain domain column", row)
1077 if "#severity" in row:
1078 severity = blocks.alias_block_level(row["#severity"])
1079 elif "severity" in row:
1080 severity = blocks.alias_block_level(row["severity"])
1082 logger.debug("row='%s' does not contain severity column", row)
1085 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1087 elif "reject_media" in row and row["reject_media"].lower() == "true":
1090 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1091 reject_reports = True
1092 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1093 reject_reports = True
1096 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1098 logger.debug("domain is empty - SKIPPED!")
1100 elif domain.endswith(".onion"):
1101 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1103 elif domain.endswith(".arpa"):
1104 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1106 elif domain.endswith(".tld"):
1107 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1109 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1110 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1111 domain = utils.deobfuscate(domain, block["blocker"])
1112 logger.debug("domain='%s' - AFTER!", domain)
1114 if not validators.domain(domain):
1115 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1117 elif blacklist.is_blacklisted(domain):
1118 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1120 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1121 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1124 logger.debug("Marking domain='%s' as handled", domain)
1125 domains.append(domain)
1127 logger.debug("Processing domain='%s' ...", domain)
1128 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1129 logger.debug("processed='%s'", processed)
1131 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1132 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1135 "reason" : block["reason"],
1139 processing.block(block["blocker"], domain, None, "reject_media")
1141 processing.block(block["blocker"], domain, None, "reject_reports")
1143 logger.debug("block[blocker]='%s'", block["blocker"])
1144 if block["blocker"] != "chaos.social":
1145 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1146 instances.set_total_blocks(block["blocker"], domains)
1148 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1149 if instances.has_pending(block["blocker"]):
1150 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1151 instances.update_data(block["blocker"])
1153 logger.debug("Invoking commit() ...")
1154 database.connection.commit()
1156 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1157 if config.get("bot_enabled") and len(blockdict) > 0:
1158 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1159 network.send_bot_post(block["blocker"], blockdict)
1161 logger.debug("Success! - EXIT!")
1164 def fetch_txt(args: argparse.Namespace) -> int:
1165 logger.debug("args[]='%s' - CALLED!", type(args))
1167 logger.debug("Invoking locking.acquire() ...")
1172 "blocker": "seirdy.one",
1173 "url" : "https://seirdy.one/pb/bsl.txt",
1176 logger.info("Checking %d text file(s) ...", len(urls))
1178 logger.debug("Fetching row[url]='%s' ...", row["url"])
1179 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1181 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1182 if response.ok and response.status_code < 300 and response.text != "":
1183 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1184 domains = response.text.split("\n")
1186 logger.info("Processing %d domains ...", len(domains))
1187 for domain in domains:
1188 logger.debug("domain='%s' - BEFORE!", domain)
1189 domain = tidyup.domain(domain)
1191 logger.debug("domain='%s' - AFTER!", domain)
1193 logger.debug("domain is empty - SKIPPED!")
1195 elif not domain_helper.is_wanted(domain):
1196 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1198 elif instances.is_recent(domain):
1199 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1202 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1203 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1205 logger.debug("processed='%s'", processed)
1207 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1210 logger.debug("Success! - EXIT!")
1213 def fetch_fedipact(args: argparse.Namespace) -> int:
1214 logger.debug("args[]='%s' - CALLED!", type(args))
1216 logger.debug("Invoking locking.acquire() ...")
1219 source_domain = "fedipact.online"
1220 if sources.is_recent(source_domain):
1221 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1224 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1225 sources.update(source_domain)
1227 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1228 response = utils.fetch_url(
1229 f"https://{source_domain}",
1230 network.web_headers,
1231 (config.get("connection_timeout"), config.get("read_timeout"))
1234 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1235 if response.ok and response.status_code < 300 and response.text != "":
1236 logger.debug("Parsing %d Bytes ...", len(response.text))
1238 doc = bs4.BeautifulSoup(response.text, "html.parser")
1239 logger.debug("doc[]='%s'", type(doc))
1241 rows = doc.findAll("li")
1242 logger.info("Checking %d row(s) ...", len(rows))
1244 logger.debug("row[]='%s'", type(row))
1245 domain = tidyup.domain(row.contents[0])
1247 logger.debug("domain='%s' - AFTER!", domain)
1249 logger.debug("domain is empty - SKIPPED!")
1252 logger.debug("domain='%s' - BEFORE!", domain)
1253 domain = domain.encode("idna").decode("utf-8")
1254 logger.debug("domain='%s' - AFTER!", domain)
1256 if not domain_helper.is_wanted(domain):
1257 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1259 elif instances.is_registered(domain):
1260 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1262 elif instances.is_recent(domain):
1263 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1266 logger.info("Fetching domain='%s' ...", domain)
1267 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1269 logger.debug("Success! - EXIT!")
1272 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1273 logger.debug("args[]='%s' - CALLED!", type(args))
1275 logger.debug("Invoking locking.acquire() ...")
1278 source_domain = "instances.joinmobilizon.org"
1279 if sources.is_recent(source_domain):
1280 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1283 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1284 sources.update(source_domain)
1286 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1287 raw = utils.fetch_url(
1288 f"https://{source_domain}/api/v1/instances",
1289 network.web_headers,
1290 (config.get("connection_timeout"), config.get("read_timeout"))
1292 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1294 parsed = json.loads(raw)
1295 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1297 if "data" not in parsed:
1298 logger.warning("parsed()=%d does not contain key 'data'")
1301 logger.info("Checking %d instances ...", len(parsed["data"]))
1302 for row in parsed["data"]:
1303 logger.debug("row[]='%s'", type(row))
1304 if "host" not in row:
1305 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1307 elif not domain_helper.is_wanted(row["host"]):
1308 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1310 elif instances.is_registered(row["host"]):
1311 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1314 logger.info("Fetching row[host]='%s' ...", row["host"])
1315 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1317 logger.debug("Success! - EXIT!")
1320 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1321 logger.debug("args[]='%s' - CALLED!", type(args))
1323 logger.debug("Invoking locking.acquire() ...")
1326 source_domain = "instanceapp.misskey.page"
1327 if sources.is_recent(source_domain):
1328 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1331 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1332 sources.update(source_domain)
1334 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1335 raw = utils.fetch_url(
1336 f"https://{source_domain}/instances.json",
1337 network.web_headers,
1338 (config.get("connection_timeout"), config.get("read_timeout"))
1340 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1342 parsed = json.loads(raw)
1343 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1345 if "instancesInfos" not in parsed:
1346 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1349 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1350 for row in parsed["instancesInfos"]:
1351 logger.debug("row[%s]='%s'", type(row), row)
1352 if "url" not in row:
1353 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1355 elif not domain_helper.is_wanted(row["url"]):
1356 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1358 elif instances.is_registered(row["url"]):
1359 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1362 logger.info("Fetching row[url]='%s' ...", row["url"])
1363 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1365 logger.debug("Success! - EXIT!")
1368 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1369 logger.debug("args[]='%s' - CALLED!", type(args))
1371 logger.debug("Invoking locking.acquire() ...")
1374 source_domain = "joinfediverse.wiki"
1375 if sources.is_recent(source_domain):
1376 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1379 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1380 sources.update(source_domain)
1382 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1383 raw = utils.fetch_url(
1384 f"https://{source_domain}/FediBlock",
1385 network.web_headers,
1386 (config.get("connection_timeout"), config.get("read_timeout"))
1388 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1390 doc = bs4.BeautifulSoup(raw, "html.parser")
1391 logger.debug("doc[]='%s'", type(doc))
1393 tables = doc.findAll("table", {"class": "wikitable"})
1395 logger.info("Analyzing %d table(s) ...", len(tables))
1397 for table in tables:
1398 logger.debug("table[]='%s'", type(table))
1400 rows = table.findAll("tr")
1401 logger.info("Checking %d row(s) ...", len(rows))
1402 block_headers = dict()
1404 logger.debug("row[%s]='%s'", type(row), row)
1406 headers = row.findAll("th")
1407 logger.debug("Found headers()=%d header(s)", len(headers))
1408 if len(headers) > 1:
1409 block_headers = dict()
1411 for header in headers:
1413 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1414 text = header.contents[0]
1416 logger.debug("text[]='%s'", type(text))
1417 if not isinstance(text, str):
1418 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1420 elif validators.domain(text.strip()):
1421 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1424 text = tidyup.domain(text.strip())
1425 logger.debug("text='%s' - AFTER!", text)
1426 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1427 logger.debug("Found header: '%s'=%d", text, cnt)
1428 block_headers[cnt] = text
1430 elif len(block_headers) == 0:
1431 logger.debug("row is not scrapable - SKIPPED!")
1433 elif len(block_headers) > 0:
1434 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1438 for element in row.find_all(["th", "td"]):
1440 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1441 if cnt in block_headers:
1442 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1444 text = element.text.strip()
1445 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1447 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1448 if key in ["domain", "instance"]:
1450 elif key == "reason":
1451 block[key] = tidyup.reason(text)
1452 elif key == "subdomain(s)":
1455 block[key] = text.split("/")
1457 logger.debug("key='%s'", key)
1460 logger.debug("block()=%d ...", len(block))
1462 logger.debug("Appending block()=%d ...", len(block))
1463 blocklist.append(block)
1465 logger.debug("blocklist()=%d", len(blocklist))
1467 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1468 domains = database.cursor.fetchall()
1470 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1472 for block in blocklist:
1473 logger.debug("block='%s'", block)
1474 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1475 origin = block["blocked"]
1476 logger.debug("origin='%s'", origin)
1477 for subdomain in block["subdomain(s)"]:
1478 block["blocked"] = subdomain + "." + origin
1479 logger.debug("block[blocked]='%s'", block["blocked"])
1480 blocking.append(block)
1482 blocking.append(block)
1484 logger.debug("blocking()=%d", blocking)
1485 for block in blocking:
1486 logger.debug("block[]='%s'", type(block))
1487 if "blocked" not in block:
1488 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1490 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1491 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1493 if block["blocked"] == "":
1494 logger.debug("block[blocked] is empty - SKIPPED!")
1496 elif not domain_helper.is_wanted(block["blocked"]):
1497 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1499 elif instances.is_recent(block["blocked"]):
1500 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1503 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1504 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1507 for blocker in domains:
1508 blocker = blocker[0]
1509 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1510 instances.set_last_blocked(blocker)
1512 for block in blocking:
1513 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1514 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1516 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1517 if block["blocked"] == "":
1518 logger.debug("block[blocked] is empty - SKIPPED!")
1520 elif not domain_helper.is_wanted(block["blocked"]):
1521 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1524 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1525 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1526 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1528 "blocked": block["blocked"],
1529 "reason" : block["reason"],
1532 if instances.has_pending(blocker):
1533 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1534 instances.update_data(blocker)
1536 logger.debug("Invoking commit() ...")
1537 database.connection.commit()
1539 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1540 if config.get("bot_enabled") and len(blockdict) > 0:
1541 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1542 network.send_bot_post(blocker, blockdict)
1544 logger.debug("Success! - EXIT!")
1547 def recheck_obfuscation(args: argparse.Namespace) -> int:
1548 logger.debug("args[]='%s' - CALLED!", type(args))
1550 logger.debug("Invoking locking.acquire() ...")
1553 if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1554 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1555 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1556 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1558 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1560 rows = database.cursor.fetchall()
1561 logger.info("Checking %d domains ...", len(rows))
1563 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1564 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1565 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1569 if row["software"] == "pleroma":
1570 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1571 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1572 elif row["software"] == "mastodon":
1573 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1574 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1575 elif row["software"] == "lemmy":
1576 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1577 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1578 elif row["software"] == "friendica":
1579 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1580 blocking = friendica.fetch_blocks(row["domain"])
1581 elif row["software"] == "misskey":
1582 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1583 blocking = misskey.fetch_blocks(row["domain"])
1585 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1587 logger.debug("row[domain]='%s'", row["domain"])
1588 # chaos.social requires special care ...
1589 if row["domain"] != "chaos.social":
1590 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1591 instances.set_last_blocked(row["domain"])
1592 instances.set_total_blocks(row["domain"], blocking)
1597 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1598 for block in blocking:
1599 logger.debug("block[blocked]='%s'", block["blocked"])
1602 if block["blocked"] == "":
1603 logger.debug("block[blocked] is empty - SKIPPED!")
1605 elif block["blocked"].endswith(".arpa"):
1606 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1608 elif block["blocked"].endswith(".tld"):
1609 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1611 elif block["blocked"].endswith(".onion"):
1612 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1614 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1615 logger.debug("block='%s' is obfuscated.", block["blocked"])
1616 obfuscated = obfuscated + 1
1617 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1618 elif not domain_helper.is_wanted(block["blocked"]):
1619 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1621 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1622 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1625 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1626 if blocked is not None and blocked != block["blocked"]:
1627 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1628 obfuscated = obfuscated - 1
1630 if blocks.is_instance_blocked(row["domain"], blocked):
1631 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1633 elif blacklist.is_blacklisted(blocked):
1634 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1637 block["block_level"] = blocks.alias_block_level(block["block_level"])
1639 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1640 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1641 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1644 "reason" : block["reason"],
1647 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1648 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1650 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1651 if obfuscated == 0 and len(blocking) > 0:
1652 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1653 instances.set_has_obfuscation(row["domain"], False)
1655 if instances.has_pending(row["domain"]):
1656 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1657 instances.update_data(row["domain"])
1659 logger.debug("Invoking commit() ...")
1660 database.connection.commit()
1662 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1663 if config.get("bot_enabled") and len(blockdict) > 0:
1664 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1665 network.send_bot_post(row["domain"], blockdict)
1667 logger.debug("Success! - EXIT!")
1670 def fetch_fedilist(args: argparse.Namespace) -> int:
1671 logger.debug("args[]='%s' - CALLED!", type(args))
1673 logger.debug("Invoking locking.acquire() ...")
1676 source_domain = "demo.fedilist.com"
1677 if sources.is_recent(source_domain):
1678 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1681 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1682 sources.update(source_domain)
1684 url = f"http://{source_domain}/instance/csv?onion=not"
1685 if args.software is not None and args.software != "":
1686 logger.debug("args.software='%s'", args.software)
1687 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1689 logger.info("Fetching url='%s' ...", url)
1690 response = reqto.get(
1692 headers=network.web_headers,
1693 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1694 allow_redirects=False
1697 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1698 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1699 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1702 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1704 logger.debug("reader[]='%s'", type(reader))
1706 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1711 logger.info("Checking %d rows ...", len(rows))
1713 logger.debug("row[]='%s'", type(row))
1714 if "hostname" not in row:
1715 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1718 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1719 domain = tidyup.domain(row["hostname"])
1720 logger.debug("domain='%s' - AFTER!", domain)
1723 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1726 logger.debug("domain='%s' - BEFORE!", domain)
1727 domain = domain.encode("idna").decode("utf-8")
1728 logger.debug("domain='%s' - AFTER!", domain)
1730 if not domain_helper.is_wanted(domain):
1731 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1733 elif (args.force is None or not args.force) and instances.is_registered(domain):
1734 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1736 elif instances.is_recent(domain):
1737 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1740 logger.info("Fetching instances from domain='%s' ...", domain)
1741 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1743 logger.debug("Success! - EXIT!")
1746 def update_nodeinfo(args: argparse.Namespace) -> int:
1747 logger.debug("args[]='%s' - CALLED!", type(args))
1749 logger.debug("Invoking locking.acquire() ...")
1752 if args.domain is not None and args.domain != "":
1753 logger.debug("Fetching args.domain='%s'", args.domain)
1754 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1755 elif args.software is not None and args.software != "":
1756 logger.info("Fetching domains for args.software='%s'", args.software)
1757 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1759 logger.info("Fetching domains for recently updated ...")
1760 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1762 domains = database.cursor.fetchall()
1764 logger.info("Checking %d domain(s) ...", len(domains))
1767 logger.debug("row[]='%s'", type(row))
1768 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1769 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1773 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1774 software = federation.determine_software(row["domain"])
1776 logger.debug("Determined software='%s'", software)
1777 if (software != row["software"] and software is not None) or args.force is True:
1778 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1779 instances.set_software(row["domain"], software)
1781 if software is not None:
1782 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1783 instances.set_success(row["domain"])
1784 except network.exceptions as exception:
1785 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1786 instances.set_last_error(row["domain"], exception)
1788 instances.set_last_nodeinfo(row["domain"])
1789 instances.update_data(row["domain"])
1792 logger.debug("Success! - EXIT!")
1795 def fetch_instances_social(args: argparse.Namespace) -> int:
1796 logger.debug("args[]='%s' - CALLED!", type(args))
1798 logger.debug("Invoking locking.acquire() ...")
1801 source_domain = "instances.social"
1803 if config.get("instances_social_api_key") == "":
1804 logger.error("API key not set. Please set in your config.json file.")
1806 elif sources.is_recent(source_domain):
1807 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1810 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1811 sources.update(source_domain)
1814 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1817 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1818 fetched = network.get_json_api(
1820 "/api/1.0/instances/list?count=0&sort_by=name",
1822 (config.get("connection_timeout"), config.get("read_timeout"))
1824 logger.debug("fetched[]='%s'", type(fetched))
1826 if "error_message" in fetched:
1827 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1829 elif "exception" in fetched:
1830 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1832 elif "json" not in fetched:
1833 logger.warning("fetched has no element 'json' - EXIT!")
1835 elif "instances" not in fetched["json"]:
1836 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1840 rows = fetched["json"]["instances"]
1842 logger.info("Checking %d row(s) ...", len(rows))
1844 logger.debug("row[]='%s'", type(row))
1845 domain = tidyup.domain(row["name"])
1846 logger.debug("domain='%s' - AFTER!", domain)
1849 logger.debug("domain is empty - SKIPPED!")
1852 logger.debug("domain='%s' - BEFORE!", domain)
1853 domain = domain.encode("idna").decode("utf-8")
1854 logger.debug("domain='%s' - AFTER!", domain)
1856 if not domain_helper.is_wanted(domain):
1857 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1859 elif domain in domains:
1860 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1862 elif instances.is_registered(domain):
1863 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1865 elif instances.is_recent(domain):
1866 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1869 logger.info("Fetching instances from domain='%s'", domain)
1870 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1872 logger.debug("Success! - EXIT!")
1875 def fetch_relays(args: argparse.Namespace) -> int:
1876 logger.debug("args[]='%s' - CALLED!", type(args))
1878 logger.debug("Invoking locking.acquire() ...")
1881 if args.domain is not None and args.domain != "":
1882 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1884 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1887 rows = database.cursor.fetchall()
1889 logger.info("Checking %d relays ...", len(rows))
1891 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1893 if not args.force and instances.is_recent(row["domain"]):
1894 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1898 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1899 raw = utils.fetch_url(
1900 f"https://{row['domain']}",
1901 network.web_headers,
1902 (config.get("connection_timeout"), config.get("read_timeout"))
1904 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1905 except network.exceptions as exception:
1906 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1907 instances.set_last_error(row["domain"], exception)
1908 instances.set_last_instance_fetch(row["domain"])
1909 instances.update_data(row["domain"])
1912 doc = bs4.BeautifulSoup(raw, features="html.parser")
1913 logger.debug("doc[]='%s'", type(doc))
1915 logger.debug("row[software]='%s'", row["software"])
1916 if row["software"] == "activityrelay":
1917 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1918 tags = doc.findAll("p")
1920 logger.debug("Checking %d paragraphs ...", len(tags))
1922 logger.debug("tag[]='%s'", type(tag))
1923 if len(tag.contents) == 0:
1924 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1926 elif "registered instances" not in tag.contents[0]:
1927 logger.debug("Skipping paragraph, text not found.")
1930 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1931 for domain in tag.contents:
1932 logger.debug("domain[%s]='%s'", type(domain), domain)
1933 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1936 domain = str(domain)
1937 logger.debug("domain='%s'", domain)
1938 if not domain_helper.is_wanted(domain):
1939 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1942 logger.debug("domain='%s' - BEFORE!", domain)
1943 domain = tidyup.domain(domain)
1944 logger.debug("domain='%s' - AFTER!", domain)
1947 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1949 elif domain not in peers:
1950 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1951 peers.append(domain)
1953 if dict_helper.has_key(domains, "domain", domain):
1954 logger.debug("domain='%s' already added", domain)
1957 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1960 "origin": row["domain"],
1962 elif row["software"] in ["aoderelay", "selective-relay"]:
1963 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1964 if row["software"] == "aoderelay":
1965 tags = doc.findAll("section", {"class": "instance"})
1967 tags = doc.find("div", {"id": "instances"}).findAll("li")
1969 logger.debug("Checking %d tags ...", len(tags))
1971 logger.debug("tag[]='%s'", type(tag))
1973 link = tag.find("a")
1974 logger.debug("link[%s]='%s'", type(link), link)
1976 logger.warning("tag='%s' has no a-tag ...", tag)
1979 components = urlparse(link["href"])
1980 domain = components.netloc.lower()
1982 if not domain_helper.is_wanted(domain):
1983 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1986 logger.debug("domain='%s' - BEFORE!", domain)
1987 domain = tidyup.domain(domain)
1988 logger.debug("domain='%s' - AFTER!", domain)
1991 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1993 elif domain not in peers:
1994 logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1995 peers.append(domain)
1997 if dict_helper.has_key(domains, "domain", domain):
1998 logger.debug("domain='%s' already added", domain)
2001 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
2004 "origin": row["domain"],
2007 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
2009 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
2010 instances.set_last_instance_fetch(row["domain"])
2012 logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
2013 instances.set_total_peers(row["domain"], peers)
2015 logger.debug("Flushing data for row[domain]='%s'", row["domain"])
2016 instances.update_data(row["domain"])
2018 logger.info("Checking %d domains ...", len(domains))
2020 logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
2021 if instances.is_registered(row["domain"]):
2022 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2025 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2026 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2028 logger.debug("Success! - EXIT!")
2031 def convert_idna(args: argparse.Namespace) -> int:
2032 logger.debug("args[]='%s' - CALLED!", type(args))
2034 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2035 rows = database.cursor.fetchall()
2037 logger.debug("rows[]='%s'", type(rows))
2038 instances.translate_idnas(rows, "domain")
2040 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2041 rows = database.cursor.fetchall()
2043 logger.debug("rows[]='%s'", type(rows))
2044 instances.translate_idnas(rows, "origin")
2046 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2047 rows = database.cursor.fetchall()
2049 logger.debug("rows[]='%s'", type(rows))
2050 blocks.translate_idnas(rows, "blocker")
2052 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2053 rows = database.cursor.fetchall()
2055 logger.debug("rows[]='%s'", type(rows))
2056 blocks.translate_idnas(rows, "blocked")
2058 logger.debug("Success! - EXIT!")