1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
33 from fba import database
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import locking
41 from fba.helpers import processing
42 from fba.helpers import software as software_helper
43 from fba.helpers import tidyup
45 from fba.http import federation
46 from fba.http import network
48 from fba.models import blocks
49 from fba.models import instances
50 from fba.models import sources
52 from fba.networks import friendica
53 from fba.networks import lemmy
54 from fba.networks import mastodon
55 from fba.networks import misskey
56 from fba.networks import pleroma
58 logging.basicConfig(level=logging.INFO)
59 logger = logging.getLogger(__name__)
60 #logger.setLevel(logging.DEBUG)
62 def check_instance(args: argparse.Namespace) -> int:
63 logger.debug("args.domain='%s' - CALLED!", args.domain)
65 if not validators.domain(args.domain):
66 logger.warning("args.domain='%s' is not valid", args.domain)
68 elif blacklist.is_blacklisted(args.domain):
69 logger.warning("args.domain='%s' is blacklisted", args.domain)
71 elif instances.is_registered(args.domain):
72 logger.warning("args.domain='%s' is already registered", args.domain)
75 logger.info("args.domain='%s' is not known", args.domain)
77 logger.debug("status=%d - EXIT!", status)
80 def check_nodeinfo(args: argparse.Namespace) -> int:
81 logger.debug("args[]='%s' - CALLED!", type(args))
84 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
87 for row in database.cursor.fetchall():
88 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
89 punycode = row["domain"].encode("idna").decode("utf-8")
91 if row["nodeinfo_url"].startswith("/"):
92 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
94 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
95 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
98 logger.info("Found %d row(s)", cnt)
100 logger.debug("EXIT!")
103 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
104 logger.debug("args[]='%s' - CALLED!", type(args))
106 # No CSRF by default, you don't have to add network.source_headers by yourself here
108 source_domain = "pixelfed.org"
110 if sources.is_recent(source_domain):
111 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
114 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
115 sources.update(source_domain)
118 logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
119 headers = csrf.determine(source_domain, dict())
120 except network.exceptions as exception:
121 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
125 logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
126 fetched = network.get_json_api(
128 "/api/v1/servers/all.json?scope=All&country=all&language=all",
130 (config.get("connection_timeout"), config.get("read_timeout"))
133 logger.debug("JSON API returned %d elements", len(fetched))
134 if "error_message" in fetched:
135 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
137 elif "data" not in fetched["json"]:
138 logger.warning("API did not return JSON with 'data' element - EXIT!")
141 rows = fetched["json"]["data"]
142 logger.info("Checking %d fetched rows ...", len(rows))
144 logger.debug("row[]='%s'", type(row))
145 if "domain" not in row:
146 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
148 elif row["domain"] == "":
149 logger.debug("row[domain] is empty - SKIPPED!")
152 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
153 domain = row["domain"].encode("idna").decode("utf-8")
154 logger.debug("domain='%s' - AFTER!", domain)
156 if not utils.is_domain_wanted(domain):
157 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
159 elif instances.is_registered(domain):
160 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
162 elif instances.is_recent(domain):
163 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
166 logger.debug("Fetching instances from domain='%s' ...", domain)
167 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
169 except network.exceptions as exception:
170 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
173 logger.debug("Success! - EXIT!")
176 def fetch_bkali(args: argparse.Namespace) -> int:
177 logger.debug("args[]='%s' - CALLED!", type(args))
179 logger.debug("Invoking locking.acquire() ...")
182 source_domain = "gql.api.bka.li"
183 if sources.is_recent(source_domain):
184 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
187 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
188 sources.update(source_domain)
192 logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
193 fetched = network.post_json_api(
197 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
201 logger.debug("fetched[]='%s'", type(fetched))
202 if "error_message" in fetched:
203 logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
205 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
206 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
209 rows = fetched["json"]
211 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
213 raise Exception("WARNING: Returned no records")
214 elif "data" not in rows:
215 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
216 elif "nodeinfo" not in rows["data"]:
217 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
219 for entry in rows["data"]["nodeinfo"]:
220 logger.debug("entry[%s]='%s'", type(entry), entry)
221 if "domain" not in entry:
222 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
224 elif entry["domain"] == "":
225 logger.debug("entry[domain] is empty - SKIPPED!")
227 elif not utils.is_domain_wanted(entry["domain"]):
228 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
230 elif instances.is_registered(entry["domain"]):
231 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
233 elif instances.is_recent(entry["domain"]):
234 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
237 logger.debug("Adding domain='%s' ...", entry["domain"])
238 domains.append(entry["domain"])
240 except network.exceptions as exception:
241 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
244 logger.debug("domains()=%d", len(domains))
246 logger.info("Adding %d new instances ...", len(domains))
247 for domain in domains:
248 logger.debug("domain='%s' - BEFORE!", domain)
249 domain = domain.encode("idna").decode("utf-8")
250 logger.debug("domain='%s' - AFTER!", domain)
253 logger.info("Fetching instances from domain='%s' ...", domain)
254 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
255 except network.exceptions as exception:
256 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
257 instances.set_last_error(domain, exception)
260 logger.debug("Success - EXIT!")
263 def fetch_blocks(args: argparse.Namespace) -> int:
264 logger.debug("args[]='%s' - CALLED!", type(args))
265 if args.domain is not None and args.domain != "":
266 logger.debug("args.domain='%s' - checking ...", args.domain)
267 if not validators.domain(args.domain):
268 logger.warning("args.domain='%s' is not valid.", args.domain)
270 elif blacklist.is_blacklisted(args.domain):
271 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
273 elif not instances.is_registered(args.domain):
274 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
277 logger.debug("Invoking locking.acquire() ...")
280 if args.domain is not None and args.domain != "":
281 # Re-check single domain
282 logger.debug("Querying database for args.domain='%s' ...", args.domain)
283 database.cursor.execute(
284 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
286 elif args.software is not None and args.software != "":
287 # Re-check single software
288 logger.debug("Querying database for args.software='%s' ...", args.software)
289 database.cursor.execute(
290 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
294 logger.debug("Re-checking all instances ...")
295 database.cursor.execute(
296 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC"
299 # Re-check after "timeout" (aka. minimum interval)
300 database.cursor.execute(
301 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
304 rows = database.cursor.fetchall()
305 logger.info("Checking %d entries ...", len(rows))
306 for blocker, software, origin, nodeinfo_url in rows:
307 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
308 blocker = tidyup.domain(blocker)
309 logger.debug("blocker='%s' - AFTER!", blocker)
312 logger.warning("blocker is now empty!")
314 elif nodeinfo_url is None or nodeinfo_url == "":
315 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
317 elif not utils.is_domain_wanted(blocker):
318 logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker)
321 logger.debug("blocker='%s'", blocker)
322 instances.set_last_blocked(blocker)
323 instances.set_has_obfuscation(blocker, False)
326 if software == "pleroma":
327 logger.info("blocker='%s',software='%s'", blocker, software)
328 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
329 elif software == "mastodon":
330 logger.info("blocker='%s',software='%s'", blocker, software)
331 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
332 elif software == "lemmy":
333 logger.info("blocker='%s',software='%s'", blocker, software)
334 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
335 elif software == "friendica":
336 logger.info("blocker='%s',software='%s'", blocker, software)
337 blocking = friendica.fetch_blocks(blocker)
338 elif software == "misskey":
339 logger.info("blocker='%s',software='%s'", blocker, software)
340 blocking = misskey.fetch_blocks(blocker)
342 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
344 logger.debug("blocker='%s'", blocker)
345 if blocker != "chaos.social":
346 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
347 instances.set_total_blocks(blocker, blocking)
349 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
351 for block in blocking:
352 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
354 if block["block_level"] == "":
355 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
358 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
359 block["blocked"] = tidyup.domain(block["blocked"])
360 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
361 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
363 if block["blocked"] == "":
364 logger.warning("blocked is empty, blocker='%s'", blocker)
366 elif block["blocked"].endswith(".onion"):
367 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
369 elif block["blocked"].endswith(".arpa"):
370 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
372 elif block["blocked"].endswith(".tld"):
373 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
375 elif block["blocked"].find("*") >= 0:
376 logger.debug("blocker='%s' uses obfuscated domains", blocker)
378 # Some friendica servers also obscure domains without hash
379 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
381 logger.debug("row[]='%s'", type(row))
383 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
384 instances.set_has_obfuscation(blocker, True)
387 block["blocked"] = row["domain"]
388 origin = row["origin"]
389 nodeinfo_url = row["nodeinfo_url"]
390 elif block["blocked"].find("?") >= 0:
391 logger.debug("blocker='%s' uses obfuscated domains", blocker)
393 # Some obscure them with question marks, not sure if that's dependent on version or not
394 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
396 logger.debug("row[]='%s'", type(row))
398 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
399 instances.set_has_obfuscation(blocker, True)
402 block["blocked"] = row["domain"]
403 origin = row["origin"]
404 nodeinfo_url = row["nodeinfo_url"]
406 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
407 if block["blocked"] == "":
408 logger.debug("block[blocked] is empty - SKIPPED!")
411 logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
412 block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
413 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
415 if not utils.is_domain_wanted(block["blocked"]):
416 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
418 elif block["block_level"] in ["accept", "accepted"]:
419 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
421 elif not instances.is_registered(block["blocked"]):
422 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
423 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
425 block["block_level"] = blocks.alias_block_level(block["block_level"])
427 if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
428 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
430 "blocked": block["blocked"],
431 "reason" : block["reason"],
434 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
435 cookies.clear(block["blocked"])
437 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
438 if instances.has_pending(blocker):
439 logger.debug("Flushing updates for blocker='%s' ...", blocker)
440 instances.update_data(blocker)
442 logger.debug("Invoking commit() ...")
443 database.connection.commit()
445 logger.debug("Invoking cookies.clear(%s) ...", blocker)
446 cookies.clear(blocker)
448 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
449 if config.get("bot_enabled") and len(blockdict) > 0:
450 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
451 network.send_bot_post(blocker, blockdict)
453 logger.debug("Success! - EXIT!")
456 def fetch_observer(args: argparse.Namespace) -> int:
457 logger.debug("args[]='%s' - CALLED!", type(args))
459 logger.debug("Invoking locking.acquire() ...")
462 source_domain = "fediverse.observer"
463 if sources.is_recent(source_domain):
464 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
467 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
468 sources.update(source_domain)
471 if args.software is None:
472 logger.info("Fetching software list ...")
473 raw = utils.fetch_url(
474 f"https://{source_domain}",
476 (config.get("connection_timeout"), config.get("read_timeout"))
478 logger.debug("raw[%s]()=%d", type(raw), len(raw))
480 doc = bs4.BeautifulSoup(raw, features="html.parser")
481 logger.debug("doc[]='%s'", type(doc))
483 navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
484 logger.debug("navbar[]='%s'", type(navbar))
486 logger.warning("Cannot find navigation bar, cannot continue!")
489 items = navbar.findAll("a", {"class": "dropdown-item"})
490 logger.debug("items[]='%s'", type(items))
492 logger.info("Checking %d menu items ...", len(items))
494 logger.debug("item[%s]='%s'", type(item), item)
495 if item.text.lower() == "all":
496 logger.debug("Skipping 'All' menu entry ...")
499 logger.debug("Appending item.text='%s' ...", item.text)
500 types.append(tidyup.domain(item.text))
502 logger.info("Adding args.software='%s' as type ...", args.software)
503 types.append(args.software)
505 logger.info("Fetching %d different table data ...", len(types))
506 for software in types:
507 logger.debug("software='%s' - BEFORE!", software)
508 if args.software is not None and args.software != software:
509 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
514 logger.debug("Fetching table data for software='%s' ...", software)
515 raw = utils.fetch_url(
516 f"https://{source_domain}/app/views/tabledata.php?software={software}",
518 (config.get("connection_timeout"), config.get("read_timeout"))
520 logger.debug("raw[%s]()=%d", type(raw), len(raw))
522 doc = bs4.BeautifulSoup(raw, features="html.parser")
523 logger.debug("doc[]='%s'", type(doc))
524 except network.exceptions as exception:
525 logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
528 items = doc.findAll("a", {"class": "url"})
529 logger.info("Checking %d items,software='%s' ...", len(items), software)
531 logger.debug("item[]='%s'", type(item))
532 domain = item.decode_contents()
533 logger.debug("domain='%s' - AFTER!", domain)
536 logger.debug("domain is empty - SKIPPED!")
539 logger.debug("domain='%s' - BEFORE!", domain)
540 domain = domain.encode("idna").decode("utf-8")
541 logger.debug("domain='%s' - AFTER!", domain)
543 if not utils.is_domain_wanted(domain):
544 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
546 elif instances.is_registered(domain):
547 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
549 elif instances.is_recent(domain):
550 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
553 software = software_helper.alias(software)
554 logger.info("Fetching instances for domain='%s'", domain)
555 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
557 logger.debug("Success! - EXIT!")
560 def fetch_todon_wiki(args: argparse.Namespace) -> int:
561 logger.debug("args[]='%s' - CALLED!", type(args))
563 logger.debug("Invoking locking.acquire() ...")
566 source_domain = "wiki.todon.eu"
567 if sources.is_recent(source_domain):
568 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
571 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
572 sources.update(source_domain)
579 logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
580 raw = utils.fetch_url(
581 f"https://{source_domain}/todon/domainblocks",
583 (config.get("connection_timeout"), config.get("read_timeout"))
585 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
587 doc = bs4.BeautifulSoup(raw, "html.parser")
588 logger.debug("doc[]='%s'", type(doc))
590 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
591 logger.info("Checking %d silenced/limited entries ...", len(silenced))
592 blocklist["silenced"] = utils.find_domains(silenced, "div")
594 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
595 logger.info("Checking %d suspended entries ...", len(suspended))
596 blocklist["reject"] = utils.find_domains(suspended, "div")
598 blocking = blocklist["silenced"] + blocklist["reject"]
601 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
602 instances.set_total_blocks(blocker, blocking)
605 for block_level in blocklist:
606 blockers = blocklist[block_level]
608 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
609 for blocked in blockers:
610 logger.debug("blocked='%s'", blocked)
612 if not instances.is_registered(blocked):
614 logger.info("Fetching instances from domain='%s' ...", blocked)
615 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
616 except network.exceptions as exception:
617 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
618 instances.set_last_error(blocked, exception)
620 if blocks.is_instance_blocked(blocker, blocked, block_level):
621 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
624 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
625 if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
626 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
632 logger.debug("Invoking commit() ...")
633 database.connection.commit()
635 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
636 if config.get("bot_enabled") and len(blockdict) > 0:
637 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
638 network.send_bot_post(blocker, blockdict)
640 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
641 if instances.has_pending(blocker):
642 logger.debug("Flushing updates for blocker='%s' ...", blocker)
643 instances.update_data(blocker)
645 logger.debug("Success! - EXIT!")
648 def fetch_cs(args: argparse.Namespace):
649 logger.debug("args[]='%s' - CALLED!", type(args))
651 logger.debug("Invoking locking.acquire() ...")
679 source_domain = "raw.githubusercontent.com"
680 if sources.is_recent(source_domain):
681 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
684 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
685 sources.update(source_domain)
687 logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
688 raw = utils.fetch_url(
689 f"https://{source_domain}/chaossocial/meta/master/federation.md",
691 (config.get("connection_timeout"), config.get("read_timeout"))
693 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
695 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
696 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
698 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
699 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
700 blocklist["silenced"] = federation.find_domains(silenced)
702 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
703 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
704 blocklist["reject"] = federation.find_domains(blocked)
706 blocking = blocklist["silenced"] + blocklist["reject"]
707 blocker = "chaos.social"
709 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
710 instances.set_total_blocks(blocker, blocking)
712 logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
713 if len(blocking) > 0:
715 for block_level in blocklist:
716 logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
718 for row in blocklist[block_level]:
719 logger.debug("row[%s]='%s'", type(row), row)
720 if not "domain" in row:
721 logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
723 elif not instances.is_registered(row["domain"]):
725 logger.info("Fetching instances from domain='%s' ...", row["domain"])
726 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
727 except network.exceptions as exception:
728 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
729 instances.set_last_error(row["domain"], exception)
731 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
732 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
734 "blocked": row["domain"],
735 "reason" : row["reason"],
738 logger.debug("Invoking commit() ...")
739 database.connection.commit()
741 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
742 if config.get("bot_enabled") and len(blockdict) > 0:
743 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
744 network.send_bot_post(blocker, blockdict)
746 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
747 if instances.has_pending(blocker):
748 logger.debug("Flushing updates for blocker='%s' ...", blocker)
749 instances.update_data(blocker)
751 logger.debug("Success! - EXIT!")
754 def fetch_fba_rss(args: argparse.Namespace) -> int:
755 logger.debug("args[]='%s' - CALLED!", type(args))
759 logger.debug("Invoking locking.acquire() ...")
762 components = urlparse(args.feed)
764 if sources.is_recent(components.netloc):
765 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
768 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
769 sources.update(components.netloc)
771 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
772 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
774 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
775 if response.ok and response.status_code < 300 and len(response.text) > 0:
776 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
777 rss = atoma.parse_rss_bytes(response.content)
779 logger.debug("rss[]='%s'", type(rss))
780 for item in rss.items:
781 logger.debug("item[%s]='%s'", type(item), item)
782 domain = tidyup.domain(item.link.split("=")[1])
784 logger.debug("domain='%s' - AFTER!", domain)
786 logger.debug("domain is empty - SKIPPED!")
789 logger.debug("domain='%s' - BEFORE!", domain)
790 domain = domain.encode("idna").decode("utf-8")
791 logger.debug("domain='%s' - AFTER!", domain)
793 if not utils.is_domain_wanted(domain):
794 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
796 elif domain in domains:
797 logger.debug("domain='%s' is already added - SKIPPED!", domain)
799 elif instances.is_registered(domain):
800 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
802 elif instances.is_recent(domain):
803 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
806 logger.debug("Adding domain='%s'", domain)
807 domains.append(domain)
809 logger.debug("domains()=%d", len(domains))
811 logger.info("Adding %d new instances ...", len(domains))
812 for domain in domains:
813 logger.debug("domain='%s'", domain)
815 logger.info("Fetching instances from domain='%s' ...", domain)
816 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
817 except network.exceptions as exception:
818 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
819 instances.set_last_error(domain, exception)
822 logger.debug("Success! - EXIT!")
825 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
826 logger.debug("args[]='%s' - CALLED!", type(args))
828 logger.debug("Invoking locking.acquire() ...")
831 source_domain = "ryona.agency"
832 if sources.is_recent(source_domain):
833 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
836 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
837 sources.update(source_domain)
839 feed = f"https://{source_domain}/users/fba/feed.atom"
843 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
844 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
846 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
847 if response.ok and response.status_code < 300 and len(response.text) > 0:
848 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
849 atom = atoma.parse_atom_bytes(response.content)
851 logger.debug("atom[]='%s'", type(atom))
852 for entry in atom.entries:
853 logger.debug("entry[]='%s'", type(entry))
854 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
855 logger.debug("doc[]='%s'", type(doc))
856 for element in doc.findAll("a"):
857 logger.debug("element[]='%s'", type(element))
858 for href in element["href"].split(","):
859 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
860 domain = tidyup.domain(href)
862 logger.debug("domain='%s' - AFTER!", domain)
864 logger.debug("domain is empty - SKIPPED!")
867 logger.debug("domain='%s' - BEFORE!", domain)
868 domain = domain.encode("idna").decode("utf-8")
869 logger.debug("domain='%s' - AFTER!", domain)
871 if not utils.is_domain_wanted(domain):
872 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
874 elif domain in domains:
875 logger.debug("domain='%s' is already added - SKIPPED!", domain)
877 elif instances.is_registered(domain):
878 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
880 elif instances.is_recent(domain):
881 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
884 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
885 domains.append(domain)
887 logger.debug("domains()=%d", len(domains))
889 logger.info("Adding %d new instances ...", len(domains))
890 for domain in domains:
891 logger.debug("domain='%s'", domain)
893 logger.info("Fetching instances from domain='%s' ...", domain)
894 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
895 except network.exceptions as exception:
896 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
897 instances.set_last_error(domain, exception)
900 logger.debug("Success! - EXIT!")
903 def fetch_instances(args: argparse.Namespace) -> int:
904 logger.debug("args[]='%s' - CALLED!", type(args))
906 logger.debug("args.domain='%s' - checking ...", args.domain)
907 if not validators.domain(args.domain):
908 logger.warning("args.domain='%s' is not valid.", args.domain)
910 elif blacklist.is_blacklisted(args.domain):
911 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
914 logger.debug("Invoking locking.acquire() ...")
919 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
920 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
921 except network.exceptions as exception:
922 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
923 instances.set_last_error(args.domain, exception)
924 instances.update_data(args.domain)
928 logger.debug("Not fetching more instances - EXIT!")
931 # Loop through some instances
932 database.cursor.execute(
933 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
936 rows = database.cursor.fetchall()
937 logger.info("Checking %d entries ...", len(rows))
939 logger.debug("row[domain]='%s'", row["domain"])
940 if row["domain"] == "":
941 logger.debug("row[domain] is empty - SKIPPED!")
944 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
945 domain = row["domain"].encode("idna").decode("utf-8")
946 logger.debug("domain='%s' - AFTER!", domain)
948 if not utils.is_domain_wanted(domain):
949 logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
953 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
954 federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
955 except network.exceptions as exception:
956 logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
957 instances.set_last_error(domain, exception)
959 logger.debug("Success - EXIT!")
962 def fetch_oliphant(args: argparse.Namespace) -> int:
963 logger.debug("args[]='%s' - CALLED!", type(args))
965 logger.debug("Invoking locking.acquire() ...")
968 source_domain = "codeberg.org"
969 if sources.is_recent(source_domain):
970 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
973 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
974 sources.update(source_domain)
977 base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
982 "blocker": "artisan.chat",
983 "csv_url": "mastodon/artisan.chat.csv",
985 "blocker": "mastodon.art",
986 "csv_url": "mastodon/mastodon.art.csv",
988 "blocker": "pleroma.envs.net",
989 "csv_url": "mastodon/pleroma.envs.net.csv",
991 "blocker": "oliphant.social",
992 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
994 "blocker": "mastodon.online",
995 "csv_url": "mastodon/mastodon.online.csv",
997 "blocker": "mastodon.social",
998 "csv_url": "mastodon/mastodon.social.csv",
1000 "blocker": "mastodon.social",
1001 "csv_url": "other/missing-tier0-mastodon.social.csv",
1003 "blocker": "rage.love",
1004 "csv_url": "mastodon/rage.love.csv",
1006 "blocker": "sunny.garden",
1007 "csv_url": "mastodon/sunny.garden.csv",
1009 "blocker": "sunny.garden",
1010 "csv_url": "mastodon/gardenfence.csv",
1012 "blocker": "solarpunk.moe",
1013 "csv_url": "mastodon/solarpunk.moe.csv",
1015 "blocker": "toot.wales",
1016 "csv_url": "mastodon/toot.wales.csv",
1018 "blocker": "union.place",
1019 "csv_url": "mastodon/union.place.csv",
1021 "blocker": "oliphant.social",
1022 "csv_url": "mastodon/birdsite.csv",
1028 logger.debug("Downloading %d files ...", len(blocklists))
1029 for block in blocklists:
1030 # Is domain given and not equal blocker?
1031 if isinstance(args.domain, str) and args.domain != block["blocker"]:
1032 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1034 elif args.domain in domains:
1035 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1039 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1040 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1042 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1043 if not response.ok or response.status_code >= 300 or response.content == "":
1044 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1047 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1048 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1054 logger.debug("row[%s]='%s'", type(row), row)
1055 domain = severity = None
1056 reject_media = reject_reports = False
1058 if "#domain" in row:
1059 domain = row["#domain"]
1060 elif "domain" in row:
1061 domain = row["domain"]
1063 logger.debug("row='%s' does not contain domain column", row)
1066 if "#severity" in row:
1067 severity = blocks.alias_block_level(row["#severity"])
1068 elif "severity" in row:
1069 severity = blocks.alias_block_level(row["severity"])
1071 logger.debug("row='%s' does not contain severity column", row)
1074 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1076 elif "reject_media" in row and row["reject_media"].lower() == "true":
1079 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1080 reject_reports = True
1081 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1082 reject_reports = True
1085 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1087 logger.debug("domain is empty - SKIPPED!")
1089 elif domain.endswith(".onion"):
1090 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1092 elif domain.endswith(".arpa"):
1093 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1095 elif domain.endswith(".tld"):
1096 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1098 elif domain.find("*") >= 0 or domain.find("?") >= 0:
1099 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1100 domain = utils.deobfuscate(domain, block["blocker"])
1101 logger.debug("domain='%s' - AFTER!", domain)
1103 if not validators.domain(domain):
1104 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1106 elif blacklist.is_blacklisted(domain):
1107 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1109 elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1110 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1113 logger.debug("Marking domain='%s' as handled", domain)
1114 domains.append(domain)
1116 logger.debug("Processing domain='%s' ...", domain)
1117 processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1118 logger.debug("processed='%s'", processed)
1120 if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1121 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1124 "reason" : block["reason"],
1128 processing.block(block["blocker"], domain, None, "reject_media")
1130 processing.block(block["blocker"], domain, None, "reject_reports")
1132 logger.debug("block[blocker]='%s'", block["blocker"])
1133 if block["blocker"] != "chaos.social":
1134 logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1135 instances.set_total_blocks(block["blocker"], domains)
1137 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1138 if instances.has_pending(block["blocker"]):
1139 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1140 instances.update_data(block["blocker"])
1142 logger.debug("Invoking commit() ...")
1143 database.connection.commit()
1145 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1146 if config.get("bot_enabled") and len(blockdict) > 0:
1147 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1148 network.send_bot_post(block["blocker"], blockdict)
1150 logger.debug("Success! - EXIT!")
1153 def fetch_txt(args: argparse.Namespace) -> int:
1154 logger.debug("args[]='%s' - CALLED!", type(args))
1156 logger.debug("Invoking locking.acquire() ...")
1161 "blocker": "seirdy.one",
1162 "url" : "https://seirdy.one/pb/bsl.txt",
1165 logger.info("Checking %d text file(s) ...", len(urls))
1167 logger.debug("Fetching row[url]='%s' ...", row["url"])
1168 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1170 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1171 if response.ok and response.status_code < 300 and response.text != "":
1172 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1173 domains = response.text.split("\n")
1175 logger.info("Processing %d domains ...", len(domains))
1176 for domain in domains:
1177 logger.debug("domain='%s' - BEFORE!", domain)
1178 domain = tidyup.domain(domain)
1180 logger.debug("domain='%s' - AFTER!", domain)
1182 logger.debug("domain is empty - SKIPPED!")
1184 elif not utils.is_domain_wanted(domain):
1185 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1187 elif instances.is_recent(domain):
1188 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1191 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1192 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1194 logger.debug("processed='%s'", processed)
1196 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1199 logger.debug("Success! - EXIT!")
1202 def fetch_fedipact(args: argparse.Namespace) -> int:
1203 logger.debug("args[]='%s' - CALLED!", type(args))
1205 logger.debug("Invoking locking.acquire() ...")
1208 source_domain = "fedipact.online"
1209 if sources.is_recent(source_domain):
1210 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1213 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1214 sources.update(source_domain)
1216 logger.info("Fetching / from source_domain='%s' ...", source_domain)
1217 response = utils.fetch_url(
1218 f"https://{source_domain}",
1219 network.web_headers,
1220 (config.get("connection_timeout"), config.get("read_timeout"))
1223 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1224 if response.ok and response.status_code < 300 and response.text != "":
1225 logger.debug("Parsing %d Bytes ...", len(response.text))
1227 doc = bs4.BeautifulSoup(response.text, "html.parser")
1228 logger.debug("doc[]='%s'", type(doc))
1230 rows = doc.findAll("li")
1231 logger.info("Checking %d row(s) ...", len(rows))
1233 logger.debug("row[]='%s'", type(row))
1234 domain = tidyup.domain(row.contents[0])
1236 logger.debug("domain='%s' - AFTER!", domain)
1238 logger.debug("domain is empty - SKIPPED!")
1241 logger.debug("domain='%s' - BEFORE!", domain)
1242 domain = domain.encode("idna").decode("utf-8")
1243 logger.debug("domain='%s' - AFTER!", domain)
1245 if not utils.is_domain_wanted(domain):
1246 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1248 elif instances.is_registered(domain):
1249 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1251 elif instances.is_recent(domain):
1252 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1255 logger.info("Fetching domain='%s' ...", domain)
1256 federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1258 logger.debug("Success! - EXIT!")
1261 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1262 logger.debug("args[]='%s' - CALLED!", type(args))
1264 logger.debug("Invoking locking.acquire() ...")
1267 source_domain = "instances.joinmobilizon.org"
1268 if sources.is_recent(source_domain):
1269 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1272 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1273 sources.update(source_domain)
1275 logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1276 raw = utils.fetch_url(
1277 f"https://{source_domain}/api/v1/instances",
1278 network.web_headers,
1279 (config.get("connection_timeout"), config.get("read_timeout"))
1281 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1283 parsed = json.loads(raw)
1284 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1286 if "data" not in parsed:
1287 logger.warning("parsed()=%d does not contain key 'data'")
1290 logger.info("Checking %d instances ...", len(parsed["data"]))
1291 for row in parsed["data"]:
1292 logger.debug("row[]='%s'", type(row))
1293 if "host" not in row:
1294 logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1296 elif not utils.is_domain_wanted(row["host"]):
1297 logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1299 elif instances.is_registered(row["host"]):
1300 logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1303 logger.info("Fetching row[host]='%s' ...", row["host"])
1304 federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1306 logger.debug("Success! - EXIT!")
1309 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1310 logger.debug("args[]='%s' - CALLED!", type(args))
1312 logger.debug("Invoking locking.acquire() ...")
1315 source_domain = "instanceapp.misskey.page"
1316 if sources.is_recent(source_domain):
1317 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1320 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1321 sources.update(source_domain)
1323 logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1324 raw = utils.fetch_url(
1325 f"https://{source_domain}/instances.json",
1326 network.web_headers,
1327 (config.get("connection_timeout"), config.get("read_timeout"))
1329 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1331 parsed = json.loads(raw)
1332 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1334 if "instancesInfos" not in parsed:
1335 logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1338 logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1339 for row in parsed["instancesInfos"]:
1340 logger.debug("row[%s]='%s'", type(row), row)
1341 if "url" not in row:
1342 logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1344 elif not utils.is_domain_wanted(row["url"]):
1345 logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1347 elif instances.is_registered(row["url"]):
1348 logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1351 logger.info("Fetching row[url]='%s' ...", row["url"])
1352 federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1354 logger.debug("Success! - EXIT!")
1357 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1358 logger.debug("args[]='%s' - CALLED!", type(args))
1360 logger.debug("Invoking locking.acquire() ...")
1363 source_domain = "joinfediverse.wiki"
1364 if sources.is_recent(source_domain):
1365 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1368 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1369 sources.update(source_domain)
1371 logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1372 raw = utils.fetch_url(
1373 f"https://{source_domain}/FediBlock",
1374 network.web_headers,
1375 (config.get("connection_timeout"), config.get("read_timeout"))
1377 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1379 doc = bs4.BeautifulSoup(raw, "html.parser")
1380 logger.debug("doc[]='%s'", type(doc))
1382 tables = doc.findAll("table", {"class": "wikitable"})
1384 logger.info("Analyzing %d table(s) ...", len(tables))
1386 for table in tables:
1387 logger.debug("table[]='%s'", type(table))
1389 rows = table.findAll("tr")
1390 logger.info("Checking %d row(s) ...", len(rows))
1391 block_headers = dict()
1393 logger.debug("row[%s]='%s'", type(row), row)
1395 headers = row.findAll("th")
1396 logger.debug("Found headers()=%d header(s)", len(headers))
1397 if len(headers) > 1:
1398 block_headers = dict()
1400 for header in headers:
1402 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1403 text = header.contents[0]
1405 logger.debug("text[]='%s'", type(text))
1406 if not isinstance(text, str):
1407 logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1409 elif validators.domain(text.strip()):
1410 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1413 text = tidyup.domain(text.strip())
1414 logger.debug("text='%s' - AFTER!", text)
1415 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1416 logger.debug("Found header: '%s'=%d", text, cnt)
1417 block_headers[cnt] = text
1419 elif len(block_headers) == 0:
1420 logger.debug("row is not scrapable - SKIPPED!")
1422 elif len(block_headers) > 0:
1423 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1427 for element in row.find_all(["th", "td"]):
1429 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1430 if cnt in block_headers:
1431 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1433 text = element.text.strip()
1434 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1436 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1437 if key in ["domain", "instance"]:
1439 elif key == "reason":
1440 block[key] = tidyup.reason(text)
1441 elif key == "subdomain(s)":
1444 block[key] = text.split("/")
1446 logger.debug("key='%s'", key)
1449 logger.debug("block()=%d ...", len(block))
1451 logger.debug("Appending block()=%d ...", len(block))
1452 blocklist.append(block)
1454 logger.debug("blocklist()=%d", len(blocklist))
1456 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1457 domains = database.cursor.fetchall()
1459 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1461 for block in blocklist:
1462 logger.debug("block='%s'", block)
1463 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1464 origin = block["blocked"]
1465 logger.debug("origin='%s'", origin)
1466 for subdomain in block["subdomain(s)"]:
1467 block["blocked"] = subdomain + "." + origin
1468 logger.debug("block[blocked]='%s'", block["blocked"])
1469 blocking.append(block)
1471 blocking.append(block)
1473 logger.debug("blocking()=%d", blocking)
1474 for block in blocking:
1475 logger.debug("block[]='%s'", type(block))
1476 if "blocked" not in block:
1477 raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1479 block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1480 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1482 if block["blocked"] == "":
1483 logger.debug("block[blocked] is empty - SKIPPED!")
1485 elif not utils.is_domain_wanted(block["blocked"]):
1486 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1488 elif instances.is_recent(block["blocked"]):
1489 logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1492 logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1493 processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1496 for blocker in domains:
1497 blocker = blocker[0]
1498 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1500 for block in blocking:
1501 logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1502 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1504 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1505 if block["blocked"] == "":
1506 logger.debug("block[blocked] is empty - SKIPPED!")
1508 elif not utils.is_domain_wanted(block["blocked"]):
1509 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1512 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1513 if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1514 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1516 "blocked": block["blocked"],
1517 "reason" : block["reason"],
1520 if instances.has_pending(blocker):
1521 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1522 instances.update_data(blocker)
1524 logger.debug("Invoking commit() ...")
1525 database.connection.commit()
1527 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1528 if config.get("bot_enabled") and len(blockdict) > 0:
1529 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1530 network.send_bot_post(blocker, blockdict)
1532 logger.debug("Success! - EXIT!")
1535 def recheck_obfuscation(args: argparse.Namespace) -> int:
1536 logger.debug("args[]='%s' - CALLED!", type(args))
1538 logger.debug("Invoking locking.acquire() ...")
1541 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1542 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1543 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1544 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1546 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1548 rows = database.cursor.fetchall()
1549 logger.info("Checking %d domains ...", len(rows))
1551 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1552 if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1553 logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1557 if row["software"] == "pleroma":
1558 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1559 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1560 elif row["software"] == "mastodon":
1561 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1562 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1563 elif row["software"] == "lemmy":
1564 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1565 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1566 elif row["software"] == "friendica":
1567 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1568 blocking = friendica.fetch_blocks(row["domain"])
1569 elif row["software"] == "misskey":
1570 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1571 blocking = misskey.fetch_blocks(row["domain"])
1573 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1575 logger.debug("row[domain]='%s'", row["domain"])
1576 # chaos.social requires special care ...
1577 if row["domain"] != "chaos.social":
1578 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1579 instances.set_total_blocks(row["domain"], blocking)
1584 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1585 for block in blocking:
1586 logger.debug("block[blocked]='%s'", block["blocked"])
1589 if block["blocked"] == "":
1590 logger.debug("block[blocked] is empty - SKIPPED!")
1592 elif block["blocked"].endswith(".arpa"):
1593 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1595 elif block["blocked"].endswith(".tld"):
1596 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1598 elif block["blocked"].endswith(".onion"):
1599 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1601 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1602 logger.debug("block='%s' is obfuscated.", block["blocked"])
1603 obfuscated = obfuscated + 1
1604 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1605 elif not utils.is_domain_wanted(block["blocked"]):
1606 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1608 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1609 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1612 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1613 if blocked is not None and blocked != block["blocked"]:
1614 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1615 obfuscated = obfuscated - 1
1616 if blocks.is_instance_blocked(row["domain"], blocked):
1617 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1620 block["block_level"] = blocks.alias_block_level(block["block_level"])
1622 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1623 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1624 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1627 "reason" : block["reason"],
1630 logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1631 instances.set_obfuscated_blocks(row["domain"], obfuscated)
1633 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1634 if obfuscated == 0 and len(blocking) > 0:
1635 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1636 instances.set_has_obfuscation(row["domain"], False)
1638 if instances.has_pending(row["domain"]):
1639 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1640 instances.update_data(row["domain"])
1642 logger.debug("Invoking commit() ...")
1643 database.connection.commit()
1645 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1646 if config.get("bot_enabled") and len(blockdict) > 0:
1647 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1648 network.send_bot_post(row["domain"], blockdict)
1650 logger.debug("Success! - EXIT!")
1653 def fetch_fedilist(args: argparse.Namespace) -> int:
1654 logger.debug("args[]='%s' - CALLED!", type(args))
1656 logger.debug("Invoking locking.acquire() ...")
1659 source_domain = "demo.fedilist.com"
1660 if sources.is_recent(source_domain):
1661 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1664 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1665 sources.update(source_domain)
1667 url = f"http://{source_domain}/instance/csv?onion=not"
1668 if args.software is not None and args.software != "":
1669 logger.debug("args.software='%s'", args.software)
1670 url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1672 logger.info("Fetching url='%s' ...", url)
1673 response = reqto.get(
1675 headers=network.web_headers,
1676 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1677 allow_redirects=False
1680 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1681 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1682 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1685 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1687 logger.debug("reader[]='%s'", type(reader))
1689 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1694 logger.info("Checking %d rows ...", len(rows))
1696 logger.debug("row[]='%s'", type(row))
1697 if "hostname" not in row:
1698 logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1701 logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1702 domain = tidyup.domain(row["hostname"])
1703 logger.debug("domain='%s' - AFTER!", domain)
1706 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1709 logger.debug("domain='%s' - BEFORE!", domain)
1710 domain = domain.encode("idna").decode("utf-8")
1711 logger.debug("domain='%s' - AFTER!", domain)
1713 if not utils.is_domain_wanted(domain):
1714 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1716 elif (args.force is None or not args.force) and instances.is_registered(domain):
1717 logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1719 elif instances.is_recent(domain):
1720 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1723 logger.info("Fetching instances from domain='%s' ...", domain)
1724 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1726 logger.debug("Success! - EXIT!")
1729 def update_nodeinfo(args: argparse.Namespace) -> int:
1730 logger.debug("args[]='%s' - CALLED!", type(args))
1732 logger.debug("Invoking locking.acquire() ...")
1735 if args.domain is not None and args.domain != "":
1736 logger.debug("Fetching args.domain='%s'", args.domain)
1737 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1738 elif args.software is not None and args.software != "":
1739 logger.info("Fetching domains for args.software='%s'", args.software)
1740 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1742 logger.info("Fetching domains for recently updated ...")
1743 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1745 domains = database.cursor.fetchall()
1747 logger.info("Checking %d domain(s) ...", len(domains))
1750 logger.debug("row[]='%s'", type(row))
1751 if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1752 logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1756 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1757 software = federation.determine_software(row["domain"])
1759 logger.debug("Determined software='%s'", software)
1760 if (software != row["software"] and software is not None) or args.force is True:
1761 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1762 instances.set_software(row["domain"], software)
1764 instances.set_success(row["domain"])
1765 except network.exceptions as exception:
1766 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1767 instances.set_last_error(row["domain"], exception)
1769 instances.set_last_nodeinfo(row["domain"])
1770 instances.update_data(row["domain"])
1773 logger.debug("Success! - EXIT!")
1776 def fetch_instances_social(args: argparse.Namespace) -> int:
1777 logger.debug("args[]='%s' - CALLED!", type(args))
1779 logger.debug("Invoking locking.acquire() ...")
1782 source_domain = "instances.social"
1784 if config.get("instances_social_api_key") == "":
1785 logger.error("API key not set. Please set in your config.json file.")
1787 elif sources.is_recent(source_domain):
1788 logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1791 logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1792 sources.update(source_domain)
1795 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1798 logger.info("Fetching list from source_domain='%s' ...", source_domain)
1799 fetched = network.get_json_api(
1801 "/api/1.0/instances/list?count=0&sort_by=name",
1803 (config.get("connection_timeout"), config.get("read_timeout"))
1805 logger.debug("fetched[]='%s'", type(fetched))
1807 if "error_message" in fetched:
1808 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1810 elif "exception" in fetched:
1811 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1813 elif "json" not in fetched:
1814 logger.warning("fetched has no element 'json' - EXIT!")
1816 elif "instances" not in fetched["json"]:
1817 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1821 rows = fetched["json"]["instances"]
1823 logger.info("Checking %d row(s) ...", len(rows))
1825 logger.debug("row[]='%s'", type(row))
1826 domain = tidyup.domain(row["name"])
1827 logger.debug("domain='%s' - AFTER!", domain)
1830 logger.debug("domain is empty - SKIPPED!")
1833 logger.debug("domain='%s' - BEFORE!", domain)
1834 domain = domain.encode("idna").decode("utf-8")
1835 logger.debug("domain='%s' - AFTER!", domain)
1837 if not utils.is_domain_wanted(domain):
1838 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1840 elif domain in domains:
1841 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1843 elif instances.is_registered(domain):
1844 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1846 elif instances.is_recent(domain):
1847 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1850 logger.info("Fetching instances from domain='%s'", domain)
1851 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1853 logger.debug("Success! - EXIT!")
1856 def fetch_relays(args: argparse.Namespace) -> int:
1857 logger.debug("args[]='%s' - CALLED!", type(args))
1859 if args.domain is not None and args.domain != "":
1860 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
1862 database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
1865 rows = database.cursor.fetchall()
1867 logger.info("Checking %d relays ...", len(rows))
1869 logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1870 if not args.force and instances.is_recent(row["domain"]):
1871 logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1875 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1876 raw = utils.fetch_url(
1877 f"https://{row['domain']}",
1878 network.web_headers,
1879 (config.get("connection_timeout"), config.get("read_timeout"))
1881 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1882 except network.exceptions as exception:
1883 logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1884 instances.set_last_error(row["domain"], exception)
1885 instances.set_last_instance_fetch(row["domain"])
1886 instances.update_data(row["domain"])
1889 doc = bs4.BeautifulSoup(raw, features="html.parser")
1890 logger.debug("doc[]='%s'", type(doc))
1892 logger.debug("row[software]='%s'", row["software"])
1893 if row["software"] == "activityrelay":
1894 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1895 tags = doc.findAll("p")
1897 logger.debug("Checking %d paragraphs ...", len(tags))
1899 logger.debug("tag[]='%s'", type(tag))
1900 if len(tag.contents) == 0:
1901 logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1903 elif "registered instances" not in tag.contents[0]:
1904 logger.debug("Skipping paragraph, text not found.")
1907 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1908 for domain in tag.contents:
1909 logger.debug("domain[%s]='%s'", type(domain), domain)
1910 if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1913 domain = str(domain)
1914 if not utils.is_domain_wanted(domain):
1915 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1918 logger.debug("domain='%s' - BEFORE!", domain)
1919 domain = tidyup.domain(domain)
1920 logger.debug("domain='%s' - AFTER!", domain)
1923 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1925 elif instances.is_registered(domain):
1926 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1928 elif dict_helper.has_key(domains, "domain", domain):
1929 logger.debug("domain='%s' already added", domain)
1932 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1935 "origin": row["domain"],
1937 elif row["software"] in ["aoderelay", "selective-relay"]:
1938 logger.debug("Checking row[domain]='%s' ...", row["domain"])
1939 if row["software"] == "aoderelay":
1940 tags = doc.findAll("section", {"class": "instance"})
1942 tags = doc.find("div", {"id": "instances"}).findAll("li")
1944 logger.debug("Checking %d tags ...", len(tags))
1946 logger.debug("tag[]='%s'", type(tag))
1948 link = tag.find("a")
1949 logger.debug("link[%s]='%s'", type(link), link)
1951 logger.warning("tag='%s' has no a-tag ...", tag)
1954 components = urlparse(link["href"])
1955 domain = components.netloc.lower()
1957 if not utils.is_domain_wanted(domain):
1958 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1961 logger.debug("domain='%s' - BEFORE!", domain)
1962 domain = tidyup.domain(domain)
1963 logger.debug("domain='%s' - AFTER!", domain)
1966 logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
1968 elif instances.is_registered(domain):
1969 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1971 elif dict_helper.has_key(domains, "domain", domain):
1972 logger.debug("domain='%s' already added", domain)
1975 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1978 "origin": row["domain"],
1981 logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1983 logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1984 instances.set_last_instance_fetch(row["domain"])
1985 instances.update_data(row["domain"])
1987 logger.info("Found %d domains to add ...", len(domains))
1989 logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1990 federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1992 logger.debug("Success! - EXIT!")
1995 def convert_idna(args: argparse.Namespace) -> int:
1996 logger.debug("args[]='%s' - CALLED!", type(args))
1998 database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1999 rows = database.cursor.fetchall()
2001 logger.debug("rows[]='%s'", type(rows))
2002 instances.translate_idnas(rows, "domain")
2004 database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2005 rows = database.cursor.fetchall()
2007 logger.debug("rows[]='%s'", type(rows))
2008 instances.translate_idnas(rows, "origin")
2010 database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2011 rows = database.cursor.fetchall()
2013 logger.debug("rows[]='%s'", type(rows))
2014 blocks.translate_idnas(rows, "blocker")
2016 database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2017 rows = database.cursor.fetchall()
2019 logger.debug("rows[]='%s'", type(rows))
2020 blocks.translate_idnas(rows, "blocked")
2022 logger.debug("Success! - EXIT!")