1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import apis
45 from fba.models import blocks
46 from fba.models import instances
48 from fba.networks import friendica
49 from fba.networks import lemmy
50 from fba.networks import mastodon
51 from fba.networks import misskey
52 from fba.networks import pleroma
54 logging.basicConfig(level=logging.INFO)
55 logger = logging.getLogger(__name__)
56 #logger.setLevel(logging.DEBUG)
58 def check_instance(args: argparse.Namespace) -> int:
59 logger.debug("args.domain='%s' - CALLED!", args.domain)
61 if not validators.domain(args.domain):
62 logger.warning("args.domain='%s' is not valid", args.domain)
64 elif blacklist.is_blacklisted(args.domain):
65 logger.warning("args.domain='%s' is blacklisted", args.domain)
67 elif instances.is_registered(args.domain):
68 logger.warning("args.domain='%s' is already registered", args.domain)
71 logger.info("args.domain='%s' is not known", args.domain)
73 logger.debug("status=%d - EXIT!", status)
76 def check_nodeinfo(args: argparse.Namespace) -> int:
77 logger.debug("args[]='%s' - CALLED!", type(args))
80 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
83 for row in database.cursor.fetchall():
84 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
85 punycode = row["domain"].encode("idna").decode("utf-8")
87 if row["nodeinfo_url"].startswith("/"):
88 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
90 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
91 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
94 logger.info("Found %d row(s)", cnt)
99 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
100 logger.debug("args[]='%s' - CALLED!", type(args))
102 # No CSRF by default, you don't have to add network.api_headers by yourself here
104 api_domain = "pixelfed.org"
106 if apis.is_recent(api_domain):
107 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
110 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
111 apis.update(api_domain)
114 logger.debug("Checking CSRF from api_domain='%s' ...", api_domain)
115 headers = csrf.determine(api_domain, dict())
116 except network.exceptions as exception:
117 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
121 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
122 fetched = network.get_json_api(
124 "/api/v1/servers/all.json?scope=All&country=all&language=all",
126 (config.get("connection_timeout"), config.get("read_timeout"))
129 logger.debug("JSON API returned %d elements", len(fetched))
130 if "error_message" in fetched:
131 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
133 elif "data" not in fetched["json"]:
134 logger.warning("API did not return JSON with 'data' element - EXIT!")
137 rows = fetched["json"]["data"]
138 logger.info("Checking %d fetched rows ...", len(rows))
140 logger.debug("row[]='%s'", type(row))
141 if "domain" not in row:
142 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
144 elif row["domain"] == "":
145 logger.debug("row[domain] is empty - SKIPPED!")
147 elif not utils.is_domain_wanted(row["domain"]):
148 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
150 elif instances.is_registered(row["domain"]):
151 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
153 elif instances.is_recent(row["domain"]):
154 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
157 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
158 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
160 except network.exceptions as exception:
161 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
164 logger.debug("Success! - EXIT!")
167 def fetch_bkali(args: argparse.Namespace) -> int:
168 logger.debug("args[]='%s' - CALLED!", type(args))
170 logger.debug("Invoking locking.acquire() ...")
173 api_domain = "gql.apis.bka.li"
174 if apis.is_recent(api_domain):
175 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
178 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
179 apis.update(api_domain)
183 logger.info("Fetching domainlist from api_domain='%s' ...", api_domain)
184 fetched = network.post_json_api(
188 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
192 logger.debug("fetched[]='%s'", type(fetched))
193 if "error_message" in fetched:
194 logger.warning("post_json_api() for 'gql.apis.bka.li' returned error message='%s", fetched["error_message"])
196 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
197 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
200 rows = fetched["json"]
202 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
204 raise Exception("WARNING: Returned no records")
205 elif "data" not in rows:
206 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
207 elif "nodeinfo" not in rows["data"]:
208 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
210 for entry in rows["data"]["nodeinfo"]:
211 logger.debug("entry[%s]='%s'", type(entry), entry)
212 if "domain" not in entry:
213 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
215 elif entry["domain"] == "":
216 logger.debug("entry[domain] is empty - SKIPPED!")
218 elif not utils.is_domain_wanted(entry["domain"]):
219 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
221 elif instances.is_registered(entry["domain"]):
222 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
224 elif instances.is_recent(entry["domain"]):
225 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
228 logger.debug("Adding domain='%s' ...", entry["domain"])
229 domains.append(entry["domain"])
231 except network.exceptions as exception:
232 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
235 logger.debug("domains()=%d", len(domains))
237 logger.info("Adding %d new instances ...", len(domains))
238 for domain in domains:
240 logger.info("Fetching instances from domain='%s' ...", domain)
241 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
242 except network.exceptions as exception:
243 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
244 instances.set_last_error(domain, exception)
247 logger.debug("Success - EXIT!")
250 def fetch_blocks(args: argparse.Namespace) -> int:
251 logger.debug("args[]='%s' - CALLED!", type(args))
252 if args.domain is not None and args.domain != "":
253 logger.debug("args.domain='%s' - checking ...", args.domain)
254 if not validators.domain(args.domain):
255 logger.warning("args.domain='%s' is not valid.", args.domain)
257 elif blacklist.is_blacklisted(args.domain):
258 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
260 elif not instances.is_registered(args.domain):
261 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
264 logger.debug("Invoking locking.acquire() ...")
267 if args.domain is not None and args.domain != "":
268 # Re-check single domain
269 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
270 database.cursor.execute(
271 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
273 elif args.software is not None and args.software != "":
274 # Re-check single software
275 logger.debug("Querying database for args.software='%s' ...", args.software)
276 database.cursor.execute(
277 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
280 # Re-check after "timeout" (aka. minimum interval)
281 database.cursor.execute(
282 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
285 rows = database.cursor.fetchall()
286 logger.info("Checking %d entries ...", len(rows))
287 for blocker, software, origin, nodeinfo_url in rows:
288 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
289 blocker = tidyup.domain(blocker)
290 logger.debug("blocker='%s' - AFTER!", blocker)
293 logger.warning("blocker is now empty!")
295 elif nodeinfo_url is None or nodeinfo_url == "":
296 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
298 elif not utils.is_domain_wanted(blocker):
299 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
302 logger.debug("blocker='%s'", blocker)
303 instances.set_last_blocked(blocker)
304 instances.set_has_obfuscation(blocker, False)
308 if software == "pleroma":
309 logger.info("blocker='%s',software='%s'", blocker, software)
310 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
311 elif software == "mastodon":
312 logger.info("blocker='%s',software='%s'", blocker, software)
313 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
314 elif software == "lemmy":
315 logger.info("blocker='%s',software='%s'", blocker, software)
316 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
317 elif software == "friendica":
318 logger.info("blocker='%s',software='%s'", blocker, software)
319 blocking = friendica.fetch_blocks(blocker)
320 elif software == "misskey":
321 logger.info("blocker='%s',software='%s'", blocker, software)
322 blocking = misskey.fetch_blocks(blocker)
324 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
326 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
327 instances.set_total_blocks(blocker, blocking)
329 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
331 for block in blocking:
332 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
334 if block["block_level"] == "":
335 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
338 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
339 block["blocked"] = tidyup.domain(block["blocked"])
340 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
341 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
343 if block["blocked"] == "":
344 logger.warning("blocked is empty, blocker='%s'", blocker)
346 elif block["blocked"].endswith(".onion"):
347 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
349 elif block["blocked"].endswith(".arpa"):
350 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
352 elif block["blocked"].endswith(".tld"):
353 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
355 elif block["blocked"].find("*") >= 0:
356 logger.debug("blocker='%s' uses obfuscated domains", blocker)
358 # Some friendica servers also obscure domains without hash
359 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
361 logger.debug("row[]='%s'", type(row))
363 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
364 instances.set_has_obfuscation(blocker, True)
367 block["blocked"] = row["domain"]
368 origin = row["origin"]
369 nodeinfo_url = row["nodeinfo_url"]
370 elif block["blocked"].find("?") >= 0:
371 logger.debug("blocker='%s' uses obfuscated domains", blocker)
373 # Some obscure them with question marks, not sure if that's dependent on version or not
374 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
376 logger.debug("row[]='%s'", type(row))
378 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
379 instances.set_has_obfuscation(blocker, True)
382 block["blocked"] = row["domain"]
383 origin = row["origin"]
384 nodeinfo_url = row["nodeinfo_url"]
386 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
387 if block["blocked"] == "":
388 logger.debug("block[blocked] is empty - SKIPPED!")
390 elif not utils.is_domain_wanted(block["blocked"]):
391 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
393 elif block["block_level"] in ["accept", "accepted"]:
394 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
396 elif not instances.is_registered(block["blocked"]):
397 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
398 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
400 block["block_level"] = utils.alias_block_level(block["block_level"])
402 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
403 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
405 "blocked": block["blocked"],
406 "reason" : block["reason"],
409 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
410 cookies.clear(block["blocked"])
412 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
413 if instances.has_pending(blocker):
414 logger.debug("Flushing updates for blocker='%s' ...", blocker)
415 instances.update_data(blocker)
417 logger.debug("Invoking commit() ...")
418 database.connection.commit()
420 logger.debug("Invoking cookies.clear(%s) ...", blocker)
421 cookies.clear(blocker)
423 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
424 if config.get("bot_enabled") and len(blockdict) > 0:
425 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
426 network.send_bot_post(blocker, blockdict)
428 logger.debug("Success! - EXIT!")
431 def fetch_observer(args: argparse.Namespace) -> int:
432 logger.debug("args[]='%s' - CALLED!", type(args))
434 logger.debug("Invoking locking.acquire() ...")
437 api_domain = "fediverse.observer"
438 if apis.is_recent(api_domain):
439 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
442 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
443 apis.update(api_domain)
446 if args.software is None:
447 logger.info("Fetching software list ...")
448 raw = utils.fetch_url(
449 f"https://{api_domain}",
451 (config.get("connection_timeout"), config.get("read_timeout"))
453 logger.debug("raw[%s]()=%d", type(raw), len(raw))
455 doc = bs4.BeautifulSoup(raw, features="html.parser")
456 logger.debug("doc[]='%s'", type(doc))
458 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
459 logger.debug("items[]='%s'", type(items))
461 logger.info("Checking %d menu items ...", len(items))
463 logger.debug("item[%s]='%s'", type(item), item)
464 if item.text.lower() == "all":
465 logger.debug("Skipping 'All' menu entry ...")
468 logger.debug("Appending item.text='%s' ...", item.text)
469 types.append(tidyup.domain(item.text))
471 logger.info("Adding args.software='%s' as type ...", args.software)
472 types.append(args.software)
474 logger.info("Fetching %d different table data ...", len(types))
475 for software in types:
476 logger.debug("software='%s' - BEFORE!", software)
477 if args.software is not None and args.software != software:
478 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
483 logger.debug("Fetching table data for software='%s' ...", software)
484 raw = utils.fetch_url(
485 f"https://{api_domain}/app/views/tabledata.php?software={software}",
487 (config.get("connection_timeout"), config.get("read_timeout"))
489 logger.debug("raw[%s]()=%d", type(raw), len(raw))
491 doc = bs4.BeautifulSoup(raw, features="html.parser")
492 logger.debug("doc[]='%s'", type(doc))
493 except network.exceptions as exception:
494 logger.warning("Cannot fetch software='%s' from api_domain='%s': '%s'", software, api_domain, type(exception))
497 items = doc.findAll("a", {"class": "url"})
498 logger.info("Checking %d items,software='%s' ...", len(items), software)
500 logger.debug("item[]='%s'", type(item))
501 domain = item.decode_contents()
503 logger.debug("domain='%s' - AFTER!", domain)
505 logger.debug("domain is empty - SKIPPED!")
507 elif not utils.is_domain_wanted(domain):
508 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
510 elif instances.is_registered(domain):
511 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
513 elif instances.is_recent(domain):
514 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
517 software = software_helper.alias(software)
518 logger.info("Fetching instances for domain='%s'", domain)
519 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
521 logger.debug("Success! - EXIT!")
524 def fetch_todon_wiki(args: argparse.Namespace) -> int:
525 logger.debug("args[]='%s' - CALLED!", type(args))
527 logger.debug("Invoking locking.acquire() ...")
530 api_domain = "wiki.todon.eu"
531 if apis.is_recent(api_domain):
532 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
535 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
536 apis.update(api_domain)
543 raw = utils.fetch_url(f"https://{api_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
544 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
546 doc = bs4.BeautifulSoup(raw, "html.parser")
547 logger.debug("doc[]='%s'", type(doc))
549 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
550 logger.info("Checking %d silenced/limited entries ...", len(silenced))
551 blocklist["silenced"] = utils.find_domains(silenced, "div")
553 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
554 logger.info("Checking %d suspended entries ...", len(suspended))
555 blocklist["reject"] = utils.find_domains(suspended, "div")
557 blocking = blocklist["silenced"] + blocklist["reject"]
560 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
561 instances.set_total_blocks(blocker, blocking)
564 for block_level in blocklist:
565 blockers = blocklist[block_level]
567 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
568 for blocked in blockers:
569 logger.debug("blocked='%s'", blocked)
571 if not instances.is_registered(blocked):
573 logger.info("Fetching instances from domain='%s' ...", blocked)
574 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
575 except network.exceptions as exception:
576 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
577 instances.set_last_error(blocked, exception)
579 if blocks.is_instance_blocked(blocker, blocked, block_level):
580 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
583 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
584 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
585 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
591 logger.debug("Invoking commit() ...")
592 database.connection.commit()
594 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
595 if config.get("bot_enabled") and len(blockdict) > 0:
596 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
597 network.send_bot_post(blocker, blockdict)
599 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
600 if instances.has_pending(blocker):
601 logger.debug("Flushing updates for blocker='%s' ...", blocker)
602 instances.update_data(blocker)
604 logger.debug("Success! - EXIT!")
607 def fetch_cs(args: argparse.Namespace):
608 logger.debug("args[]='%s' - CALLED!", type(args))
610 logger.debug("Invoking locking.acquire() ...")
638 api_domain = "raw.githubusercontent.com"
639 if apis.is_recent(api_domain):
640 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
643 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
644 apis.update(api_domain)
646 raw = utils.fetch_url(f"https://{api_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
647 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
649 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
650 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
652 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
653 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
654 domains["silenced"] = federation.find_domains(silenced)
656 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
657 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
658 domains["reject"] = federation.find_domains(blocked)
660 blocking = blocklist["silenced"] + blocklist["reject"]
661 blocker = "chaos.social"
663 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
664 instances.set_total_blocks(blocker, blocking)
666 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
670 for block_level in domains:
671 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
673 for row in domains[block_level]:
674 logger.debug("row[%s]='%s'", type(row), row)
675 if instances.is_recent(row["domain"], "last_blocked"):
676 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
678 elif not instances.is_registered(row["domain"]):
680 logger.info("Fetching instances from domain='%s' ...", row["domain"])
681 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
682 except network.exceptions as exception:
683 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
684 instances.set_last_error(row["domain"], exception)
686 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
687 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
689 "blocked": row["domain"],
690 "reason" : row["reason"],
693 logger.debug("Invoking commit() ...")
694 database.connection.commit()
696 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
697 if config.get("bot_enabled") and len(blockdict) > 0:
698 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
699 network.send_bot_post(blocker, blockdict)
701 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
702 if instances.has_pending(blocker):
703 logger.debug("Flushing updates for blocker='%s' ...", blocker)
704 instances.update_data(blocker)
706 logger.debug("Success! - EXIT!")
709 def fetch_fba_rss(args: argparse.Namespace) -> int:
710 logger.debug("args[]='%s' - CALLED!", type(args))
714 logger.debug("Invoking locking.acquire() ...")
717 components = urlparse(args.feed)
719 if apis.is_recent(components.netloc):
720 logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
723 logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
724 apis.update(components.netloc)
726 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
727 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
729 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
730 if response.ok and response.status_code < 300 and len(response.text) > 0:
731 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
732 rss = atoma.parse_rss_bytes(response.content)
734 logger.debug("rss[]='%s'", type(rss))
735 for item in rss.items:
736 logger.debug("item='%s'", item)
737 domain = tidyup.domain(item.link.split("=")[1])
739 logger.debug("domain='%s' - AFTER!", domain)
741 logger.debug("domain is empty - SKIPPED!")
743 elif not utils.is_domain_wanted(domain):
744 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
746 elif domain in domains:
747 logger.debug("domain='%s' is already added - SKIPPED!", domain)
749 elif instances.is_registered(domain):
750 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
752 elif instances.is_recent(domain):
753 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
756 logger.debug("Adding domain='%s'", domain)
757 domains.append(domain)
759 logger.debug("domains()=%d", len(domains))
761 logger.info("Adding %d new instances ...", len(domains))
762 for domain in domains:
764 logger.info("Fetching instances from domain='%s' ...", domain)
765 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
766 except network.exceptions as exception:
767 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
768 instances.set_last_error(domain, exception)
771 logger.debug("Success! - EXIT!")
774 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
775 logger.debug("args[]='%s' - CALLED!", type(args))
777 logger.debug("Invoking locking.acquire() ...")
780 api_domain = "ryana.agency"
781 if apis.is_recent(api_domain):
782 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
785 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
786 apis.update(api_domain)
788 feed = f"https://{api_domain}/users/fba/feed.atom"
792 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
793 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
795 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
796 if response.ok and response.status_code < 300 and len(response.text) > 0:
797 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
798 atom = atoma.parse_atom_bytes(response.content)
800 logger.debug("atom[]='%s'", type(atom))
801 for entry in atom.entries:
802 logger.debug("entry[]='%s'", type(entry))
803 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
804 logger.debug("doc[]='%s'", type(doc))
805 for element in doc.findAll("a"):
806 logger.debug("element[]='%s'", type(element))
807 for href in element["href"].split(","):
808 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
809 domain = tidyup.domain(href)
811 logger.debug("domain='%s' - AFTER!", domain)
813 logger.debug("domain is empty - SKIPPED!")
815 elif not utils.is_domain_wanted(domain):
816 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
818 elif domain in domains:
819 logger.debug("domain='%s' is already added - SKIPPED!", domain)
821 elif instances.is_registered(domain):
822 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
824 elif instances.is_recent(domain):
825 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
828 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
829 domains.append(domain)
831 logger.debug("domains()=%d", len(domains))
833 logger.info("Adding %d new instances ...", len(domains))
834 for domain in domains:
835 logger.debug("domain='%s'", domain)
837 logger.info("Fetching instances from domain='%s' ...", domain)
838 federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
839 except network.exceptions as exception:
840 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
841 instances.set_last_error(domain, exception)
844 logger.debug("Success! - EXIT!")
847 def fetch_instances(args: argparse.Namespace) -> int:
848 logger.debug("args[]='%s' - CALLED!", type(args))
850 logger.debug("args.domain='%s' - checking ...", args.domain)
851 if not validators.domain(args.domain):
852 logger.warning("args.domain='%s' is not valid.", args.domain)
854 elif blacklist.is_blacklisted(args.domain):
855 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
858 logger.debug("Invoking locking.acquire() ...")
863 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
864 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
865 except network.exceptions as exception:
866 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
867 instances.set_last_error(args.domain, exception)
868 instances.update_data(args.domain)
872 logger.debug("Not fetching more instances - EXIT!")
875 # Loop through some instances
876 database.cursor.execute(
877 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
880 rows = database.cursor.fetchall()
881 logger.info("Checking %d entries ...", len(rows))
883 logger.debug("row[domain]='%s'", row["domain"])
884 if row["domain"] == "":
885 logger.debug("row[domain] is empty - SKIPPED!")
887 elif not utils.is_domain_wanted(row["domain"]):
888 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
892 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
893 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
894 except network.exceptions as exception:
895 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
896 instances.set_last_error(row["domain"], exception)
898 logger.debug("Success - EXIT!")
901 def fetch_oliphant(args: argparse.Namespace) -> int:
902 logger.debug("args[]='%s' - CALLED!", type(args))
904 logger.debug("Invoking locking.acquire() ...")
907 api_domain = "codeberg.org"
908 if apis.is_recent(api_domain):
909 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
912 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
913 apis.update(api_domain)
916 base_url = f"https://{api_domain}/oliphant/blocklists/raw/branch/main/blocklists"
921 "blocker": "artisan.chat",
922 "csv_url": "mastodon/artisan.chat.csv",
924 "blocker": "mastodon.art",
925 "csv_url": "mastodon/mastodon.art.csv",
927 "blocker": "pleroma.envs.net",
928 "csv_url": "mastodon/pleroma.envs.net.csv",
930 "blocker": "oliphant.social",
931 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
933 "blocker": "mastodon.online",
934 "csv_url": "mastodon/mastodon.online.csv",
936 "blocker": "mastodon.social",
937 "csv_url": "mastodon/mastodon.social.csv",
939 "blocker": "mastodon.social",
940 "csv_url": "other/missing-tier0-mastodon.social.csv",
942 "blocker": "rage.love",
943 "csv_url": "mastodon/rage.love.csv",
945 "blocker": "sunny.garden",
946 "csv_url": "mastodon/sunny.garden.csv",
948 "blocker": "solarpunk.moe",
949 "csv_url": "mastodon/solarpunk.moe.csv",
951 "blocker": "toot.wales",
952 "csv_url": "mastodon/toot.wales.csv",
954 "blocker": "union.place",
955 "csv_url": "mastodon/union.place.csv",
961 logger.debug("Downloading %d files ...", len(blocklists))
962 for block in blocklists:
963 # Is domain given and not equal blocker?
964 if isinstance(args.domain, str) and args.domain != block["blocker"]:
965 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
967 elif args.domain in domains:
968 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
970 elif instances.is_recent(block["blocker"]):
971 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
975 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
976 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
978 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
979 if not response.ok or response.status_code >= 300 or response.content == "":
980 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
983 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
984 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
988 logger.info("Processing %d rows ...", len(reader))
991 logger.debug("row[%s]='%s'", type(row), row)
992 domain = severity = None
993 reject_media = reject_reports = False
996 domain = row["#domain"]
997 elif "domain" in row:
998 domain = row["domain"]
1000 logger.debug("row='%s' does not contain domain column", row)
1003 if "#severity" in row:
1004 severity = row["#severity"]
1005 elif "severity" in row:
1006 severity = row["severity"]
1008 logger.debug("row='%s' does not contain severity column", row)
1011 if "#reject_media" in row and row["#reject_media"].lower() == "true":
1013 elif "reject_media" in row and row["reject_media"].lower() == "true":
1016 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1017 reject_reports = True
1018 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1019 reject_reports = True
1022 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1024 logger.debug("domain is empty - SKIPPED!")
1026 elif not utils.is_domain_wanted(domain):
1027 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1030 logger.debug("Marking domain='%s' as handled", domain)
1031 domains.append(domain)
1033 logger.debug("Processing domain='%s' ...", domain)
1034 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1035 logger.debug("processed='%s'", processed)
1037 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
1038 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1041 "reason" : block["reason"],
1045 utils.process_block(block["blocker"], domain, None, "reject_media")
1047 utils.process_block(block["blocker"], domain, None, "reject_reports")
1049 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
1050 instances.set_total_blocks(block["blocker"], cnt)
1052 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1053 if instances.has_pending(block["blocker"]):
1054 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1055 instances.update_data(block["blocker"])
1057 logger.debug("Invoking commit() ...")
1058 database.connection.commit()
1060 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1061 if config.get("bot_enabled") and len(blockdict) > 0:
1062 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1063 network.send_bot_post(block["blocker"], blockdict)
1065 logger.debug("Success! - EXIT!")
1068 def fetch_txt(args: argparse.Namespace) -> int:
1069 logger.debug("args[]='%s' - CALLED!", type(args))
1071 logger.debug("Invoking locking.acquire() ...")
1076 "blocker": "seirdy.one",
1077 "url" : "https://seirdy.one/pb/bsl.txt",
1080 logger.info("Checking %d text file(s) ...", len(urls))
1082 logger.debug("Fetching row[url]='%s' ...", row["url"])
1083 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1085 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1086 if response.ok and response.status_code < 300 and response.text != "":
1087 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1088 domains = response.text.split("\n")
1090 logger.info("Processing %d domains ...", len(domains))
1091 for domain in domains:
1092 logger.debug("domain='%s' - BEFORE!", domain)
1093 domain = tidyup.domain(domain)
1095 logger.debug("domain='%s' - AFTER!", domain)
1097 logger.debug("domain is empty - SKIPPED!")
1099 elif not utils.is_domain_wanted(domain):
1100 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1102 elif instances.is_recent(domain):
1103 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1106 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1107 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1109 logger.debug("processed='%s'", processed)
1111 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1114 logger.debug("Success! - EXIT!")
1117 def fetch_fedipact(args: argparse.Namespace) -> int:
1118 logger.debug("args[]='%s' - CALLED!", type(args))
1120 logger.debug("Invoking locking.acquire() ...")
1123 api_domain = "fedipact.online"
1124 if apis.is_recent(api_domain):
1125 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1128 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1129 apis.update(api_domain)
1131 response = utils.fetch_url(
1132 f"https://{api_domain}",
1133 network.web_headers,
1134 (config.get("connection_timeout"), config.get("read_timeout"))
1137 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1138 if response.ok and response.status_code < 300 and response.text != "":
1139 logger.debug("Parsing %d Bytes ...", len(response.text))
1141 doc = bs4.BeautifulSoup(response.text, "html.parser")
1142 logger.debug("doc[]='%s'", type(doc))
1144 rows = doc.findAll("li")
1145 logger.info("Checking %d row(s) ...", len(rows))
1147 logger.debug("row[]='%s'", type(row))
1148 domain = tidyup.domain(row.contents[0])
1150 logger.debug("domain='%s' - AFTER!", domain)
1152 logger.debug("domain is empty - SKIPPED!")
1154 elif not utils.is_domain_wanted(domain):
1155 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1157 elif instances.is_registered(domain):
1158 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1160 elif instances.is_recent(domain):
1161 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1164 logger.info("Fetching domain='%s' ...", domain)
1165 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1167 logger.debug("Success! - EXIT!")
1170 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1171 logger.debug("args[]='%s' - CALLED!", type(args))
1173 logger.debug("Invoking locking.acquire() ...")
1176 api_domain = "joinfediverse.wiki"
1177 if apis.is_recent(api_domain):
1178 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1181 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1182 apis.update(api_domain)
1184 raw = utils.fetch_url(
1185 f"https://{api_domain}/FediBlock",
1186 network.web_headers,
1187 (config.get("connection_timeout"), config.get("read_timeout"))
1189 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1191 doc = bs4.BeautifulSoup(raw, "html.parser")
1192 logger.debug("doc[]='%s'", type(doc))
1194 tables = doc.findAll("table", {"class": "wikitable"})
1196 logger.info("Analyzing %d table(s) ...", len(tables))
1198 for table in tables:
1199 logger.debug("table[]='%s'", type(table))
1201 rows = table.findAll("tr")
1202 logger.info("Checking %d row(s) ...", len(rows))
1203 block_headers = dict()
1205 logger.debug("row[%s]='%s'", type(row), row)
1207 headers = row.findAll("th")
1208 logger.debug("Found headers()=%d header(s)", len(headers))
1209 if len(headers) > 1:
1210 block_headers = dict()
1212 for header in headers:
1214 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1215 text = header.contents[0]
1217 logger.debug("text[]='%s'", type(text))
1218 if not isinstance(text, str):
1219 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1221 elif validators.domain(text.strip()):
1222 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1225 text = tidyup.domain(text.strip())
1226 logger.debug("text='%s'", text)
1227 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1228 logger.debug("Found header: '%s'=%d", text, cnt)
1229 block_headers[cnt] = text
1231 elif len(block_headers) == 0:
1232 logger.debug("row is not scrapable - SKIPPED!")
1234 elif len(block_headers) > 0:
1235 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1239 for element in row.find_all(["th", "td"]):
1241 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1242 if cnt in block_headers:
1243 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1245 text = element.text.strip()
1246 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1248 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1249 if key in ["domain", "instance"]:
1251 elif key == "reason":
1252 block[key] = tidyup.reason(text)
1253 elif key == "subdomain(s)":
1256 block[key] = text.split("/")
1258 logger.debug("key='%s'", key)
1261 logger.debug("block()=%d ...", len(block))
1263 logger.debug("Appending block()=%d ...", len(block))
1264 blocklist.append(block)
1266 logger.debug("blocklist()=%d", len(blocklist))
1268 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1269 domains = database.cursor.fetchall()
1271 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1273 for block in blocklist:
1274 logger.debug("block='%s'", block)
1275 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1276 origin = block["blocked"]
1277 for subdomain in block["subdomain(s)"]:
1278 block["blocked"] = subdomain + "." + origin
1279 blocking.append(block)
1281 blocking.append(block)
1283 logger.debug("blocking()=%d", blocking)
1284 for block in blocking:
1285 logger.debug("block[]='%s'", type(block))
1286 block["blocked"] = tidyup.domain(block["blocked"])
1288 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1289 if block["blocked"] == "":
1290 logger.debug("block[blocked] is empty - SKIPPED!")
1292 elif not utils.is_domain_wanted(block["blocked"]):
1293 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1295 elif instances.is_recent(block["blocked"]):
1296 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1299 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1300 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1303 for blocker in domains:
1304 blocker = blocker[0]
1305 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1307 for block in blocking:
1308 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1309 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1311 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1312 if block["blocked"] == "":
1313 logger.debug("block[blocked] is empty - SKIPPED!")
1315 elif not utils.is_domain_wanted(block["blocked"]):
1316 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1319 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1320 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1321 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1323 "blocked": block["blocked"],
1324 "reason" : block["reason"],
1327 if instances.has_pending(blocker):
1328 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1329 instances.update_data(blocker)
1331 logger.debug("Invoking commit() ...")
1332 database.connection.commit()
1334 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1335 if config.get("bot_enabled") and len(blockdict) > 0:
1336 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1337 network.send_bot_post(blocker, blockdict)
1339 logger.debug("Success! - EXIT!")
1342 def recheck_obfuscation(args: argparse.Namespace) -> int:
1343 logger.debug("args[]='%s' - CALLED!", type(args))
1345 logger.debug("Invoking locking.acquire() ...")
1348 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1349 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1350 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1351 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1353 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1355 rows = database.cursor.fetchall()
1356 logger.info("Checking %d domains ...", len(rows))
1358 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1359 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1360 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1364 if row["software"] == "pleroma":
1365 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1366 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1367 elif row["software"] == "mastodon":
1368 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1369 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1370 elif row["software"] == "lemmy":
1371 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1372 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1373 elif row["software"] == "friendica":
1374 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1375 blocking = friendica.fetch_blocks(row["domain"])
1376 elif row["software"] == "misskey":
1377 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1378 blocking = misskey.fetch_blocks(row["domain"])
1380 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1382 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1383 instances.set_total_blocks(row["domain"], blocking)
1385 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1388 for block in blocking:
1389 logger.debug("block[blocked]='%s'", block["blocked"])
1392 if block["blocked"] == "":
1393 logger.debug("block[blocked] is empty - SKIPPED!")
1395 elif block["blocked"].endswith(".arpa"):
1396 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1398 elif block["blocked"].endswith(".tld"):
1399 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1401 elif block["blocked"].endswith(".onion"):
1402 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1404 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1405 logger.debug("block='%s' is obfuscated.", block["blocked"])
1406 obfuscated = obfuscated + 1
1407 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1408 elif not utils.is_domain_wanted(block["blocked"]):
1409 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1411 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1412 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1415 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1416 if blocked is not None and blocked != block["blocked"]:
1417 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1418 obfuscated = obfuscated - 1
1419 if blocks.is_instance_blocked(row["domain"], blocked):
1420 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1423 block["block_level"] = utils.alias_block_level(block["block_level"])
1425 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1426 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1427 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1430 "reason" : block["reason"],
1433 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1434 if obfuscated == 0 and len(blocking) > 0:
1435 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1436 instances.set_has_obfuscation(row["domain"], False)
1438 if instances.has_pending(row["domain"]):
1439 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1440 instances.update_data(row["domain"])
1442 logger.debug("Invoking commit() ...")
1443 database.connection.commit()
1445 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1446 if config.get("bot_enabled") and len(blockdict) > 0:
1447 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1448 network.send_bot_post(row["domain"], blockdict)
1450 logger.debug("Success! - EXIT!")
1453 def fetch_fedilist(args: argparse.Namespace) -> int:
1454 logger.debug("args[]='%s' - CALLED!", type(args))
1456 logger.debug("Invoking locking.acquire() ...")
1459 api_domain = "demo.fedilist.com"
1460 if apis.is_recent(api_domain):
1461 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1464 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1465 apis.update(api_domain)
1467 url = f"http://{api_domain}/instance/csv?onion=not"
1468 if args.software is not None and args.software != "":
1469 logger.debug("args.software='%s'", args.software)
1470 url = f"http://{api_domain}/instance/csv?software={args.software}&onion=not"
1472 logger.info("Fetching url='%s' ...", url)
1473 response = reqto.get(
1475 headers=network.web_headers,
1476 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1477 allow_redirects=False
1480 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1481 if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1482 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1485 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1487 logger.debug("reader[]='%s'", type(reader))
1490 logger.debug("row[]='%s'", type(row))
1491 domain = tidyup.domain(row["hostname"])
1492 logger.debug("domain='%s' - AFTER!", domain)
1495 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1497 elif not utils.is_domain_wanted(domain):
1498 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1500 elif (args.all is None or not args.all) and instances.is_registered(domain):
1501 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1503 elif instances.is_recent(domain):
1504 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1507 logger.info("Fetching instances from domain='%s' ...", domain)
1508 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1510 logger.debug("Success! - EXIT!")
1513 def update_nodeinfo(args: argparse.Namespace) -> int:
1514 logger.debug("args[]='%s' - CALLED!", type(args))
1516 logger.debug("Invoking locking.acquire() ...")
1519 if args.domain is not None and args.domain != "":
1520 logger.debug("Fetching args.domain='%s'", args.domain)
1521 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1522 elif args.software is not None and args.software != "":
1523 logger.info("Fetching domains for args.software='%s'", args.software)
1524 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1526 logger.info("Fetching domains for recently updated ...")
1527 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1529 domains = database.cursor.fetchall()
1531 logger.info("Checking %d domain(s) ...", len(domains))
1534 logger.debug("row[]='%s'", type(row))
1536 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1537 software = federation.determine_software(row["domain"])
1539 logger.debug("Determined software='%s'", software)
1540 if software != row["software"] and software is not None:
1541 logger.warning("Software type has changed from '%s' to '%s'!", row["software"], software)
1542 instances.set_software(row["domain"], software)
1544 instances.set_success(row["domain"])
1545 except network.exceptions as exception:
1546 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1547 instances.set_last_error(row["domain"], exception)
1549 instances.set_last_nodeinfo(row["domain"])
1550 instances.update_data(row["domain"])
1553 logger.debug("Success! - EXIT!")
1556 def fetch_instances_social(args: argparse.Namespace) -> int:
1557 logger.debug("args[]='%s' - CALLED!", type(args))
1559 logger.debug("Invoking locking.acquire() ...")
1562 api_domain = "instances.social"
1564 if config.get("instances_social_api_key") == "":
1565 logger.error("API key not set. Please set in your config.json file.")
1567 elif apis.is_recent(api_domain):
1568 logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
1571 logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
1572 apis.update(api_domain)
1575 "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1578 fetched = network.get_json_api(
1580 "/api/1.0/instances/list?count=0&sort_by=name",
1582 (config.get("connection_timeout"), config.get("read_timeout"))
1584 logger.debug("fetched[]='%s'", type(fetched))
1586 if "error_message" in fetched:
1587 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1589 elif "exception" in fetched:
1590 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1592 elif "json" not in fetched:
1593 logger.warning("fetched has no element 'json' - EXIT!")
1595 elif "instances" not in fetched["json"]:
1596 logger.warning("fetched[row] has no element 'instances' - EXIT!")
1600 rows = fetched["json"]["instances"]
1602 logger.info("Checking %d row(s) ...", len(rows))
1604 logger.debug("row[]='%s'", type(row))
1605 domain = tidyup.domain(row["name"])
1607 logger.debug("domain='%s' - AFTER!", domain)
1609 logger.debug("domain is empty - SKIPPED!")
1611 elif not utils.is_domain_wanted(domain):
1612 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1614 elif domain in domains:
1615 logger.debug("domain='%s' is already added - SKIPPED!", domain)
1617 elif instances.is_registered(domain):
1618 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1620 elif instances.is_recent(domain):
1621 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1624 logger.info("Fetching instances from domain='%s'", domain)
1625 federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
1627 logger.debug("Success! - EXIT!")