# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import inspect
+
+import bs4
+import validators
+
from fba import config
+from fba import csrf
+from fba import fba
from fba import federation
-from fba import instances
from fba import network
+from fba.helpers import blacklist
+from fba.helpers import tidyup
+
+from fba.models import blocks
+from fba.models import instances
+
def fetch_peers(domain: str) -> list:
- # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software='lemmy' - CALLED!")
+ # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='lemmy' - CALLED!")
if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
+ raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
peers = list()
+
+ # No CSRF by default, you don't have to add network.api_headers by yourself here
+ headers = tuple()
+
+ try:
+ # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ headers = csrf.determine(domain, dict())
+ except network.exceptions as exception:
+ print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+ instances.set_last_error(domain, exception)
+ return peers
+
try:
# DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
data = network.get_json_api(
domain,
"/api/v3/site",
+ headers,
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: data['{type(data)}']='{data}'")
+ # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
if "error_message" in data:
print("WARNING: Could not reach any JSON API:", domain)
- instances.update_last_error(domain, response)
+ instances.set_last_error(domain, data)
elif "federated_instances" in data["json"]:
# DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
peers = peers + federation.add_peers(data["json"]["federated_instances"])
# DEBUG: print("DEBUG: Added instance(s) to peers")
else:
print("WARNING: JSON response does not contain 'federated_instances':", domain)
- instances.update_last_error(domain, data)
+ instances.set_last_error(domain, data)
- except BaseException as exception:
+ except network.exceptions as exception:
print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
+ instances.set_last_error(domain, exception)
# DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
- instances.set_data("total_peers", domain, len(peers))
-
- # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
- instances.update_last_instance_fetch(domain)
+ instances.set_total_peers(domain, peers)
# DEBUG: print("DEBUG: Returning peers[]:", type(peers))
return peers
+
+def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
+ # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
+ if not isinstance(domain, str):
+ raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
+ elif domain == "":
+ raise ValueError("Parameter 'domain' is empty")
+ elif not isinstance(origin, str) and origin is not None:
+ raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
+ elif origin == "":
+ raise ValueError("Parameter 'origin' is empty")
+ elif not isinstance(nodeinfo_url, str):
+ raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
+ elif nodeinfo_url == "":
+ raise ValueError("Parameter 'nodeinfo_url' is empty")
+
+ translations = [
+ "Blocked Instances",
+ "Instàncies bloquejades",
+ "Blocáilte Ásc",
+ "封锁实例",
+ "Blokované instance",
+ "Geblokkeerde instanties",
+ "Blockerade instanser",
+ "Instàncias blocadas",
+ "Istanze bloccate",
+ "Instances bloquées",
+ "Letiltott példányok",
+ "Instancias bloqueadas",
+ "Blokeatuta dauden instantziak",
+ "차단된 인스턴스",
+ "Peladen Yang Diblokir",
+ "Blokerede servere",
+ "Blokitaj nodoj",
+ "Блокирани Инстанции",
+ "Blockierte Instanzen",
+ "Estetyt instanssit",
+ "Instâncias bloqueadas",
+ "Zablokowane instancje",
+ "Blokované inštancie",
+ "المثلاء المحجوبون",
+ "Užblokuoti serveriai",
+ "ブロックしたインスタンス",
+ "Блокированные Инстансы",
+ "Αποκλεισμένοι διακομιστές",
+ "封鎖站台",
+ "Instâncias bloqueadas",
+ ]
+
+ try:
+ # json endpoint for newer mastodongs
+ found_blocks = list()
+ blocklist = list()
+
+ rows = {
+ "reject" : [],
+ "media_removal" : [],
+ "followers_only": [],
+ "report_removal": [],
+ }
+
+ # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'")
+ response = network.fetch_response(
+ domain,
+ "/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
+ if response.ok and response.status_code < 300 and response.text != "":
+ # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...")
+
+ doc = bs4.BeautifulSoup(response.text, "html.parser")
+ # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
+
+ headers = doc.findAll("h5")
+ found = None
+ # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...")
+ for header in headers:
+ # DEBUG: print(f"DEBUG: header[]={type(header)}")
+ content = header.contents[0]
+
+ # DEBUG: print(f"DEBUG: content='{content}'")
+ if content in translations:
+ # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!")
+ found = header
+ break
+
+ # DEBUG: print(f"DEBUG: found[]='{type(found)}'")
+ if found is None:
+ # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!")
+ return
+
+ blocking = found.find_next("ul").findAll("a")
+ # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...")
+ for tag in blocking:
+ # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'")
+ blocked = tidyup.domain(tag.contents[0])
+
+ # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ if not validators.domain(blocked):
+ print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
+ continue
+ elif blocked.endswith(".arpa"):
+ print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
+ elif blacklist.is_blacklisted(blocked):
+ # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ continue
+ elif not instances.is_registered(blocked):
+ # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
+ instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
+
+ if not blocks.is_instance_blocked(domain, blocked, "reject"):
+ # DEBUG: print("DEBUG: Blocking:", domain, blocked)
+ blocks.add_instance(domain, blocked, None, "reject")
+
+ found_blocks.append({
+ "blocked": blocked,
+ "reason" : None
+ })
+ else:
+ # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
+ blocks.update_last_seen(domain, blocked, "reject")
+
+ # DEBUG: print("DEBUG: Committing changes ...")
+ fba.connection.commit()
+ except network.exceptions as exception:
+ print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
+ instances.set_last_error(domain, exception)
+
+ # DEBUG: print("DEBUG: EXIT!")