]> git.mxchange.org Git - fba.git/blobdiff - fba/networks/lemmy.py
Continued:
[fba.git] / fba / networks / lemmy.py
index 5963227fbb80e1c16cbf86822899826e6edf7c1f..541dd55e7ad0b2a4689529876e23daacb14aad41 100644 (file)
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
+import inspect
+
+import bs4
+import validators
+
 from fba import config
+from fba import csrf
+from fba import fba
 from fba import federation
-from fba import instances
 from fba import network
 
+from fba.helpers import blacklist
+from fba.helpers import tidyup
+
+from fba.models import blocks
+from fba.models import instances
+
 def fetch_peers(domain: str) -> list:
-    # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software='lemmy' - CALLED!")
+    # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='lemmy' - CALLED!")
     if not isinstance(domain, str):
-        raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
+        raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
     elif domain == "":
         raise ValueError("Parameter 'domain' is empty")
 
     peers = list()
+
+    # No CSRF by default, you don't have to add network.api_headers by yourself here
+    headers = tuple()
+
+    try:
+        # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+        headers = csrf.determine(domain, dict())
+    except network.exceptions as exception:
+        print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+        instances.set_last_error(domain, exception)
+        return peers
+
     try:
         # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
         data = network.get_json_api(
             domain,
             "/api/v3/site",
+            headers,
             (config.get("connection_timeout"), config.get("read_timeout"))
         )
 
-        # DEBUG: print(f"DEBUG: data['{type(data)}']='{data}'")
+        # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
         if "error_message" in data:
             print("WARNING: Could not reach any JSON API:", domain)
-            instances.update_last_error(domain, data)
+            instances.set_last_error(domain, data)
         elif "federated_instances" in data["json"]:
             # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
             peers = peers + federation.add_peers(data["json"]["federated_instances"])
             # DEBUG: print("DEBUG: Added instance(s) to peers")
         else:
             print("WARNING: JSON response does not contain 'federated_instances':", domain)
-            instances.update_last_error(domain, data)
+            instances.set_last_error(domain, data)
 
-    except BaseException as exception:
+    except network.exceptions as exception:
         print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
+        instances.set_last_error(domain, exception)
 
     # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
-    instances.set_data("total_peers", domain, len(peers))
-
-    # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
-    instances.update_last_instance_fetch(domain)
+    instances.set_total_peers(domain, peers)
 
     # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
     return peers
+
+def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
+    # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
+    if not isinstance(domain, str):
+        raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
+    elif domain == "":
+        raise ValueError("Parameter 'domain' is empty")
+    elif not isinstance(origin, str) and origin is not None:
+        raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
+    elif origin == "":
+        raise ValueError("Parameter 'origin' is empty")
+    elif not isinstance(nodeinfo_url, str):
+        raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
+    elif nodeinfo_url == "":
+        raise ValueError("Parameter 'nodeinfo_url' is empty")
+
+    translations = [
+        "Blocked Instances",
+        "Instàncies bloquejades",
+        "Blocáilte Ásc",
+        "封锁实例",
+        "Blokované instance",
+        "Geblokkeerde instanties",
+        "Blockerade instanser",
+        "Instàncias blocadas",
+        "Istanze bloccate",
+        "Instances bloquées",
+        "Letiltott példányok",
+        "Instancias bloqueadas",
+        "Blokeatuta dauden instantziak",
+        "차단된 인스턴스",
+        "Peladen Yang Diblokir",
+        "Blokerede servere",
+        "Blokitaj nodoj",
+        "Блокирани Инстанции",
+        "Blockierte Instanzen",
+        "Estetyt instanssit",
+        "Instâncias bloqueadas",
+        "Zablokowane instancje",
+        "Blokované inštancie",
+        "المثلاء المحجوبون",
+        "Užblokuoti serveriai",
+        "ブロックしたインスタンス",
+        "Блокированные Инстансы",
+        "Αποκλεισμένοι διακομιστές",
+        "封鎖站台",
+        "Instâncias bloqueadas",
+    ]
+
+    try:
+        # json endpoint for newer mastodongs
+        found_blocks = list()
+        blocklist = list()
+
+        rows = {
+            "reject"        : [],
+            "media_removal" : [],
+            "followers_only": [],
+            "report_removal": [],
+        }
+
+        # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'")
+        response = network.fetch_response(
+            domain,
+            "/instances",
+            network.web_headers,
+            (config.get("connection_timeout"), config.get("read_timeout"))
+        )
+
+        # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
+        if response.ok and response.status_code < 300 and response.text != "":
+            # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...")
+
+            doc = bs4.BeautifulSoup(response.text, "html.parser")
+            # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
+
+            headers = doc.findAll("h5")
+            found = None
+            # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...")
+            for header in headers:
+                # DEBUG: print(f"DEBUG: header[]={type(header)}")
+                content = header.contents[0]
+                # DEBUG: print(f"DEBUG: content='{content}'")
+                if content in translations:
+                    # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!")
+                    found = header
+                    break
+
+            # DEBUG: print(f"DEBUG: found[]='{type(found)}'")
+            if found is None:
+                # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!")
+                return
+
+            blocking = found.find_next("ul").findAll("a")
+            # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...")
+            for tag in blocking:
+                # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'")
+                blocked = tidyup.domain(tag.contents[0])
+
+                # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+                if not validators.domain(blocked):
+                    print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
+                    continue
+                elif blocked.endswith(".arpa"):
+                    print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+                    continue
+                elif blocked.endswith(".tld"):
+                    print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+                    continue
+                elif blacklist.is_blacklisted(blocked):
+                    # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+                    continue
+                elif not instances.is_registered(blocked):
+                    # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
+                    instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
+
+                if not blocks.is_instance_blocked(domain, blocked, "reject"):
+                    # DEBUG: print("DEBUG: Blocking:", domain, blocked)
+                    blocks.add_instance(domain, blocked, None, "reject")
+
+                    found_blocks.append({
+                        "blocked": blocked,
+                        "reason" : None
+                    })
+                else:
+                    # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
+                    blocks.update_last_seen(domain, blocked, "reject")
+
+        # DEBUG: print("DEBUG: Committing changes ...")
+        fba.connection.commit()
+    except network.exceptions as exception:
+        print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
+        instances.set_last_error(domain, exception)
+
+    # DEBUG: print("DEBUG: EXIT!")