]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Wed, 21 Jun 2023 03:41:20 +0000 (05:41 +0200)
committerRoland Häder <roland@mxchange.org>
Wed, 21 Jun 2023 04:06:54 +0000 (06:06 +0200)
- also skip here to avoid exception

fba/http/federation.py
fba/networks/misskey.py

index 492d932ef7ba8148c3ae7ff970cf9ebdb2c584da..c61bea5188b81b6b175311147dd6a2efc299bf29 100644 (file)
@@ -357,12 +357,18 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
                         url = f"https://{domain}{url}"
                         components = urlparse(url)
 
-                    if blacklist.is_blacklisted(components.netloc):
-                        print(f"WARNING: components.netloc='{components.netloc}' is blacklisted - SKIPPED!")
-                        continue
-                    elif not validators.domain(components.netloc):
+                    if not validators.domain(components.netloc):
                         print(f"WARNING: components.netloc='{components.netloc}' is not a valid domain - SKIPPED!")
                         continue
+                    elif domain.endswith(".arpa"):
+                        print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!")
+                        continue
+                    elif domain.endswith(".tld"):
+                        print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!")
+                        continue
+                    elif blacklist.is_blacklisted(components.netloc):
+                        # DEBUG: print(f"DEBUG: components.netloc='{components.netloc}' is blacklisted - SKIPPED!")
+                        continue
 
                     # DEBUG: print("DEBUG: Fetching nodeinfo from:", url)
                     data = network.fetch_api_url(
@@ -585,8 +591,17 @@ def find_domains(tag: bs4.element.Tag) -> list:
 
         # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
 
-        if blacklist.is_blacklisted(domain):
-            print(f"WARNING: domain='{domain}' is blacklisted - SKIPPED!")
+        if not validators.domain(domain.split("/")[0]):
+            print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
+            continue
+        elif domain.endswith(".arpa"):
+            print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!")
+            continue
+        elif domain.endswith(".tld"):
+            print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!")
+            continue
+        elif blacklist.is_blacklisted(domain):
+            # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
             continue
         elif domain == "gab.com/.ai, develop.gab.com":
             # DEBUG: print("DEBUG: Multiple domains detected in one row")
@@ -638,7 +653,7 @@ def add_peers(rows: dict) -> list:
                     print(f"WARNING: peer='{peer}' is a fake domain - SKIPPED!")
                     continue
                 elif blacklist.is_blacklisted(peer):
-                    print(f"WARNING: peer='{peer}' is blacklisted - SKIPPED!")
+                    # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted - SKIPPED!")
                     continue
 
                 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
index 2cede8b704b505f226d0c150aad7a0b53e4d3cce..f39c649fa895a8fa976a532a4093423621ec8235 100644 (file)
@@ -110,10 +110,19 @@ def fetch_peers(domain: str) -> list:
                 print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
                 continue
             elif not isinstance(row["host"], str):
-                print(f"WARNING: row[host][]='{type(row['host'])}' is not 'str'")
+                print(f"WARNING: row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!")
+                continue
+            elif not validators.domain(row["host"].split("/")[0]):
+                print(f"WARNING: row[host]='{row['host']}' is not a valid domain - SKIPPED!")
+                continue
+            elif row["host"].endswith(".arpa"):
+                print(f"WARNING: row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!")
+                continue
+            elif row["host"].endswith(".tld"):
+                print(f"WARNING: row[host]='{row['host']}' is a fake domain - SKIPPED!")
                 continue
             elif blacklist.is_blacklisted(row["host"]):
-                # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'")
+                # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!")
                 continue
             elif row["host"] in peers:
                 # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.")