From: Roland Häder <roland@mxchange.org>
Date: Wed, 21 Jun 2023 01:16:26 +0000 (+0200)
Subject: Continued:
X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=ddbf920c6e5963095a10f35abdd85f7b53a6f09e;p=fba.git

Continued:
- also tidyup blocked domain from Friendica, too
- also check it against blacklist and any unwanted .arpa/.tld TLDs
---

diff --git a/fba/networks/friendica.py b/fba/networks/friendica.py
index 0c881f7..393f659 100644
--- a/fba/networks/friendica.py
+++ b/fba/networks/friendica.py
@@ -15,10 +15,12 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 import bs4
+import validators
 
 from fba import config
 from fba import network
 
+from fba.helpers import blacklist
 from fba.helpers import tidyup
 
 from fba.models import instances
@@ -68,8 +70,24 @@ def fetch_blocks(domain: str) -> dict:
     # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
     for line in rows:
         # DEBUG: print(f"DEBUG: line='{line}'")
+        blocked = tidyup.domain(line.find_all("td")[0].text)
+        print(f"DEBUG: blocked='{blocked}'")
+
+        if not validators.domain(blocked):
+            print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
+            continue
+        elif blocked.endswith(".arpa"):
+            print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+            continue
+        elif blocked.endswith(".tld"):
+            print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+            continue
+        elif blacklist.is_blacklisted(blocked):
+            # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+            continue
+
         blocked.append({
-            "domain": tidyup.domain(line.find_all("td")[0].text),
+            "domain": tidyup.domain(domaih),
             "reason": tidyup.reason(line.find_all("td")[1].text)
         })
         # DEBUG: print("DEBUG: Next!")