]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Wed, 21 Jun 2023 01:16:26 +0000 (03:16 +0200)
committerRoland Häder <roland@mxchange.org>
Wed, 21 Jun 2023 01:16:26 +0000 (03:16 +0200)
- also tidyup blocked domain from Friendica, too
- also check it against blacklist and any unwanted .arpa/.tld TLDs

fba/networks/friendica.py

index 0c881f79fea8b91982ac15d5bf6af173e25da5d6..393f65964c0b789207a6627659d3f9ec20351a08 100644 (file)
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 import bs4
+import validators
 
 from fba import config
 from fba import network
 
+from fba.helpers import blacklist
 from fba.helpers import tidyup
 
 from fba.models import instances
@@ -68,8 +70,24 @@ def fetch_blocks(domain: str) -> dict:
     # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
     for line in rows:
         # DEBUG: print(f"DEBUG: line='{line}'")
+        blocked = tidyup.domain(line.find_all("td")[0].text)
+        print(f"DEBUG: blocked='{blocked}'")
+
+        if not validators.domain(blocked):
+            print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
+            continue
+        elif blocked.endswith(".arpa"):
+            print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+            continue
+        elif blocked.endswith(".tld"):
+            print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+            continue
+        elif blacklist.is_blacklisted(blocked):
+            # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+            continue
+
         blocked.append({
-            "domain": tidyup.domain(line.find_all("td")[0].text),
+            "domain": tidyup.domain(domaih),
             "reason": tidyup.reason(line.find_all("td")[1].text)
         })
         # DEBUG: print("DEBUG: Next!")