]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Fri, 22 Dec 2023 07:44:52 +0000 (08:44 +0100)
committerRoland Häder <roland@mxchange.org>
Fri, 22 Dec 2023 07:44:52 +0000 (08:44 +0100)
- need to cut off everything after hash symbol because that is for JavaScript
  click-event loaded content anyway
- prevented a few empty/None strings for invoking tidyup.domain()
- improved a few log messages

fba/http/csrf.py
fba/http/federation.py
fba/networks/friendica.py
fba/networks/lemmy.py
fba/networks/mastodon.py
fba/networks/misskey.py
fba/networks/peertube.py

index b84d5aaf6c7bb5a36a89158917655c88e6ec7570..a66fa02fb1ea4c966a3dfb277963dda5c40c08bb 100644 (file)
@@ -54,7 +54,7 @@ def determine(domain: str, headers: dict) -> dict:
     )
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
-    if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
+    if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url.split("#")[0]):
         # Save cookies
         logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
         cookies.store(domain, response.cookies.get_dict())
@@ -71,7 +71,7 @@ def determine(domain: str, headers: dict) -> dict:
         if tag is not None:
             logger.debug("Adding CSRF token='%s' for domain='%s'", tag["content"], domain)
             reqheaders["X-CSRF-Token"] = tag["content"]
-    elif not domain_helper.is_in_url(domain, response.url):
+    elif not domain_helper.is_in_url(domain, response.url.split("#")[0]):
         logger.warning("domain='%s' doesn't match with response.url='%s', maybe redirect to other domain?", domain, response.url)
 
         message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
index 355036e76bad78314033e6ca62a128038fb40b79..2c35fc65887373b84dcf368329dbcc46e25b2d5b 100644 (file)
@@ -295,7 +295,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
     )
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
-    if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
+    if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url.split("#")[0]):
         logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
         doc = bs4.BeautifulSoup(response.text, "html.parser")
 
@@ -338,7 +338,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
             if software is not None and software != "":
                 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
                 instances.set_detection_mode(domain, "SITE_NAME")
-    elif not domain_helper.is_in_url(domain, response.url):
+    elif not domain_helper.is_in_url(domain, response.url.split("#")[0]):
         logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
 
         components = urlparse(response.url)
index f723c710f01a0368c21e93ec672b5408ada5eea3..05efa9a575b55fe570098bf6b5ac296a8a955062 100644 (file)
@@ -44,7 +44,7 @@ def fetch_blocks(domain: str) -> list:
     block_tag = None
 
     try:
-        logger.debug("Fetching friendica blocks from domain='%s'", domain)
+        logger.debug("Fetching friendica blocks from domain='%s' ...", domain)
         raw = network.fetch_response(
             domain,
             "/friendica",
@@ -85,10 +85,11 @@ def fetch_blocks(domain: str) -> list:
     for line in rows:
         logger.debug("line='%s'", line)
         blocked = line.find_all("td")[0].text
-        logger.debug("blocked='%s'", blocked)
+        reason  = line.find_all("td")[1].text
+        logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason)
 
         blocked = tidyup.domain(blocked) if blocked != "" else None
-        reason  = tidyup.reason(line.find_all("td")[1].text)
+        reason  = tidyup.reason(reason) if reason != "" else None
         logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
 
         if blocked in [None, ""]:
index 7d058d596e0bb381286b1cb8a40fa0d973e41501..766e4dd59c73845e113703412a51e94866e0977e 100644 (file)
@@ -83,7 +83,7 @@ def fetch_peers(domain: str, origin: str) -> list:
     headers = tuple()
 
     try:
-        logger.debug("Checking CSRF for domain='%s'", domain)
+        logger.debug("Checking CSRF for domain='%s' ...", domain)
         headers = csrf.determine(domain, dict())
     except network.exceptions as exception:
         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
@@ -384,7 +384,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
                     continue
 
                 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
-                peer = tidyup.domain(row["domain"])
+                peer = tidyup.domain(row["domain"]) if row["domain"] != "" else None
                 logger.debug("peer='%s' - AFTER!", peer)
 
                 if peer in [None, ""]:
index 127c33a414051d59a20c17bca7c7e48792608bae..e7d00b6e667c04ee3eb3fb9eb6799af20f4b5fb2 100644 (file)
@@ -189,7 +189,7 @@ def fetch_blocks(domain: str) -> list:
 
             reason = tidyup.reason(block["comment"]) if "comment" in block and block["comment"] is not None and block["comment"] != "" else None
 
-            logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s'", domain, block["domain"], reason, block["severity"])
+            logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...", domain, block["domain"], reason, block["severity"])
             blocklist.append({
                 "blocker"    : domain,
                 "blocked"    : block["domain"],
index 61005047a497df81386a3ad68bb042c4a2f4d45b..03b038fa3863b57202ff3651cf8b5355e51433e3 100644 (file)
@@ -297,8 +297,8 @@ def fetch_blocks(domain: str) -> list:
             for instance in rows:
                 # Is it there?
                 logger.debug("instance[]='%s'", type(instance))
-                blocked = tidyup.domain(instance["host"])
-                logger.debug("blocked='%s'", blocked)
+                blocked = tidyup.domain(instance["host"]) if instance["host"] != "" else None
+                logger.debug("blocked='%s' - AFTER!", blocked)
 
                 if blocked in [None, ""]:
                     logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
index caf38653dc61e2edb4b0f20f0ff2705861086ac7..733a5b69b3e68473aead288c026934012bca2819 100644 (file)
@@ -43,7 +43,7 @@ def fetch_peers(domain: str) -> list:
     start   = 0
 
     try:
-        logger.debug("Checking CSRF for domain='%s'", domain)
+        logger.debug("Checking CSRF for domain='%s' ...", domain)
         headers = csrf.determine(domain, dict())
     except network.exceptions as exception:
         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)