]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Fri, 9 Jun 2023 03:32:23 +0000 (05:32 +0200)
committerRoland Häder <roland@mxchange.org>
Fri, 9 Jun 2023 03:32:23 +0000 (05:32 +0200)
- the repository is more up-to-date, let's fetch this instead
- had to parse the markdown code back to HTML to be able to traverse with
  existing code over it

fba/commands.py
fba/fba.py

index a14d8783419a327e964327f0fd5c16619a34cae3..1db0c0872220b96e957cd7393575ef2624e6e679 100644 (file)
@@ -21,6 +21,7 @@ import csv
 import inspect
 import itertools
 import json
+import markdown
 import re
 import reqto
 import sys
@@ -257,23 +258,43 @@ def fetch_blocks(args: argparse.Namespace):
 
 def fetch_cs(args: argparse.Namespace):
     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
+    extensions = [
+        'extra',
+        'abbr',
+        'attr_list',
+        'def_list',
+        'fenced_code',
+        'footnotes',
+        'md_in_html',
+        'admonition',
+        'codehilite',
+        'legacy_attrs',
+        'legacy_em',
+        'meta',
+        'nl2br',
+        'sane_lists',
+        'smarty',
+        'toc',
+        'wikilinks'
+    ]
+
     domains = {
         "silenced": list(),
         "reject"  : list(),
     }
 
     try:
-        doc = bs4.BeautifulSoup(
-            network.fetch_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
-            "html.parser",
-        )
-        # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
-        silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
+        raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+        # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
 
+        doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
+
+        # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
+        silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
-        blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
 
+        blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
         domains["reject"] = domains["reject"] + fba.find_domains(blocked)
 
index 10204167426ab1d9a7dddc095439dbc9ebb5e288..7f0c140d3dac2ade2041077ea01d6ebb2e37992e 100644 (file)
@@ -807,7 +807,7 @@ def find_domains(tag: bs4.element.Tag) -> list:
     # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
     return domains
 
-def fetch_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
+def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response:
     # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
     if not isinstance(url, str):
         raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
@@ -815,8 +815,8 @@ def fetch_url(url: str, headers: dict, timeout: list) -> requests.models.Respons
         raise ValueError("Parameter 'url' is empty")
     elif not isinstance(headers, dict):
         raise ValueError(f"Parameter headers[]='{type(headers)}' is not 'dict'")
-    elif not isinstance(timeout, list):
-        raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not 'list'")
+    elif not isinstance(timeout, tuple):
+        raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not 'tuple'")
 
     # DEBUG: print(f"DEBUG: Parsing url='{url}'")
     components = urlparse(url)