]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Thu, 8 Jun 2023 23:58:53 +0000 (01:58 +0200)
committerRoland Häder <roland@mxchange.org>
Thu, 8 Jun 2023 23:58:53 +0000 (01:58 +0200)
- added command 'fetch_federater' which fetches a CSV file from github
- if software is null, attempt to determine it
- renamed get_url() to fetch_url()

fba/boot.py
fba/commands.py
fba/fba.py

index 13a233d5b3e86ad8f3b2bbd9b5b893378ce4179a..e66690bd6fabbd0bfe942f0488c9ec600326c0de 100644 (file)
@@ -99,6 +99,13 @@ def init_parser():
     )
     parser.set_defaults(command=commands.fetch_fbabot_atom)
 
+    ### Fetch blocks from federater ###
+    parser = subparser_command.add_parser(
+        "fetch_federater",
+        help="Fetches CSV file (block recommendations) for more possible instances to disover",
+    )
+    parser.set_defaults(command=commands.fetch_federater)
+
     ### Fetch instances from given initial instance ###
     parser = subparser_command.add_parser(
         "fetch_instances",
index 809ee21ddb91d7f7fbfd38d9c9d9d748fda46db9..c2bb7917c643f667d587704057e7d654c95fa095 100644 (file)
@@ -17,6 +17,7 @@
 import argparse
 import atoma
 import bs4
+import csv
 import inspect
 import itertools
 import json
@@ -308,7 +309,7 @@ def fetch_fba_rss(args: argparse.Namespace):
 
     try:
         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
-        response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
+        response = fba.fetch_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
 
         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
         if response.ok and response.status_code < 300 and len(response.text) > 0:
@@ -355,7 +356,7 @@ def fetch_fbabot_atom(args: argparse.Namespace):
     domains = list()
     try:
         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
-        response = fba.get_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
+        response = fba.fetch_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
 
         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
         if response.ok and response.status_code < 300 and len(response.text) > 0:
@@ -429,3 +430,32 @@ def fetch_instances(args: argparse.Namespace):
         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
 
     # DEBUG: print("DEBUG: EXIT!")
+
+def fetch_federater(args: argparse.Namespace):
+    # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
+    boot.acquire_lock()
+
+    # Fetch this URL
+    response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
+    # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
+    if response.ok and response.content != "":
+        # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
+        #print(f"DEBUG: response.content={response.content}")
+        reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
+        #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
+        # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
+        for row in reader:
+            if not validators.domain(row["#domain"]):
+                print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
+                continue
+            elif blacklist.is_blacklisted(row["#domain"]):
+                print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
+                continue
+            elif instances.is_registered(row["#domain"]):
+                # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
+                continue
+
+            print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
+            fba.fetch_instances(row["#domain"], 'github.com', None, inspect.currentframe().f_code.co_name)
+
+    # DEBUG: print("DEBUG: EXIT!")
index 1eddf5d5aae7af0919831df1b59a5d9ed5164635..7868dd6356382dd8dac20374b3eb7d922c417c51 100644 (file)
@@ -93,6 +93,12 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path:
         raise ValueError(f"Parameter 'domain' is empty")
     elif type(origin) != str and origin != None:
         raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
+    elif software == None:
+        print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
+        software = determine_software(domain, path)
+        print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
+    elif type(software) != str:
+        raise ValueError(f"Parameter software[]={type(software)} is not 'str'")
     elif type(script) != str:
         raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
     elif domain == "":
@@ -521,7 +527,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list:
                     # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
                     if link["rel"] in nodeinfo_identifier:
                         # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
-                        response = get_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+                        response = fetch_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
 
                         data = json_from_response(response)
                         # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
@@ -955,7 +961,7 @@ def find_domains(tag: bs4.element.Tag) -> list:
     # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
     return domains
 
-def get_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
+def fetch_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
     # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
     if type(url) != str:
         raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")