From 88c30cbcf5f275c39bee5426443dd5b30c51281a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Fri, 9 Jun 2023 06:28:39 +0200 Subject: [PATCH] Continued: - fixed bad indenting - renamed json -> rows to avoid redeclaration with module 'json' - sorted import members (standard imports first) - removed unused imports - fixed "Redefining name 'headers' from outer scope" --- fba/commands.py | 28 +++++++++++++--------------- fba/fba.py | 8 ++++---- fba/federation/mastodon.py | 22 +++++++++++----------- fba/federation/peertube.py | 2 +- fba/federation/pleroma.py | 14 +++++++------- fba/instances.py | 6 +++--- fba/network.py | 4 ++-- 7 files changed, 41 insertions(+), 43 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 98d8ba6..ce06879 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -14,18 +14,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import argparse -import atoma -import bs4 import csv import inspect -import itertools import json -import markdown -import re -import reqto import sys import time + +import argparse +import atoma +import bs4 +import markdown import validators from fba import blacklist @@ -160,12 +158,12 @@ def fetch_blocks(args: argparse.Namespace): print(f"INFO: blocker='{blocker}',software='{software}'") try: if software == "friendica": - json = friendica.fetch_blocks(blocker) + rows = friendica.fetch_blocks(blocker) elif software == "misskey": - json = misskey.fetch_blocks(blocker) + rows = misskey.fetch_blocks(blocker) - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocklist in json.items(): + print(f"INFO: Checking {len(rows.items())} entries from blocker='{blocker}',software='{software}' ...") + for block_level, blocklist in rows.items(): # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist)) block_level = fba.tidyup_domain(block_level) # DEBUG: print("DEBUG: AFTER-block_level:", block_level) @@ -284,7 +282,7 @@ def fetch_cs(args: argparse.Namespace): } try: - raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text + raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}") doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser') @@ -331,7 +329,7 @@ def fetch_fba_rss(args: argparse.Namespace): try: print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") - response = fba.fetch_url(args.feed, network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") if response.ok and response.status_code < 300 and len(response.text) > 0: @@ -378,7 +376,7 @@ def fetch_fbabot_atom(args: argparse.Namespace): domains = list() try: print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") - response = fba.fetch_url(feed, network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") if response.ok and response.status_code < 300 and len(response.text) > 0: @@ -458,7 +456,7 @@ def fetch_federater(args: argparse.Namespace): boot.acquire_lock() # Fetch this URL - response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print(f"DEBUG: response[]='{type(response)}'") if response.ok and response.content != "": # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...") diff --git a/fba/fba.py b/fba/fba.py index 95c916c..59a3e83 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -13,20 +13,20 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import bs4 import hashlib import re -import requests import json import sqlite3 import sys import time + +import bs4 +import requests import validators from urllib.parse import urlparse from fba import blacklist -from fba import cache from fba import config from fba import instances from fba import network @@ -522,7 +522,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: try: # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...") - response = network.fetch_response(domain, path, network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text)) if response.ok and response.status_code < 300 and len(response.text) > 0: diff --git a/fba/federation/mastodon.py b/fba/federation/mastodon.py index b2213a5..6cbdce4 100644 --- a/fba/federation/mastodon.py +++ b/fba/federation/mastodon.py @@ -68,7 +68,7 @@ def fetch_blocks_from_about(domain: str) -> dict: try: doc = bs4.BeautifulSoup( - network.fetch_response(domain, "/about/more", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + network.fetch_response(domain, "/about/more", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, "html.parser", ) except BaseException as exception: @@ -93,7 +93,7 @@ def fetch_blocks_from_about(domain: str) -> dict: { "domain": fba.tidyup_domain(line.find("span").text), "hash" : fba.tidyup_domain(line.find("span")["title"][9:]), - "reason": fba.tidyup_domain(line.find_all("td")[1].text), + "reason": fba.tidyup_reason(line.find_all("td")[1].text), } ) else: @@ -125,7 +125,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # json endpoint for newer mastodongs blockdict = list() try: - json = { + rows = { "reject" : [], "media_removal" : [], "followers_only": [], @@ -135,7 +135,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # handling CSRF, I've saw at least one server requiring it to access the endpoint # DEBUG: print("DEBUG: Fetching meta:", domain) meta = bs4.BeautifulSoup( - network.fetch_response(domain, "/", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + network.fetch_response(domain, "/", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, "html.parser", ) try: @@ -160,25 +160,25 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) if block['severity'] == 'suspend': # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['reject'].append(entry) + rows['reject'].append(entry) elif block['severity'] == 'silence': # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['followers_only'].append(entry) + rows['followers_only'].append(entry) elif block['severity'] == 'reject_media': # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['media_removal'].append(entry) + rows['media_removal'].append(entry) elif block['severity'] == 'reject_reports': # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['report_removal'].append(entry) + rows['report_removal'].append(entry) else: print("WARNING: Unknown severity:", block['severity'], block['domain']) except BaseException as exception: # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: domain='{domain}',exception[{type(exception)}]={str(exception)}") - json = fetch_blocks_from_about(domain) + rows = fetch_blocks_from_about(domain) - print(f"INFO: Checking {len(json.items())} entries from domain='{domain}',software='mastodon' ...") - for block_level, blocklist in json.items(): + print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...") + for block_level, blocklist in rows.items(): # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist)) block_level = fba.tidyup_domain(block_level) diff --git a/fba/federation/peertube.py b/fba/federation/peertube.py index cc9dd7a..cff21c8 100644 --- a/fba/federation/peertube.py +++ b/fba/federation/peertube.py @@ -33,7 +33,7 @@ def fetch_peers(domain: str) -> list: # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'") while True: try: - response = network.fetch_response(domain, "/api/v1/server/{mode}?start={start}&count=100", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = network.fetch_response(domain, "/api/v1/server/{mode}?start={start}&count=100", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) data = fba.json_from_response(response) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'") diff --git a/fba/federation/pleroma.py b/fba/federation/pleroma.py index 764209c..868d93f 100644 --- a/fba/federation/pleroma.py +++ b/fba/federation/pleroma.py @@ -40,22 +40,22 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): try: # Blocks blockdict = list() - json = fba.fetch_nodeinfo(domain, nodeinfo_url) + rows = fba.fetch_nodeinfo(domain, nodeinfo_url) - if json is None: + if rows is None: print("WARNING: Could not fetch nodeinfo from domain:", domain) return - elif not "metadata" in json: - print(f"WARNING: json()={len(json)} does not have key 'metadata', domain='{domain}'") + elif not "metadata" in rows: + print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'") return - elif not "federation" in json["metadata"]: - print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', domain='{domain}'") + elif not "federation" in rows["metadata"]: + print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'") return # DEBUG: print("DEBUG: Updating nodeinfo:", domain) instances.update_last_nodeinfo(domain) - federation = json["metadata"]["federation"] + federation = rows["metadata"]["federation"] if "enabled" in federation: # DEBUG: print("DEBUG: Instance has no block list to analyze:", domain) diff --git a/fba/instances.py b/fba/instances.py index f31cad3..d1c27a9 100644 --- a/fba/instances.py +++ b/fba/instances.py @@ -101,9 +101,9 @@ def update_data(domain: str): for key in _pending: # DEBUG: print("DEBUG: key:", key) if domain in _pending[key]: - # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...") - fields.append(_pending[key][domain]) - sql_string += f" {key} = ?," + # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...") + fields.append(_pending[key][domain]) + sql_string += f" {key} = ?," fields.append(time.time()) fields.append(domain) diff --git a/fba/network.py b/fba/network.py index d932221..df883c9 100644 --- a/fba/network.py +++ b/fba/network.py @@ -23,7 +23,7 @@ from fba import fba from fba import instances # HTTP headers for non-API requests -headers = { +web_headers = { "User-Agent": config.get("useragent"), } @@ -125,7 +125,7 @@ def fetch_friendica_blocks(domain: str) -> dict: try: doc = bs4.BeautifulSoup( - fetch_response(domain, "/friendica", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + fetch_response(domain, "/friendica", web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, "html.parser", ) except BaseException as exception: -- 2.39.5