# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
-import inspect
+import logging
import bs4
-import validators
-from fba import blacklist
-from fba import config
from fba import csrf
-from fba import fba
-from fba import federation
-from fba import network
+from fba import utils
+from fba.helpers import config
+from fba.helpers import domain as domain_helper
from fba.helpers import tidyup
-from fba.models import blocks
+from fba.http import federation
+from fba.http import network
+
from fba.models import instances
-def fetch_peers(domain: str) -> list:
- # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='lemmy' - CALLED!")
- if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
- elif domain == "":
- raise ValueError("Parameter 'domain' is empty")
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
+
+def fetch_peers(domain: str, origin: str) -> list:
+ logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
+ domain_helper.raise_on(domain)
peers = list()
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug("Checking CSRF for domain='%s'", domain)
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+ logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
instances.set_last_error(domain, exception)
- return peers
+ return list()
try:
- # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
+ logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
data = network.get_json_api(
domain,
"/api/v3/site",
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
+ logger.debug("data[]='%s'", type(data))
if "error_message" in data:
- print("WARNING: Could not reach any JSON API:", domain)
+ logger.warning("Could not reach any JSON API: domain='%s'", domain)
instances.set_last_error(domain, data)
- elif "federated_instances" in data["json"]:
- # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
+ elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
+ logger.debug("Found federated_instances for domain='%s'", domain)
peers = peers + federation.add_peers(data["json"]["federated_instances"])
- # DEBUG: print("DEBUG: Added instance(s) to peers")
- else:
- print("WARNING: JSON response does not contain 'federated_instances':", domain)
- instances.set_last_error(domain, data)
+
+ logger.debug("Marking domain='%s' as successfully handled ...", domain)
+ instances.set_success(domain)
+
+ if len(peers) == 0:
+ logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
+ peers = fetch_instances(domain, origin)
except network.exceptions as exception:
- print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
+ logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
instances.set_last_error(domain, exception)
- # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
- instances.set_total_peers(domain, peers)
-
- # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
+ logger.debug("peers()=%d - EXIT!", len(peers))
return peers
-def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
- # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
- if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
- elif domain == "":
- raise ValueError("Parameter 'domain' is empty")
- elif not isinstance(origin, str) and origin is not None:
- raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
- elif origin == "":
- raise ValueError("Parameter 'origin' is empty")
- elif not isinstance(nodeinfo_url, str):
+def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
+ logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
+ domain_helper.raise_on(domain)
+
+ if not isinstance(nodeinfo_url, str):
raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
elif nodeinfo_url == "":
raise ValueError("Parameter 'nodeinfo_url' is empty")
translations = [
- "Blocked Instances",
- "Instàncies bloquejades",
- "Blocáilte Ásc",
- "封锁实例",
- "Blokované instance",
- "Geblokkeerde instanties",
- "Blockerade instanser",
- "Instàncias blocadas",
- "Istanze bloccate",
- "Instances bloquées",
- "Letiltott példányok",
- "Instancias bloqueadas",
- "Blokeatuta dauden instantziak",
- "차단된 인스턴스",
- "Peladen Yang Diblokir",
- "Blokerede servere",
- "Blokitaj nodoj",
- "Блокирани Инстанции",
- "Blockierte Instanzen",
- "Estetyt instanssit",
- "Instâncias bloqueadas",
- "Zablokowane instancje",
- "Blokované inštancie",
- "المثلاء المحجوبون",
- "Užblokuoti serveriai",
- "ブロックしたインスタンス",
- "Блокированные Инстансы",
- "Αποκλεισμένοι διακομιστές",
- "封鎖站台",
- "Instâncias bloqueadas",
+ "Blocked Instances".lower(),
+ "Instàncies bloquejades".lower(),
+ "Blocáilte Ásc".lower(),
+ "封锁实例".lower(),
+ "Blokované instance".lower(),
+ "Geblokkeerde instanties".lower(),
+ "Blockerade instanser".lower(),
+ "Instàncias blocadas".lower(),
+ "Istanze bloccate".lower(),
+ "Instances bloquées".lower(),
+ "Letiltott példányok".lower(),
+ "Instancias bloqueadas".lower(),
+ "Blokeatuta dauden instantziak".lower(),
+ "차단된 인스턴스".lower(),
+ "Peladen Yang Diblokir".lower(),
+ "Blokerede servere".lower(),
+ "Blokitaj nodoj".lower(),
+ "Блокирани Инстанции".lower(),
+ "Blockierte Instanzen".lower(),
+ "Estetyt instanssit".lower(),
+ "Instâncias bloqueadas".lower(),
+ "Zablokowane instancje".lower(),
+ "Blokované inštancie".lower(),
+ "المثلاء المحجوبون".lower(),
+ "Užblokuoti serveriai".lower(),
+ "ブロックしたインスタンス".lower(),
+ "Блокированные Инстансы".lower(),
+ "Αποκλεισμένοι διακομιστές".lower(),
+ "封鎖站台".lower(),
+ "Instâncias bloqueadas".lower(),
]
+ blocklist = list()
+
try:
# json endpoint for newer mastodongs
- found_blocks = list()
- blocklist = list()
-
- rows = {
- "reject" : [],
- "media_removal" : [],
- "followers_only": [],
- "report_removal": [],
- }
-
- # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'")
+ logger.debug("Fetching /instances from domain='%s'", domain)
response = network.fetch_response(
domain,
"/instances",
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
if response.ok and response.status_code < 300 and response.text != "":
- # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...")
+ logger.debug("Parsing %s Bytes ...", len(response.text))
doc = bs4.BeautifulSoup(response.text, "html.parser")
- # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
+ logger.debug("doc[]='%s'", type(doc))
headers = doc.findAll("h5")
found = None
- # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...")
+ logger.debug("Search in %d header(s) ...", len(headers))
for header in headers:
- # DEBUG: print(f"DEBUG: header[]={type(header)}")
+ logger.debug("header[]='%s'", type(header))
content = header.contents[0]
-
- # DEBUG: print(f"DEBUG: content='{content}'")
- if content in translations:
- # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!")
+
+ logger.debug("content[%s]='%s'", type(content), content)
+ if content is None:
+ logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
+ continue
+ elif not isinstance(content, str):
+ logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
+ continue
+ elif content.lower() in translations:
+ logger.debug("Found header with blocked instances - BREAK!")
found = header
break
- # DEBUG: print(f"DEBUG: found[]='{type(found)}'")
+ logger.debug("found[]='%s'", type(found))
if found is None:
- # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!")
- return
+ logger.debug("domain='%s' is not blocking any instances - EXIT!", domain)
+ return blocklist
- blocking = found.find_next("ul").findAll("a")
- # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...")
+ blocking = found.find_next(["ul","table"]).findAll("a")
+ logger.debug("Found %d blocked instance(s) ...", len(blocking))
for tag in blocking:
- # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'")
+ logger.debug("tag[]='%s'", type(tag))
blocked = tidyup.domain(tag.contents[0])
+ logger.debug("blocked='%s'", blocked)
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
- if not validators.domain(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is not a valid domain - SKIPPED!")
- continue
- elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
- continue
- elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ if blocked == "":
+ logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
continue
- elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ elif not utils.is_domain_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
continue
- elif not instances.is_registered(blocked):
- # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
- instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
-
- if not blocks.is_instance_blocked(domain, blocked, "reject"):
- # DEBUG: print("DEBUG: Blocking:", domain, blocked)
- blocks.add_instance(domain, blocked, None, "reject")
-
- found_blocks.append({
- "blocked": blocked,
- "reason" : None
- })
- else:
- # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
- blocks.update_last_seen(domain, blocked, "reject")
-
- # DEBUG: print("DEBUG: Committing changes ...")
- fba.connection.commit()
+
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
+ blocklist.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": "reject",
+ })
+
except network.exceptions as exception:
- print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
+ logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
instances.set_last_error(domain, exception)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+ return blocklist
+
+def fetch_instances(domain: str, origin: str) -> list:
+ logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
+ domain_helper.raise_on(domain)
+
+ peers = list()
+
+ try:
+ # json endpoint for newer mastodongs
+ logger.debug("Fetching /instances from domain='%s'", domain)
+ response = network.fetch_response(
+ domain,
+ "/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if response.ok and response.status_code < 300 and response.text != "":
+ logger.debug("Parsing %s Bytes ...", len(response.text))
+
+ doc = bs4.BeautifulSoup(response.text, "html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ headers = doc.findAll("h5")
+ logger.debug("Checking %d headers ...", len(headers))
+ for header in headers:
+ logger.debug("header[%s]='%s'", type(header), header)
+
+ rows = header.find_next(["ul","table"]).findAll("a")
+ logger.debug("Found %d blocked instance(s) ...", len(rows))
+ for tag in rows:
+ logger.debug("tag[]='%s'", type(tag))
+ text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
+ peer = tidyup.domain(text)
+ logger.debug("peer='%s'", peer)
+
+ if peer == "":
+ logger.debug("peer is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(peer):
+ logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
+ continue
+ elif peer in peers:
+ logger.debug("peer='%s' already added - SKIPPED!", peer)
+ continue
+
+ logger.debug("Appending peer='%s' ...", peer)
+ peers.append(peer)
+
+ logger.debug("Marking domain='%s' as successfully handled ...", domain)
+ instances.set_success(domain)
+
+ except network.exceptions as exception:
+ logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
+ instances.set_last_error(domain, exception)
+
+ logger.debug("peers()=%d - EXIT!", len(peers))
+ return peers