1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from fba.helpers import config
24 from fba.helpers import cookies
26 from fba.http import network
28 logging.basicConfig(level=logging.INFO)
29 logger = logging.getLogger(__name__)
31 def determine(domain: str, headers: dict) -> dict:
32 logger.debug(f"domain='{domain}',headers()={len(headers)} - CALLED!")
33 if not isinstance(domain, str):
34 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
36 raise ValueError("Parameter 'domain' is empty")
37 elif domain.lower() != domain:
38 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
39 elif not validators.domain(domain.split("/")[0]):
40 raise ValueError(f"domain='{domain}' is not a valid domain")
41 elif domain.endswith(".arpa"):
42 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
43 elif domain.endswith(".tld"):
44 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
45 elif not isinstance(headers, dict):
46 raise ValueError(f"Parameter headers[]='{type(headers)}' is not 'dict'")
48 # Default headers with no CSRF
51 # Fetch / to check for meta tag indicating csrf
52 logger.debug(f"Fetching / from domain='{domain}' for CSRF check ...")
55 headers=network.web_headers,
56 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
59 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
60 if response.ok and response.status_code < 300 and response.text != "" and response.text.find("<html") > 0:
62 logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...")
63 cookies.store(domain, response.cookies.get_dict())
66 meta = bs4.BeautifulSoup(
70 logger.debug(f"meta[]='{type(meta)}'")
71 tag = meta.find("meta", attrs={"name": "csrf-token"})
73 logger.debug(f"tag={tag}")
75 logger.debug(f"Adding CSRF token='{tag['content']}' for domain='{domain}'")
76 reqheaders["X-CSRF-Token"] = tag["content"]
78 logger.debug(f"reqheaders()={len(reqheaders)} - EXIT!")