fba/csrf.py

   1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
   2 # Copyright (C) 2023 Free Software Foundation
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published
   6 # by the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  16
  17 import logging
  18
  19 import bs4
  20 import reqto
  21
  22 from fba.helpers import config
  23 from fba.helpers import cookies
  24 from fba.helpers import domain as domain_helper
  25
  26 from fba.http import network
  27
  28 logging.basicConfig(level=logging.INFO)
  29 logger = logging.getLogger(__name__)
  30
  31 def determine(domain: str, headers: dict) -> dict:
  32     logger.debug("domain='%s',headers()=%d - CALLED!", domain, len(headers))
  33     domain_helper.raise_on(domain)
  34
  35     if not isinstance(headers, dict):
  36         raise ValueError(f"Parameter headers[]='{type(headers)}' is not 'dict'")
  37
  38     # Default headers with no CSRF
  39     reqheaders = headers
  40
  41     # Fetch / to check for meta tag indicating csrf
  42     logger.debug("Fetching / from domain='%s' for CSRF check ...", domain)
  43     response = reqto.get(
  44         f"https://{domain}/",
  45         headers=network.web_headers,
  46         timeout=(config.get("connection_timeout"), config.get("read_timeout"))
  47     )
  48
  49     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
  50     if response.ok and response.status_code < 300 and response.text != "" and response.text.find("<html") > 0:
  51         # Save cookies
  52         logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
  53         cookies.store(domain, response.cookies.get_dict())
  54
  55         # Parse text
  56         meta = bs4.BeautifulSoup(
  57             response.text,
  58             "html.parser"
  59         )
  60         logger.debug("meta[]='%s'", type(meta))
  61         tag = meta.find("meta", attrs={"name": "csrf-token"})
  62
  63         logger.debug("tag[%s]='%s'", type(tag), tag)
  64         if tag is not None:
  65             logger.debug("Adding CSRF token='%s' for domain='%s'", tag["content"], domain)
  66             reqheaders["X-CSRF-Token"] = tag["content"]
  67
  68     logger.debug("reqheaders()=%d - EXIT!", len(reqheaders))
  69     return reqheaders