1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba.helpers import blacklist
25 from fba.helpers import config
26 from fba.helpers import tidyup
28 from fba.http import network
30 from fba.models import instances
32 logging.basicConfig(level=logging.INFO)
33 logger = logging.getLogger(__name__)
35 def fetch_blocks(domain: str) -> dict:
36 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
37 if not isinstance(domain, str):
38 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
40 raise ValueError("Parameter 'domain' is empty")
41 elif domain.lower() != domain:
42 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
43 elif not validators.domain(domain.split("/")[0]):
44 raise ValueError(f"domain='{domain}' is not a valid domain")
45 elif domain.endswith(".arpa"):
46 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
47 elif domain.endswith(".tld"):
48 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
54 logger.debug("Fetching friendica blocks from domain:", domain)
55 doc = bs4.BeautifulSoup(
56 network.fetch_response(
60 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("doc[]='%s'", type(doc))
66 block_tag = doc.find(id="about_blocklist")
67 except network.exceptions as exception:
68 logger.warning(f"Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
69 instances.set_last_error(domain, exception)
72 # Prevents exceptions:
74 logger.debug("Instance has no block list:", domain)
77 table = block_tag.find("table")
79 logger.debug(f"table[]='{type(table)}'")
80 if table.find("tbody"):
81 rows = table.find("tbody").find_all("tr")
83 rows = table.find_all("tr")
85 logger.debug(f"Found rows()={len(rows)}")
87 logger.debug(f"line='{line}'")
88 blocked = tidyup.domain(line.find_all("td")[0].text)
89 reason = tidyup.reason(line.find_all("td")[1].text)
90 logger.debug(f"blocked='{blocked}',reason='{reason}'")
92 if not utils.is_domain_wanted((blocked):
93 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
96 logger.debug(f"Appending blocked='{blocked}',reason='{reason}'")
98 "domain": tidyup.domain(blocked),
99 "reason": tidyup.reason(reason)
101 logger.debug("Next!")
103 logger.debug("Returning blocklist() for domain:", domain, len(blocklist))