1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
20 from fba.helpers import blacklist
21 from fba.helpers import config
22 from fba.helpers import tidyup
24 from fba.http import network
26 from fba.models import instances
28 def fetch_blocks(domain: str) -> dict:
29 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
30 if not isinstance(domain, str):
31 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
33 raise ValueError("Parameter 'domain' is empty")
34 elif domain.lower() != domain:
35 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
36 elif not validators.domain(domain.split("/")[0]):
37 raise ValueError(f"domain='{domain}' is not a valid domain")
38 elif domain.endswith(".arpa"):
39 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
40 elif domain.endswith(".tld"):
41 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
47 # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
48 doc = bs4.BeautifulSoup(
49 network.fetch_response(
53 (config.get("connection_timeout"), config.get("read_timeout"))
57 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
59 block_tag = doc.find(id="about_blocklist")
60 except network.exceptions as exception:
61 print(f"WARNING: Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
62 instances.set_last_error(domain, exception)
65 # Prevents exceptions:
67 # DEBUG: print("DEBUG: Instance has no block list:", domain)
70 table = block_tag.find("table")
72 # DEBUG: print(f"DEBUG: table[]='{type(table)}'")
73 if table.find("tbody"):
74 rows = table.find("tbody").find_all("tr")
76 rows = table.find_all("tr")
78 # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
80 # DEBUG: print(f"DEBUG: line='{line}'")
81 blocked = tidyup.domain(line.find_all("td")[0].text)
82 reason = tidyup.reason(line.find_all("td")[1].text)
83 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}'")
85 if not validators.domain(blocked):
86 print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
88 elif blocked.endswith(".arpa"):
89 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
91 elif blocked.endswith(".tld"):
92 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
94 elif blacklist.is_blacklisted(blocked):
95 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
98 # DEBUG: print(f"DEBUG: Appending blocked='{blocked}',reason='{reason}'")
100 "domain": tidyup.domain(blocked),
101 "reason": tidyup.reason(reason)
103 # DEBUG: print("DEBUG: Next!")
105 # DEBUG: print("DEBUG: Returning blocklist() for domain:", domain, len(blocklist))