1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
20 from fba.helpers import blacklist
21 from fba.helpers import config
22 from fba.helpers import tidyup
24 from fba.http import network
26 from fba.models import instances
28 def fetch_blocks(domain: str) -> dict:
29 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
30 if not isinstance(domain, str):
31 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
33 raise ValueError("Parameter 'domain' is empty")
39 # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
40 doc = bs4.BeautifulSoup(
41 network.fetch_response(
45 (config.get("connection_timeout"), config.get("read_timeout"))
49 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
51 blocklist = doc.find(id="about_blocklist")
52 except network.exceptions as exception:
53 print(f"WARNING: Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
54 instances.set_last_error(domain, exception)
57 # Prevents exceptions:
59 # DEBUG: print("DEBUG: Instance has no block list:", domain)
62 table = blocklist.find("table")
64 # DEBUG: print(f"DEBUG: table[]='{type(table)}'")
65 if table.find("tbody"):
66 rows = table.find("tbody").find_all("tr")
68 rows = table.find_all("tr")
70 # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
72 # DEBUG: print(f"DEBUG: line='{line}'")
73 blocked = tidyup.domain(line.find_all("td")[0].text)
74 print(f"DEBUG: blocked='{blocked}'")
76 if not validators.domain(blocked):
77 print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
79 elif blocked.endswith(".arpa"):
80 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
82 elif blocked.endswith(".tld"):
83 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
85 elif blacklist.is_blacklisted(blocked):
86 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
90 "domain": tidyup.domain(blocked),
91 "reason": tidyup.reason(line.find_all("td")[1].text)
93 # DEBUG: print("DEBUG: Next!")
95 # DEBUG: print("DEBUG: Returning blocklist() for domain:", domain, len(blocklist))