from fba.models import instances
from fba.networks import friendica
+from fba.networks import lemmy
from fba.networks import mastodon
from fba.networks import misskey
from fba.networks import pleroma
mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
elif software == "lemmy":
print(f"INFO: blocker='{blocker}',software='{software}'")
- #lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
+ lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
elif software == "friendica" or software == "misskey":
print(f"INFO: blocker='{blocker}',software='{software}'")
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import inspect
+
+import bs4
+import validators
+
+from fba import blacklist
from fba import config
from fba import csrf
+from fba import fba
from fba import federation
from fba import network
+from fba.models import blocks
from fba.models import instances
def fetch_peers(domain: str) -> list:
print("WARNING: JSON response does not contain 'federated_instances':", domain)
instances.set_last_error(domain, data)
- except BaseException as exception:
+ except network.exceptions as exception:
print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
# DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
# DEBUG: print("DEBUG: Returning peers[]:", type(peers))
return peers
+
+def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
+ # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
+ if not isinstance(domain, str):
+ raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
+ elif domain == "":
+ raise ValueError("Parameter 'domain' is empty")
+ elif not isinstance(origin, str) and origin is not None:
+ raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
+ elif origin == "":
+ raise ValueError("Parameter 'origin' is empty")
+ elif not isinstance(nodeinfo_url, str):
+ raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
+ elif nodeinfo_url == "":
+ raise ValueError("Parameter 'nodeinfo_url' is empty")
+
+ translations = [
+ "blocked instances",
+ ]
+
+ try:
+ # json endpoint for newer mastodongs
+ found_blocks = list()
+ blocklist = list()
+
+ rows = {
+ "reject" : [],
+ "media_removal" : [],
+ "followers_only": [],
+ "report_removal": [],
+ }
+
+ # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'")
+ response = network.fetch_response(
+ domain,
+ "/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
+ if response.ok and response.status_code < 300 and response.text != "":
+ # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...")
+
+ doc = bs4.BeautifulSoup(response.text, "html.parser")
+ # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
+
+ headers = doc.findAll("h5")
+ found = None
+ # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...")
+ for header in headers:
+ # DEBUG: print(f"DEBUG: header[]={type(header)}")
+ content = header.contents[0]
+
+ # DEBUG: print(f"DEBUG: content='{content}'")
+ if content.lower() in translations:
+ # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!")
+ found = header
+ break
+
+ # DEBUG: print(f"DEBUG: found[]='{type(found)}'")
+ if found is None:
+ # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!")
+ return
+
+ blocking = found.find_next("ul").findAll("a")
+ # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...")
+ for tag in blocking:
+ # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'")
+ blocked = tag.contents[0]
+
+ # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ if not validators.domain(blocked):
+ # DEBUG: print(f"DEBUG: blocked='{blocked}' is not a valid domain - SKIPPED!")
+ continue
+ elif blacklist.is_blacklisted(blocked):
+ # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ continue
+ elif blocked.endswith(".arpa"):
+ print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
+ elif not instances.is_registered(blocked):
+ # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
+ instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
+
+ if not blocks.is_instance_blocked(domain, blocked, "reject"):
+ # DEBUG: print("DEBUG: Blocking:", domain, blocked)
+ blocks.add_instance(domain, blocked, None, "reject")
+
+ found_blocks.append({
+ "blocked": blocked,
+ "reason" : None
+ })
+ else:
+ # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
+ blocks.update_last_seen(domain, blocked, "reject")
+
+ # DEBUG: print("DEBUG: Committing changes ...")
+ fba.connection.commit()
+ except network.exceptions as exception:
+ print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
+
+ # DEBUG: print("DEBUG: EXIT!")
# DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
break
- except BaseException as exception:
- print("ERROR: Cannot fetch from domain:", domain, exception)
+ except network.exceptions as exception:
+ print(f"ERROR: Cannot fetch from domain='{domain}',exception='{type(exception)}'")
instances.set_last_error(domain, exception)
break
elif blocked.endswith(".arpa"):
print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
elif not instances.is_registered(blocked):
# DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
elif blocked.endswith(".arpa"):
print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
elif not instances.is_registered(blocked):
# DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
elif blocked.endswith(".arpa"):
print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
elif not instances.is_registered(blocked):
# Commit changes
fba.connection.commit()
elif blocked.endswith(".arpa"):
print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
elif not instances.is_registered(blocked):
# Commit changes
fba.connection.commit()
elif blocked.endswith(".arpa"):
print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
elif not instances.is_registered(blocked):
# DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
elif blocked.endswith(".arpa"):
print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
elif not instances.is_registered(blocked):
# DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
elif blocked.endswith(".arpa"):
print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
elif not instances.is_registered(blocked):
# DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
# DEBUG: print(f"DEBUG: Found 'h2' header in path='{path}' - BREAK!")
break
- except BaseException as exception:
+ except network.exceptions as exception:
print("ERROR: Cannot fetch from domain:", domain, exception)
instances.set_last_error(domain, exception)
break