"rejected": list(),
}
- source_domain = "raw.githubusercontent.com"
+ source_domain = "meta.chaos.social"
if sources.is_recent(source_domain):
logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
return 1
logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
raw = network.fetch_url(
- f"https://{source_domain}/chaossocial/meta/master/federation.md",
+ f"https://{source_domain}/federation",
network.web_headers,
(config.get("connection_timeout"), config.get("read_timeout"))
).text
doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
- silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
+ silenced = doc.find("h2", {"id": "silenced-instances"}).find_next("dl", attrs={"class": "instance-list"})
logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
blocklist["silenced"] = federation.find_domains(silenced)
- blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
- logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
- blocklist["rejected"] = federation.find_domains(blocked)
+ defederated = doc.find("h2", {"id": "defederated-instances"}).find_next("dl", attrs={"class": "instance-list"})
+ logger.debug("defederated[%s]()=%d", type(defederated), len(defederated))
+ blocklist["rejected"] = federation.find_domains(defederated)
blocking = blocklist["silenced"] + blocklist["rejected"]
blocker = "chaos.social"
logger.debug("software[%s]='%s' - EXIT!", type(software), software)
return software
-def find_domains(tag: bs4.element.Tag) -> list:
- logger.debug("tag[]='%s' - CALLED!", type(tag))
+def find_domains(tag: bs4.element.Tag, domainColumn: str = "dt", reasonColumn: str = "dd", reasonText: str = "Categories:") -> list:
+ logger.debug("tag[]='%s',domainColumn='%s',reasonColumn='%s',reasonText='%s' - CALLED!", type(tag), domainColumn, reasonColumn, reasonText)
if not isinstance(tag, bs4.element.Tag):
raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
- elif len(tag.select("tr")) == 0:
- raise KeyError("No table rows found in table!")
+ elif not isinstance(domainColumn, str):
+ raise ValueError(f"Parameter domainColumn[]='{type(domainColumn)}' is not type of 'str'")
+ elif domainColumn == "":
+ raise ValueError("Parameter 'domainColumn' is an empty string")
+ elif not isinstance(reasonColumn, str):
+ raise ValueError(f"Parameter reasonColumn[]='{type(reasonColumn)}' is not type of 'str'")
+ elif reasonColumn == "":
+ raise ValueError("Parameter 'reasonColumn' is an empty string")
+ elif len(tag.find_all(domainColumn)) == 0:
+ raise KeyError("No domainColumn='{domainColumn}' rows found in table!")
+ elif len(tag.find_all(reasonColumn)) == 0:
+ raise KeyError("No reasonColumn='{reasonColumn}' rows found in table!")
+ elif not isinstance(reasonText, str):
+ raise ValueError(f"Parameter reasonText[]='{type(reasonText)}' is not type of 'str'")
+ elif reasonText == "":
+ raise ValueError("Parameter 'reasonText' is an empty string")
domains = list()
- for element in tag.select("tr"):
- logger.debug("element[]='%s'", type(element))
- if not element.find("td"):
- logger.debug("Skipping element, no <td> found")
- continue
-
- domain = tidyup.domain(element.find("td").text)
- reason = tidyup.reason(element.findAll("td")[1].text)
-
+ for element in tag.find_all(domainColumn):
+ logger.debug("element[%s]='%s'", type(element), element)
+ domain = tidyup.domain(element.text)
+ reasons = element.find_next(reasonColumn).text.split(reasonText)[1].splitlines()
+ logger.debug("reasons(%d)='%s'", len(reasons), reasons)
+ reason = None
+ for r in reasons:
+ logger.debug("r[%s]='%s'", type(r), r)
+ if r != "":
+ reason = r
+ break
+
+ reason = tidyup.reason(reason)
logger.debug("domain='%s',reason='%s'", domain, reason)
if not domain_helper.is_wanted(domain):