if "domain" not in row:
logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
continue
+ elif row["domain"] == "":
+ logger.debug("row[domain] is empty - SKIPPED!")
+ continue
elif not utils.is_domain_wanted(row["domain"]):
- logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
+ logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
continue
elif instances.is_registered(row["domain"]):
logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
if "domain" not in entry:
logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
continue
+ elif entry["domain"] == "":
+ logger.debug("entry[domain] is empty - SKIPPED!")
+ continue
elif not utils.is_domain_wanted(entry["domain"]):
- logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
+ logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
continue
elif instances.is_registered(entry["domain"]):
logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
nodeinfo_url = row["nodeinfo_url"]
logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
- if not utils.is_domain_wanted(block["blocked"]):
- logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ if block["blocked"] == "":
+ logger.debug("block[blocked] is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(block["blocked"]):
+ logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif block["block_level"] in ["accept", "accepted"]:
logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
logger.debug("item[]='%s'", type(item))
domain = item.decode_contents()
- logger.debug("domain='%s'", domain)
- if not utils.is_domain_wanted(domain):
- logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+ if domain == "":
+ logger.debug("domain is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_registered(domain):
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
logger.debug("rss[]='%s'", type(rss))
for item in rss.items:
logger.debug("item='%s'", item)
- domain = item.link.split("=")[1]
+ domain = tidyup.domain(item.link.split("=")[1])
- if not utils.is_domain_wanted(domain):
- logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+ if domain == "":
+ logger.debug("domain is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
logger.debug("domain='%s' is already added - SKIPPED!", domain)
doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
logger.debug("doc[]='%s'", type(doc))
for element in doc.findAll("a"):
+ logger.debug("element[]='%s'", type(element))
for href in element["href"].split(","):
- logger.debug("href[%s]='%s", type(href), href)
+ logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
domain = tidyup.domain(href)
- logger.debug("domain='%s'", domain)
- if not utils.is_domain_wanted(domain):
- logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+ if domain == "":
+ logger.debug("domain is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
logger.debug("domain='%s' is already added - SKIPPED!", domain)
rows = database.cursor.fetchall()
logger.info("Checking %d entries ...", len(rows))
for row in rows:
- logger.debug("domain='%s'", row["domain"])
- if not utils.is_domain_wanted(row["domain"]):
- logger.debug("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
+ logger.debug("row[domain]='%s'", row["domain"])
+ if row["domain"] == "":
+ logger.debug("row[domain] is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(row["domain"]):
+ logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
continue
try:
reject_reports = True
logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
- if not utils.is_domain_wanted(domain):
- logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ if domain == "":
+ logger.debug("domain is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
continue
logger.debug("Marking domain='%s' as handled", domain)
logger.info("Processing %d domains ...", len(domains))
for domain in domains:
- logger.debug("domain='%s'", domain)
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain)
+
+ logger.debug("domain='%s' - AFTER!", domain)
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
elif not utils.is_domain_wanted(domain):
- logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_recent(domain):
logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
logger.debug("row[]='%s'", type(row))
domain = tidyup.domain(row.contents[0])
- logger.debug("domain='%s'", domain)
+ logger.debug("domain='%s' - AFTER!", domain)
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
elif not utils.is_domain_wanted(domain):
- logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_registered(domain):
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
logger.debug("blocking()=%d", blocking)
for block in blocking:
+ logger.debug("block[]='%s'", type(block))
block["blocked"] = tidyup.domain(block["blocked"])
- if not utils.is_domain_wanted(block["blocked"]):
- logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
+ if block["blocked"] == "":
+ logger.debug("block[blocked] is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(block["blocked"]):
+ logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif instances.is_recent(block["blocked"]):
logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
logger.debug("blocker[%s]='%s'", type(blocker), blocker)
for block in blocking:
+ logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
- if not utils.is_domain_wanted(block["blocked"]):
- logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
+ if block["blocked"] == "":
+ logger.debug("block[blocked] is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(block["blocked"]):
+ logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
obfuscated = 0
blockdict = list()
for block in blocking:
- logger.debug("blocked='%s'", block["blocked"])
+ logger.debug("block[blocked]='%s'", block["blocked"])
blocked = None
- if block["blocked"].endswith(".arpa"):
+ if block["blocked"] == "":
+ logger.debug("block[blocked] is empty - SKIPPED!")
+ continue
+ elif block["blocked"].endswith(".arpa"):
logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
continue
elif block["blocked"].endswith(".tld"):
obfuscated = obfuscated + 1
blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
elif not utils.is_domain_wanted(block["blocked"]):
- logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
# Language mapping X -> English
language_mapping = {
# English -> English
- "Reject": "Suspended servers",
+ "limited servers" : "followers_only",
+ "suspended servers": "reject",
+ "silenced servers" : "silenced",
+ "filtered media" : "filtered_media",
}
def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
for blocked in rows:
logger.debug("blocked='%s' - BEFORE!", blocked)
blocked = tidyup.domain(blocked)
- logger.debug("blocked='%s' - AFTER!", blocked)
+ reason = tidyup.reason(rows[blocked]["reason"])
+ logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
if blocked not in rows or "reason" not in rows[blocked]:
logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
break
-
- reason = tidyup.reason(rows[blocked]["reason"])
- logger.debug("reason='%s'", reason)
-
- if blocked == "":
+ elif blocked == "":
logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
continue
elif not utils.is_domain_wanted(blocked):
logger.debug("blocklist()=%d", len(blocklist))
if len(blocklist) > 0:
- logger.info("Checking %d record(s) ...", len(blocklist))
+ logger.info("Checking %d different blocklists ...", len(blocklist))
for block_level in blocklist:
logger.debug("block_level='%s'", block_level)
rows = blocklist[block_level]
logger.debug("rows[%s]()=%d'", type(rows), len(rows))
- for record in rows:
- logger.debug("record[]='%s'", type(record))
- blocked = tidyup.domain(record["blocked"])
- reason = tidyup.reason(record["reason"])
- logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
-
- if not utils.is_domain_wanted(blocked):
- logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
- continue
-
- logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
- blocked = utils.deobfuscate_domain(blocked, domain)
+ for block in rows:
+ logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", block["blocked"], domain)
+ block["blocked"] = utils.deobfuscate_domain(block["blocked"], domain)
- logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
- if not utils.is_domain_wanted(blocked):
- logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ logger.debug("block[blocked]='%s' - DEOBFUSCATED!", block["blocked"])
+ if not utils.is_domain_wanted(block["blocked"]):
+ logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
continue
- logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...",domain, blocked, reason, block_level)
+ logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
blockdict.append({
"blocker" : domain,
- "blocked" : blocked,
- "reason" : reason,
+ "blocked" : block["blocked"],
+ "reason" : block["reason"],
"block_level": block_level,
})
break
blocklist = {
- "Suspended servers": [],
- "Filtered media" : [],
- "Limited servers" : [],
- "Silenced servers" : [],
+ "reject" : [],
+ "filtered_media": [],
+ "followers_only": [],
+ "silenced" : [],
}
logger.debug("doc[]='%s'", type(doc))
logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
return list()
- for header in doc.find_all("h2"):
+ headers = doc.find_all("h2")
+
+ logger.debug("headers[]='%s'", type(headers))
+ if headers is None:
+ logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
+ return list()
+
+ logger.info("Checking %d headers ...", len(headers))
+ for header in headers:
logger.debug("header[%s]='%s'", type(header), header)
- header_text = tidyup.reason(header.text)
+ block_level = tidyup.reason(header.text).lower()
- logger.debug("header_text='%s' - BEFORE!", header_text)
- if header_text in language_mapping:
- logger.debug("header_text='%s' - FOUND!", header_text)
- header_text = language_mapping[header_text]
+ logger.debug("block_level='%s' - BEFORE!", block_level)
+ if block_level in language_mapping:
+ logger.debug("block_level='%s' - FOUND!", block_level)
+ block_level = language_mapping[block_level].lower()
else:
- logger.warning("header_text='%s' not found in language mapping table", header_text)
+ logger.warning("block_level='%s' not found in language mapping table", block_level)
- logger.debug("header_text='%s - AFTER!'", header_text)
- if header_text in blocklist or header_text.lower() in blocklist:
+ logger.debug("block_level='%s - AFTER!'", block_level)
+ if block_level in blocklist:
# replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
- logger.debug("Found header_text='%s', importing domain blocks ...", header_text)
+ logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
for line in header.find_next("table").find_all("tr")[1:]:
logger.debug("line[]='%s'", type(line))
- blocklist[header_text].append({
- "blocked": tidyup.domain(line.find_all("td")[0].text),
- "reason" : tidyup.reason(line.find_all("td")[1].text),
+ blocked = tidyup.domain(line.find_all("td")[0].text)
+ reason = tidyup.reason(line.find_all("td")[1].text)
+
+ logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
+ blocklist[block_level].append({
+ "blocked": blocked,
+ "reason" : reason,
})
else:
- logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
+ logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
- return {
- "reject" : blocklist["Suspended servers"],
- "media_removal" : blocklist["Filtered media"],
- "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
- }
+ return blocklist