# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
-import bs4
-import validators
+import logging
-from fba import config
-from fba import network
+import bs4
-from fba.helpers import blacklist
+from fba.helpers import config
+from fba.helpers import domain as domain_helper
from fba.helpers import tidyup
+from fba.http import network
+
from fba.models import instances
-def fetch_blocks(domain: str) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
- if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
- elif domain == "":
- raise ValueError("Parameter 'domain' is empty")
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
- blocked = list()
- blocklist = None
+def fetch_blocks(domain: str) -> list:
+ logger.debug("domain='%s' - CALLED!", domain)
+ domain_helper.raise_on(domain)
+
+ if not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
+
+ blocklist = list()
+ block_tag = None
try:
- # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
- doc = bs4.BeautifulSoup(
- network.fetch_response(
- domain,
- "/friendica",
- network.web_headers,
- (config.get("connection_timeout"), config.get("read_timeout"))
- ).text,
- "html.parser",
- )
- # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
-
- blocklist = doc.find(id="about_blocklist")
+ logger.debug("Fetching friendica blocks from domain='%s'", domain)
+ raw = network.fetch_response(
+ domain,
+ "/friendica",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("Parsing %d Bytes ...", len(raw))
+
+ doc = bs4.BeautifulSoup(raw, "html.parser",)
+ logger.debug("doc[]='%s'", type(doc))
+
+ block_tag = doc.find(id="about_blocklist")
+ logger.debug("block_tag[%s]='%s'", type(block_tag), block_tag)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
+ logger.warning("Exception '%s' during fetching instances from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
- return dict()
- # Prevents exceptions:
- if blocklist is None:
- # DEBUG: print("DEBUG: Instance has no block list:", domain)
- return dict()
+ logger.debug("Returning empty list ... - EXIT!")
+ return list()
+
+ logger.debug("block_tag[%s]='%s'", type(block_tag), block_tag)
+ if block_tag is None:
+ logger.debug("Instance has no block list: domain='%s' - EXIT!", domain)
+ return list()
- table = blocklist.find("table")
+ table = block_tag.find("table")
- # DEBUG: print(f"DEBUG: table[]='{type(table)}'")
- if table.find("tbody"):
+ logger.debug("table[]='%s'", type(table))
+ if table is None:
+ logger.warning("domain='%s' has no table tag - EXIT !", domain)
+ return list()
+ elif table.find("tbody"):
rows = table.find("tbody").find_all("tr")
else:
rows = table.find_all("tr")
- # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
+ logger.debug("Found rows()=%d", len(rows))
for line in rows:
- # DEBUG: print(f"DEBUG: line='{line}'")
- blocked = tidyup.domain(line.find_all("td")[0].text)
- print(f"DEBUG: blocked='{blocked}'")
+ logger.debug("line='%s'", line)
+ blocked = line.find_all("td")[0].text
+ logger.debug("blocked='%s'", blocked)
- if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
- continue
- elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
- continue
- elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ blocked = tidyup.domain(blocked) if blocked != "" else None
+ reason = tidyup.reason(line.find_all("td")[1].text)
+ logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
+
+ if blocked is None or blocked == "":
+ logger.warning("line[]='%s' returned empty blocked domain - SKIPPED!", type(line))
continue
- elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
continue
- blocked.append({
- "domain": tidyup.domain(domaih),
- "reason": tidyup.reason(line.find_all("td")[1].text)
+ logger.debug("Appending blocked='%s',reason='%s'", blocked, reason)
+ blocklist.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : reason,
+ "block_level": "reject",
})
- # DEBUG: print("DEBUG: Next!")
- # DEBUG: print("DEBUG: Returning blocklist() for domain:", domain, len(blocklist))
- return {
- "reject": blocked
- }
+ logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+ return blocklist