logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
- logger.debug("domain='%s',row[blocker]='%s'", domain, row["blocker"])
+ logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
logger.debug("processed='%s'", processed)
logger.debug("Success! - EXIT!")
return 0
+def fetch_joinfediverse(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+ locking.acquire()
+ raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
+ doc = bs4.BeautifulSoup(raw, "html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+ tables = doc.findAll("table", {"class": "wikitable"})
+ logger.info("Analyzing %d table(s) ...", len(tables))
+ blocklist = list()
+ for table in tables:
+ logger.debug("table[]='%s'", type(table))
+ rows = table.findAll("tr")
+ logger.info("Checking %d row(s) ...", len(rows))
+ block_headers = dict()
+ for row in rows:
+ #logger.debug("row[%s]='%s'", type(row), row)
+ headers = row.findAll("th")
+ #logger.debug("Found headers()=%d header(s)", len(headers))
+ if len(headers) > 1:
+ block_headers = dict()
+ cnt = 0
+ for header in headers:
+ cnt = cnt + 1
+ #logger.debug("header[]='%s',cnt=%d", type(header), cnt)
+ text = header.contents[0]
+ #logger.debug("text[]='%s'", type(text))
+ if not isinstance(text, str):
+ #logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
+ continue
+ elif validators.domain(text.strip()):
+ #logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
+ continue
+ text = tidyup.domain(text.strip())
+ #logger.debug("text='%s'", text)
+ if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
+ logger.debug("Found header: '%s'=%d", text, cnt)
+ block_headers[cnt] = text
+ elif len(block_headers) == 0:
+ #logger.debug("row is not scrapable - SKIPPED!")
+ continue
+ elif len(block_headers) > 0:
+ logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
+ cnt = 0
+ block = dict()
+ for element in row.find_all(["th", "td"]):
+ cnt = cnt + 1
+ logger.debug("element[]='%s',cnt=%d", type(element), cnt)
+ if cnt in block_headers:
+ logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
+ text = element.text.strip()
+ key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
+ logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
+ if key in ["domain", "instance"]:
+ block[key] = text
+ elif key == "reason":
+ block[key] = tidyup.reason(text)
+ elif key == "subdomain(s)":
+ block[key] = list()
+ if text != "":
+ block[key] = text.split("/")
+ else:
+ logger.debug("key='%s'", key)
+ block[key] = text
+ logger.debug("block()=%d ...", len(block))
+ if len(block) > 0:
+ logger.debug("Appending block()=%d ...", len(block))
+ blocklist.append(block)
+ logger.debug("blocklist()=%d", len(blocklist))
+ database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
+ domains = database.cursor.fetchall()
+ logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
+ blocking = list()
+ for block in blocklist:
+ logger.debug("block='%s'", block)
+ if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
+ origin = block["blocked"]
+ for subdomain in block["subdomain(s)"]:
+ block["blocked"] = subdomain + "." + origin
+ blocking.append(block)
+ else:
+ blocking.append(block)
+ logger.debug("blocking()=%d", blocking)
+ for block in blocking:
+ block["blocked"] = tidyup.domain(block["blocked"])
+ if not utils.is_domain_wanted(block["blocked"]):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ continue
+ elif instances.is_recent(block["blocked"]):
+ logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
+ continue
+ logger.info("Proccessing blocked='%s' ...", block["blocked"])
+ processed = utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
+ blockdict = list()
+ for blocker in domains:
+ blocker = blocker[0]
+ logger.debug("blocker[%s]='%s'", type(blocker), blocker)
+ for block in blocking:
+ block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
+ if not utils.is_domain_wanted(block["blocked"]):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ continue
+ logger.debug("blocked='%s',reason='%s'", block['blocked'], block['reason'])
+ if not blocks.is_instance_blocked(blocker, block['blocked'], "reject"):
+ logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block['blocked'], block["reason"], "reject")
+ blocks.add_instance(blocker, block['blocked'], block["reason"], "reject")
+ logger.debug("block_level='%s',config[bot_enabled]='%s'", "reject", config.get("bot_enabled"))
+ if config.get("bot_enabled"):
+ logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block['blocked'], block["reason"])
+ blockdict.append({
+ "blocked": block['blocked'],
+ "reason" : block["reason"],
+ })
+ else:
+ logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block['blocked'])
+ blocks.update_last_seen(blocker, block['blocked'], "reject")
+ blocks.update_reason(block["reason"], blocker, block['blocked'], "reject")
+ if instances.has_pending(blocker):
+ logger.debug("Flushing updates for blocker='%s' ...", blocker)
+ instances.update_data(blocker)
+ logger.debug("Invoking commit() ...")
+ database.connection.commit()
+ logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
+ if config.get("bot_enabled") and len(blockdict) > 0:
+ logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
+ network.send_bot_post(blocker, blockdict)
+ logger.debug("Success! - EXIT!")
+ return 0