# along with this program. If not, see <https://www.gnu.org/licenses/>.
import bs4
+import validators
from fba import config
from fba import network
+from fba.helpers import blacklist
from fba.helpers import tidyup
from fba.models import instances
# DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
for line in rows:
# DEBUG: print(f"DEBUG: line='{line}'")
+ blocked = tidyup.domain(line.find_all("td")[0].text)
+ print(f"DEBUG: blocked='{blocked}'")
+
+ if not validators.domain(blocked):
+ print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
+ continue
+ elif blocked.endswith(".arpa"):
+ print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ continue
+ elif blocked.endswith(".tld"):
+ print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ continue
+ elif blacklist.is_blacklisted(blocked):
+ # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ continue
+
blocked.append({
- "domain": tidyup.domain(line.find_all("td")[0].text),
+ "domain": tidyup.domain(domaih),
"reason": tidyup.reason(line.find_all("td")[1].text)
})
# DEBUG: print("DEBUG: Next!")