1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
79 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
82 for row in database.cursor.fetchall():
83 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84 punycode = row["domain"].encode("idna").decode("utf-8")
86 if row["nodeinfo_url"].startswith("/"):
87 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
89 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
93 logger.info("Found %d row(s)", cnt)
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99 logger.debug("args[]='%s' - CALLED!", type(args))
101 # No CSRF by default, you don't have to add network.api_headers by yourself here
105 logger.debug("Checking CSRF from pixelfed.org")
106 headers = csrf.determine("pixelfed.org", dict())
107 except network.exceptions as exception:
108 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
112 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
113 fetched = network.get_json_api(
115 "/api/v1/servers/all.json?scope=All&country=all&language=all",
117 (config.get("connection_timeout"), config.get("read_timeout"))
120 logger.debug("JSON API returned %d elements", len(fetched))
121 if "error_message" in fetched:
122 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
124 elif "data" not in fetched["json"]:
125 logger.warning("API did not return JSON with 'data' element - EXIT!")
128 rows = fetched["json"]["data"]
129 logger.info("Checking %d fetched rows ...", len(rows))
131 logger.debug("row[]='%s'", type(row))
132 if "domain" not in row:
133 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
135 elif row["domain"] == "":
136 logger.debug("row[domain] is empty - SKIPPED!")
138 elif not utils.is_domain_wanted(row["domain"]):
139 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
141 elif instances.is_registered(row["domain"]):
142 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
144 elif instances.is_recent(row["domain"]):
145 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
148 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
149 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
151 except network.exceptions as exception:
152 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
155 logger.debug("Success! - EXIT!")
158 def fetch_bkali(args: argparse.Namespace) -> int:
159 logger.debug("args[]='%s' - CALLED!", type(args))
162 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
163 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
166 logger.debug("fetched[]='%s'", type(fetched))
167 if "error_message" in fetched:
168 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
170 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
171 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
174 rows = fetched["json"]
176 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
178 raise Exception("WARNING: Returned no records")
179 elif "data" not in rows:
180 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
181 elif "nodeinfo" not in rows["data"]:
182 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
184 for entry in rows["data"]["nodeinfo"]:
185 logger.debug("entry[%s]='%s'", type(entry), entry)
186 if "domain" not in entry:
187 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
189 elif entry["domain"] == "":
190 logger.debug("entry[domain] is empty - SKIPPED!")
192 elif not utils.is_domain_wanted(entry["domain"]):
193 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
195 elif instances.is_registered(entry["domain"]):
196 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
198 elif instances.is_recent(entry["domain"]):
199 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
202 logger.debug("Adding domain='%s' ...", entry["domain"])
203 domains.append(entry["domain"])
205 except network.exceptions as exception:
206 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
209 logger.debug("domains()=%d", len(domains))
213 logger.info("Adding %d new instances ...", len(domains))
214 for domain in domains:
216 logger.info("Fetching instances from domain='%s' ...", domain)
217 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
218 except network.exceptions as exception:
219 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
220 instances.set_last_error(domain, exception)
223 logger.debug("Success - EXIT!")
226 def fetch_blocks(args: argparse.Namespace) -> int:
227 logger.debug("args[]='%s' - CALLED!", type(args))
228 if args.domain is not None and args.domain != "":
229 logger.debug("args.domain='%s' - checking ...", args.domain)
230 if not validators.domain(args.domain):
231 logger.warning("args.domain='%s' is not valid.", args.domain)
233 elif blacklist.is_blacklisted(args.domain):
234 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
236 elif not instances.is_registered(args.domain):
237 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
242 if args.domain is not None and args.domain != "":
243 # Re-check single domain
244 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
245 database.cursor.execute(
246 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
248 elif args.software is not None and args.software != "":
249 # Re-check single software
250 logger.debug("Querying database for args.software='%s' ...", args.software)
251 database.cursor.execute(
252 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
255 # Re-check after "timeout" (aka. minimum interval)
256 database.cursor.execute(
257 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
260 rows = database.cursor.fetchall()
261 logger.info("Checking %d entries ...", len(rows))
262 for blocker, software, origin, nodeinfo_url in rows:
263 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
264 blocker = tidyup.domain(blocker)
265 logger.debug("blocker='%s' - AFTER!", blocker)
268 logger.warning("blocker is now empty!")
270 elif nodeinfo_url is None or nodeinfo_url == "":
271 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
273 elif not utils.is_domain_wanted(blocker):
274 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
277 logger.debug("blocker='%s'", blocker)
278 instances.set_last_blocked(blocker)
279 instances.set_has_obfuscation(blocker, False)
283 if software == "pleroma":
284 logger.info("blocker='%s',software='%s'", blocker, software)
285 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
286 elif software == "mastodon":
287 logger.info("blocker='%s',software='%s'", blocker, software)
288 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
289 elif software == "lemmy":
290 logger.info("blocker='%s',software='%s'", blocker, software)
291 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
292 elif software == "friendica":
293 logger.info("blocker='%s',software='%s'", blocker, software)
294 blocking = friendica.fetch_blocks(blocker)
295 elif software == "misskey":
296 logger.info("blocker='%s',software='%s'", blocker, software)
297 blocking = misskey.fetch_blocks(blocker)
299 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
301 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
303 for block in blocking:
304 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
306 if block["block_level"] == "":
307 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
310 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
311 block["blocked"] = tidyup.domain(block["blocked"])
312 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
313 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
315 if block["blocked"] == "":
316 logger.warning("blocked is empty, blocker='%s'", blocker)
318 elif block["blocked"].endswith(".onion"):
319 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
321 elif block["blocked"].endswith(".arpa"):
322 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
324 elif block["blocked"].endswith(".tld"):
325 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
327 elif block["blocked"].find("*") >= 0:
328 logger.debug("blocker='%s' uses obfuscated domains", blocker)
330 # Some friendica servers also obscure domains without hash
331 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
333 logger.debug("row[]='%s'", type(row))
335 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
336 instances.set_has_obfuscation(blocker, True)
339 block["blocked"] = row["domain"]
340 origin = row["origin"]
341 nodeinfo_url = row["nodeinfo_url"]
342 elif block["blocked"].find("?") >= 0:
343 logger.debug("blocker='%s' uses obfuscated domains", blocker)
345 # Some obscure them with question marks, not sure if that's dependent on version or not
346 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
348 logger.debug("row[]='%s'", type(row))
350 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
351 instances.set_has_obfuscation(blocker, True)
354 block["blocked"] = row["domain"]
355 origin = row["origin"]
356 nodeinfo_url = row["nodeinfo_url"]
358 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
359 if block["blocked"] == "":
360 logger.debug("block[blocked] is empty - SKIPPED!")
362 elif not utils.is_domain_wanted(block["blocked"]):
363 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
365 elif block["block_level"] in ["accept", "accepted"]:
366 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
368 elif not instances.is_registered(block["blocked"]):
369 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
370 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
372 block["block_level"] = utils.alias_block_level(block["block_level"])
374 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
375 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
377 "blocked": block["blocked"],
378 "reason" : block["reason"],
381 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
382 cookies.clear(block["blocked"])
384 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
385 if instances.has_pending(blocker):
386 logger.debug("Flushing updates for blocker='%s' ...", blocker)
387 instances.update_data(blocker)
389 logger.debug("Invoking commit() ...")
390 database.connection.commit()
392 logger.debug("Invoking cookies.clear(%s) ...", blocker)
393 cookies.clear(blocker)
395 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
396 if config.get("bot_enabled") and len(blockdict) > 0:
397 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
398 network.send_bot_post(blocker, blockdict)
400 logger.debug("Success! - EXIT!")
403 def fetch_observer(args: argparse.Namespace) -> int:
404 logger.debug("args[]='%s' - CALLED!", type(args))
410 if args.software is None:
411 logger.info("Fetching software list ...")
412 raw = utils.fetch_url(
413 "https://fediverse.observer",
415 (config.get("connection_timeout"), config.get("read_timeout"))
417 logger.debug("raw[%s]()=%d", type(raw), len(raw))
419 doc = bs4.BeautifulSoup(raw, features="html.parser")
420 logger.debug("doc[]='%s'", type(doc))
422 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
423 logger.debug("items[]='%s'", type(items))
425 logger.info("Checking %d menu items ...", len(items))
427 logger.debug("item[%s]='%s'", type(item), item)
428 if item.text.lower() == "all":
429 logger.debug("Skipping 'All' menu entry ...")
432 logger.debug("Appending item.text='%s' ...", item.text)
433 types.append(tidyup.domain(item.text))
435 logger.info("Adding args.software='%s' as type ...", args.software)
436 types.append(args.software)
438 logger.info("Fetching %d different table data ...", len(types))
439 for software in types:
440 logger.debug("software='%s' - BEFORE!", software)
441 if args.software is not None and args.software != software:
442 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
447 logger.debug("Fetching table data for software='%s' ...", software)
448 raw = utils.fetch_url(
449 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
451 (config.get("connection_timeout"), config.get("read_timeout"))
453 logger.debug("raw[%s]()=%d", type(raw), len(raw))
455 doc = bs4.BeautifulSoup(raw, features="html.parser")
456 logger.debug("doc[]='%s'", type(doc))
457 except network.exceptions as exception:
458 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
461 items = doc.findAll("a", {"class": "url"})
462 logger.info("Checking %d items,software='%s' ...", len(items), software)
464 logger.debug("item[]='%s'", type(item))
465 domain = item.decode_contents()
467 logger.debug("domain='%s' - AFTER!", domain)
469 logger.debug("domain is empty - SKIPPED!")
471 elif not utils.is_domain_wanted(domain):
472 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
474 elif instances.is_registered(domain):
475 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
477 elif instances.is_recent(domain):
478 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
481 software = software_helper.alias(software)
482 logger.info("Fetching instances for domain='%s'", domain)
483 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
485 logger.debug("Success! - EXIT!")
488 def fetch_todon_wiki(args: argparse.Namespace) -> int:
489 logger.debug("args[]='%s' - CALLED!", type(args))
497 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
498 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
500 doc = bs4.BeautifulSoup(raw, "html.parser")
501 logger.debug("doc[]='%s'", type(doc))
503 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
504 logger.info("Checking %d silenced/limited entries ...", len(silenced))
505 blocklist["silenced"] = utils.find_domains(silenced, "div")
507 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
508 logger.info("Checking %d suspended entries ...", len(suspended))
509 blocklist["reject"] = utils.find_domains(suspended, "div")
512 for block_level in blocklist:
513 blockers = blocklist[block_level]
515 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
516 for blocked in blockers:
517 logger.debug("blocked='%s'", blocked)
519 if not instances.is_registered(blocked):
521 logger.info("Fetching instances from domain='%s' ...", blocked)
522 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
523 except network.exceptions as exception:
524 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
525 instances.set_last_error(blocked, exception)
527 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
528 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
531 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
532 if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
533 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
539 logger.debug("Invoking commit() ...")
540 database.connection.commit()
542 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
543 if config.get("bot_enabled") and len(blockdict) > 0:
544 logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
545 network.send_bot_post("todon.eu", blockdict)
547 logger.debug("Success! - EXIT!")
550 def fetch_cs(args: argparse.Namespace):
551 logger.debug("args[]='%s' - CALLED!", type(args))
577 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
578 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
580 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
581 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
583 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
584 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
585 domains["silenced"] = federation.find_domains(silenced)
587 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
588 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
589 domains["reject"] = federation.find_domains(blocked)
591 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
596 for block_level in domains:
597 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
599 for row in domains[block_level]:
600 logger.debug("row[%s]='%s'", type(row), row)
601 if instances.is_recent(row["domain"], "last_blocked"):
602 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
604 elif not instances.is_registered(row["domain"]):
606 logger.info("Fetching instances from domain='%s' ...", row["domain"])
607 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
608 except network.exceptions as exception:
609 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
610 instances.set_last_error(row["domain"], exception)
612 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
613 logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
615 "blocked": row["domain"],
616 "reason" : row["reason"],
619 logger.debug("Invoking commit() ...")
620 database.connection.commit()
622 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
623 if config.get("bot_enabled") and len(blockdict) > 0:
624 logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
625 network.send_bot_post("chaos.social", blockdict)
627 logger.debug("Success! - EXIT!")
630 def fetch_fba_rss(args: argparse.Namespace) -> int:
631 logger.debug("args[]='%s' - CALLED!", type(args))
634 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
635 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
637 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
638 if response.ok and response.status_code < 300 and len(response.text) > 0:
639 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
640 rss = atoma.parse_rss_bytes(response.content)
642 logger.debug("rss[]='%s'", type(rss))
643 for item in rss.items:
644 logger.debug("item='%s'", item)
645 domain = tidyup.domain(item.link.split("=")[1])
647 logger.debug("domain='%s' - AFTER!", domain)
649 logger.debug("domain is empty - SKIPPED!")
651 elif not utils.is_domain_wanted(domain):
652 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
654 elif domain in domains:
655 logger.debug("domain='%s' is already added - SKIPPED!", domain)
657 elif instances.is_registered(domain):
658 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
660 elif instances.is_recent(domain):
661 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
664 logger.debug("Adding domain='%s'", domain)
665 domains.append(domain)
667 logger.debug("domains()=%d", len(domains))
671 logger.info("Adding %d new instances ...", len(domains))
672 for domain in domains:
674 logger.info("Fetching instances from domain='%s' ...", domain)
675 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
676 except network.exceptions as exception:
677 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
678 instances.set_last_error(domain, exception)
681 logger.debug("Success! - EXIT!")
684 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
685 logger.debug("args[]='%s' - CALLED!", type(args))
686 feed = "https://ryona.agency/users/fba/feed.atom"
690 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
691 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
693 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
694 if response.ok and response.status_code < 300 and len(response.text) > 0:
695 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
696 atom = atoma.parse_atom_bytes(response.content)
698 logger.debug("atom[]='%s'", type(atom))
699 for entry in atom.entries:
700 logger.debug("entry[]='%s'", type(entry))
701 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
702 logger.debug("doc[]='%s'", type(doc))
703 for element in doc.findAll("a"):
704 logger.debug("element[]='%s'", type(element))
705 for href in element["href"].split(","):
706 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
707 domain = tidyup.domain(href)
709 logger.debug("domain='%s' - AFTER!", domain)
711 logger.debug("domain is empty - SKIPPED!")
713 elif not utils.is_domain_wanted(domain):
714 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
716 elif domain in domains:
717 logger.debug("domain='%s' is already added - SKIPPED!", domain)
719 elif instances.is_registered(domain):
720 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
722 elif instances.is_recent(domain):
723 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
726 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
727 domains.append(domain)
729 logger.debug("domains()=%d", len(domains))
733 logger.info("Adding %d new instances ...", len(domains))
734 for domain in domains:
736 logger.info("Fetching instances from domain='%s' ...", domain)
737 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
738 except network.exceptions as exception:
739 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
740 instances.set_last_error(domain, exception)
743 logger.debug("Success! - EXIT!")
746 def fetch_instances(args: argparse.Namespace) -> int:
747 logger.debug("args[]='%s' - CALLED!", type(args))
752 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
753 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
754 except network.exceptions as exception:
755 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
756 instances.set_last_error(args.domain, exception)
757 instances.update_data(args.domain)
761 logger.debug("Not fetching more instances - EXIT!")
764 # Loop through some instances
765 database.cursor.execute(
766 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
769 rows = database.cursor.fetchall()
770 logger.info("Checking %d entries ...", len(rows))
772 logger.debug("row[domain]='%s'", row["domain"])
773 if row["domain"] == "":
774 logger.debug("row[domain] is empty - SKIPPED!")
776 elif not utils.is_domain_wanted(row["domain"]):
777 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
781 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
782 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
783 except network.exceptions as exception:
784 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
785 instances.set_last_error(row["domain"], exception)
787 logger.debug("Success - EXIT!")
790 def fetch_oliphant(args: argparse.Namespace) -> int:
791 logger.debug("args[]='%s' - CALLED!", type(args))
795 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
800 "blocker": "artisan.chat",
801 "csv_url": "mastodon/artisan.chat.csv",
803 "blocker": "mastodon.art",
804 "csv_url": "mastodon/mastodon.art.csv",
806 "blocker": "pleroma.envs.net",
807 "csv_url": "mastodon/pleroma.envs.net.csv",
809 "blocker": "oliphant.social",
810 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
812 "blocker": "mastodon.online",
813 "csv_url": "mastodon/mastodon.online.csv",
815 "blocker": "mastodon.social",
816 "csv_url": "mastodon/mastodon.social.csv",
818 "blocker": "mastodon.social",
819 "csv_url": "other/missing-tier0-mastodon.social.csv",
821 "blocker": "rage.love",
822 "csv_url": "mastodon/rage.love.csv",
824 "blocker": "sunny.garden",
825 "csv_url": "mastodon/sunny.garden.csv",
827 "blocker": "solarpunk.moe",
828 "csv_url": "mastodon/solarpunk.moe.csv",
830 "blocker": "toot.wales",
831 "csv_url": "mastodon/toot.wales.csv",
833 "blocker": "union.place",
834 "csv_url": "mastodon/union.place.csv",
840 logger.debug("Downloading %d files ...", len(blocklists))
841 for block in blocklists:
842 # Is domain given and not equal blocker?
843 if isinstance(args.domain, str) and args.domain != block["blocker"]:
844 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
846 elif args.domain in domains:
847 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
849 elif instances.is_recent(block["blocker"]):
850 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
854 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
855 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
857 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
858 if not response.ok or response.status_code > 399 or response.content == "":
859 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
862 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
863 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
865 logger.debug("reader[]='%s'", type(reader))
868 logger.debug("row[%s]='%s'", type(row), row)
869 domain = severity = None
870 reject_media = reject_reports = False
872 domain = row["#domain"]
873 elif "domain" in row:
874 domain = row["domain"]
876 logger.debug("row='%s' does not contain domain column", row)
879 if "#severity" in row:
880 severity = row["#severity"]
881 elif "severity" in row:
882 severity = row["severity"]
884 logger.debug("row='%s' does not contain severity column", row)
887 if "#reject_media" in row and row["#reject_media"].lower() == "true":
889 elif "reject_media" in row and row["reject_media"].lower() == "true":
892 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
893 reject_reports = True
894 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
895 reject_reports = True
897 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
899 logger.debug("domain is empty - SKIPPED!")
901 elif not utils.is_domain_wanted(domain):
902 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
905 logger.debug("Marking domain='%s' as handled", domain)
906 domains.append(domain)
908 logger.debug("Processing domain='%s' ...", domain)
909 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
910 logger.debug("processed='%s'", processed)
912 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
913 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
916 "reason" : block["reason"],
920 utils.process_block(block["blocker"], domain, None, "reject_media")
922 utils.process_block(block["blocker"], domain, None, "reject_reports")
924 logger.debug("Invoking commit() ...")
925 database.connection.commit()
927 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
928 if config.get("bot_enabled") and len(blockdict) > 0:
929 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
930 network.send_bot_post(block["blocker"], blockdict)
932 logger.debug("Success! - EXIT!")
935 def fetch_txt(args: argparse.Namespace) -> int:
936 logger.debug("args[]='%s' - CALLED!", type(args))
941 "blocker": "seirdy.one",
942 "url" : "https://seirdy.one/pb/bsl.txt",
945 logger.info("Checking %d text file(s) ...", len(urls))
947 logger.debug("Fetching row[url]='%s' ...", row["url"])
948 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
950 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
951 if response.ok and response.status_code < 300 and response.text != "":
952 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
953 domains = response.text.split("\n")
955 logger.info("Processing %d domains ...", len(domains))
956 for domain in domains:
957 logger.debug("domain='%s' - BEFORE!", domain)
958 domain = tidyup.domain(domain)
960 logger.debug("domain='%s' - AFTER!", domain)
962 logger.debug("domain is empty - SKIPPED!")
964 elif not utils.is_domain_wanted(domain):
965 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
967 elif instances.is_recent(domain):
968 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
971 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
972 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
974 logger.debug("processed='%s'", processed)
976 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
979 logger.debug("Success! - EXIT!")
982 def fetch_fedipact(args: argparse.Namespace) -> int:
983 logger.debug("args[]='%s' - CALLED!", type(args))
986 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
988 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
989 if response.ok and response.status_code < 300 and response.text != "":
990 logger.debug("Parsing %d Bytes ...", len(response.text))
992 doc = bs4.BeautifulSoup(response.text, "html.parser")
993 logger.debug("doc[]='%s'", type(doc))
995 rows = doc.findAll("li")
996 logger.info("Checking %d row(s) ...", len(rows))
998 logger.debug("row[]='%s'", type(row))
999 domain = tidyup.domain(row.contents[0])
1001 logger.debug("domain='%s' - AFTER!", domain)
1003 logger.debug("domain is empty - SKIPPED!")
1005 elif not utils.is_domain_wanted(domain):
1006 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1008 elif instances.is_registered(domain):
1009 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1011 elif instances.is_recent(domain):
1012 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1015 logger.info("Fetching domain='%s' ...", domain)
1016 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1018 logger.debug("Success! - EXIT!")
1021 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1022 logger.debug("args[]='%s' - CALLED!", type(args))
1025 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1026 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1028 doc = bs4.BeautifulSoup(raw, "html.parser")
1029 logger.debug("doc[]='%s'", type(doc))
1031 tables = doc.findAll("table", {"class": "wikitable"})
1033 logger.info("Analyzing %d table(s) ...", len(tables))
1035 for table in tables:
1036 logger.debug("table[]='%s'", type(table))
1038 rows = table.findAll("tr")
1039 logger.info("Checking %d row(s) ...", len(rows))
1040 block_headers = dict()
1042 logger.debug("row[%s]='%s'", type(row), row)
1044 headers = row.findAll("th")
1045 logger.debug("Found headers()=%d header(s)", len(headers))
1046 if len(headers) > 1:
1047 block_headers = dict()
1049 for header in headers:
1051 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1052 text = header.contents[0]
1054 logger.debug("text[]='%s'", type(text))
1055 if not isinstance(text, str):
1056 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1058 elif validators.domain(text.strip()):
1059 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1062 text = tidyup.domain(text.strip())
1063 logger.debug("text='%s'", text)
1064 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1065 logger.debug("Found header: '%s'=%d", text, cnt)
1066 block_headers[cnt] = text
1068 elif len(block_headers) == 0:
1069 logger.debug("row is not scrapable - SKIPPED!")
1071 elif len(block_headers) > 0:
1072 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1076 for element in row.find_all(["th", "td"]):
1078 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1079 if cnt in block_headers:
1080 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1082 text = element.text.strip()
1083 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1085 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1086 if key in ["domain", "instance"]:
1088 elif key == "reason":
1089 block[key] = tidyup.reason(text)
1090 elif key == "subdomain(s)":
1093 block[key] = text.split("/")
1095 logger.debug("key='%s'", key)
1098 logger.debug("block()=%d ...", len(block))
1100 logger.debug("Appending block()=%d ...", len(block))
1101 blocklist.append(block)
1103 logger.debug("blocklist()=%d", len(blocklist))
1105 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1106 domains = database.cursor.fetchall()
1108 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1110 for block in blocklist:
1111 logger.debug("block='%s'", block)
1112 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1113 origin = block["blocked"]
1114 for subdomain in block["subdomain(s)"]:
1115 block["blocked"] = subdomain + "." + origin
1116 blocking.append(block)
1118 blocking.append(block)
1120 logger.debug("blocking()=%d", blocking)
1121 for block in blocking:
1122 logger.debug("block[]='%s'", type(block))
1123 block["blocked"] = tidyup.domain(block["blocked"])
1125 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1126 if block["blocked"] == "":
1127 logger.debug("block[blocked] is empty - SKIPPED!")
1129 elif not utils.is_domain_wanted(block["blocked"]):
1130 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1132 elif instances.is_recent(block["blocked"]):
1133 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1136 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1137 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1140 for blocker in domains:
1141 blocker = blocker[0]
1142 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1144 for block in blocking:
1145 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1146 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1148 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1149 if block["blocked"] == "":
1150 logger.debug("block[blocked] is empty - SKIPPED!")
1152 elif not utils.is_domain_wanted(block["blocked"]):
1153 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1156 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1157 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1158 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1160 "blocked": block["blocked"],
1161 "reason" : block["reason"],
1164 if instances.has_pending(blocker):
1165 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1166 instances.update_data(blocker)
1168 logger.debug("Invoking commit() ...")
1169 database.connection.commit()
1171 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1172 if config.get("bot_enabled") and len(blockdict) > 0:
1173 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1174 network.send_bot_post(blocker, blockdict)
1176 logger.debug("Success! - EXIT!")
1179 def recheck_obfuscation(args: argparse.Namespace) -> int:
1180 logger.debug("args[]='%s' - CALLED!", type(args))
1184 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1185 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1186 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1187 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1189 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1191 rows = database.cursor.fetchall()
1192 logger.info("Checking %d domains ...", len(rows))
1194 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1197 if row["software"] == "pleroma":
1198 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1199 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1200 elif row["software"] == "mastodon":
1201 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1202 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1203 elif row["software"] == "lemmy":
1204 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1205 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1206 elif row["software"] == "friendica":
1207 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1208 blocking = friendica.fetch_blocks(row["domain"])
1209 elif row["software"] == "misskey":
1210 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1211 blocking = misskey.fetch_blocks(row["domain"])
1213 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1215 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1218 for block in blocking:
1219 logger.debug("block[blocked]='%s'", block["blocked"])
1222 if block["blocked"] == "":
1223 logger.debug("block[blocked] is empty - SKIPPED!")
1225 elif block["blocked"].endswith(".arpa"):
1226 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1228 elif block["blocked"].endswith(".tld"):
1229 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1231 elif block["blocked"].endswith(".onion"):
1232 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1234 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1235 logger.debug("block='%s' is obfuscated.", block["blocked"])
1236 obfuscated = obfuscated + 1
1237 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1238 elif not utils.is_domain_wanted(block["blocked"]):
1239 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1241 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1242 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1245 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1246 if blocked is not None and blocked != block["blocked"]:
1247 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1248 obfuscated = obfuscated - 1
1249 if blocks.is_instance_blocked(row["domain"], blocked):
1250 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1253 block["block_level"] = utils.alias_block_level(block["block_level"])
1255 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1256 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1257 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1260 "reason" : block["reason"],
1263 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1264 if obfuscated == 0 and len(blocking) > 0:
1265 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1266 instances.set_has_obfuscation(row["domain"], False)
1268 if instances.has_pending(row["domain"]):
1269 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1270 instances.update_data(row["domain"])
1272 logger.debug("Invoking commit() ...")
1273 database.connection.commit()
1275 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1276 if config.get("bot_enabled") and len(blockdict) > 0:
1277 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1278 network.send_bot_post(row["domain"], blockdict)
1280 logger.debug("Success! - EXIT!")
1283 def fetch_fedilist(args: argparse.Namespace) -> int:
1284 logger.debug("args[]='%s' - CALLED!", type(args))
1286 url = "http://demo.fedilist.com/instance/csv?onion=not"
1287 if args.software is not None and args.software != "":
1288 logger.debug("args.software='%s'", args.software)
1289 url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1293 logger.info("Fetching url='%s' from fedilist.com ...", url)
1294 response = reqto.get(
1296 headers=network.web_headers,
1297 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1298 allow_redirects=False
1301 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1302 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1304 logger.debug("reader[]='%s'", type(reader))
1307 logger.debug("row[]='%s'", type(row))
1308 domain = tidyup.domain(row["hostname"])
1309 logger.debug("domain='%s' - AFTER!", domain)
1312 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1314 elif not utils.is_domain_wanted(domain):
1315 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1317 elif instances.is_recent(domain):
1318 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1321 logger.info("Fetching instances from domain='%s' ...", domain)
1322 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1324 logger.debug("Success! - EXIT!")