1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
79 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
82 for row in database.cursor.fetchall():
83 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84 punycode = row["domain"].encode("idna").decode("utf-8")
86 if row["nodeinfo_url"].startswith("/"):
87 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
89 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
93 logger.info("Found %d row(s)", cnt)
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99 logger.debug("args[]='%s' - CALLED!", type(args))
101 # No CSRF by default, you don't have to add network.api_headers by yourself here
105 logger.debug("Checking CSRF from pixelfed.org")
106 headers = csrf.determine("pixelfed.org", dict())
107 except network.exceptions as exception:
108 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
112 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
113 fetched = network.get_json_api(
115 "/api/v1/servers/all.json?scope=All&country=all&language=all",
117 (config.get("connection_timeout"), config.get("read_timeout"))
120 logger.debug("JSON API returned %d elements", len(fetched))
121 if "error_message" in fetched:
122 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
124 elif "data" not in fetched["json"]:
125 logger.warning("API did not return JSON with 'data' element - EXIT!")
128 rows = fetched["json"]["data"]
129 logger.info("Checking %d fetched rows ...", len(rows))
131 logger.debug("row[]='%s'", type(row))
132 if "domain" not in row:
133 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
135 elif row["domain"] == "":
136 logger.debug("row[domain] is empty - SKIPPED!")
138 elif not utils.is_domain_wanted(row["domain"]):
139 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
141 elif instances.is_registered(row["domain"]):
142 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
144 elif instances.is_recent(row["domain"]):
145 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
148 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
149 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
151 except network.exceptions as exception:
152 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
155 logger.debug("Success! - EXIT!")
158 def fetch_bkali(args: argparse.Namespace) -> int:
159 logger.debug("args[]='%s' - CALLED!", type(args))
162 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
163 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
166 logger.debug("fetched[]='%s'", type(fetched))
167 if "error_message" in fetched:
168 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
170 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
171 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
174 rows = fetched["json"]
176 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
178 raise Exception("WARNING: Returned no records")
179 elif "data" not in rows:
180 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
181 elif "nodeinfo" not in rows["data"]:
182 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
184 for entry in rows["data"]["nodeinfo"]:
185 logger.debug("entry[%s]='%s'", type(entry), entry)
186 if "domain" not in entry:
187 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
189 elif entry["domain"] == "":
190 logger.debug("entry[domain] is empty - SKIPPED!")
192 elif not utils.is_domain_wanted(entry["domain"]):
193 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
195 elif instances.is_registered(entry["domain"]):
196 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
198 elif instances.is_recent(entry["domain"]):
199 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
202 logger.debug("Adding domain='%s' ...", entry["domain"])
203 domains.append(entry["domain"])
205 except network.exceptions as exception:
206 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
209 logger.debug("domains()=%d", len(domains))
213 logger.info("Adding %d new instances ...", len(domains))
214 for domain in domains:
216 logger.info("Fetching instances from domain='%s' ...", domain)
217 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
218 except network.exceptions as exception:
219 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
220 instances.set_last_error(domain, exception)
223 logger.debug("Success - EXIT!")
226 def fetch_blocks(args: argparse.Namespace) -> int:
227 logger.debug("args[]='%s' - CALLED!", type(args))
228 if args.domain is not None and args.domain != "":
229 logger.debug("args.domain='%s' - checking ...", args.domain)
230 if not validators.domain(args.domain):
231 logger.warning("args.domain='%s' is not valid.", args.domain)
233 elif blacklist.is_blacklisted(args.domain):
234 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
236 elif not instances.is_registered(args.domain):
237 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
242 if args.domain is not None and args.domain != "":
243 # Re-check single domain
244 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
245 database.cursor.execute(
246 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
248 elif args.software is not None and args.software != "":
249 # Re-check single software
250 logger.debug("Querying database for args.software='%s' ...", args.software)
251 database.cursor.execute(
252 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
255 # Re-check after "timeout" (aka. minimum interval)
256 database.cursor.execute(
257 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
260 rows = database.cursor.fetchall()
261 logger.info("Checking %d entries ...", len(rows))
262 for blocker, software, origin, nodeinfo_url in rows:
263 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
264 blocker = tidyup.domain(blocker)
265 logger.debug("blocker='%s' - AFTER!", blocker)
268 logger.warning("blocker is now empty!")
270 elif nodeinfo_url is None or nodeinfo_url == "":
271 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
273 elif not utils.is_domain_wanted(blocker):
274 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
277 logger.debug("blocker='%s'", blocker)
278 instances.set_last_blocked(blocker)
279 instances.set_has_obfuscation(blocker, False)
283 if software == "pleroma":
284 logger.info("blocker='%s',software='%s'", blocker, software)
285 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
286 elif software == "mastodon":
287 logger.info("blocker='%s',software='%s'", blocker, software)
288 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
289 elif software == "lemmy":
290 logger.info("blocker='%s',software='%s'", blocker, software)
291 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
292 elif software == "friendica":
293 logger.info("blocker='%s',software='%s'", blocker, software)
294 blocking = friendica.fetch_blocks(blocker)
295 elif software == "misskey":
296 logger.info("blocker='%s',software='%s'", blocker, software)
297 blocking = misskey.fetch_blocks(blocker)
299 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
301 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
302 instances.set_total_blocks(blocker, blocking)
304 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
306 for block in blocking:
307 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
309 if block["block_level"] == "":
310 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
313 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
314 block["blocked"] = tidyup.domain(block["blocked"])
315 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
316 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
318 if block["blocked"] == "":
319 logger.warning("blocked is empty, blocker='%s'", blocker)
321 elif block["blocked"].endswith(".onion"):
322 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
324 elif block["blocked"].endswith(".arpa"):
325 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
327 elif block["blocked"].endswith(".tld"):
328 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
330 elif block["blocked"].find("*") >= 0:
331 logger.debug("blocker='%s' uses obfuscated domains", blocker)
333 # Some friendica servers also obscure domains without hash
334 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
336 logger.debug("row[]='%s'", type(row))
338 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
339 instances.set_has_obfuscation(blocker, True)
342 block["blocked"] = row["domain"]
343 origin = row["origin"]
344 nodeinfo_url = row["nodeinfo_url"]
345 elif block["blocked"].find("?") >= 0:
346 logger.debug("blocker='%s' uses obfuscated domains", blocker)
348 # Some obscure them with question marks, not sure if that's dependent on version or not
349 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
351 logger.debug("row[]='%s'", type(row))
353 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
354 instances.set_has_obfuscation(blocker, True)
357 block["blocked"] = row["domain"]
358 origin = row["origin"]
359 nodeinfo_url = row["nodeinfo_url"]
361 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
362 if block["blocked"] == "":
363 logger.debug("block[blocked] is empty - SKIPPED!")
365 elif not utils.is_domain_wanted(block["blocked"]):
366 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
368 elif block["block_level"] in ["accept", "accepted"]:
369 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
371 elif not instances.is_registered(block["blocked"]):
372 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
373 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
375 block["block_level"] = utils.alias_block_level(block["block_level"])
377 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
378 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
380 "blocked": block["blocked"],
381 "reason" : block["reason"],
384 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
385 cookies.clear(block["blocked"])
387 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
388 if instances.has_pending(blocker):
389 logger.debug("Flushing updates for blocker='%s' ...", blocker)
390 instances.update_data(blocker)
392 logger.debug("Invoking commit() ...")
393 database.connection.commit()
395 logger.debug("Invoking cookies.clear(%s) ...", blocker)
396 cookies.clear(blocker)
398 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
399 if config.get("bot_enabled") and len(blockdict) > 0:
400 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
401 network.send_bot_post(blocker, blockdict)
403 logger.debug("Success! - EXIT!")
406 def fetch_observer(args: argparse.Namespace) -> int:
407 logger.debug("args[]='%s' - CALLED!", type(args))
413 if args.software is None:
414 logger.info("Fetching software list ...")
415 raw = utils.fetch_url(
416 "https://fediverse.observer",
418 (config.get("connection_timeout"), config.get("read_timeout"))
420 logger.debug("raw[%s]()=%d", type(raw), len(raw))
422 doc = bs4.BeautifulSoup(raw, features="html.parser")
423 logger.debug("doc[]='%s'", type(doc))
425 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
426 logger.debug("items[]='%s'", type(items))
428 logger.info("Checking %d menu items ...", len(items))
430 logger.debug("item[%s]='%s'", type(item), item)
431 if item.text.lower() == "all":
432 logger.debug("Skipping 'All' menu entry ...")
435 logger.debug("Appending item.text='%s' ...", item.text)
436 types.append(tidyup.domain(item.text))
438 logger.info("Adding args.software='%s' as type ...", args.software)
439 types.append(args.software)
441 logger.info("Fetching %d different table data ...", len(types))
442 for software in types:
443 logger.debug("software='%s' - BEFORE!", software)
444 if args.software is not None and args.software != software:
445 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
450 logger.debug("Fetching table data for software='%s' ...", software)
451 raw = utils.fetch_url(
452 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
454 (config.get("connection_timeout"), config.get("read_timeout"))
456 logger.debug("raw[%s]()=%d", type(raw), len(raw))
458 doc = bs4.BeautifulSoup(raw, features="html.parser")
459 logger.debug("doc[]='%s'", type(doc))
460 except network.exceptions as exception:
461 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
464 items = doc.findAll("a", {"class": "url"})
465 logger.info("Checking %d items,software='%s' ...", len(items), software)
467 logger.debug("item[]='%s'", type(item))
468 domain = item.decode_contents()
470 logger.debug("domain='%s' - AFTER!", domain)
472 logger.debug("domain is empty - SKIPPED!")
474 elif not utils.is_domain_wanted(domain):
475 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
477 elif instances.is_registered(domain):
478 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
480 elif instances.is_recent(domain):
481 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
484 software = software_helper.alias(software)
485 logger.info("Fetching instances for domain='%s'", domain)
486 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
488 logger.debug("Success! - EXIT!")
491 def fetch_todon_wiki(args: argparse.Namespace) -> int:
492 logger.debug("args[]='%s' - CALLED!", type(args))
500 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
501 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
503 doc = bs4.BeautifulSoup(raw, "html.parser")
504 logger.debug("doc[]='%s'", type(doc))
506 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
507 logger.info("Checking %d silenced/limited entries ...", len(silenced))
508 blocklist["silenced"] = utils.find_domains(silenced, "div")
510 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
511 logger.info("Checking %d suspended entries ...", len(suspended))
512 blocklist["reject"] = utils.find_domains(suspended, "div")
515 for block_level in blocklist:
516 blockers = blocklist[block_level]
518 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
519 for blocked in blockers:
520 logger.debug("blocked='%s'", blocked)
522 if not instances.is_registered(blocked):
524 logger.info("Fetching instances from domain='%s' ...", blocked)
525 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
526 except network.exceptions as exception:
527 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
528 instances.set_last_error(blocked, exception)
530 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
531 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
534 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
535 if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
536 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
542 logger.debug("Invoking commit() ...")
543 database.connection.commit()
545 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
546 if config.get("bot_enabled") and len(blockdict) > 0:
547 logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
548 network.send_bot_post("todon.eu", blockdict)
550 logger.debug("Success! - EXIT!")
553 def fetch_cs(args: argparse.Namespace):
554 logger.debug("args[]='%s' - CALLED!", type(args))
580 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
581 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
583 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
584 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
586 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
587 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
588 domains["silenced"] = federation.find_domains(silenced)
590 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
591 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
592 domains["reject"] = federation.find_domains(blocked)
594 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
599 for block_level in domains:
600 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
602 for row in domains[block_level]:
603 logger.debug("row[%s]='%s'", type(row), row)
604 if instances.is_recent(row["domain"], "last_blocked"):
605 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
607 elif not instances.is_registered(row["domain"]):
609 logger.info("Fetching instances from domain='%s' ...", row["domain"])
610 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
611 except network.exceptions as exception:
612 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
613 instances.set_last_error(row["domain"], exception)
615 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
616 logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
618 "blocked": row["domain"],
619 "reason" : row["reason"],
622 logger.debug("Invoking commit() ...")
623 database.connection.commit()
625 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
626 if config.get("bot_enabled") and len(blockdict) > 0:
627 logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
628 network.send_bot_post("chaos.social", blockdict)
630 logger.debug("Success! - EXIT!")
633 def fetch_fba_rss(args: argparse.Namespace) -> int:
634 logger.debug("args[]='%s' - CALLED!", type(args))
637 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
638 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
640 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
641 if response.ok and response.status_code < 300 and len(response.text) > 0:
642 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
643 rss = atoma.parse_rss_bytes(response.content)
645 logger.debug("rss[]='%s'", type(rss))
646 for item in rss.items:
647 logger.debug("item='%s'", item)
648 domain = tidyup.domain(item.link.split("=")[1])
650 logger.debug("domain='%s' - AFTER!", domain)
652 logger.debug("domain is empty - SKIPPED!")
654 elif not utils.is_domain_wanted(domain):
655 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
657 elif domain in domains:
658 logger.debug("domain='%s' is already added - SKIPPED!", domain)
660 elif instances.is_registered(domain):
661 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
663 elif instances.is_recent(domain):
664 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
667 logger.debug("Adding domain='%s'", domain)
668 domains.append(domain)
670 logger.debug("domains()=%d", len(domains))
674 logger.info("Adding %d new instances ...", len(domains))
675 for domain in domains:
677 logger.info("Fetching instances from domain='%s' ...", domain)
678 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
679 except network.exceptions as exception:
680 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
681 instances.set_last_error(domain, exception)
684 logger.debug("Success! - EXIT!")
687 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
688 logger.debug("args[]='%s' - CALLED!", type(args))
689 feed = "https://ryona.agency/users/fba/feed.atom"
693 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
694 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
696 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
697 if response.ok and response.status_code < 300 and len(response.text) > 0:
698 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
699 atom = atoma.parse_atom_bytes(response.content)
701 logger.debug("atom[]='%s'", type(atom))
702 for entry in atom.entries:
703 logger.debug("entry[]='%s'", type(entry))
704 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
705 logger.debug("doc[]='%s'", type(doc))
706 for element in doc.findAll("a"):
707 logger.debug("element[]='%s'", type(element))
708 for href in element["href"].split(","):
709 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
710 domain = tidyup.domain(href)
712 logger.debug("domain='%s' - AFTER!", domain)
714 logger.debug("domain is empty - SKIPPED!")
716 elif not utils.is_domain_wanted(domain):
717 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
719 elif domain in domains:
720 logger.debug("domain='%s' is already added - SKIPPED!", domain)
722 elif instances.is_registered(domain):
723 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
725 elif instances.is_recent(domain):
726 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
729 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
730 domains.append(domain)
732 logger.debug("domains()=%d", len(domains))
736 logger.info("Adding %d new instances ...", len(domains))
737 for domain in domains:
739 logger.info("Fetching instances from domain='%s' ...", domain)
740 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
741 except network.exceptions as exception:
742 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
743 instances.set_last_error(domain, exception)
746 logger.debug("Success! - EXIT!")
749 def fetch_instances(args: argparse.Namespace) -> int:
750 logger.debug("args[]='%s' - CALLED!", type(args))
755 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
756 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
757 except network.exceptions as exception:
758 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
759 instances.set_last_error(args.domain, exception)
760 instances.update_data(args.domain)
764 logger.debug("Not fetching more instances - EXIT!")
767 # Loop through some instances
768 database.cursor.execute(
769 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
772 rows = database.cursor.fetchall()
773 logger.info("Checking %d entries ...", len(rows))
775 logger.debug("row[domain]='%s'", row["domain"])
776 if row["domain"] == "":
777 logger.debug("row[domain] is empty - SKIPPED!")
779 elif not utils.is_domain_wanted(row["domain"]):
780 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
784 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
785 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
786 except network.exceptions as exception:
787 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
788 instances.set_last_error(row["domain"], exception)
790 logger.debug("Success - EXIT!")
793 def fetch_oliphant(args: argparse.Namespace) -> int:
794 logger.debug("args[]='%s' - CALLED!", type(args))
798 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
803 "blocker": "artisan.chat",
804 "csv_url": "mastodon/artisan.chat.csv",
806 "blocker": "mastodon.art",
807 "csv_url": "mastodon/mastodon.art.csv",
809 "blocker": "pleroma.envs.net",
810 "csv_url": "mastodon/pleroma.envs.net.csv",
812 "blocker": "oliphant.social",
813 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
815 "blocker": "mastodon.online",
816 "csv_url": "mastodon/mastodon.online.csv",
818 "blocker": "mastodon.social",
819 "csv_url": "mastodon/mastodon.social.csv",
821 "blocker": "mastodon.social",
822 "csv_url": "other/missing-tier0-mastodon.social.csv",
824 "blocker": "rage.love",
825 "csv_url": "mastodon/rage.love.csv",
827 "blocker": "sunny.garden",
828 "csv_url": "mastodon/sunny.garden.csv",
830 "blocker": "solarpunk.moe",
831 "csv_url": "mastodon/solarpunk.moe.csv",
833 "blocker": "toot.wales",
834 "csv_url": "mastodon/toot.wales.csv",
836 "blocker": "union.place",
837 "csv_url": "mastodon/union.place.csv",
843 logger.debug("Downloading %d files ...", len(blocklists))
844 for block in blocklists:
845 # Is domain given and not equal blocker?
846 if isinstance(args.domain, str) and args.domain != block["blocker"]:
847 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
849 elif args.domain in domains:
850 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
852 elif instances.is_recent(block["blocker"]):
853 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
857 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
858 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
860 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
861 if not response.ok or response.status_code > 399 or response.content == "":
862 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
865 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
866 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
868 logger.debug("reader[]='%s'", type(reader))
871 logger.debug("row[%s]='%s'", type(row), row)
872 domain = severity = None
873 reject_media = reject_reports = False
875 domain = row["#domain"]
876 elif "domain" in row:
877 domain = row["domain"]
879 logger.debug("row='%s' does not contain domain column", row)
882 if "#severity" in row:
883 severity = row["#severity"]
884 elif "severity" in row:
885 severity = row["severity"]
887 logger.debug("row='%s' does not contain severity column", row)
890 if "#reject_media" in row and row["#reject_media"].lower() == "true":
892 elif "reject_media" in row and row["reject_media"].lower() == "true":
895 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
896 reject_reports = True
897 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
898 reject_reports = True
900 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
902 logger.debug("domain is empty - SKIPPED!")
904 elif not utils.is_domain_wanted(domain):
905 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
908 logger.debug("Marking domain='%s' as handled", domain)
909 domains.append(domain)
911 logger.debug("Processing domain='%s' ...", domain)
912 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
913 logger.debug("processed='%s'", processed)
915 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
916 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
919 "reason" : block["reason"],
923 utils.process_block(block["blocker"], domain, None, "reject_media")
925 utils.process_block(block["blocker"], domain, None, "reject_reports")
927 logger.debug("Invoking commit() ...")
928 database.connection.commit()
930 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
931 if config.get("bot_enabled") and len(blockdict) > 0:
932 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
933 network.send_bot_post(block["blocker"], blockdict)
935 logger.debug("Success! - EXIT!")
938 def fetch_txt(args: argparse.Namespace) -> int:
939 logger.debug("args[]='%s' - CALLED!", type(args))
944 "blocker": "seirdy.one",
945 "url" : "https://seirdy.one/pb/bsl.txt",
948 logger.info("Checking %d text file(s) ...", len(urls))
950 logger.debug("Fetching row[url]='%s' ...", row["url"])
951 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
953 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
954 if response.ok and response.status_code < 300 and response.text != "":
955 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
956 domains = response.text.split("\n")
958 logger.info("Processing %d domains ...", len(domains))
959 for domain in domains:
960 logger.debug("domain='%s' - BEFORE!", domain)
961 domain = tidyup.domain(domain)
963 logger.debug("domain='%s' - AFTER!", domain)
965 logger.debug("domain is empty - SKIPPED!")
967 elif not utils.is_domain_wanted(domain):
968 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
970 elif instances.is_recent(domain):
971 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
974 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
975 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
977 logger.debug("processed='%s'", processed)
979 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
982 logger.debug("Success! - EXIT!")
985 def fetch_fedipact(args: argparse.Namespace) -> int:
986 logger.debug("args[]='%s' - CALLED!", type(args))
989 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
991 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
992 if response.ok and response.status_code < 300 and response.text != "":
993 logger.debug("Parsing %d Bytes ...", len(response.text))
995 doc = bs4.BeautifulSoup(response.text, "html.parser")
996 logger.debug("doc[]='%s'", type(doc))
998 rows = doc.findAll("li")
999 logger.info("Checking %d row(s) ...", len(rows))
1001 logger.debug("row[]='%s'", type(row))
1002 domain = tidyup.domain(row.contents[0])
1004 logger.debug("domain='%s' - AFTER!", domain)
1006 logger.debug("domain is empty - SKIPPED!")
1008 elif not utils.is_domain_wanted(domain):
1009 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1011 elif instances.is_registered(domain):
1012 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1014 elif instances.is_recent(domain):
1015 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1018 logger.info("Fetching domain='%s' ...", domain)
1019 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1021 logger.debug("Success! - EXIT!")
1024 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1025 logger.debug("args[]='%s' - CALLED!", type(args))
1028 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1029 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1031 doc = bs4.BeautifulSoup(raw, "html.parser")
1032 logger.debug("doc[]='%s'", type(doc))
1034 tables = doc.findAll("table", {"class": "wikitable"})
1036 logger.info("Analyzing %d table(s) ...", len(tables))
1038 for table in tables:
1039 logger.debug("table[]='%s'", type(table))
1041 rows = table.findAll("tr")
1042 logger.info("Checking %d row(s) ...", len(rows))
1043 block_headers = dict()
1045 logger.debug("row[%s]='%s'", type(row), row)
1047 headers = row.findAll("th")
1048 logger.debug("Found headers()=%d header(s)", len(headers))
1049 if len(headers) > 1:
1050 block_headers = dict()
1052 for header in headers:
1054 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1055 text = header.contents[0]
1057 logger.debug("text[]='%s'", type(text))
1058 if not isinstance(text, str):
1059 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1061 elif validators.domain(text.strip()):
1062 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1065 text = tidyup.domain(text.strip())
1066 logger.debug("text='%s'", text)
1067 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1068 logger.debug("Found header: '%s'=%d", text, cnt)
1069 block_headers[cnt] = text
1071 elif len(block_headers) == 0:
1072 logger.debug("row is not scrapable - SKIPPED!")
1074 elif len(block_headers) > 0:
1075 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1079 for element in row.find_all(["th", "td"]):
1081 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1082 if cnt in block_headers:
1083 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1085 text = element.text.strip()
1086 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1088 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1089 if key in ["domain", "instance"]:
1091 elif key == "reason":
1092 block[key] = tidyup.reason(text)
1093 elif key == "subdomain(s)":
1096 block[key] = text.split("/")
1098 logger.debug("key='%s'", key)
1101 logger.debug("block()=%d ...", len(block))
1103 logger.debug("Appending block()=%d ...", len(block))
1104 blocklist.append(block)
1106 logger.debug("blocklist()=%d", len(blocklist))
1108 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1109 domains = database.cursor.fetchall()
1111 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1113 for block in blocklist:
1114 logger.debug("block='%s'", block)
1115 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1116 origin = block["blocked"]
1117 for subdomain in block["subdomain(s)"]:
1118 block["blocked"] = subdomain + "." + origin
1119 blocking.append(block)
1121 blocking.append(block)
1123 logger.debug("blocking()=%d", blocking)
1124 for block in blocking:
1125 logger.debug("block[]='%s'", type(block))
1126 block["blocked"] = tidyup.domain(block["blocked"])
1128 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1129 if block["blocked"] == "":
1130 logger.debug("block[blocked] is empty - SKIPPED!")
1132 elif not utils.is_domain_wanted(block["blocked"]):
1133 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1135 elif instances.is_recent(block["blocked"]):
1136 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1139 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1140 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1143 for blocker in domains:
1144 blocker = blocker[0]
1145 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1147 for block in blocking:
1148 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1149 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1151 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1152 if block["blocked"] == "":
1153 logger.debug("block[blocked] is empty - SKIPPED!")
1155 elif not utils.is_domain_wanted(block["blocked"]):
1156 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1159 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1160 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1161 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1163 "blocked": block["blocked"],
1164 "reason" : block["reason"],
1167 if instances.has_pending(blocker):
1168 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1169 instances.update_data(blocker)
1171 logger.debug("Invoking commit() ...")
1172 database.connection.commit()
1174 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1175 if config.get("bot_enabled") and len(blockdict) > 0:
1176 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1177 network.send_bot_post(blocker, blockdict)
1179 logger.debug("Success! - EXIT!")
1182 def recheck_obfuscation(args: argparse.Namespace) -> int:
1183 logger.debug("args[]='%s' - CALLED!", type(args))
1187 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1188 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1189 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1190 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1192 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1194 rows = database.cursor.fetchall()
1195 logger.info("Checking %d domains ...", len(rows))
1197 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1198 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1199 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1203 if row["software"] == "pleroma":
1204 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1205 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1206 elif row["software"] == "mastodon":
1207 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1208 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1209 elif row["software"] == "lemmy":
1210 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1211 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1212 elif row["software"] == "friendica":
1213 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1214 blocking = friendica.fetch_blocks(row["domain"])
1215 elif row["software"] == "misskey":
1216 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1217 blocking = misskey.fetch_blocks(row["domain"])
1219 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1221 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1222 instances.set_total_blocks(row["domain"], blocking)
1224 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1227 for block in blocking:
1228 logger.debug("block[blocked]='%s'", block["blocked"])
1231 if block["blocked"] == "":
1232 logger.debug("block[blocked] is empty - SKIPPED!")
1234 elif block["blocked"].endswith(".arpa"):
1235 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1237 elif block["blocked"].endswith(".tld"):
1238 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1240 elif block["blocked"].endswith(".onion"):
1241 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1243 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1244 logger.debug("block='%s' is obfuscated.", block["blocked"])
1245 obfuscated = obfuscated + 1
1246 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1247 elif not utils.is_domain_wanted(block["blocked"]):
1248 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1250 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1251 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1254 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1255 if blocked is not None and blocked != block["blocked"]:
1256 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1257 obfuscated = obfuscated - 1
1258 if blocks.is_instance_blocked(row["domain"], blocked):
1259 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1262 block["block_level"] = utils.alias_block_level(block["block_level"])
1264 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1265 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1266 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1269 "reason" : block["reason"],
1272 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1273 if obfuscated == 0 and len(blocking) > 0:
1274 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1275 instances.set_has_obfuscation(row["domain"], False)
1277 if instances.has_pending(row["domain"]):
1278 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1279 instances.update_data(row["domain"])
1281 logger.debug("Invoking commit() ...")
1282 database.connection.commit()
1284 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1285 if config.get("bot_enabled") and len(blockdict) > 0:
1286 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1287 network.send_bot_post(row["domain"], blockdict)
1289 logger.debug("Success! - EXIT!")
1292 def fetch_fedilist(args: argparse.Namespace) -> int:
1293 logger.debug("args[]='%s' - CALLED!", type(args))
1295 url = "http://demo.fedilist.com/instance/csv?onion=not"
1296 if args.software is not None and args.software != "":
1297 logger.debug("args.software='%s'", args.software)
1298 url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1302 logger.info("Fetching url='%s' from fedilist.com ...", url)
1303 response = reqto.get(
1305 headers=network.web_headers,
1306 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1307 allow_redirects=False
1310 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1311 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1313 logger.debug("reader[]='%s'", type(reader))
1316 logger.debug("row[]='%s'", type(row))
1317 domain = tidyup.domain(row["hostname"])
1318 logger.debug("domain='%s' - AFTER!", domain)
1321 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1323 elif not utils.is_domain_wanted(domain):
1324 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1326 elif (args.all is None or not args.all) and instances.is_registered(domain):
1327 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1329 elif instances.is_recent(domain):
1330 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1333 logger.info("Fetching instances from domain='%s' ...", domain)
1334 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1336 logger.debug("Success! - EXIT!")