1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
79 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
82 for row in database.cursor.fetchall():
83 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84 punycode = row["domain"].encode("idna").decode("utf-8")
86 if row["nodeinfo_url"].startswith("/"):
87 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
89 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
93 logger.info("Found %d row(s)", cnt)
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99 logger.debug("args[]='%s' - CALLED!", type(args))
101 # No CSRF by default, you don't have to add network.api_headers by yourself here
105 logger.debug("Checking CSRF from pixelfed.org")
106 headers = csrf.determine("pixelfed.org", dict())
107 except network.exceptions as exception:
108 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
112 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
113 fetched = network.get_json_api(
115 "/api/v1/servers/all.json?scope=All&country=all&language=all",
117 (config.get("connection_timeout"), config.get("read_timeout"))
120 logger.debug("JSON API returned %d elements", len(fetched))
121 if "error_message" in fetched:
122 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
124 elif "data" not in fetched["json"]:
125 logger.warning("API did not return JSON with 'data' element - EXIT!")
128 rows = fetched["json"]["data"]
129 logger.info("Checking %d fetched rows ...", len(rows))
131 logger.debug("row[]='%s'", type(row))
132 if "domain" not in row:
133 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
135 elif row["domain"] == "":
136 logger.debug("row[domain] is empty - SKIPPED!")
138 elif not utils.is_domain_wanted(row["domain"]):
139 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
141 elif instances.is_registered(row["domain"]):
142 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
144 elif instances.is_recent(row["domain"]):
145 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
148 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
149 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
151 except network.exceptions as exception:
152 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
155 logger.debug("Success! - EXIT!")
158 def fetch_bkali(args: argparse.Namespace) -> int:
159 logger.debug("args[]='%s' - CALLED!", type(args))
162 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
163 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
166 logger.debug("fetched[]='%s'", type(fetched))
167 if "error_message" in fetched:
168 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
170 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
171 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
174 rows = fetched["json"]
176 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
178 raise Exception("WARNING: Returned no records")
179 elif "data" not in rows:
180 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
181 elif "nodeinfo" not in rows["data"]:
182 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
184 for entry in rows["data"]["nodeinfo"]:
185 logger.debug("entry[%s]='%s'", type(entry), entry)
186 if "domain" not in entry:
187 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
189 elif entry["domain"] == "":
190 logger.debug("entry[domain] is empty - SKIPPED!")
192 elif not utils.is_domain_wanted(entry["domain"]):
193 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
195 elif instances.is_registered(entry["domain"]):
196 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
198 elif instances.is_recent(entry["domain"]):
199 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
202 logger.debug("Adding domain='%s' ...", entry["domain"])
203 domains.append(entry["domain"])
205 except network.exceptions as exception:
206 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
209 logger.debug("domains()=%d", len(domains))
213 logger.info("Adding %d new instances ...", len(domains))
214 for domain in domains:
216 logger.info("Fetching instances from domain='%s' ...", domain)
217 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
218 except network.exceptions as exception:
219 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
220 instances.set_last_error(domain, exception)
223 logger.debug("Success - EXIT!")
226 def fetch_blocks(args: argparse.Namespace) -> int:
227 logger.debug("args[]='%s' - CALLED!", type(args))
228 if args.domain is not None and args.domain != "":
229 logger.debug("args.domain='%s' - checking ...", args.domain)
230 if not validators.domain(args.domain):
231 logger.warning("args.domain='%s' is not valid.", args.domain)
233 elif blacklist.is_blacklisted(args.domain):
234 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
236 elif not instances.is_registered(args.domain):
237 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
242 if args.domain is not None and args.domain != "":
243 # Re-check single domain
244 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
245 database.cursor.execute(
246 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
248 elif args.software is not None and args.software != "":
249 # Re-check single software
250 logger.debug("Querying database for args.software='%s' ...", args.software)
251 database.cursor.execute(
252 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
255 # Re-check after "timeout" (aka. minimum interval)
256 database.cursor.execute(
257 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
260 rows = database.cursor.fetchall()
261 logger.info("Checking %d entries ...", len(rows))
262 for blocker, software, origin, nodeinfo_url in rows:
263 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
264 blocker = tidyup.domain(blocker)
265 logger.debug("blocker='%s' - AFTER!", blocker)
268 logger.warning("blocker is now empty!")
270 elif nodeinfo_url is None or nodeinfo_url == "":
271 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
273 elif not utils.is_domain_wanted(blocker):
274 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
277 logger.debug("blocker='%s'", blocker)
278 instances.set_last_blocked(blocker)
279 instances.set_has_obfuscation(blocker, False)
283 if software == "pleroma":
284 logger.info("blocker='%s',software='%s'", blocker, software)
285 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
286 elif software == "mastodon":
287 logger.info("blocker='%s',software='%s'", blocker, software)
288 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
289 elif software == "lemmy":
290 logger.info("blocker='%s',software='%s'", blocker, software)
291 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
292 elif software == "friendica":
293 logger.info("blocker='%s',software='%s'", blocker, software)
294 blocking = friendica.fetch_blocks(blocker)
295 elif software == "misskey":
296 logger.info("blocker='%s',software='%s'", blocker, software)
297 blocking = misskey.fetch_blocks(blocker)
299 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
301 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
302 instances.set_total_blocks(blocker, blocking)
304 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
306 for block in blocking:
307 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
309 if block["block_level"] == "":
310 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
313 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
314 block["blocked"] = tidyup.domain(block["blocked"])
315 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
316 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
318 if block["blocked"] == "":
319 logger.warning("blocked is empty, blocker='%s'", blocker)
321 elif block["blocked"].endswith(".onion"):
322 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
324 elif block["blocked"].endswith(".arpa"):
325 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
327 elif block["blocked"].endswith(".tld"):
328 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
330 elif block["blocked"].find("*") >= 0:
331 logger.debug("blocker='%s' uses obfuscated domains", blocker)
333 # Some friendica servers also obscure domains without hash
334 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
336 logger.debug("row[]='%s'", type(row))
338 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
339 instances.set_has_obfuscation(blocker, True)
342 block["blocked"] = row["domain"]
343 origin = row["origin"]
344 nodeinfo_url = row["nodeinfo_url"]
345 elif block["blocked"].find("?") >= 0:
346 logger.debug("blocker='%s' uses obfuscated domains", blocker)
348 # Some obscure them with question marks, not sure if that's dependent on version or not
349 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
351 logger.debug("row[]='%s'", type(row))
353 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
354 instances.set_has_obfuscation(blocker, True)
357 block["blocked"] = row["domain"]
358 origin = row["origin"]
359 nodeinfo_url = row["nodeinfo_url"]
361 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
362 if block["blocked"] == "":
363 logger.debug("block[blocked] is empty - SKIPPED!")
365 elif not utils.is_domain_wanted(block["blocked"]):
366 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
368 elif block["block_level"] in ["accept", "accepted"]:
369 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
371 elif not instances.is_registered(block["blocked"]):
372 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
373 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
375 block["block_level"] = utils.alias_block_level(block["block_level"])
377 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
378 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
380 "blocked": block["blocked"],
381 "reason" : block["reason"],
384 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
385 cookies.clear(block["blocked"])
387 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
388 if instances.has_pending(blocker):
389 logger.debug("Flushing updates for blocker='%s' ...", blocker)
390 instances.update_data(blocker)
392 logger.debug("Invoking commit() ...")
393 database.connection.commit()
395 logger.debug("Invoking cookies.clear(%s) ...", blocker)
396 cookies.clear(blocker)
398 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
399 if config.get("bot_enabled") and len(blockdict) > 0:
400 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
401 network.send_bot_post(blocker, blockdict)
403 logger.debug("Success! - EXIT!")
406 def fetch_observer(args: argparse.Namespace) -> int:
407 logger.debug("args[]='%s' - CALLED!", type(args))
413 if args.software is None:
414 logger.info("Fetching software list ...")
415 raw = utils.fetch_url(
416 "https://fediverse.observer",
418 (config.get("connection_timeout"), config.get("read_timeout"))
420 logger.debug("raw[%s]()=%d", type(raw), len(raw))
422 doc = bs4.BeautifulSoup(raw, features="html.parser")
423 logger.debug("doc[]='%s'", type(doc))
425 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
426 logger.debug("items[]='%s'", type(items))
428 logger.info("Checking %d menu items ...", len(items))
430 logger.debug("item[%s]='%s'", type(item), item)
431 if item.text.lower() == "all":
432 logger.debug("Skipping 'All' menu entry ...")
435 logger.debug("Appending item.text='%s' ...", item.text)
436 types.append(tidyup.domain(item.text))
438 logger.info("Adding args.software='%s' as type ...", args.software)
439 types.append(args.software)
441 logger.info("Fetching %d different table data ...", len(types))
442 for software in types:
443 logger.debug("software='%s' - BEFORE!", software)
444 if args.software is not None and args.software != software:
445 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
450 logger.debug("Fetching table data for software='%s' ...", software)
451 raw = utils.fetch_url(
452 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
454 (config.get("connection_timeout"), config.get("read_timeout"))
456 logger.debug("raw[%s]()=%d", type(raw), len(raw))
458 doc = bs4.BeautifulSoup(raw, features="html.parser")
459 logger.debug("doc[]='%s'", type(doc))
460 except network.exceptions as exception:
461 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
464 items = doc.findAll("a", {"class": "url"})
465 logger.info("Checking %d items,software='%s' ...", len(items), software)
467 logger.debug("item[]='%s'", type(item))
468 domain = item.decode_contents()
470 logger.debug("domain='%s' - AFTER!", domain)
472 logger.debug("domain is empty - SKIPPED!")
474 elif not utils.is_domain_wanted(domain):
475 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
477 elif instances.is_registered(domain):
478 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
480 elif instances.is_recent(domain):
481 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
484 software = software_helper.alias(software)
485 logger.info("Fetching instances for domain='%s'", domain)
486 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
488 logger.debug("Success! - EXIT!")
491 def fetch_todon_wiki(args: argparse.Namespace) -> int:
492 logger.debug("args[]='%s' - CALLED!", type(args))
500 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
501 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
503 doc = bs4.BeautifulSoup(raw, "html.parser")
504 logger.debug("doc[]='%s'", type(doc))
506 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
507 logger.info("Checking %d silenced/limited entries ...", len(silenced))
508 blocklist["silenced"] = utils.find_domains(silenced, "div")
510 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
511 logger.info("Checking %d suspended entries ...", len(suspended))
512 blocklist["reject"] = utils.find_domains(suspended, "div")
514 blocking = blocklist["silenced"] + blocklist["reject"]
517 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
518 instances.set_total_blocks(blocker, blocking)
521 for block_level in blocklist:
522 blockers = blocklist[block_level]
524 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
525 for blocked in blockers:
526 logger.debug("blocked='%s'", blocked)
528 if not instances.is_registered(blocked):
530 logger.info("Fetching instances from domain='%s' ...", blocked)
531 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
532 except network.exceptions as exception:
533 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
534 instances.set_last_error(blocked, exception)
536 if blocks.is_instance_blocked(blocker, blocked, block_level):
537 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
540 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
541 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
542 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
548 logger.debug("Invoking commit() ...")
549 database.connection.commit()
551 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
552 if config.get("bot_enabled") and len(blockdict) > 0:
553 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
554 network.send_bot_post(blocker, blockdict)
556 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
557 if instances.has_pending(blocker):
558 logger.debug("Flushing updates for blocker='%s' ...", blocker)
559 instances.update_data(blocker)
561 logger.debug("Success! - EXIT!")
564 def fetch_cs(args: argparse.Namespace):
565 logger.debug("args[]='%s' - CALLED!", type(args))
591 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
592 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
594 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
595 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
597 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
598 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
599 domains["silenced"] = federation.find_domains(silenced)
601 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
602 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
603 domains["reject"] = federation.find_domains(blocked)
605 blocking = blocklist["silenced"] + blocklist["reject"]
606 blocker = "chaos.social"
608 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
609 instances.set_total_blocks(blocker, blocking)
611 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
616 for block_level in domains:
617 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
619 for row in domains[block_level]:
620 logger.debug("row[%s]='%s'", type(row), row)
621 if instances.is_recent(row["domain"], "last_blocked"):
622 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
624 elif not instances.is_registered(row["domain"]):
626 logger.info("Fetching instances from domain='%s' ...", row["domain"])
627 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
628 except network.exceptions as exception:
629 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
630 instances.set_last_error(row["domain"], exception)
632 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
633 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
635 "blocked": row["domain"],
636 "reason" : row["reason"],
639 logger.debug("Invoking commit() ...")
640 database.connection.commit()
642 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
643 if config.get("bot_enabled") and len(blockdict) > 0:
644 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
645 network.send_bot_post(blocker, blockdict)
647 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
648 if instances.has_pending(blocker):
649 logger.debug("Flushing updates for blocker='%s' ...", blocker)
650 instances.update_data(blocker)
652 logger.debug("Success! - EXIT!")
655 def fetch_fba_rss(args: argparse.Namespace) -> int:
656 logger.debug("args[]='%s' - CALLED!", type(args))
659 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
660 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
662 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
663 if response.ok and response.status_code < 300 and len(response.text) > 0:
664 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
665 rss = atoma.parse_rss_bytes(response.content)
667 logger.debug("rss[]='%s'", type(rss))
668 for item in rss.items:
669 logger.debug("item='%s'", item)
670 domain = tidyup.domain(item.link.split("=")[1])
672 logger.debug("domain='%s' - AFTER!", domain)
674 logger.debug("domain is empty - SKIPPED!")
676 elif not utils.is_domain_wanted(domain):
677 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
679 elif domain in domains:
680 logger.debug("domain='%s' is already added - SKIPPED!", domain)
682 elif instances.is_registered(domain):
683 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
685 elif instances.is_recent(domain):
686 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
689 logger.debug("Adding domain='%s'", domain)
690 domains.append(domain)
692 logger.debug("domains()=%d", len(domains))
696 logger.info("Adding %d new instances ...", len(domains))
697 for domain in domains:
699 logger.info("Fetching instances from domain='%s' ...", domain)
700 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
701 except network.exceptions as exception:
702 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
703 instances.set_last_error(domain, exception)
706 logger.debug("Success! - EXIT!")
709 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
710 logger.debug("args[]='%s' - CALLED!", type(args))
711 feed = "https://ryona.agency/users/fba/feed.atom"
715 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
716 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
718 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
719 if response.ok and response.status_code < 300 and len(response.text) > 0:
720 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
721 atom = atoma.parse_atom_bytes(response.content)
723 logger.debug("atom[]='%s'", type(atom))
724 for entry in atom.entries:
725 logger.debug("entry[]='%s'", type(entry))
726 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
727 logger.debug("doc[]='%s'", type(doc))
728 for element in doc.findAll("a"):
729 logger.debug("element[]='%s'", type(element))
730 for href in element["href"].split(","):
731 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
732 domain = tidyup.domain(href)
734 logger.debug("domain='%s' - AFTER!", domain)
736 logger.debug("domain is empty - SKIPPED!")
738 elif not utils.is_domain_wanted(domain):
739 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
741 elif domain in domains:
742 logger.debug("domain='%s' is already added - SKIPPED!", domain)
744 elif instances.is_registered(domain):
745 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
747 elif instances.is_recent(domain):
748 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
751 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
752 domains.append(domain)
754 logger.debug("domains()=%d", len(domains))
758 logger.info("Adding %d new instances ...", len(domains))
759 for domain in domains:
760 logger.debug("domain='%s'", domain)
762 logger.info("Fetching instances from domain='%s' ...", domain)
763 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
764 except network.exceptions as exception:
765 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
766 instances.set_last_error(domain, exception)
769 logger.debug("Success! - EXIT!")
772 def fetch_instances(args: argparse.Namespace) -> int:
773 logger.debug("args[]='%s' - CALLED!", type(args))
778 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
779 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
780 except network.exceptions as exception:
781 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
782 instances.set_last_error(args.domain, exception)
783 instances.update_data(args.domain)
787 logger.debug("Not fetching more instances - EXIT!")
790 # Loop through some instances
791 database.cursor.execute(
792 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
795 rows = database.cursor.fetchall()
796 logger.info("Checking %d entries ...", len(rows))
798 logger.debug("row[domain]='%s'", row["domain"])
799 if row["domain"] == "":
800 logger.debug("row[domain] is empty - SKIPPED!")
802 elif not utils.is_domain_wanted(row["domain"]):
803 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
807 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
808 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
809 except network.exceptions as exception:
810 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
811 instances.set_last_error(row["domain"], exception)
813 logger.debug("Success - EXIT!")
816 def fetch_oliphant(args: argparse.Namespace) -> int:
817 logger.debug("args[]='%s' - CALLED!", type(args))
821 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
826 "blocker": "artisan.chat",
827 "csv_url": "mastodon/artisan.chat.csv",
829 "blocker": "mastodon.art",
830 "csv_url": "mastodon/mastodon.art.csv",
832 "blocker": "pleroma.envs.net",
833 "csv_url": "mastodon/pleroma.envs.net.csv",
835 "blocker": "oliphant.social",
836 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
838 "blocker": "mastodon.online",
839 "csv_url": "mastodon/mastodon.online.csv",
841 "blocker": "mastodon.social",
842 "csv_url": "mastodon/mastodon.social.csv",
844 "blocker": "mastodon.social",
845 "csv_url": "other/missing-tier0-mastodon.social.csv",
847 "blocker": "rage.love",
848 "csv_url": "mastodon/rage.love.csv",
850 "blocker": "sunny.garden",
851 "csv_url": "mastodon/sunny.garden.csv",
853 "blocker": "solarpunk.moe",
854 "csv_url": "mastodon/solarpunk.moe.csv",
856 "blocker": "toot.wales",
857 "csv_url": "mastodon/toot.wales.csv",
859 "blocker": "union.place",
860 "csv_url": "mastodon/union.place.csv",
866 logger.debug("Downloading %d files ...", len(blocklists))
867 for block in blocklists:
868 # Is domain given and not equal blocker?
869 if isinstance(args.domain, str) and args.domain != block["blocker"]:
870 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
872 elif args.domain in domains:
873 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
875 elif instances.is_recent(block["blocker"]):
876 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
880 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
881 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
883 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
884 if not response.ok or response.status_code >= 300 or response.content == "":
885 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
888 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
889 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
893 logger.info("Processing %d rows ...", len(reader))
896 logger.debug("row[%s]='%s'", type(row), row)
897 domain = severity = None
898 reject_media = reject_reports = False
901 domain = row["#domain"]
902 elif "domain" in row:
903 domain = row["domain"]
905 logger.debug("row='%s' does not contain domain column", row)
908 if "#severity" in row:
909 severity = row["#severity"]
910 elif "severity" in row:
911 severity = row["severity"]
913 logger.debug("row='%s' does not contain severity column", row)
916 if "#reject_media" in row and row["#reject_media"].lower() == "true":
918 elif "reject_media" in row and row["reject_media"].lower() == "true":
921 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
922 reject_reports = True
923 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
924 reject_reports = True
927 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
929 logger.debug("domain is empty - SKIPPED!")
931 elif not utils.is_domain_wanted(domain):
932 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
935 logger.debug("Marking domain='%s' as handled", domain)
936 domains.append(domain)
938 logger.debug("Processing domain='%s' ...", domain)
939 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
940 logger.debug("processed='%s'", processed)
942 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
943 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
946 "reason" : block["reason"],
950 utils.process_block(block["blocker"], domain, None, "reject_media")
952 utils.process_block(block["blocker"], domain, None, "reject_reports")
954 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
955 instances.set_total_blocks(block["blocker"], cnt)
957 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
958 if instances.has_pending(block["blocker"]):
959 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
960 instances.update_data(block["blocker"])
962 logger.debug("Invoking commit() ...")
963 database.connection.commit()
965 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
966 if config.get("bot_enabled") and len(blockdict) > 0:
967 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
968 network.send_bot_post(block["blocker"], blockdict)
970 logger.debug("Success! - EXIT!")
973 def fetch_txt(args: argparse.Namespace) -> int:
974 logger.debug("args[]='%s' - CALLED!", type(args))
979 "blocker": "seirdy.one",
980 "url" : "https://seirdy.one/pb/bsl.txt",
983 logger.info("Checking %d text file(s) ...", len(urls))
985 logger.debug("Fetching row[url]='%s' ...", row["url"])
986 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
988 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
989 if response.ok and response.status_code < 300 and response.text != "":
990 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
991 domains = response.text.split("\n")
993 logger.info("Processing %d domains ...", len(domains))
994 for domain in domains:
995 logger.debug("domain='%s' - BEFORE!", domain)
996 domain = tidyup.domain(domain)
998 logger.debug("domain='%s' - AFTER!", domain)
1000 logger.debug("domain is empty - SKIPPED!")
1002 elif not utils.is_domain_wanted(domain):
1003 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1005 elif instances.is_recent(domain):
1006 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1009 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1010 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1012 logger.debug("processed='%s'", processed)
1014 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1017 logger.debug("Success! - EXIT!")
1020 def fetch_fedipact(args: argparse.Namespace) -> int:
1021 logger.debug("args[]='%s' - CALLED!", type(args))
1024 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1026 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1027 if response.ok and response.status_code < 300 and response.text != "":
1028 logger.debug("Parsing %d Bytes ...", len(response.text))
1030 doc = bs4.BeautifulSoup(response.text, "html.parser")
1031 logger.debug("doc[]='%s'", type(doc))
1033 rows = doc.findAll("li")
1034 logger.info("Checking %d row(s) ...", len(rows))
1036 logger.debug("row[]='%s'", type(row))
1037 domain = tidyup.domain(row.contents[0])
1039 logger.debug("domain='%s' - AFTER!", domain)
1041 logger.debug("domain is empty - SKIPPED!")
1043 elif not utils.is_domain_wanted(domain):
1044 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1046 elif instances.is_registered(domain):
1047 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1049 elif instances.is_recent(domain):
1050 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1053 logger.info("Fetching domain='%s' ...", domain)
1054 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1056 logger.debug("Success! - EXIT!")
1059 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1060 logger.debug("args[]='%s' - CALLED!", type(args))
1063 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1064 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1066 doc = bs4.BeautifulSoup(raw, "html.parser")
1067 logger.debug("doc[]='%s'", type(doc))
1069 tables = doc.findAll("table", {"class": "wikitable"})
1071 logger.info("Analyzing %d table(s) ...", len(tables))
1073 for table in tables:
1074 logger.debug("table[]='%s'", type(table))
1076 rows = table.findAll("tr")
1077 logger.info("Checking %d row(s) ...", len(rows))
1078 block_headers = dict()
1080 logger.debug("row[%s]='%s'", type(row), row)
1082 headers = row.findAll("th")
1083 logger.debug("Found headers()=%d header(s)", len(headers))
1084 if len(headers) > 1:
1085 block_headers = dict()
1087 for header in headers:
1089 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1090 text = header.contents[0]
1092 logger.debug("text[]='%s'", type(text))
1093 if not isinstance(text, str):
1094 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1096 elif validators.domain(text.strip()):
1097 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1100 text = tidyup.domain(text.strip())
1101 logger.debug("text='%s'", text)
1102 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1103 logger.debug("Found header: '%s'=%d", text, cnt)
1104 block_headers[cnt] = text
1106 elif len(block_headers) == 0:
1107 logger.debug("row is not scrapable - SKIPPED!")
1109 elif len(block_headers) > 0:
1110 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1114 for element in row.find_all(["th", "td"]):
1116 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1117 if cnt in block_headers:
1118 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1120 text = element.text.strip()
1121 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1123 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1124 if key in ["domain", "instance"]:
1126 elif key == "reason":
1127 block[key] = tidyup.reason(text)
1128 elif key == "subdomain(s)":
1131 block[key] = text.split("/")
1133 logger.debug("key='%s'", key)
1136 logger.debug("block()=%d ...", len(block))
1138 logger.debug("Appending block()=%d ...", len(block))
1139 blocklist.append(block)
1141 logger.debug("blocklist()=%d", len(blocklist))
1143 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1144 domains = database.cursor.fetchall()
1146 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1148 for block in blocklist:
1149 logger.debug("block='%s'", block)
1150 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1151 origin = block["blocked"]
1152 for subdomain in block["subdomain(s)"]:
1153 block["blocked"] = subdomain + "." + origin
1154 blocking.append(block)
1156 blocking.append(block)
1158 logger.debug("blocking()=%d", blocking)
1159 for block in blocking:
1160 logger.debug("block[]='%s'", type(block))
1161 block["blocked"] = tidyup.domain(block["blocked"])
1163 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1164 if block["blocked"] == "":
1165 logger.debug("block[blocked] is empty - SKIPPED!")
1167 elif not utils.is_domain_wanted(block["blocked"]):
1168 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1170 elif instances.is_recent(block["blocked"]):
1171 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1174 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1175 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1178 for blocker in domains:
1179 blocker = blocker[0]
1180 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1182 for block in blocking:
1183 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1184 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1186 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1187 if block["blocked"] == "":
1188 logger.debug("block[blocked] is empty - SKIPPED!")
1190 elif not utils.is_domain_wanted(block["blocked"]):
1191 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1194 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1195 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1196 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1198 "blocked": block["blocked"],
1199 "reason" : block["reason"],
1202 if instances.has_pending(blocker):
1203 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1204 instances.update_data(blocker)
1206 logger.debug("Invoking commit() ...")
1207 database.connection.commit()
1209 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1210 if config.get("bot_enabled") and len(blockdict) > 0:
1211 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1212 network.send_bot_post(blocker, blockdict)
1214 logger.debug("Success! - EXIT!")
1217 def recheck_obfuscation(args: argparse.Namespace) -> int:
1218 logger.debug("args[]='%s' - CALLED!", type(args))
1222 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1223 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1224 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1225 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1227 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1229 rows = database.cursor.fetchall()
1230 logger.info("Checking %d domains ...", len(rows))
1232 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1233 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1234 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1238 if row["software"] == "pleroma":
1239 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1240 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1241 elif row["software"] == "mastodon":
1242 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1243 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1244 elif row["software"] == "lemmy":
1245 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1246 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1247 elif row["software"] == "friendica":
1248 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1249 blocking = friendica.fetch_blocks(row["domain"])
1250 elif row["software"] == "misskey":
1251 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1252 blocking = misskey.fetch_blocks(row["domain"])
1254 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1256 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1257 instances.set_total_blocks(row["domain"], blocking)
1259 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1262 for block in blocking:
1263 logger.debug("block[blocked]='%s'", block["blocked"])
1266 if block["blocked"] == "":
1267 logger.debug("block[blocked] is empty - SKIPPED!")
1269 elif block["blocked"].endswith(".arpa"):
1270 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1272 elif block["blocked"].endswith(".tld"):
1273 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1275 elif block["blocked"].endswith(".onion"):
1276 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1278 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1279 logger.debug("block='%s' is obfuscated.", block["blocked"])
1280 obfuscated = obfuscated + 1
1281 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1282 elif not utils.is_domain_wanted(block["blocked"]):
1283 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1285 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1286 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1289 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1290 if blocked is not None and blocked != block["blocked"]:
1291 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1292 obfuscated = obfuscated - 1
1293 if blocks.is_instance_blocked(row["domain"], blocked):
1294 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1297 block["block_level"] = utils.alias_block_level(block["block_level"])
1299 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1300 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1301 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1304 "reason" : block["reason"],
1307 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1308 if obfuscated == 0 and len(blocking) > 0:
1309 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1310 instances.set_has_obfuscation(row["domain"], False)
1312 if instances.has_pending(row["domain"]):
1313 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1314 instances.update_data(row["domain"])
1316 logger.debug("Invoking commit() ...")
1317 database.connection.commit()
1319 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1320 if config.get("bot_enabled") and len(blockdict) > 0:
1321 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1322 network.send_bot_post(row["domain"], blockdict)
1324 logger.debug("Success! - EXIT!")
1327 def fetch_fedilist(args: argparse.Namespace) -> int:
1328 logger.debug("args[]='%s' - CALLED!", type(args))
1330 url = "http://demo.fedilist.com/instance/csv?onion=not"
1331 if args.software is not None and args.software != "":
1332 logger.debug("args.software='%s'", args.software)
1333 url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1337 logger.info("Fetching url='%s' from fedilist.com ...", url)
1338 response = reqto.get(
1340 headers=network.web_headers,
1341 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1342 allow_redirects=False
1345 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1346 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1348 logger.debug("reader[]='%s'", type(reader))
1351 logger.debug("row[]='%s'", type(row))
1352 domain = tidyup.domain(row["hostname"])
1353 logger.debug("domain='%s' - AFTER!", domain)
1356 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1358 elif not utils.is_domain_wanted(domain):
1359 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1361 elif (args.all is None or not args.all) and instances.is_registered(domain):
1362 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1364 elif instances.is_recent(domain):
1365 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1368 logger.info("Fetching instances from domain='%s' ...", domain)
1369 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1371 logger.debug("Success! - EXIT!")
1374 def update_nodeinfo(args: argparse.Namespace) -> int:
1375 logger.debug("args[]='%s' - CALLED!", type(args))
1379 if args.domain is not None and args.domain != "":
1380 logger.debug("Fetching args.domain='%s'", args.domain)
1381 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1382 elif args.software is not None and args.software != "":
1383 logger.info("Fetching domains for args.software='%s'", args.software)
1384 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1386 logger.info("Fetching domains for recently updated ...")
1387 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL AND software IS NULL AND last_status_code < 999", [time.time() - config.get("recheck_block")])
1389 domains = database.cursor.fetchall()
1391 logger.info("Checking %d domain(s) ...", len(domains))
1393 logger.debug("row[]='%s'", type(row))
1395 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1396 software = federation.determine_software(row["domain"])
1398 logger.debug("Determined software='%s'", software)
1399 if software != row["software"]:
1400 logger.warning("Software type has changed from '%s' to '%s'!", row["software"], software)
1401 instances.set_software(row["domain"], software)
1403 instances.set_success(row["domain"])
1404 except network.exceptions as exception:
1405 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1406 instances.set_last_error(row["domain"], exception)
1408 instances.set_last_nodeinfo(row["domain"])
1409 instances.update_data(row["domain"])
1411 logger.debug("Success! - EXIT!")