1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
79 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
82 for row in database.cursor.fetchall():
83 logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84 punycode = row["domain"].encode("idna").decode("utf-8")
86 if row["nodeinfo_url"].startswith("/"):
87 logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
89 elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90 logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
93 logger.info("Found %d row(s)", cnt)
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99 logger.debug("args[]='%s' - CALLED!", type(args))
101 # No CSRF by default, you don't have to add network.api_headers by yourself here
103 domain = "pixelfed.org"
106 logger.debug("Checking CSRF from domain='%s' ...", domain)
107 headers = csrf.determine(domain, dict())
108 except network.exceptions as exception:
109 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
113 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
114 fetched = network.get_json_api(
116 "/api/v1/servers/all.json?scope=All&country=all&language=all",
118 (config.get("connection_timeout"), config.get("read_timeout"))
121 logger.debug("JSON API returned %d elements", len(fetched))
122 if "error_message" in fetched:
123 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
125 elif "data" not in fetched["json"]:
126 logger.warning("API did not return JSON with 'data' element - EXIT!")
129 rows = fetched["json"]["data"]
130 logger.info("Checking %d fetched rows ...", len(rows))
132 logger.debug("row[]='%s'", type(row))
133 if "domain" not in row:
134 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
136 elif row["domain"] == "":
137 logger.debug("row[domain] is empty - SKIPPED!")
139 elif not utils.is_domain_wanted(row["domain"]):
140 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
142 elif instances.is_registered(row["domain"]):
143 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
145 elif instances.is_recent(row["domain"]):
146 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
149 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
150 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
152 except network.exceptions as exception:
153 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
156 logger.debug("Success! - EXIT!")
159 def fetch_bkali(args: argparse.Namespace) -> int:
160 logger.debug("args[]='%s' - CALLED!", type(args))
163 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
164 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
167 logger.debug("fetched[]='%s'", type(fetched))
168 if "error_message" in fetched:
169 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
171 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
172 logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
175 rows = fetched["json"]
177 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
179 raise Exception("WARNING: Returned no records")
180 elif "data" not in rows:
181 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
182 elif "nodeinfo" not in rows["data"]:
183 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
185 for entry in rows["data"]["nodeinfo"]:
186 logger.debug("entry[%s]='%s'", type(entry), entry)
187 if "domain" not in entry:
188 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
190 elif entry["domain"] == "":
191 logger.debug("entry[domain] is empty - SKIPPED!")
193 elif not utils.is_domain_wanted(entry["domain"]):
194 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
196 elif instances.is_registered(entry["domain"]):
197 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
199 elif instances.is_recent(entry["domain"]):
200 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
203 logger.debug("Adding domain='%s' ...", entry["domain"])
204 domains.append(entry["domain"])
206 except network.exceptions as exception:
207 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
210 logger.debug("domains()=%d", len(domains))
214 logger.info("Adding %d new instances ...", len(domains))
215 for domain in domains:
217 logger.info("Fetching instances from domain='%s' ...", domain)
218 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
219 except network.exceptions as exception:
220 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
221 instances.set_last_error(domain, exception)
224 logger.debug("Success - EXIT!")
227 def fetch_blocks(args: argparse.Namespace) -> int:
228 logger.debug("args[]='%s' - CALLED!", type(args))
229 if args.domain is not None and args.domain != "":
230 logger.debug("args.domain='%s' - checking ...", args.domain)
231 if not validators.domain(args.domain):
232 logger.warning("args.domain='%s' is not valid.", args.domain)
234 elif blacklist.is_blacklisted(args.domain):
235 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
237 elif not instances.is_registered(args.domain):
238 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
243 if args.domain is not None and args.domain != "":
244 # Re-check single domain
245 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
246 database.cursor.execute(
247 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
249 elif args.software is not None and args.software != "":
250 # Re-check single software
251 logger.debug("Querying database for args.software='%s' ...", args.software)
252 database.cursor.execute(
253 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
256 # Re-check after "timeout" (aka. minimum interval)
257 database.cursor.execute(
258 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
261 rows = database.cursor.fetchall()
262 logger.info("Checking %d entries ...", len(rows))
263 for blocker, software, origin, nodeinfo_url in rows:
264 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
265 blocker = tidyup.domain(blocker)
266 logger.debug("blocker='%s' - AFTER!", blocker)
269 logger.warning("blocker is now empty!")
271 elif nodeinfo_url is None or nodeinfo_url == "":
272 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
274 elif not utils.is_domain_wanted(blocker):
275 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
278 logger.debug("blocker='%s'", blocker)
279 instances.set_last_blocked(blocker)
280 instances.set_has_obfuscation(blocker, False)
284 if software == "pleroma":
285 logger.info("blocker='%s',software='%s'", blocker, software)
286 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
287 elif software == "mastodon":
288 logger.info("blocker='%s',software='%s'", blocker, software)
289 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
290 elif software == "lemmy":
291 logger.info("blocker='%s',software='%s'", blocker, software)
292 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
293 elif software == "friendica":
294 logger.info("blocker='%s',software='%s'", blocker, software)
295 blocking = friendica.fetch_blocks(blocker)
296 elif software == "misskey":
297 logger.info("blocker='%s',software='%s'", blocker, software)
298 blocking = misskey.fetch_blocks(blocker)
300 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
302 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
303 instances.set_total_blocks(blocker, blocking)
305 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
307 for block in blocking:
308 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
310 if block["block_level"] == "":
311 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
314 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
315 block["blocked"] = tidyup.domain(block["blocked"])
316 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
317 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
319 if block["blocked"] == "":
320 logger.warning("blocked is empty, blocker='%s'", blocker)
322 elif block["blocked"].endswith(".onion"):
323 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
325 elif block["blocked"].endswith(".arpa"):
326 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
328 elif block["blocked"].endswith(".tld"):
329 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
331 elif block["blocked"].find("*") >= 0:
332 logger.debug("blocker='%s' uses obfuscated domains", blocker)
334 # Some friendica servers also obscure domains without hash
335 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
337 logger.debug("row[]='%s'", type(row))
339 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
340 instances.set_has_obfuscation(blocker, True)
343 block["blocked"] = row["domain"]
344 origin = row["origin"]
345 nodeinfo_url = row["nodeinfo_url"]
346 elif block["blocked"].find("?") >= 0:
347 logger.debug("blocker='%s' uses obfuscated domains", blocker)
349 # Some obscure them with question marks, not sure if that's dependent on version or not
350 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
352 logger.debug("row[]='%s'", type(row))
354 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
355 instances.set_has_obfuscation(blocker, True)
358 block["blocked"] = row["domain"]
359 origin = row["origin"]
360 nodeinfo_url = row["nodeinfo_url"]
362 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
363 if block["blocked"] == "":
364 logger.debug("block[blocked] is empty - SKIPPED!")
366 elif not utils.is_domain_wanted(block["blocked"]):
367 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
369 elif block["block_level"] in ["accept", "accepted"]:
370 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
372 elif not instances.is_registered(block["blocked"]):
373 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
374 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
376 block["block_level"] = utils.alias_block_level(block["block_level"])
378 if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
379 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
381 "blocked": block["blocked"],
382 "reason" : block["reason"],
385 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
386 cookies.clear(block["blocked"])
388 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
389 if instances.has_pending(blocker):
390 logger.debug("Flushing updates for blocker='%s' ...", blocker)
391 instances.update_data(blocker)
393 logger.debug("Invoking commit() ...")
394 database.connection.commit()
396 logger.debug("Invoking cookies.clear(%s) ...", blocker)
397 cookies.clear(blocker)
399 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
400 if config.get("bot_enabled") and len(blockdict) > 0:
401 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
402 network.send_bot_post(blocker, blockdict)
404 logger.debug("Success! - EXIT!")
407 def fetch_observer(args: argparse.Namespace) -> int:
408 logger.debug("args[]='%s' - CALLED!", type(args))
414 if args.software is None:
415 logger.info("Fetching software list ...")
416 raw = utils.fetch_url(
417 "https://fediverse.observer",
419 (config.get("connection_timeout"), config.get("read_timeout"))
421 logger.debug("raw[%s]()=%d", type(raw), len(raw))
423 doc = bs4.BeautifulSoup(raw, features="html.parser")
424 logger.debug("doc[]='%s'", type(doc))
426 items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
427 logger.debug("items[]='%s'", type(items))
429 logger.info("Checking %d menu items ...", len(items))
431 logger.debug("item[%s]='%s'", type(item), item)
432 if item.text.lower() == "all":
433 logger.debug("Skipping 'All' menu entry ...")
436 logger.debug("Appending item.text='%s' ...", item.text)
437 types.append(tidyup.domain(item.text))
439 logger.info("Adding args.software='%s' as type ...", args.software)
440 types.append(args.software)
442 logger.info("Fetching %d different table data ...", len(types))
443 for software in types:
444 logger.debug("software='%s' - BEFORE!", software)
445 if args.software is not None and args.software != software:
446 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
451 logger.debug("Fetching table data for software='%s' ...", software)
452 raw = utils.fetch_url(
453 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
455 (config.get("connection_timeout"), config.get("read_timeout"))
457 logger.debug("raw[%s]()=%d", type(raw), len(raw))
459 doc = bs4.BeautifulSoup(raw, features="html.parser")
460 logger.debug("doc[]='%s'", type(doc))
461 except network.exceptions as exception:
462 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
465 items = doc.findAll("a", {"class": "url"})
466 logger.info("Checking %d items,software='%s' ...", len(items), software)
468 logger.debug("item[]='%s'", type(item))
469 domain = item.decode_contents()
471 logger.debug("domain='%s' - AFTER!", domain)
473 logger.debug("domain is empty - SKIPPED!")
475 elif not utils.is_domain_wanted(domain):
476 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
478 elif instances.is_registered(domain):
479 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
481 elif instances.is_recent(domain):
482 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
485 software = software_helper.alias(software)
486 logger.info("Fetching instances for domain='%s'", domain)
487 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
489 logger.debug("Success! - EXIT!")
492 def fetch_todon_wiki(args: argparse.Namespace) -> int:
493 logger.debug("args[]='%s' - CALLED!", type(args))
501 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
502 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
504 doc = bs4.BeautifulSoup(raw, "html.parser")
505 logger.debug("doc[]='%s'", type(doc))
507 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
508 logger.info("Checking %d silenced/limited entries ...", len(silenced))
509 blocklist["silenced"] = utils.find_domains(silenced, "div")
511 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
512 logger.info("Checking %d suspended entries ...", len(suspended))
513 blocklist["reject"] = utils.find_domains(suspended, "div")
515 blocking = blocklist["silenced"] + blocklist["reject"]
518 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
519 instances.set_total_blocks(blocker, blocking)
522 for block_level in blocklist:
523 blockers = blocklist[block_level]
525 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
526 for blocked in blockers:
527 logger.debug("blocked='%s'", blocked)
529 if not instances.is_registered(blocked):
531 logger.info("Fetching instances from domain='%s' ...", blocked)
532 federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
533 except network.exceptions as exception:
534 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
535 instances.set_last_error(blocked, exception)
537 if blocks.is_instance_blocked(blocker, blocked, block_level):
538 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
541 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
542 if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
543 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
549 logger.debug("Invoking commit() ...")
550 database.connection.commit()
552 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
553 if config.get("bot_enabled") and len(blockdict) > 0:
554 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
555 network.send_bot_post(blocker, blockdict)
557 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
558 if instances.has_pending(blocker):
559 logger.debug("Flushing updates for blocker='%s' ...", blocker)
560 instances.update_data(blocker)
562 logger.debug("Success! - EXIT!")
565 def fetch_cs(args: argparse.Namespace):
566 logger.debug("args[]='%s' - CALLED!", type(args))
592 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
593 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
595 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
596 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
598 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
599 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
600 domains["silenced"] = federation.find_domains(silenced)
602 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
603 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
604 domains["reject"] = federation.find_domains(blocked)
606 blocking = blocklist["silenced"] + blocklist["reject"]
607 blocker = "chaos.social"
609 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
610 instances.set_total_blocks(blocker, blocking)
612 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
617 for block_level in domains:
618 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
620 for row in domains[block_level]:
621 logger.debug("row[%s]='%s'", type(row), row)
622 if instances.is_recent(row["domain"], "last_blocked"):
623 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
625 elif not instances.is_registered(row["domain"]):
627 logger.info("Fetching instances from domain='%s' ...", row["domain"])
628 federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
629 except network.exceptions as exception:
630 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
631 instances.set_last_error(row["domain"], exception)
633 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
634 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
636 "blocked": row["domain"],
637 "reason" : row["reason"],
640 logger.debug("Invoking commit() ...")
641 database.connection.commit()
643 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
644 if config.get("bot_enabled") and len(blockdict) > 0:
645 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
646 network.send_bot_post(blocker, blockdict)
648 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
649 if instances.has_pending(blocker):
650 logger.debug("Flushing updates for blocker='%s' ...", blocker)
651 instances.update_data(blocker)
653 logger.debug("Success! - EXIT!")
656 def fetch_fba_rss(args: argparse.Namespace) -> int:
657 logger.debug("args[]='%s' - CALLED!", type(args))
660 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
661 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
663 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
664 if response.ok and response.status_code < 300 and len(response.text) > 0:
665 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
666 rss = atoma.parse_rss_bytes(response.content)
668 logger.debug("rss[]='%s'", type(rss))
669 for item in rss.items:
670 logger.debug("item='%s'", item)
671 domain = tidyup.domain(item.link.split("=")[1])
673 logger.debug("domain='%s' - AFTER!", domain)
675 logger.debug("domain is empty - SKIPPED!")
677 elif not utils.is_domain_wanted(domain):
678 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
680 elif domain in domains:
681 logger.debug("domain='%s' is already added - SKIPPED!", domain)
683 elif instances.is_registered(domain):
684 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
686 elif instances.is_recent(domain):
687 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
690 logger.debug("Adding domain='%s'", domain)
691 domains.append(domain)
693 logger.debug("domains()=%d", len(domains))
697 logger.info("Adding %d new instances ...", len(domains))
698 for domain in domains:
700 logger.info("Fetching instances from domain='%s' ...", domain)
701 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
702 except network.exceptions as exception:
703 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
704 instances.set_last_error(domain, exception)
707 logger.debug("Success! - EXIT!")
710 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
711 logger.debug("args[]='%s' - CALLED!", type(args))
712 feed = "https://ryona.agency/users/fba/feed.atom"
716 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
717 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
719 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
720 if response.ok and response.status_code < 300 and len(response.text) > 0:
721 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
722 atom = atoma.parse_atom_bytes(response.content)
724 logger.debug("atom[]='%s'", type(atom))
725 for entry in atom.entries:
726 logger.debug("entry[]='%s'", type(entry))
727 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
728 logger.debug("doc[]='%s'", type(doc))
729 for element in doc.findAll("a"):
730 logger.debug("element[]='%s'", type(element))
731 for href in element["href"].split(","):
732 logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
733 domain = tidyup.domain(href)
735 logger.debug("domain='%s' - AFTER!", domain)
737 logger.debug("domain is empty - SKIPPED!")
739 elif not utils.is_domain_wanted(domain):
740 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
742 elif domain in domains:
743 logger.debug("domain='%s' is already added - SKIPPED!", domain)
745 elif instances.is_registered(domain):
746 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
748 elif instances.is_recent(domain):
749 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
752 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
753 domains.append(domain)
755 logger.debug("domains()=%d", len(domains))
759 logger.info("Adding %d new instances ...", len(domains))
760 for domain in domains:
761 logger.debug("domain='%s'", domain)
763 logger.info("Fetching instances from domain='%s' ...", domain)
764 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
765 except network.exceptions as exception:
766 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
767 instances.set_last_error(domain, exception)
770 logger.debug("Success! - EXIT!")
773 def fetch_instances(args: argparse.Namespace) -> int:
774 logger.debug("args[]='%s' - CALLED!", type(args))
779 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
780 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
781 except network.exceptions as exception:
782 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
783 instances.set_last_error(args.domain, exception)
784 instances.update_data(args.domain)
788 logger.debug("Not fetching more instances - EXIT!")
791 # Loop through some instances
792 database.cursor.execute(
793 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
796 rows = database.cursor.fetchall()
797 logger.info("Checking %d entries ...", len(rows))
799 logger.debug("row[domain]='%s'", row["domain"])
800 if row["domain"] == "":
801 logger.debug("row[domain] is empty - SKIPPED!")
803 elif not utils.is_domain_wanted(row["domain"]):
804 logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
808 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
809 federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
810 except network.exceptions as exception:
811 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
812 instances.set_last_error(row["domain"], exception)
814 logger.debug("Success - EXIT!")
817 def fetch_oliphant(args: argparse.Namespace) -> int:
818 logger.debug("args[]='%s' - CALLED!", type(args))
822 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
827 "blocker": "artisan.chat",
828 "csv_url": "mastodon/artisan.chat.csv",
830 "blocker": "mastodon.art",
831 "csv_url": "mastodon/mastodon.art.csv",
833 "blocker": "pleroma.envs.net",
834 "csv_url": "mastodon/pleroma.envs.net.csv",
836 "blocker": "oliphant.social",
837 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
839 "blocker": "mastodon.online",
840 "csv_url": "mastodon/mastodon.online.csv",
842 "blocker": "mastodon.social",
843 "csv_url": "mastodon/mastodon.social.csv",
845 "blocker": "mastodon.social",
846 "csv_url": "other/missing-tier0-mastodon.social.csv",
848 "blocker": "rage.love",
849 "csv_url": "mastodon/rage.love.csv",
851 "blocker": "sunny.garden",
852 "csv_url": "mastodon/sunny.garden.csv",
854 "blocker": "solarpunk.moe",
855 "csv_url": "mastodon/solarpunk.moe.csv",
857 "blocker": "toot.wales",
858 "csv_url": "mastodon/toot.wales.csv",
860 "blocker": "union.place",
861 "csv_url": "mastodon/union.place.csv",
867 logger.debug("Downloading %d files ...", len(blocklists))
868 for block in blocklists:
869 # Is domain given and not equal blocker?
870 if isinstance(args.domain, str) and args.domain != block["blocker"]:
871 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
873 elif args.domain in domains:
874 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
876 elif instances.is_recent(block["blocker"]):
877 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
881 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
882 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
884 logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
885 if not response.ok or response.status_code >= 300 or response.content == "":
886 logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
889 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
890 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
894 logger.info("Processing %d rows ...", len(reader))
897 logger.debug("row[%s]='%s'", type(row), row)
898 domain = severity = None
899 reject_media = reject_reports = False
902 domain = row["#domain"]
903 elif "domain" in row:
904 domain = row["domain"]
906 logger.debug("row='%s' does not contain domain column", row)
909 if "#severity" in row:
910 severity = row["#severity"]
911 elif "severity" in row:
912 severity = row["severity"]
914 logger.debug("row='%s' does not contain severity column", row)
917 if "#reject_media" in row and row["#reject_media"].lower() == "true":
919 elif "reject_media" in row and row["reject_media"].lower() == "true":
922 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
923 reject_reports = True
924 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
925 reject_reports = True
928 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
930 logger.debug("domain is empty - SKIPPED!")
932 elif not utils.is_domain_wanted(domain):
933 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
936 logger.debug("Marking domain='%s' as handled", domain)
937 domains.append(domain)
939 logger.debug("Processing domain='%s' ...", domain)
940 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
941 logger.debug("processed='%s'", processed)
943 if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
944 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
947 "reason" : block["reason"],
951 utils.process_block(block["blocker"], domain, None, "reject_media")
953 utils.process_block(block["blocker"], domain, None, "reject_reports")
955 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
956 instances.set_total_blocks(block["blocker"], cnt)
958 logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
959 if instances.has_pending(block["blocker"]):
960 logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
961 instances.update_data(block["blocker"])
963 logger.debug("Invoking commit() ...")
964 database.connection.commit()
966 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
967 if config.get("bot_enabled") and len(blockdict) > 0:
968 logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
969 network.send_bot_post(block["blocker"], blockdict)
971 logger.debug("Success! - EXIT!")
974 def fetch_txt(args: argparse.Namespace) -> int:
975 logger.debug("args[]='%s' - CALLED!", type(args))
980 "blocker": "seirdy.one",
981 "url" : "https://seirdy.one/pb/bsl.txt",
984 logger.info("Checking %d text file(s) ...", len(urls))
986 logger.debug("Fetching row[url]='%s' ...", row["url"])
987 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
989 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
990 if response.ok and response.status_code < 300 and response.text != "":
991 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
992 domains = response.text.split("\n")
994 logger.info("Processing %d domains ...", len(domains))
995 for domain in domains:
996 logger.debug("domain='%s' - BEFORE!", domain)
997 domain = tidyup.domain(domain)
999 logger.debug("domain='%s' - AFTER!", domain)
1001 logger.debug("domain is empty - SKIPPED!")
1003 elif not utils.is_domain_wanted(domain):
1004 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1006 elif instances.is_recent(domain):
1007 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1010 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1011 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1013 logger.debug("processed='%s'", processed)
1015 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1018 logger.debug("Success! - EXIT!")
1021 def fetch_fedipact(args: argparse.Namespace) -> int:
1022 logger.debug("args[]='%s' - CALLED!", type(args))
1025 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1027 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1028 if response.ok and response.status_code < 300 and response.text != "":
1029 logger.debug("Parsing %d Bytes ...", len(response.text))
1031 doc = bs4.BeautifulSoup(response.text, "html.parser")
1032 logger.debug("doc[]='%s'", type(doc))
1034 rows = doc.findAll("li")
1035 logger.info("Checking %d row(s) ...", len(rows))
1037 logger.debug("row[]='%s'", type(row))
1038 domain = tidyup.domain(row.contents[0])
1040 logger.debug("domain='%s' - AFTER!", domain)
1042 logger.debug("domain is empty - SKIPPED!")
1044 elif not utils.is_domain_wanted(domain):
1045 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1047 elif instances.is_registered(domain):
1048 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1050 elif instances.is_recent(domain):
1051 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1054 logger.info("Fetching domain='%s' ...", domain)
1055 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1057 logger.debug("Success! - EXIT!")
1060 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1061 logger.debug("args[]='%s' - CALLED!", type(args))
1064 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1065 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1067 doc = bs4.BeautifulSoup(raw, "html.parser")
1068 logger.debug("doc[]='%s'", type(doc))
1070 tables = doc.findAll("table", {"class": "wikitable"})
1072 logger.info("Analyzing %d table(s) ...", len(tables))
1074 for table in tables:
1075 logger.debug("table[]='%s'", type(table))
1077 rows = table.findAll("tr")
1078 logger.info("Checking %d row(s) ...", len(rows))
1079 block_headers = dict()
1081 logger.debug("row[%s]='%s'", type(row), row)
1083 headers = row.findAll("th")
1084 logger.debug("Found headers()=%d header(s)", len(headers))
1085 if len(headers) > 1:
1086 block_headers = dict()
1088 for header in headers:
1090 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1091 text = header.contents[0]
1093 logger.debug("text[]='%s'", type(text))
1094 if not isinstance(text, str):
1095 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1097 elif validators.domain(text.strip()):
1098 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1101 text = tidyup.domain(text.strip())
1102 logger.debug("text='%s'", text)
1103 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1104 logger.debug("Found header: '%s'=%d", text, cnt)
1105 block_headers[cnt] = text
1107 elif len(block_headers) == 0:
1108 logger.debug("row is not scrapable - SKIPPED!")
1110 elif len(block_headers) > 0:
1111 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1115 for element in row.find_all(["th", "td"]):
1117 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1118 if cnt in block_headers:
1119 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1121 text = element.text.strip()
1122 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1124 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1125 if key in ["domain", "instance"]:
1127 elif key == "reason":
1128 block[key] = tidyup.reason(text)
1129 elif key == "subdomain(s)":
1132 block[key] = text.split("/")
1134 logger.debug("key='%s'", key)
1137 logger.debug("block()=%d ...", len(block))
1139 logger.debug("Appending block()=%d ...", len(block))
1140 blocklist.append(block)
1142 logger.debug("blocklist()=%d", len(blocklist))
1144 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1145 domains = database.cursor.fetchall()
1147 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1149 for block in blocklist:
1150 logger.debug("block='%s'", block)
1151 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1152 origin = block["blocked"]
1153 for subdomain in block["subdomain(s)"]:
1154 block["blocked"] = subdomain + "." + origin
1155 blocking.append(block)
1157 blocking.append(block)
1159 logger.debug("blocking()=%d", blocking)
1160 for block in blocking:
1161 logger.debug("block[]='%s'", type(block))
1162 block["blocked"] = tidyup.domain(block["blocked"])
1164 logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1165 if block["blocked"] == "":
1166 logger.debug("block[blocked] is empty - SKIPPED!")
1168 elif not utils.is_domain_wanted(block["blocked"]):
1169 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1171 elif instances.is_recent(block["blocked"]):
1172 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1175 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1176 utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1179 for blocker in domains:
1180 blocker = blocker[0]
1181 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1183 for block in blocking:
1184 logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1185 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1187 logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1188 if block["blocked"] == "":
1189 logger.debug("block[blocked] is empty - SKIPPED!")
1191 elif not utils.is_domain_wanted(block["blocked"]):
1192 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1195 logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1196 if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1197 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1199 "blocked": block["blocked"],
1200 "reason" : block["reason"],
1203 if instances.has_pending(blocker):
1204 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1205 instances.update_data(blocker)
1207 logger.debug("Invoking commit() ...")
1208 database.connection.commit()
1210 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1211 if config.get("bot_enabled") and len(blockdict) > 0:
1212 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1213 network.send_bot_post(blocker, blockdict)
1215 logger.debug("Success! - EXIT!")
1218 def recheck_obfuscation(args: argparse.Namespace) -> int:
1219 logger.debug("args[]='%s' - CALLED!", type(args))
1223 if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1224 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1225 elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1226 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1228 database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1230 rows = database.cursor.fetchall()
1231 logger.info("Checking %d domains ...", len(rows))
1233 logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1234 if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1235 logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1239 if row["software"] == "pleroma":
1240 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1241 blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1242 elif row["software"] == "mastodon":
1243 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1244 blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1245 elif row["software"] == "lemmy":
1246 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1247 blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1248 elif row["software"] == "friendica":
1249 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1250 blocking = friendica.fetch_blocks(row["domain"])
1251 elif row["software"] == "misskey":
1252 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1253 blocking = misskey.fetch_blocks(row["domain"])
1255 logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1257 logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1258 instances.set_total_blocks(row["domain"], blocking)
1260 logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1263 for block in blocking:
1264 logger.debug("block[blocked]='%s'", block["blocked"])
1267 if block["blocked"] == "":
1268 logger.debug("block[blocked] is empty - SKIPPED!")
1270 elif block["blocked"].endswith(".arpa"):
1271 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1273 elif block["blocked"].endswith(".tld"):
1274 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1276 elif block["blocked"].endswith(".onion"):
1277 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1279 elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1280 logger.debug("block='%s' is obfuscated.", block["blocked"])
1281 obfuscated = obfuscated + 1
1282 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1283 elif not utils.is_domain_wanted(block["blocked"]):
1284 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1286 elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1287 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1290 logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1291 if blocked is not None and blocked != block["blocked"]:
1292 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1293 obfuscated = obfuscated - 1
1294 if blocks.is_instance_blocked(row["domain"], blocked):
1295 logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1298 block["block_level"] = utils.alias_block_level(block["block_level"])
1300 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1301 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1302 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1305 "reason" : block["reason"],
1308 logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1309 if obfuscated == 0 and len(blocking) > 0:
1310 logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1311 instances.set_has_obfuscation(row["domain"], False)
1313 if instances.has_pending(row["domain"]):
1314 logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1315 instances.update_data(row["domain"])
1317 logger.debug("Invoking commit() ...")
1318 database.connection.commit()
1320 logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1321 if config.get("bot_enabled") and len(blockdict) > 0:
1322 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1323 network.send_bot_post(row["domain"], blockdict)
1325 logger.debug("Success! - EXIT!")
1328 def fetch_fedilist(args: argparse.Namespace) -> int:
1329 logger.debug("args[]='%s' - CALLED!", type(args))
1331 url = "http://demo.fedilist.com/instance/csv?onion=not"
1332 if args.software is not None and args.software != "":
1333 logger.debug("args.software='%s'", args.software)
1334 url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1338 logger.info("Fetching url='%s' from fedilist.com ...", url)
1339 response = reqto.get(
1341 headers=network.web_headers,
1342 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1343 allow_redirects=False
1346 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1347 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1349 logger.debug("reader[]='%s'", type(reader))
1352 logger.debug("row[]='%s'", type(row))
1353 domain = tidyup.domain(row["hostname"])
1354 logger.debug("domain='%s' - AFTER!", domain)
1357 logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1359 elif not utils.is_domain_wanted(domain):
1360 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1362 elif (args.all is None or not args.all) and instances.is_registered(domain):
1363 logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1365 elif instances.is_recent(domain):
1366 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1369 logger.info("Fetching instances from domain='%s' ...", domain)
1370 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1372 logger.debug("Success! - EXIT!")
1375 def update_nodeinfo(args: argparse.Namespace) -> int:
1376 logger.debug("args[]='%s' - CALLED!", type(args))
1380 if args.domain is not None and args.domain != "":
1381 logger.debug("Fetching args.domain='%s'", args.domain)
1382 database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1383 elif args.software is not None and args.software != "":
1384 logger.info("Fetching domains for args.software='%s'", args.software)
1385 database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1387 logger.info("Fetching domains for recently updated ...")
1388 database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1390 domains = database.cursor.fetchall()
1392 logger.info("Checking %d domain(s) ...", len(domains))
1395 logger.debug("row[]='%s'", type(row))
1397 logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1398 software = federation.determine_software(row["domain"])
1400 logger.debug("Determined software='%s'", software)
1401 if software != row["software"]:
1402 logger.warning("Software type has changed from '%s' to '%s'!", row["software"], software)
1403 instances.set_software(row["domain"], software)
1405 instances.set_success(row["domain"])
1406 except network.exceptions as exception:
1407 logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1408 instances.set_last_error(row["domain"], exception)
1410 instances.set_last_nodeinfo(row["domain"])
1411 instances.update_data(row["domain"])
1414 logger.debug("Success! - EXIT!")