1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
78 # No CSRF by default, you don't have to add network.api_headers by yourself here
82 logger.debug("Checking CSRF from pixelfed.org")
83 headers = csrf.determine("pixelfed.org", dict())
84 except network.exceptions as exception:
85 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
90 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
91 fetched = network.get_json_api(
93 "/api/v1/servers/all.json?scope=All&country=all&language=all",
95 (config.get("connection_timeout"), config.get("read_timeout"))
98 logger.debug("JSON API returned %d elements", len(fetched))
99 if "error_message" in fetched:
100 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
102 elif "data" not in fetched["json"]:
103 logger.warning("API did not return JSON with 'data' element - EXIT!")
106 rows = fetched["json"]["data"]
107 logger.info("Checking %d fetched rows ...", len(rows))
109 logger.debug("row[]='%s'", type(row))
110 if "domain" not in row:
111 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
113 elif not utils.is_domain_wanted(row["domain"]):
114 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
116 elif instances.is_registered(row["domain"]):
117 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
119 elif instances.is_recent(row["domain"]):
120 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
123 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
124 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
126 except network.exceptions as exception:
127 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
130 logger.debug("Success! - EXIT!")
133 def fetch_bkali(args: argparse.Namespace) -> int:
134 logger.debug("args[]='%s' - CALLED!", type(args))
137 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
138 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
141 logger.debug("fetched[]='%s'", type(fetched))
142 if "error_message" in fetched:
143 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
145 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
146 logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
149 rows = fetched["json"]
151 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
153 raise Exception("WARNING: Returned no records")
154 elif "data" not in rows:
155 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
156 elif "nodeinfo" not in rows["data"]:
157 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
159 for entry in rows["data"]["nodeinfo"]:
160 logger.debug("entry[%s]='%s'", type(entry), entry)
161 if "domain" not in entry:
162 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
164 elif not utils.is_domain_wanted(entry["domain"]):
165 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
167 elif instances.is_registered(entry["domain"]):
168 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
170 elif instances.is_recent(entry["domain"]):
171 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
174 logger.debug("Adding domain='%s' ...", entry["domain"])
175 domains.append(entry["domain"])
177 except network.exceptions as exception:
178 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
181 logger.debug("domains()=%d", len(domains))
185 logger.info("Adding %d new instances ...", len(domains))
186 for domain in domains:
188 logger.info("Fetching instances from domain='%s' ...", domain)
189 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
190 except network.exceptions as exception:
191 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
192 instances.set_last_error(domain, exception)
194 logger.debug("Success - EXIT!")
197 def fetch_blocks(args: argparse.Namespace) -> int:
198 logger.debug("args[]='%s' - CALLED!", type(args))
199 if args.domain is not None and args.domain != "":
200 logger.debug("args.domain='%s' - checking ...", args.domain)
201 if not validators.domain(args.domain):
202 logger.warning("args.domain='%s' is not valid.", args.domain)
204 elif blacklist.is_blacklisted(args.domain):
205 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
207 elif not instances.is_registered(args.domain):
208 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
213 if args.domain is not None and args.domain != "":
214 # Re-check single domain
215 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
216 database.cursor.execute(
217 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
219 elif args.software is not None and args.software != "":
220 # Re-check single software
221 logger.debug("Querying database for args.software='%s' ...", args.software)
222 database.cursor.execute(
223 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
226 # Re-check after "timeout" (aka. minimum interval)
227 database.cursor.execute(
228 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
231 rows = database.cursor.fetchall()
232 logger.info("Checking %d entries ...", len(rows))
233 for blocker, software, origin, nodeinfo_url in rows:
234 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
235 blocker = tidyup.domain(blocker)
236 logger.debug("blocker='%s' - AFTER!", blocker)
239 logger.warning("blocker is now empty!")
241 elif nodeinfo_url is None or nodeinfo_url == "":
242 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
244 elif not utils.is_domain_wanted(blocker):
245 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
248 logger.debug("blocker='%s'", blocker)
249 instances.set_last_blocked(blocker)
250 instances.set_has_obfuscation(blocker, False)
254 if software == "pleroma":
255 logger.info("blocker='%s',software='%s'", blocker, software)
256 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
257 elif software == "mastodon":
258 logger.info("blocker='%s',software='%s'", blocker, software)
259 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
260 elif software == "lemmy":
261 logger.info("blocker='%s',software='%s'", blocker, software)
262 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
263 elif software == "friendica":
264 logger.info("blocker='%s',software='%s'", blocker, software)
265 blocking = friendica.fetch_blocks(blocker)
266 elif software == "misskey":
267 logger.info("blocker='%s',software='%s'", blocker, software)
268 blocking = misskey.fetch_blocks(blocker)
270 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
272 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
273 for block in blocking:
274 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block['block_level'], block["reason"])
276 if block['block_level'] == "":
277 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
280 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
281 block["blocked"] = tidyup.domain(block["blocked"])
282 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
283 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
285 if block["blocked"] == "":
286 logger.warning("blocked is empty, blocker='%s'", blocker)
288 elif block["blocked"].count("*") > 0:
289 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
290 instances.set_has_obfuscation(blocker, True)
292 # Some friendica servers also obscure domains without hash
293 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
295 logger.debug("row[]='%s'", type(row))
297 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
300 block["blocked"] = row[0]
302 nodeinfo_url = row[2]
303 elif block["blocked"].count("?") > 0:
304 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
305 instances.set_has_obfuscation(blocker, True)
307 # Some obscure them with question marks, not sure if that's dependent on version or not
308 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
310 logger.debug("row[]='%s'", type(row))
312 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
315 block["blocked"] = row[0]
317 nodeinfo_url = row[2]
319 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
320 if not utils.is_domain_wanted(block["blocked"]):
321 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
323 elif block['block_level'] in ["accept", "accepted"]:
324 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
326 elif not instances.is_registered(block["blocked"]):
327 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
328 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
330 if block['block_level'] == "silence":
331 logger.debug("Block level 'silence' has been changed to 'silenced'")
332 block['block_level'] = "silenced"
333 elif block['block_level'] == "suspend":
334 logger.debug("Block level 'suspend' has been changed to 'suspended'")
335 block['block_level'] = "suspended"
337 utils.process_block(blocker, block['blocked'], block['reason'], block['block_level'])
339 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
340 cookies.clear(block["blocked"])
342 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
343 if instances.has_pending(blocker):
344 logger.debug("Flushing updates for blocker='%s' ...", blocker)
345 instances.update_data(blocker)
347 logger.debug("Invoking commit() ...")
348 database.connection.commit()
350 logger.debug("Invoking cookies.clear(%s) ...", blocker)
351 cookies.clear(blocker)
353 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
354 if config.get("bot_enabled") and len(blockdict) > 0:
355 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
356 network.send_bot_post(blocker, blockdict)
358 logger.debug("Success! - EXIT!")
361 def fetch_observer(args: argparse.Namespace) -> int:
362 logger.debug("args[]='%s' - CALLED!", type(args))
398 logger.info("Fetching %d different table data ...", len(types))
399 for software in types:
400 logger.debug("software='%s' - BEFORE!", software)
401 if args.software is not None and args.software != software:
402 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!")
407 logger.debug("Fetching table data for software='%s' ...", software)
408 raw = utils.fetch_url(
409 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
411 (config.get("connection_timeout"), config.get("read_timeout"))
413 logger.debug("raw[%s]()=%d", type(raw), len(raw))
415 doc = bs4.BeautifulSoup(raw, features='html.parser')
416 logger.debug("doc[]='%s'", type(doc))
417 except network.exceptions as exception:
418 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
421 items = doc.findAll("a", {"class": "url"})
422 logger.info("Checking %d items,software='%s' ...", len(items), software)
424 logger.debug("item[]='%s'", type(item))
425 domain = item.decode_contents()
427 logger.debug("domain='%s'", domain)
428 if not utils.is_domain_wanted(domain):
429 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
431 elif instances.is_registered(domain):
432 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
434 elif instances.is_recent(domain):
435 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
438 software = software_helper.alias(software)
439 logger.info("Fetching instances for domain='%s'", domain)
440 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
442 logger.debug("Success! - EXIT!")
445 def fetch_todon_wiki(args: argparse.Namespace) -> int:
446 logger.debug("args[]='%s' - CALLED!", type(args))
454 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
455 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
457 doc = bs4.BeautifulSoup(raw, "html.parser")
458 logger.debug("doc[]='%s'", type(doc))
460 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
461 logger.info("Checking %d silenced/limited entries ...", len(silenced))
462 blocklist["silenced"] = utils.find_domains(silenced, "div")
464 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
465 logger.info("Checking %d suspended entries ...", len(suspended))
466 blocklist["reject"] = utils.find_domains(suspended, "div")
468 for block_level in blocklist:
469 blockers = blocklist[block_level]
471 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
472 for blocked in blockers:
473 logger.debug("blocked='%s'", blocked)
475 if not instances.is_registered(blocked):
477 logger.info("Fetching instances from domain='%s' ...", blocked)
478 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
479 except network.exceptions as exception:
480 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
481 instances.set_last_error(blocked, exception)
483 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
484 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
487 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
488 blocks.add_instance("todon.eu", blocked, None, block_level)
490 logger.debug("Invoking commit() ...")
491 database.connection.commit()
493 logger.debug("Success! - EXIT!")
496 def fetch_cs(args: argparse.Namespace):
497 logger.debug("args[]='%s' - CALLED!", type(args))
523 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
524 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
526 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
527 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
529 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
530 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
531 domains["silenced"] = federation.find_domains(silenced)
533 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
534 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
535 domains["reject"] = federation.find_domains(blocked)
537 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
541 for block_level in domains:
542 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
544 for row in domains[block_level]:
545 logger.debug("row[%s]='%s'", type(row), row)
546 if instances.is_recent(row["domain"], "last_blocked"):
547 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
549 elif not instances.is_registered(row["domain"]):
551 logger.info("Fetching instances from domain='%s' ...", row["domain"])
552 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
553 except network.exceptions as exception:
554 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
555 instances.set_last_error(row["domain"], exception)
557 utils.process_block('chaos.social', row['domain'], row['reason'], block_level)
559 logger.debug("Invoking commit() ...")
560 database.connection.commit()
562 logger.debug("Success! - EXIT!")
565 def fetch_fba_rss(args: argparse.Namespace) -> int:
566 logger.debug("args[]='%s' - CALLED!", type(args))
569 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
570 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
572 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
573 if response.ok and response.status_code < 300 and len(response.text) > 0:
574 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
575 rss = atoma.parse_rss_bytes(response.content)
577 logger.debug("rss[]='%s'", type(rss))
578 for item in rss.items:
579 logger.debug("item='%s'", item)
580 domain = item.link.split("=")[1]
582 if not utils.is_domain_wanted(domain):
583 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
585 elif domain in domains:
586 logger.debug("domain='%s' is already added - SKIPPED!", domain)
588 elif instances.is_registered(domain):
589 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
591 elif instances.is_recent(domain):
592 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
595 logger.debug("Adding domain='%s'", domain)
596 domains.append(domain)
598 logger.debug("domains()=%d", len(domains))
602 logger.info("Adding %d new instances ...", len(domains))
603 for domain in domains:
605 logger.info("Fetching instances from domain='%s' ...", domain)
606 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
607 except network.exceptions as exception:
608 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
609 instances.set_last_error(domain, exception)
611 logger.debug("Success! - EXIT!")
614 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
615 logger.debug("args[]='%s' - CALLED!", type(args))
616 feed = "https://ryona.agency/users/fba/feed.atom"
620 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
621 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
623 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
624 if response.ok and response.status_code < 300 and len(response.text) > 0:
625 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
626 atom = atoma.parse_atom_bytes(response.content)
628 logger.debug("atom[]='%s'", type(atom))
629 for entry in atom.entries:
630 logger.debug("entry[]='%s'", type(entry))
631 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
632 logger.debug("doc[]='%s'", type(doc))
633 for element in doc.findAll("a"):
634 for href in element["href"].split(","):
635 logger.debug("href[%s]='%s", type(href), href)
636 domain = tidyup.domain(href)
638 logger.debug("domain='%s'", domain)
639 if not utils.is_domain_wanted(domain):
640 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
642 elif domain in domains:
643 logger.debug("domain='%s' is already added - SKIPPED!", domain)
645 elif instances.is_registered(domain):
646 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
648 elif instances.is_recent(domain):
649 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
652 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
653 domains.append(domain)
655 logger.debug("domains()='%d", len(domains))
659 logger.info("Adding %d new instances ...", len(domains))
660 for domain in domains:
662 logger.info("Fetching instances from domain='%s' ...", domain)
663 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
664 except network.exceptions as exception:
665 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
666 instances.set_last_error(domain, exception)
668 logger.debug("Success! - EXIT!")
671 def fetch_instances(args: argparse.Namespace) -> int:
672 logger.debug("args[]='%s' - CALLED!", type(args))
677 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
678 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
679 except network.exceptions as exception:
680 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
681 instances.set_last_error(args.domain, exception)
685 logger.debug("Not fetching more instances - EXIT!")
688 # Loop through some instances
689 database.cursor.execute(
690 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
693 rows = database.cursor.fetchall()
694 logger.info("Checking %d entries ...", len(rows))
696 logger.debug("domain='%s'", row[0])
697 if not utils.is_domain_wanted(row[0]):
698 logger.debug("Domain row[0]='%s' is not wanted - SKIPPED!", row[0])
702 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row[0], row[1], row[2], row[3])
703 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
704 except network.exceptions as exception:
705 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
706 instances.set_last_error(row[0], exception)
708 logger.debug("Success - EXIT!")
711 def fetch_oliphant(args: argparse.Namespace) -> int:
712 logger.debug("args[]='%s' - CALLED!", type(args))
716 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
721 "blocker": "artisan.chat",
722 "csv_url": "mastodon/artisan.chat.csv",
724 "blocker": "mastodon.art",
725 "csv_url": "mastodon/mastodon.art.csv",
727 "blocker": "pleroma.envs.net",
728 "csv_url": "mastodon/pleroma.envs.net.csv",
730 "blocker": "oliphant.social",
731 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
733 "blocker": "mastodon.online",
734 "csv_url": "mastodon/mastodon.online.csv",
736 "blocker": "mastodon.social",
737 "csv_url": "mastodon/mastodon.social.csv",
739 "blocker": "mastodon.social",
740 "csv_url": "other/missing-tier0-mastodon.social.csv",
742 "blocker": "rage.love",
743 "csv_url": "mastodon/rage.love.csv",
745 "blocker": "sunny.garden",
746 "csv_url": "mastodon/sunny.garden.csv",
748 "blocker": "solarpunk.moe",
749 "csv_url": "mastodon/solarpunk.moe.csv",
751 "blocker": "toot.wales",
752 "csv_url": "mastodon/toot.wales.csv",
754 "blocker": "union.place",
755 "csv_url": "mastodon/union.place.csv",
761 logger.debug("Downloading %d files ...", len(blocklists))
762 for block in blocklists:
763 # Is domain given and not equal blocker?
764 if isinstance(args.domain, str) and args.domain != block["blocker"]:
765 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
767 elif args.domain in domains:
768 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
770 elif instances.is_recent(block["blocker"]):
771 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
775 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block["blocker"])
776 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
778 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
779 if response.ok and response.content != "":
780 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
781 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
783 logger.debug("reader[]='%s'", type(reader))
785 logger.debug("row[%s]='%s'", type(row), row)
786 domain = severity = None
787 reject_media = reject_reports = False
789 domain = row["#domain"]
790 elif "domain" in row:
791 domain = row["domain"]
793 logger.debug("row='%s' does not contain domain column", row)
796 if "#severity" in row:
797 severity = row["#severity"]
798 elif "severity" in row:
799 severity = row["severity"]
801 logger.debug("row='%s' does not contain severity column", row)
804 if "#reject_media" in row and row["#reject_media"].lower() == "true":
806 elif "reject_media" in row and row["reject_media"].lower() == "true":
809 if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
810 reject_reports = True
811 elif "reject_reports" in row and row["reject_reports"].lower() == "true":
812 reject_reports = True
814 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
815 if not utils.is_domain_wanted(domain):
816 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
819 logger.debug("Marking domain='%s' as handled", domain)
820 domains.append(domain)
822 logger.debug("Processing domain='%s' ...", domain)
823 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
824 logger.debug("processed='%s'", processed)
826 utils.process_block(block['blocker'], domain, None, "reject")
828 utils.process_block(block['blocker'], domain, None, "reject_media")
830 utils.process_block(block['blocker'], domain, None, "reject_reports")
832 logger.debug("Success! - EXIT!")
835 def fetch_txt(args: argparse.Namespace) -> int:
836 logger.debug("args[]='%s' - CALLED!", type(args))
841 "blocker": "seirdy.one",
842 "url" : "https://seirdy.one/pb/bsl.txt",
845 logger.info("Checking %d text file(s) ...", len(urls))
847 logger.debug("Fetching row[url]='%s' ...", row["url"])
848 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
850 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
851 if response.ok and response.status_code < 300 and response.text != "":
852 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
853 domains = response.text.split("\n")
855 logger.info("Processing %d domains ...", len(domains))
856 for domain in domains:
857 logger.debug("domain='%s'", domain)
859 logger.debug("domain is empty - SKIPPED!")
861 elif not utils.is_domain_wanted(domain):
862 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
864 elif instances.is_recent(domain):
865 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
868 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
869 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
871 logger.debug("processed='%s'", processed)
873 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
876 logger.debug("Success! - EXIT!")
879 def fetch_fedipact(args: argparse.Namespace) -> int:
880 logger.debug("args[]='%s' - CALLED!", type(args))
883 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
885 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
886 if response.ok and response.status_code < 300 and response.text != "":
887 logger.debug("Parsing %d Bytes ...", len(response.text))
889 doc = bs4.BeautifulSoup(response.text, "html.parser")
890 logger.debug("doc[]='%s'", type(doc))
892 rows = doc.findAll("li")
893 logger.info("Checking %d row(s) ...", len(rows))
895 logger.debug("row[]='%s'", type(row))
896 domain = tidyup.domain(row.contents[0])
898 logger.debug("domain='%s'", domain)
900 logger.debug("domain is empty - SKIPPED!")
902 elif not utils.is_domain_wanted(domain):
903 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
905 elif instances.is_registered(domain):
906 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
908 elif instances.is_recent(domain):
909 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
912 logger.info("Fetching domain='%s' ...", domain)
913 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
915 logger.debug("Success! - EXIT!")
918 def fetch_joinfediverse(args: argparse.Namespace) -> int:
919 logger.debug("args[]='%s' - CALLED!", type(args))
922 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
923 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
925 doc = bs4.BeautifulSoup(raw, "html.parser")
926 logger.debug("doc[]='%s'", type(doc))
928 tables = doc.findAll("table", {"class": "wikitable"})
930 logger.info("Analyzing %d table(s) ...", len(tables))
933 logger.debug("table[]='%s'", type(table))
935 rows = table.findAll("tr")
936 logger.info("Checking %d row(s) ...", len(rows))
937 block_headers = dict()
939 logger.debug("row[%s]='%s'", type(row), row)
941 headers = row.findAll("th")
942 logger.debug("Found headers()=%d header(s)", len(headers))
944 block_headers = dict()
946 for header in headers:
948 logger.debug("header[]='%s',cnt=%d", type(header), cnt)
949 text = header.contents[0]
951 logger.debug("text[]='%s'", type(text))
952 if not isinstance(text, str):
953 logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
955 elif validators.domain(text.strip()):
956 logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
959 text = tidyup.domain(text.strip())
960 logger.debug("text='%s'", text)
961 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
962 logger.debug("Found header: '%s'=%d", text, cnt)
963 block_headers[cnt] = text
965 elif len(block_headers) == 0:
966 logger.debug("row is not scrapable - SKIPPED!")
968 elif len(block_headers) > 0:
969 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
973 for element in row.find_all(["th", "td"]):
975 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
976 if cnt in block_headers:
977 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
979 text = element.text.strip()
980 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
982 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
983 if key in ["domain", "instance"]:
985 elif key == "reason":
986 block[key] = tidyup.reason(text)
987 elif key == "subdomain(s)":
990 block[key] = text.split("/")
992 logger.debug("key='%s'", key)
995 logger.debug("block()=%d ...", len(block))
997 logger.debug("Appending block()=%d ...", len(block))
998 blocklist.append(block)
1000 logger.debug("blocklist()=%d", len(blocklist))
1002 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1003 domains = database.cursor.fetchall()
1005 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1007 for block in blocklist:
1008 logger.debug("block='%s'", block)
1009 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1010 origin = block["blocked"]
1011 for subdomain in block["subdomain(s)"]:
1012 block["blocked"] = subdomain + "." + origin
1013 blocking.append(block)
1015 blocking.append(block)
1017 logger.debug("blocking()=%d", blocking)
1018 for block in blocking:
1019 block["blocked"] = tidyup.domain(block["blocked"])
1021 if not utils.is_domain_wanted(block["blocked"]):
1022 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1024 elif instances.is_recent(block["blocked"]):
1025 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1028 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1029 processed = utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1032 for blocker in domains:
1033 blocker = blocker[0]
1034 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1036 for block in blocking:
1037 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1039 if not utils.is_domain_wanted(block["blocked"]):
1040 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1043 logger.debug("blocked='%s',reason='%s'", block['blocked'], block['reason'])
1044 utils.process_block(blocker, block['blocked'], block['reason'], "reject")
1046 if instances.has_pending(blocker):
1047 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1048 instances.update_data(blocker)
1050 logger.debug("Invoking commit() ...")
1051 database.connection.commit()
1053 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
1054 if config.get("bot_enabled") and len(blockdict) > 0:
1055 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1056 network.send_bot_post(blocker, blockdict)
1058 logger.debug("Success! - EXIT!")