1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
31 from fba import database
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
41 from fba.http import federation
42 from fba.http import network
44 from fba.models import blocks
45 from fba.models import instances
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
57 def check_instance(args: argparse.Namespace) -> int:
58 logger.debug("args.domain='%s' - CALLED!", args.domain)
60 if not validators.domain(args.domain):
61 logger.warning("args.domain='%s' is not valid", args.domain)
63 elif blacklist.is_blacklisted(args.domain):
64 logger.warning("args.domain='%s' is blacklisted", args.domain)
66 elif instances.is_registered(args.domain):
67 logger.warning("args.domain='%s' is already registered", args.domain)
70 logger.info("args.domain='%s' is not known", args.domain)
72 logger.debug("status=%d - EXIT!", status)
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76 logger.debug("args[]='%s' - CALLED!", type(args))
78 # No CSRF by default, you don't have to add network.api_headers by yourself here
82 logger.debug("Checking CSRF from pixelfed.org")
83 headers = csrf.determine("pixelfed.org", dict())
84 except network.exceptions as exception:
85 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
90 logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
91 fetched = network.get_json_api(
93 "/api/v1/servers/all.json?scope=All&country=all&language=all",
95 (config.get("connection_timeout"), config.get("read_timeout"))
98 logger.debug("JSON API returned %d elements", len(fetched))
99 if "error_message" in fetched:
100 logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
102 elif "data" not in fetched["json"]:
103 logger.warning("API did not return JSON with 'data' element - EXIT!")
106 rows = fetched["json"]["data"]
107 logger.info("Checking %d fetched rows ...", len(rows))
109 logger.debug("row[]='%s'", type(row))
110 if "domain" not in row:
111 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
113 elif not utils.is_domain_wanted(row["domain"]):
114 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
116 elif instances.is_registered(row["domain"]):
117 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
119 elif instances.is_recent(row["domain"]):
120 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
123 logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
124 federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
126 except network.exceptions as exception:
127 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
130 logger.debug("Success! - EXIT!")
133 def fetch_bkali(args: argparse.Namespace) -> int:
134 logger.debug("args[]='%s' - CALLED!", type(args))
137 fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
138 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
141 logger.debug("fetched[]='%s'", type(fetched))
142 if "error_message" in fetched:
143 logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
145 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
146 logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
149 rows = fetched["json"]
151 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
153 raise Exception("WARNING: Returned no records")
154 elif "data" not in rows:
155 raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
156 elif "nodeinfo" not in rows["data"]:
157 raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
159 for entry in rows["data"]["nodeinfo"]:
160 logger.debug("entry[%s]='%s'", type(entry), entry)
161 if "domain" not in entry:
162 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
164 elif not utils.is_domain_wanted(entry["domain"]):
165 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
167 elif instances.is_registered(entry["domain"]):
168 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
170 elif instances.is_recent(entry["domain"]):
171 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
174 logger.debug("Adding domain='%s' ...", entry["domain"])
175 domains.append(entry["domain"])
177 except network.exceptions as exception:
178 logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
181 logger.debug("domains()=%d", len(domains))
185 logger.info("Adding %d new instances ...", len(domains))
186 for domain in domains:
188 logger.info("Fetching instances from domain='%s' ...", domain)
189 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
190 except network.exceptions as exception:
191 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
192 instances.set_last_error(domain, exception)
194 logger.debug("Success - EXIT!")
197 def fetch_blocks(args: argparse.Namespace) -> int:
198 logger.debug("args[]='%s' - CALLED!", type(args))
199 if args.domain is not None and args.domain != "":
200 logger.debug("args.domain='%s' - checking ...", args.domain)
201 if not validators.domain(args.domain):
202 logger.warning("args.domain='%s' is not valid.", args.domain)
204 elif blacklist.is_blacklisted(args.domain):
205 logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
207 elif not instances.is_registered(args.domain):
208 logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
213 if args.domain is not None and args.domain != "":
214 # Re-check single domain
215 logger.debug("Querying database for single args.domain='%s' ...", args.domain)
216 database.cursor.execute(
217 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
219 elif args.software is not None and args.software != "":
220 # Re-check single software
221 logger.debug("Querying database for args.software='%s' ...", args.software)
222 database.cursor.execute(
223 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
226 # Re-check after "timeout" (aka. minimum interval)
227 database.cursor.execute(
228 "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
231 rows = database.cursor.fetchall()
232 logger.info("Checking %d entries ...", len(rows))
233 for blocker, software, origin, nodeinfo_url in rows:
234 logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
235 blocker = tidyup.domain(blocker)
236 logger.debug("blocker='%s' - AFTER!", blocker)
239 logger.warning("blocker is now empty!")
241 elif nodeinfo_url is None or nodeinfo_url == "":
242 logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
244 elif not utils.is_domain_wanted(blocker):
245 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
248 logger.debug("blocker='%s'", blocker)
249 instances.set_last_blocked(blocker)
250 instances.set_has_obfuscation(blocker, False)
254 if software == "pleroma":
255 logger.info("blocker='%s',software='%s'", blocker, software)
256 blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
257 elif software == "mastodon":
258 logger.info("blocker='%s',software='%s'", blocker, software)
259 blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
260 elif software == "lemmy":
261 logger.info("blocker='%s',software='%s'", blocker, software)
262 blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
263 elif software == "friendica":
264 logger.info("blocker='%s',software='%s'", blocker, software)
265 blocking = friendica.fetch_blocks(blocker)
266 elif software == "misskey":
267 logger.info("blocker='%s',software='%s'", blocker, software)
268 blocking = misskey.fetch_blocks(blocker)
270 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
272 logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
273 for block in blocking:
274 logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block['block_level'], block["reason"])
276 if block['block_level'] == "":
277 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
280 logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
281 block["blocked"] = tidyup.domain(block["blocked"])
282 block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
283 logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
285 if block["blocked"] == "":
286 logger.warning("blocked is empty, blocker='%s'", blocker)
288 elif block["blocked"].count("*") > 0:
289 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
290 instances.set_has_obfuscation(blocker, True)
292 # Some friendica servers also obscure domains without hash
293 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
295 logger.debug("row[]='%s'", type(row))
297 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
300 block["blocked"] = row[0]
302 nodeinfo_url = row[2]
303 elif block["blocked"].count("?") > 0:
304 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
305 instances.set_has_obfuscation(blocker, True)
307 # Some obscure them with question marks, not sure if that's dependent on version or not
308 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
310 logger.debug("row[]='%s'", type(row))
312 logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
315 block["blocked"] = row[0]
317 nodeinfo_url = row[2]
319 logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
320 if not utils.is_domain_wanted(block["blocked"]):
321 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
323 elif block['block_level'] in ["accept", "accepted"]:
324 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
326 elif not instances.is_registered(block["blocked"]):
327 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
328 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
330 if block['block_level'] == "silence":
331 logger.debug("Block level 'silence' has been changed to 'silenced'")
332 block['block_level'] = "silenced"
333 elif block['block_level'] == "suspend":
334 logger.debug("Block level 'suspend' has been changed to 'suspended'")
335 block['block_level'] = "suspended"
337 if not blocks.is_instance_blocked(blocker, block["blocked"], block['block_level']):
338 logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block["blocked"], block["reason"], block['block_level'])
339 blocks.add_instance(blocker, block["blocked"], block["reason"], block['block_level'])
341 logger.debug("block_level='%s',config[bot_enabled]='%s'", block['block_level'], config.get("bot_enabled"))
342 if block['block_level'] == "reject" and config.get("bot_enabled"):
343 logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block["blocked"], block["reason"])
345 "blocked": block["blocked"],
346 "reason" : block["reason"],
349 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block["blocked"])
350 blocks.update_last_seen(blocker, block["blocked"], block['block_level'])
351 blocks.update_reason(block["reason"], blocker, block["blocked"], block['block_level'])
353 logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
354 cookies.clear(block["blocked"])
356 logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
357 if instances.has_pending(blocker):
358 logger.debug("Flushing updates for blocker='%s' ...", blocker)
359 instances.update_data(blocker)
361 logger.debug("Invoking commit() ...")
362 database.connection.commit()
364 logger.debug("Invoking cookies.clear(%s) ...", blocker)
365 cookies.clear(blocker)
367 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
368 if config.get("bot_enabled") and len(blockdict) > 0:
369 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
370 network.send_bot_post(blocker, blockdict)
372 logger.debug("Success! - EXIT!")
375 def fetch_observer(args: argparse.Namespace) -> int:
376 logger.debug("args[]='%s' - CALLED!", type(args))
412 logger.info("Fetching %d different table data ...", len(types))
413 for software in types:
414 logger.debug("software='%s' - BEFORE!", software)
415 if args.software is not None and args.software != software:
416 logger.debug("args.software='%s' does not match software='%s' - SKIPPED!")
421 logger.debug("Fetching table data for software='%s' ...", software)
422 raw = utils.fetch_url(
423 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
425 (config.get("connection_timeout"), config.get("read_timeout"))
427 logger.debug("raw[%s]()=%d", type(raw), len(raw))
429 doc = bs4.BeautifulSoup(raw, features='html.parser')
430 logger.debug("doc[]='%s'", type(doc))
431 except network.exceptions as exception:
432 logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
435 items = doc.findAll("a", {"class": "url"})
436 logger.info("Checking %d items,software='%s' ...", len(items), software)
438 logger.debug("item[]='%s'", type(item))
439 domain = item.decode_contents()
441 logger.debug("domain='%s'", domain)
442 if not utils.is_domain_wanted(domain):
443 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
445 elif instances.is_registered(domain):
446 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
448 elif instances.is_recent(domain):
449 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
452 software = software_helper.alias(software)
453 logger.info("Fetching instances for domain='%s'", domain)
454 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
456 logger.debug("Success! - EXIT!")
459 def fetch_todon_wiki(args: argparse.Namespace) -> int:
460 logger.debug("args[]='%s' - CALLED!", type(args))
468 raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
469 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
471 doc = bs4.BeautifulSoup(raw, "html.parser")
472 logger.debug("doc[]='%s'", type(doc))
474 silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
475 logger.info("Checking %d silenced/limited entries ...", len(silenced))
476 blocklist["silenced"] = utils.find_domains(silenced, "div")
478 suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
479 logger.info("Checking %d suspended entries ...", len(suspended))
480 blocklist["reject"] = utils.find_domains(suspended, "div")
482 for block_level in blocklist:
483 blockers = blocklist[block_level]
485 logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
486 for blocked in blockers:
487 logger.debug("blocked='%s'", blocked)
489 if not instances.is_registered(blocked):
491 logger.info("Fetching instances from domain='%s' ...", blocked)
492 federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
493 except network.exceptions as exception:
494 logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
495 instances.set_last_error(blocked, exception)
497 if blocks.is_instance_blocked("todon.eu", blocked, block_level):
498 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
501 logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
502 blocks.add_instance("todon.eu", blocked, None, block_level)
504 logger.debug("Invoking commit() ...")
505 database.connection.commit()
507 logger.debug("Success! - EXIT!")
510 def fetch_cs(args: argparse.Namespace):
511 logger.debug("args[]='%s' - CALLED!", type(args))
537 raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
538 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
540 doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
541 logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
543 silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
544 logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
545 domains["silenced"] = federation.find_domains(silenced)
547 blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
548 logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
549 domains["reject"] = federation.find_domains(blocked)
551 logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
555 for block_level in domains:
556 logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
558 for row in domains[block_level]:
559 logger.debug("row[%s]='%s'", type(row), row)
560 if instances.is_recent(row["domain"], "last_blocked"):
561 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
563 elif not instances.is_registered(row["domain"]):
565 logger.info("Fetching instances from domain='%s' ...", row["domain"])
566 federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
567 except network.exceptions as exception:
568 logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
569 instances.set_last_error(row["domain"], exception)
571 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
572 logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
573 blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
575 logger.debug("Invoking commit() ...")
576 database.connection.commit()
578 logger.debug("Success! - EXIT!")
581 def fetch_fba_rss(args: argparse.Namespace) -> int:
582 logger.debug("args[]='%s' - CALLED!", type(args))
585 logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
586 response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
588 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
589 if response.ok and response.status_code < 300 and len(response.text) > 0:
590 logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
591 rss = atoma.parse_rss_bytes(response.content)
593 logger.debug("rss[]='%s'", type(rss))
594 for item in rss.items:
595 logger.debug("item='%s'", item)
596 domain = item.link.split("=")[1]
598 if not utils.is_domain_wanted(domain):
599 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
601 elif domain in domains:
602 logger.debug("domain='%s' is already added - SKIPPED!", domain)
604 elif instances.is_registered(domain):
605 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
607 elif instances.is_recent(domain):
608 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
611 logger.debug("Adding domain='%s'", domain)
612 domains.append(domain)
614 logger.debug("domains()=%d", len(domains))
618 logger.info("Adding %d new instances ...", len(domains))
619 for domain in domains:
621 logger.info("Fetching instances from domain='%s' ...", domain)
622 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
623 except network.exceptions as exception:
624 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
625 instances.set_last_error(domain, exception)
627 logger.debug("Success! - EXIT!")
630 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
631 logger.debug("args[]='%s' - CALLED!", type(args))
632 feed = "https://ryona.agency/users/fba/feed.atom"
636 logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
637 response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
639 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
640 if response.ok and response.status_code < 300 and len(response.text) > 0:
641 logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
642 atom = atoma.parse_atom_bytes(response.content)
644 logger.debug("atom[]='%s'", type(atom))
645 for entry in atom.entries:
646 logger.debug("entry[]='%s'", type(entry))
647 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
648 logger.debug("doc[]='%s'", type(doc))
649 for element in doc.findAll("a"):
650 for href in element["href"].split(","):
651 logger.debug("href[%s]='%s", type(href), href)
652 domain = tidyup.domain(href)
654 logger.debug("domain='%s'", domain)
655 if not utils.is_domain_wanted(domain):
656 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
658 elif domain in domains:
659 logger.debug("domain='%s' is already added - SKIPPED!", domain)
661 elif instances.is_registered(domain):
662 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
664 elif instances.is_recent(domain):
665 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
668 logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
669 domains.append(domain)
671 logger.debug("domains()='%d", len(domains))
675 logger.info("Adding %d new instances ...", len(domains))
676 for domain in domains:
678 logger.info("Fetching instances from domain='%s' ...", domain)
679 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
680 except network.exceptions as exception:
681 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
682 instances.set_last_error(domain, exception)
684 logger.debug("Success! - EXIT!")
687 def fetch_instances(args: argparse.Namespace) -> int:
688 logger.debug("args[]='%s' - CALLED!", type(args))
693 logger.info("Fetching instances from args.domain='%s' ...", args.domain)
694 federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
695 except network.exceptions as exception:
696 logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
697 instances.set_last_error(args.domain, exception)
701 logger.debug("Not fetching more instances - EXIT!")
704 # Loop through some instances
705 database.cursor.execute(
706 "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
709 rows = database.cursor.fetchall()
710 logger.info("Checking %d entries ...", len(rows))
712 logger.debug("domain='%s'", row[0])
713 if not utils.is_domain_wanted(row[0]):
714 logger.debug("Domain row[0]='%s' is not wanted - SKIPPED!", row[0])
718 logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row[0], row[1], row[2], row[3])
719 federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
720 except network.exceptions as exception:
721 logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
722 instances.set_last_error(row[0], exception)
724 logger.debug("Success - EXIT!")
727 def fetch_oliphant(args: argparse.Namespace) -> int:
728 logger.debug("args[]='%s' - CALLED!", type(args))
732 base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
737 "blocker": "artisan.chat",
738 "csv_url": "mastodon/artisan.chat.csv",
740 "blocker": "mastodon.art",
741 "csv_url": "mastodon/mastodon.art.csv",
743 "blocker": "pleroma.envs.net",
744 "csv_url": "mastodon/pleroma.envs.net.csv",
746 "blocker": "oliphant.social",
747 "csv_url": "mastodon/_unified_tier3_blocklist.csv",
749 "blocker": "mastodon.online",
750 "csv_url": "mastodon/mastodon.online.csv",
752 "blocker": "mastodon.social",
753 "csv_url": "mastodon/mastodon.social.csv",
755 "blocker": "mastodon.social",
756 "csv_url": "other/missing-tier0-mastodon.social.csv",
758 "blocker": "rage.love",
759 "csv_url": "mastodon/rage.love.csv",
761 "blocker": "sunny.garden",
762 "csv_url": "mastodon/sunny.garden.csv",
764 "blocker": "solarpunk.moe",
765 "csv_url": "mastodon/solarpunk.moe.csv",
767 "blocker": "toot.wales",
768 "csv_url": "mastodon/toot.wales.csv",
770 "blocker": "union.place",
771 "csv_url": "mastodon/union.place.csv",
777 logger.debug("Downloading %d files ...", len(blocklists))
778 for block in blocklists:
779 # Is domain given and not equal blocker?
780 if isinstance(args.domain, str) and args.domain != block["blocker"]:
781 logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
783 elif args.domain in domains:
784 logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
786 elif instances.is_recent(block["blocker"]):
787 logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
791 logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block["blocker"])
792 response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
794 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
795 if response.ok and response.content != "":
796 logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
797 reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
799 logger.debug("reader[]='%s'", type(reader))
801 logger.debug("row[%s]='%s'", type(row), row)
804 domain = row["#domain"]
805 elif "domain" in row:
806 domain = row["domain"]
808 logger.debug("row='%s' does not contain domain column", row)
811 logger.debug("domain='%s'", domain)
812 if not utils.is_domain_wanted(domain):
813 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
815 elif instances.is_recent(domain):
816 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
819 logger.debug("Marking domain='%s' as handled", domain)
820 domains.append(domain)
822 logger.debug("Processing domain='%s' ...", domain)
823 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
825 logger.debug("processed='%s'", processed)
827 logger.debug("Success! - EXIT!")
830 def fetch_txt(args: argparse.Namespace) -> int:
831 logger.debug("args[]='%s' - CALLED!", type(args))
836 "blocker": "seirdy.one",
837 "url" : "https://seirdy.one/pb/bsl.txt",
840 logger.info("Checking %d text file(s) ...", len(urls))
842 logger.debug("Fetching row[url]='%s' ...", row["url"])
843 response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
845 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
846 if response.ok and response.status_code < 300 and response.text != "":
847 logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
848 domains = response.text.split("\n")
850 logger.info("Processing %d domains ...", len(domains))
851 for domain in domains:
852 logger.debug("domain='%s'", domain)
854 logger.debug("domain is empty - SKIPPED!")
856 elif not utils.is_domain_wanted(domain):
857 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
859 elif instances.is_recent(domain):
860 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
863 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
864 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
866 logger.debug("processed='%s'", processed)
868 logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
871 logger.debug("Success! - EXIT!")
874 def fetch_fedipact(args: argparse.Namespace) -> int:
875 logger.debug("args[]='%s' - CALLED!", type(args))
878 response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
880 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
881 if response.ok and response.status_code < 300 and response.text != "":
882 logger.debug("Parsing %d Bytes ...", len(response.text))
884 doc = bs4.BeautifulSoup(response.text, "html.parser")
885 logger.debug("doc[]='%s'", type(doc))
887 rows = doc.findAll("li")
888 logger.info("Checking %d row(s) ...", len(rows))
890 logger.debug("row[]='%s'", type(row))
891 domain = tidyup.domain(row.contents[0])
893 logger.debug("domain='%s'", domain)
895 logger.debug("domain is empty - SKIPPED!")
897 elif not utils.is_domain_wanted(domain):
898 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
900 elif instances.is_registered(domain):
901 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
903 elif instances.is_recent(domain):
904 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
907 logger.info("Fetching domain='%s' ...", domain)
908 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
910 logger.debug("Success! - EXIT!")
913 def fetch_joinfediverse(args: argparse.Namespace) -> int:
914 logger.debug("args[]='%s' - CALLED!", type(args))
917 raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
918 logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
920 doc = bs4.BeautifulSoup(raw, "html.parser")
921 logger.debug("doc[]='%s'", type(doc))
923 tables = doc.findAll("table", {"class": "wikitable"})
925 logger.info("Analyzing %d table(s) ...", len(tables))
928 logger.debug("table[]='%s'", type(table))
930 rows = table.findAll("tr")
931 logger.info("Checking %d row(s) ...", len(rows))
932 block_headers = dict()
934 #logger.debug("row[%s]='%s'", type(row), row)
936 headers = row.findAll("th")
937 #logger.debug("Found headers()=%d header(s)", len(headers))
939 block_headers = dict()
941 for header in headers:
943 #logger.debug("header[]='%s',cnt=%d", type(header), cnt)
944 text = header.contents[0]
946 #logger.debug("text[]='%s'", type(text))
947 if not isinstance(text, str):
948 #logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
950 elif validators.domain(text.strip()):
951 #logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
954 text = tidyup.domain(text.strip())
955 #logger.debug("text='%s'", text)
956 if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
957 logger.debug("Found header: '%s'=%d", text, cnt)
958 block_headers[cnt] = text
959 elif len(block_headers) == 0:
960 #logger.debug("row is not scrapable - SKIPPED!")
962 elif len(block_headers) > 0:
963 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
967 for element in row.find_all(["th", "td"]):
969 logger.debug("element[]='%s',cnt=%d", type(element), cnt)
970 if cnt in block_headers:
971 logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
973 text = element.text.strip()
974 key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
976 logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
977 if key in ["domain", "instance"]:
979 elif key == "reason":
980 block[key] = tidyup.reason(text)
981 elif key == "subdomain(s)":
984 block[key] = text.split("/")
986 logger.debug("key='%s'", key)
989 logger.debug("block()=%d ...", len(block))
991 logger.debug("Appending block()=%d ...", len(block))
992 blocklist.append(block)
994 logger.debug("blocklist()=%d", len(blocklist))
996 database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
997 domains = database.cursor.fetchall()
999 logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1001 for block in blocklist:
1002 logger.debug("block='%s'", block)
1003 if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1004 origin = block["blocked"]
1005 for subdomain in block["subdomain(s)"]:
1006 block["blocked"] = subdomain + "." + origin
1007 blocking.append(block)
1009 blocking.append(block)
1011 logger.debug("blocking()=%d", blocking)
1012 for block in blocking:
1013 block["blocked"] = tidyup.domain(block["blocked"])
1015 if not utils.is_domain_wanted(block["blocked"]):
1016 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1018 elif instances.is_recent(block["blocked"]):
1019 logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1022 logger.info("Proccessing blocked='%s' ...", block["blocked"])
1023 processed = utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1026 for blocker in domains:
1027 blocker = blocker[0]
1028 logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1030 for block in blocking:
1031 block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1033 if not utils.is_domain_wanted(block["blocked"]):
1034 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1037 logger.debug("blocked='%s',reason='%s'", block['blocked'], block['reason'])
1038 if not blocks.is_instance_blocked(blocker, block['blocked'], "reject"):
1039 logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block['blocked'], block["reason"], "reject")
1040 blocks.add_instance(blocker, block['blocked'], block["reason"], "reject")
1042 logger.debug("block_level='%s',config[bot_enabled]='%s'", "reject", config.get("bot_enabled"))
1043 if config.get("bot_enabled"):
1044 logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block['blocked'], block["reason"])
1046 "blocked": block['blocked'],
1047 "reason" : block["reason"],
1050 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block['blocked'])
1051 blocks.update_last_seen(blocker, block['blocked'], "reject")
1052 blocks.update_reason(block["reason"], blocker, block['blocked'], "reject")
1054 if instances.has_pending(blocker):
1055 logger.debug("Flushing updates for blocker='%s' ...", blocker)
1056 instances.update_data(blocker)
1058 logger.debug("Invoking commit() ...")
1059 database.connection.commit()
1061 logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
1062 if config.get("bot_enabled") and len(blockdict) > 0:
1063 logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1064 network.send_bot_post(blocker, blockdict)
1066 logger.debug("Success! - EXIT!")