]> git.mxchange.org Git - fba.git/blob - deprecated/commands.py
Continued:
[fba.git] / deprecated / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 def fetch_joinfediverse(args: argparse.Namespace) -> int:
18     logger.debug("args[]='%s' - CALLED!", type(args))
19
20     logger.debug("Invoking locking.acquire() ...")
21     locking.acquire()
22
23     source_domain = "joinfediverse.wiki"
24     if sources.is_recent(source_domain):
25         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
26         return 1
27     else:
28         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
29         sources.update(source_domain)
30
31     logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
32     raw = utils.fetch_url(
33         f"https://{source_domain}/FediBlock",
34         network.web_headers,
35         (config.get("connection_timeout"), config.get("read_timeout"))
36     ).text
37     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
38
39     doc = bs4.BeautifulSoup(raw, "html.parser")
40     logger.debug("doc[]='%s'", type(doc))
41
42     tables = doc.findAll("table", {"class": "wikitable"})
43
44     logger.info("Analyzing %d table(s) ...", len(tables))
45     blocklist = list()
46     for table in tables:
47         logger.debug("table[]='%s'", type(table))
48
49         rows = table.findAll("tr")
50         logger.info("Checking %d row(s) ...", len(rows))
51         block_headers = dict()
52         for row in rows:
53             logger.debug("row[%s]='%s'", type(row), row)
54
55             headers = row.findAll("th")
56             logger.debug("Found headers()=%d header(s)", len(headers))
57             if len(headers) > 1:
58                 block_headers = dict()
59                 cnt = 0
60                 for header in headers:
61                     cnt = cnt + 1
62                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
63                     text = header.contents[0]
64
65                     logger.debug("text[]='%s'", type(text))
66                     if not isinstance(text, str):
67                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
68                         continue
69                     elif validators.domain(text.strip()):
70                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
71                         continue
72
73                     text = tidyup.domain(text.strip())
74                     logger.debug("text='%s' - AFTER!", text)
75                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
76                         logger.debug("Found header: '%s'=%d", text, cnt)
77                         block_headers[cnt] = text
78
79             elif len(block_headers) == 0:
80                 logger.debug("row is not scrapable - SKIPPED!")
81                 continue
82             elif len(block_headers) > 0:
83                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
84                 cnt = 0
85                 block = dict()
86
87                 for element in row.find_all(["th", "td"]):
88                     cnt = cnt + 1
89                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
90                     if cnt in block_headers:
91                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
92
93                         text = element.text.strip()
94                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
95
96                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
97                         if key in ["domain", "instance"]:
98                             block[key] = text
99                         elif key == "reason":
100                             block[key] = tidyup.reason(text)
101                         elif key == "subdomain(s)":
102                             block[key] = list()
103                             if text != "":
104                                 block[key] = text.split("/")
105                         else:
106                             logger.debug("key='%s'", key)
107                             block[key] = text
108
109                 logger.debug("block()=%d ...", len(block))
110                 if len(block) > 0:
111                     logger.debug("Appending block()=%d ...", len(block))
112                     blocklist.append(block)
113
114     logger.debug("blocklist()=%d", len(blocklist))
115
116     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
117     domains = database.cursor.fetchall()
118
119     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
120     blocking = list()
121     for block in blocklist:
122         logger.debug("block='%s'", block)
123         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
124             origin = block["blocked"]
125             logger.debug("origin='%s'", origin)
126             for subdomain in block["subdomain(s)"]:
127                 block["blocked"] = subdomain + "." + origin
128                 logger.debug("block[blocked]='%s'", block["blocked"])
129                 blocking.append(block)
130         else:
131             blocking.append(block)
132
133     logger.debug("blocking()=%d", blocking)
134     for block in blocking:
135         logger.debug("block[]='%s'", type(block))
136         if "blocked" not in block:
137             raise KeyError(f"block()={len(block)} does not have element 'blocked'")
138
139         block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
140         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
141
142         if block["blocked"] == "":
143             logger.debug("block[blocked] is empty - SKIPPED!")
144             continue
145         elif not domain_helper.is_wanted(block["blocked"]):
146             logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
147             continue
148         elif instances.is_recent(block["blocked"]):
149             logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
150             continue
151
152         logger.debug("Proccessing blocked='%s' ...", block["blocked"])
153         processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
154
155     blockdict = list()
156     for blocker in domains:
157         blocker = blocker[0]
158         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
159         instances.set_last_blocked(blocker)
160
161         for block in blocking:
162             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
163             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
164
165             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
166             if block["blocked"] == "":
167                 logger.debug("block[blocked] is empty - SKIPPED!")
168                 continue
169             elif not domain_helper.is_wanted(block["blocked"]):
170                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
171                 continue
172
173             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
174             if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
175                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
176                 blockdict.append({
177                     "blocked": block["blocked"],
178                     "reason" : block["reason"],
179                 })
180
181         if instances.has_pending(blocker):
182             logger.debug("Flushing updates for blocker='%s' ...", blocker)
183             instances.update(blocker)
184
185         logger.debug("Invoking commit() ...")
186         database.connection.commit()
187
188         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
189         if config.get("bot_enabled") and len(blockdict) > 0:
190             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
191             network.send_bot_post(blocker, blockdict)
192
193     logger.debug("Success! - EXIT!")
194     return 0