with open("config.json") as f:
config = json.loads(f.read())
+# Don't check these, known trolls/flooders/testing/developing
blacklist = [
+ # Floods network with fake nodes as "research" project
"activitypub-troll.cf",
+ # Similar troll
"gab.best",
+ # Similar troll
"4chan.icu",
+ # Flooder (?)
"social.shrimpcam.pw",
+ # Flooder (?)
"mastotroll.netz.org",
+ # Testing/developing installations
"ngrok.io",
]
+# Array with pending errors needed to be written to database
pending_errors = {
}
-nodeinfos = [
+# "rel" identifiers (no real URLs)
+nodeinfo_identifier = [
"http://nodeinfo.diaspora.software/ns/schema/2.1",
"http://nodeinfo.diaspora.software/ns/schema/2.0",
"http://nodeinfo.diaspora.software/ns/schema/1.1",
"http://nodeinfo.diaspora.software/ns/schema/1.0",
]
+# HTTP headers for requests
headers = {
- "user-agent": config["useragent"]
+ "user-agent": config["useragent"],
+}
+
+# Found info from node, such as nodeinfo URL, detection mode that needs to be
+# written to database. Both arrays must be filled at the same time or else
+# update_nodeinfo() will fail
+nodeinfos = {
+ # Detection mode: AUTO_DISCOVERY or STATIC_CHECKS
+ "detection_mode": {},
+ # Found nodeinfo URL
+ "nodeinfo_url": {},
}
connection = sqlite3.connect("blocks.db")
print("ERROR: failed SQL query:", domain, e)
sys.exit(255)
+def update_nodeinfos(domain: str):
+ #print("DEBUG: Updating nodeinfo for domain:", domain)
+ if domain not in nodeinfos["detection_mode"] or domain not in nodeinfos["nodeinfo_url"]:
+ print(f"WARNING: domain {domain} has no pending nodeinfo!")
+ raise
+
+ try:
+ cursor.execute("UPDATE instances SET detection_mode = ?, nodeinfo_url = ? WHERE domain = ? LIMIT 1", [
+ nodeinfos["detection_mode"][domain],
+ nodeinfos["nodeinfo_url"][domain],
+ domain
+ ])
+
+ if cursor.rowcount == 0:
+ print("WARNING: Did not update any rows:", domain)
+
+ except BaseException as e:
+ print("ERROR: failed SQL query:", domain, e)
+ sys.exit(255)
+
+ # NOISY-DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain)
+ del nodeinfos["detection_mode"][domain]
+ del nodeinfos["nodeinfo_url"][domain]
+
def update_last_error(domain: str, res: any):
# NOISY-DEBUG: print("DEBUG: domain,res.status_code:", domain, res.status_code, res.reason)
try:
print("ERROR: failed SQL query:", domain, e)
sys.exit(255)
+ # NOISY-DEBUG: print("DEBUG: Deleting pending_errors for domain:", domain)
+ del pending_errors[domain]
+
def update_last_nodeinfo(domain: str):
# NOISY-DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
try:
if res.ok and res.json() is not None:
# NOISY-DEBUG: print("DEBUG: Success:", request)
json = res.json()
+ nodeinfos["detection_mode"][domain] = "STATIC_CHECK"
+ nodeinfos["nodeinfo_url"][domain] = request
break
elif not res.ok or res.status_code >= 400:
print("WARNING: Failed fetching nodeinfo from domain:", domain)
# NOISY-DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
for link in nodeinfo["links"]:
# NOISY-DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
- if link["rel"] in nodeinfos:
+ if link["rel"] in nodeinfo_identifier:
# NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
res = reqto.get(link["href"])
# NOISY-DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code)
if res.ok and res.json() is not None:
# NOISY-DEBUG: print("DEBUG: Found JSON nodeinfo():", len(res.json()))
json = res.json()
+ nodeinfos["detection_mode"][domain] = "AUTO_DISCOVERY"
+ nodeinfos["nodeinfo_url"][domain] = link["href"]
break
else:
print("WARNING: Unknown 'rel' value:", domain, link["rel"])
),
)
+ if domain in nodeinfos["nodeinfo_url"]:
+ # NOISY-DEBUG print("DEBUG: domain has pending nodeinfo being updated:", domain)
+ update_nodeinfos(domain)
+
if domain in pending_errors:
- # NOISY-DEBUG: print("DEBUG: domain has pending error be updated:", domain)
+ # NOISY-DEBUG: print("DEBUG: domain has pending error being updated:", domain)
update_last_error(domain, pending_errors[domain])
del pending_errors[domain]