Better metadata grabbing and caching

2023-08-10 21:12:55 +03:00 · 2018-12-17 15:10:10 +01:00 · 2018-12-17 15:10:10 +01:00 · 78b1937044
commit 78b1937044
parent 6be0dfd25b
7 changed files with 117 additions and 29 deletions
--- a/cleanup.py
+++ b/cleanup.py
@ -38,6 +38,12 @@ class CleanerAgent:
 		if a.strip() == "":
 			return []
 			
+		if a.strip() in self.rules_notanartist:
+			return []
+			
+		if " performing " in a.lower():
+			return self.parseArtists(re.split(" [Pp]erforming",a)[0])
+			
 		if a.strip() in self.rules_belongtogether:
 			return [a.strip()]
 		if a.strip() in self.rules_replaceartist:
--- a/info/artists/.gitignore
+++ b/info/artists/.gitignore
@ -0,0 +1,5 @@
+*.png
+*.jpg
+*.jpeg
+*.txt
+!default.jpg
--- a/info/artists/default.jpg
+++ b/info/artists/default.jpg
--- a/info/artists_cache/.gitignore
+++ b/info/artists_cache/.gitignore
@ -0,0 +1,4 @@
+*.png
+*.jpg
+*.jpeg
+*.txt
--- a/server.py
+++ b/server.py
@ -70,18 +70,27 @@ def graceful_exit(sig=None,frame=None):
 	sys.exit()


+@route("/info/<pth:re:.*\\.jpeg>")
+@route("/info/<pth:re:.*\\.jpg>")
+@route("/info/<pth:re:.*\\.png>")
+def static_image(pth):
+	return static_file("info/" + pth,root="")
+
@route("/<name:re:.*\\.html>")
@route("/<name:re:.*\\.js>")
@route("/<name:re:.*\\.css>")
@route("/<name:re:.*\\.png>")
@route("/<name:re:.*\\.jpeg>")
-def static(name):
-	
+def static(name):	
 	return static_file("website/" + name,root="")
 	
+
+	
@route("/<name>")
 def static_html(name):
 	keys = FormsDict.decode(request.query)
+	
+	# If a python file exists, it provides the replacement dict for the html file
 	if os.path.exists("website/" + name + ".py"):
 		txt_keys = SourceFileLoader(name,"website/" + name + ".py").load_module().replacedict(keys,DATABASE_PORT)
 		with open("website/" + name + ".html") as htmlfile:
@ -91,7 +100,7 @@ def static_html(name):
 			return html
 		
 		
-
+	# Otherwise, we just serve the html file
 	return static_file("website/" + name + ".html",root="")

 #set graceful shutdown
--- a/utilities.py
+++ b/utilities.py
@ -1,5 +1,4 @@
-
-
+### TSV files

 def parseTSV(filename,*args):
 	f = open(filename)
@ -56,7 +55,92 @@ def createTSV(filename):
 	if not os.path.exists(filename):
 		open(filename,"w").close()
 		
+### Logging
 		
 def log(msg):
 	print(msg)
+	# best function ever	
 	
+
+### Media info
+
+def getArtistInfo(artist):
+	import re
+	import os
+	import urllib
+	import json
+	import _thread
+	
+	
+	filename = re.sub("[^a-zA-Z0-9]","",artist)
+	filepath = "info/artists/" + filename
+	filepath_cache = "info/artists_cache/" + filename
+	
+	# check if custom image exists
+	if os.path.exists(filepath + ".png"):
+		imgurl = "/" + filepath + ".png"
+	elif os.path.exists(filepath + ".jpg"):
+		imgurl = "/" + filepath + ".jpg"
+	elif os.path.exists(filepath + ".jpeg"):
+		imgurl = "/" + filepath + ".jpeg"
+	
+	#check if cached image exists	
+	elif os.path.exists(filepath_cache + ".png"):
+		imgurl = "/" + filepath_cache + ".png"
+	elif os.path.exists(filepath_cache + ".jpg"):
+		imgurl = "/" + filepath_cache + ".jpg"
+	elif os.path.exists(filepath_cache + ".jpeg"):
+		imgurl = "/" + filepath_cache + ".jpeg"
+		
+		
+	# check if custom desc exists
+	if os.path.exists(filepath + ".txt"):
+		with open(filepath + ".txt","r") as descfile:
+			desc = descfile.read().replace("\n","")
+	
+	#check if cached desc exists	
+	elif os.path.exists(filepath_cache + ".txt"):
+		with open(filepath_cache + ".txt","r") as descfile:
+			desc = descfile.read().replace("\n","")
+			
+	try:
+		return {"image":imgurl,"info":desc}
+	except NameError:
+		pass
+	#is this pythonic?
+	
+	
+	# if we neither have a custom image nor a cached version, we return the address from lastfm, but cache that image for later use	
+	with open("apikey","r") as keyfile:
+		apikey = keyfile.read().replace("\n","")
+	
+	
+	try:	
+		url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(artist) + "&api_key=" + apikey + "&format=json"
+		response = urllib.request.urlopen(url)
+		lastfm_data = json.loads(response.read())
+		try:
+			imgurl
+		except NameError:
+			imgurl = lastfm_data["artist"]["image"][2]["#text"]
+			_thread.start_new_thread(cacheImage,(imgurl,"info/artists_cache",filename))
+		try:
+			desc
+		except NameError:
+			desc = lastfm_data["artist"]["bio"]["summary"]
+			with open(filepath_cache + ".txt","w") as descfile:
+				descfile.write(desc)
+		# this feels so dirty
+		
+		
+		return {"image":imgurl,"info":desc}
+	except:
+		return {"image":"/info/artists/default.jpg","info":"No information available"}
+		
+	
+	
+def cacheImage(url,path,filename):
+	import urllib.request
+	response = urllib.request.urlopen(url)
+	target = path + "/" + filename + "." + response.info().get_content_subtype()	
+	urllib.request.urlretrieve(url,target)
--- a/website/artist.py
+++ b/website/artist.py
@ -1,34 +1,14 @@
 import urllib
 import json

-
-
-#def page(keys):
-#
-#	txt_keys = replace(keys)
-#
-#
-#	with open("website/artist.html","r") as htmlfile:
-#		html = htmlfile.read()
-#			
-#		
-#
-#		for k in txt_keys:
-#			html = html.replace(k,txt_keys[k])
-#			
-#		return html
 		
 def replacedict(keys,dbport):
+	from utilities import getArtistInfo

-	with open("website/apikey","r") as keyfile:
-		apikey = keyfile.read().replace("\n","")
 	
-	
-	url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(keys["artist"]) + "&api_key=" + apikey + "&format=json"
-	response = urllib.request.urlopen(url)
-	lastfm_data = json.loads(response.read())
-	imgurl = lastfm_data["artist"]["image"][2]["#text"]
-	desc = lastfm_data["artist"]["bio"]["summary"]
+	info = getArtistInfo(keys["artist"])
+	imgurl = info.get("image")
+	desc = info.get("info")
 	
 	response = urllib.request.urlopen("http://localhost:" + str(dbport) + "/artistinfo?artist=" + urllib.parse.quote(keys["artist"]))
 	db_data = json.loads(response.read())