1
0
mirror of https://github.com/krateng/maloja.git synced 2023-08-10 21:12:55 +03:00

Better metadata grabbing and caching

This commit is contained in:
Krateng 2018-12-17 15:10:10 +01:00
parent 6be0dfd25b
commit 78b1937044
7 changed files with 117 additions and 29 deletions

View File

@ -38,6 +38,12 @@ class CleanerAgent:
if a.strip() == "": if a.strip() == "":
return [] return []
if a.strip() in self.rules_notanartist:
return []
if " performing " in a.lower():
return self.parseArtists(re.split(" [Pp]erforming",a)[0])
if a.strip() in self.rules_belongtogether: if a.strip() in self.rules_belongtogether:
return [a.strip()] return [a.strip()]
if a.strip() in self.rules_replaceartist: if a.strip() in self.rules_replaceartist:

5
info/artists/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
*.png
*.jpg
*.jpeg
*.txt
!default.jpg

BIN
info/artists/default.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

4
info/artists_cache/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*.png
*.jpg
*.jpeg
*.txt

View File

@ -70,18 +70,27 @@ def graceful_exit(sig=None,frame=None):
sys.exit() sys.exit()
@route("/info/<pth:re:.*\\.jpeg>")
@route("/info/<pth:re:.*\\.jpg>")
@route("/info/<pth:re:.*\\.png>")
def static_image(pth):
return static_file("info/" + pth,root="")
@route("/<name:re:.*\\.html>") @route("/<name:re:.*\\.html>")
@route("/<name:re:.*\\.js>") @route("/<name:re:.*\\.js>")
@route("/<name:re:.*\\.css>") @route("/<name:re:.*\\.css>")
@route("/<name:re:.*\\.png>") @route("/<name:re:.*\\.png>")
@route("/<name:re:.*\\.jpeg>") @route("/<name:re:.*\\.jpeg>")
def static(name): def static(name):
return static_file("website/" + name,root="") return static_file("website/" + name,root="")
@route("/<name>") @route("/<name>")
def static_html(name): def static_html(name):
keys = FormsDict.decode(request.query) keys = FormsDict.decode(request.query)
# If a python file exists, it provides the replacement dict for the html file
if os.path.exists("website/" + name + ".py"): if os.path.exists("website/" + name + ".py"):
txt_keys = SourceFileLoader(name,"website/" + name + ".py").load_module().replacedict(keys,DATABASE_PORT) txt_keys = SourceFileLoader(name,"website/" + name + ".py").load_module().replacedict(keys,DATABASE_PORT)
with open("website/" + name + ".html") as htmlfile: with open("website/" + name + ".html") as htmlfile:
@ -91,7 +100,7 @@ def static_html(name):
return html return html
# Otherwise, we just serve the html file
return static_file("website/" + name + ".html",root="") return static_file("website/" + name + ".html",root="")
#set graceful shutdown #set graceful shutdown

View File

@ -1,5 +1,4 @@
### TSV files
def parseTSV(filename,*args): def parseTSV(filename,*args):
f = open(filename) f = open(filename)
@ -56,7 +55,92 @@ def createTSV(filename):
if not os.path.exists(filename): if not os.path.exists(filename):
open(filename,"w").close() open(filename,"w").close()
### Logging
def log(msg): def log(msg):
print(msg) print(msg)
# best function ever
### Media info
def getArtistInfo(artist):
import re
import os
import urllib
import json
import _thread
filename = re.sub("[^a-zA-Z0-9]","",artist)
filepath = "info/artists/" + filename
filepath_cache = "info/artists_cache/" + filename
# check if custom image exists
if os.path.exists(filepath + ".png"):
imgurl = "/" + filepath + ".png"
elif os.path.exists(filepath + ".jpg"):
imgurl = "/" + filepath + ".jpg"
elif os.path.exists(filepath + ".jpeg"):
imgurl = "/" + filepath + ".jpeg"
#check if cached image exists
elif os.path.exists(filepath_cache + ".png"):
imgurl = "/" + filepath_cache + ".png"
elif os.path.exists(filepath_cache + ".jpg"):
imgurl = "/" + filepath_cache + ".jpg"
elif os.path.exists(filepath_cache + ".jpeg"):
imgurl = "/" + filepath_cache + ".jpeg"
# check if custom desc exists
if os.path.exists(filepath + ".txt"):
with open(filepath + ".txt","r") as descfile:
desc = descfile.read().replace("\n","")
#check if cached desc exists
elif os.path.exists(filepath_cache + ".txt"):
with open(filepath_cache + ".txt","r") as descfile:
desc = descfile.read().replace("\n","")
try:
return {"image":imgurl,"info":desc}
except NameError:
pass
#is this pythonic?
# if we neither have a custom image nor a cached version, we return the address from lastfm, but cache that image for later use
with open("apikey","r") as keyfile:
apikey = keyfile.read().replace("\n","")
try:
url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(artist) + "&api_key=" + apikey + "&format=json"
response = urllib.request.urlopen(url)
lastfm_data = json.loads(response.read())
try:
imgurl
except NameError:
imgurl = lastfm_data["artist"]["image"][2]["#text"]
_thread.start_new_thread(cacheImage,(imgurl,"info/artists_cache",filename))
try:
desc
except NameError:
desc = lastfm_data["artist"]["bio"]["summary"]
with open(filepath_cache + ".txt","w") as descfile:
descfile.write(desc)
# this feels so dirty
return {"image":imgurl,"info":desc}
except:
return {"image":"/info/artists/default.jpg","info":"No information available"}
def cacheImage(url,path,filename):
import urllib.request
response = urllib.request.urlopen(url)
target = path + "/" + filename + "." + response.info().get_content_subtype()
urllib.request.urlretrieve(url,target)

View File

@ -2,33 +2,13 @@ import urllib
import json import json
#def page(keys):
#
# txt_keys = replace(keys)
#
#
# with open("website/artist.html","r") as htmlfile:
# html = htmlfile.read()
#
#
#
# for k in txt_keys:
# html = html.replace(k,txt_keys[k])
#
# return html
def replacedict(keys,dbport): def replacedict(keys,dbport):
from utilities import getArtistInfo
with open("website/apikey","r") as keyfile:
apikey = keyfile.read().replace("\n","")
url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(keys["artist"]) + "&api_key=" + apikey + "&format=json" info = getArtistInfo(keys["artist"])
response = urllib.request.urlopen(url) imgurl = info.get("image")
lastfm_data = json.loads(response.read()) desc = info.get("info")
imgurl = lastfm_data["artist"]["image"][2]["#text"]
desc = lastfm_data["artist"]["bio"]["summary"]
response = urllib.request.urlopen("http://localhost:" + str(dbport) + "/artistinfo?artist=" + urllib.parse.quote(keys["artist"])) response = urllib.request.urlopen("http://localhost:" + str(dbport) + "/artistinfo?artist=" + urllib.parse.quote(keys["artist"]))
db_data = json.loads(response.read()) db_data = json.loads(response.read())