1
0
mirror of https://github.com/krateng/maloja.git synced 2023-08-10 21:12:55 +03:00
maloja/utilities.py

544 lines
14 KiB
Python
Raw Normal View History

import re
import os
import hashlib
from threading import Thread, Timer
import pickle
import urllib
2019-03-10 20:14:50 +03:00
import datetime
import random
2019-04-03 17:03:48 +03:00
import itertools
from doreah import settings
2019-04-01 17:52:42 +03:00
from doreah import caching
2019-03-29 22:23:32 +03:00
from doreah.logging import log
#####
## RULESTATE VALIDATION
#####
def checksumTSV(folder):
sums = ""
for f in os.listdir(folder + "/"):
if (f.endswith(".tsv")):
f = open(folder + "/" + f,"rb")
sums += hashlib.md5(f.read()).hexdigest() + "\n"
f.close()
return sums
# returns whether checksums match and sets the checksum to invalid if they don't (or sets the new one if no previous one exists)
def combineChecksums(filename,checksums):
import os
if os.path.exists(filename + ".rulestate"):
f = open(filename + ".rulestate","r")
oldchecksums = f.read()
f.close()
if oldchecksums == checksums:
# the new checksum given by the calling db server represents the rule state that all current unsaved scrobbles were created under
# if this is the same as the existing one, we're all good
return True
elif (oldchecksums != "INVALID"):
#if not, the file is not consistent to any single rule state (some scrobbles were created with an old ruleset, some not)
f = open(filename + ".rulestate","w")
f.write("INVALID") # this will never match any sha256sum
f.close()
return False
else:
#if the file already says invalid, no need to open it and rewrite
return False
else:
f = open(filename + ".rulestate","w")
f.write(checksums)
f.close()
return True
# checks ALL files for their rule state. if they are all the same as the current loaded one, the entire database can be assumed to be consistent with the current ruleset
# in any other case, get out
def consistentRulestate(folder,checksums):
result = []
for scrobblefile in os.listdir(folder + "/"):
if (scrobblefile.endswith(".tsv")):
try:
f = open(folder + "/" + scrobblefile + ".rulestate","r")
if f.read() != checksums:
return False
except:
return False
finally:
f.close()
return True
2019-03-12 14:56:53 +03:00
#####
## IMAGES
#####
2018-12-17 17:10:10 +03:00
2018-12-28 20:06:09 +03:00
def apirequest(artists=None,artist=None,title=None):
2018-12-28 20:06:09 +03:00
import urllib.parse, urllib.request
import json
#try:
#with open("apikey","r") as keyfile:
# apikey = keyfile.read().replace("\n","")
apikey = settings.get_settings("LASTFM_API_KEY")
if apikey is None: return None
#except:
# return None
2018-12-28 20:06:09 +03:00
sites = [
{
"name":"lastfm",
"artisturl":"https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist={artist}&api_key=" + apikey + "&format=json",
"trackurl":"https://ws.audioscrobbler.com/2.0/?method=track.getinfo&track={title}&artist={artist}&api_key=" + apikey + "&format=json",
2019-02-21 02:13:18 +03:00
"result_artist_imgurl":lambda data:data["artist"]["image"][3]["#text"],
"result_track_imgurl":lambda data:data["track"]["album"]["image"][3]["#text"]
2018-12-28 20:06:09 +03:00
#"result_artist_desc":lambda data:data["artist"]["bio"]["summary"],
#"result_track_desc":lambda data:None
}
]
2018-12-28 20:06:09 +03:00
# TRACKS
if title is not None:
for s in sites:
try:
artiststr = urllib.parse.quote(", ".join(artists))
titlestr = urllib.parse.quote(title)
response = urllib.request.urlopen(s["trackurl"].format(artist=artiststr,title=titlestr))
2019-03-12 13:39:36 +03:00
log("API: " + s["name"] + "; Image request: " + "/".join(artists) + " - " + title,module="external")
2018-12-28 20:06:09 +03:00
data = json.loads(response.read())
if s["result_track_imgurl"](data) != "":
2019-03-12 13:39:36 +03:00
return s["result_track_imgurl"](data)
2018-12-28 20:06:09 +03:00
except:
pass
2018-12-28 20:06:09 +03:00
if len(artists) == 1:
2019-01-10 01:29:01 +03:00
#return {"image":apirequest(artist=artists[0])["image"]}
2019-03-12 13:39:36 +03:00
return None
2018-12-28 20:06:09 +03:00
# try the same track with every single artist
for a in artists:
rec = apirequest(artists=[a],title=title)
2019-03-12 13:39:36 +03:00
if rec is not None:
2018-12-28 20:06:09 +03:00
return rec
2019-03-12 13:39:36 +03:00
return None
# ARTISTS
2018-12-28 20:06:09 +03:00
else:
for s in sites:
try:
response = urllib.request.urlopen(s["artisturl"].format(artist=urllib.parse.quote(artist)))
2019-03-12 13:39:36 +03:00
log("API: " + s["name"] + "; Image request: " + artist,module="external")
2018-12-28 20:06:09 +03:00
data = json.loads(response.read())
if s["result_artist_imgurl"](data) != "":
2019-03-12 13:39:36 +03:00
return s["result_artist_imgurl"](data)
2018-12-28 20:06:09 +03:00
except:
pass
2019-03-12 13:39:36 +03:00
return None
2018-12-28 20:06:09 +03:00
2019-04-01 17:52:42 +03:00
### Caches
2019-04-01 17:52:42 +03:00
cacheage = settings.get_settings("CACHE_EXPIRE_POSITIVE") * 24 * 3600
cacheage_neg = settings.get_settings("CACHE_EXPIRE_NEGATIVE") * 24 * 3600
2019-04-01 17:52:42 +03:00
artist_cache = caching.Cache.create(name="artist_cache",maxage=cacheage,maxage_negative=cacheage_neg)
track_cache = caching.Cache.create(name="track_cache",maxage=cacheage,maxage_negative=cacheage_neg)
2019-04-03 17:03:48 +03:00
# removes emojis and weird shit from names
def clean(name):
return "".join(c for c in name if c.isalnum() or c in []).strip()
def local_files(artist=None,artists=None,title=None):
# check if we're dealing with a track or artist, then clean up names
# (only remove non-alphanumeric, allow korean and stuff)
if title is not None and artists is not None:
track = True
title, artists = clean(title), [clean(a) for a in artists]
elif artist is not None:
track = False
artist = clean(artist)
else: return []
superfolder = "images/tracks/" if track else "images/artists/"
filenames = []
if track:
#unsafeartists = [artist.translate(None,"-_./\\") for artist in artists]
safeartists = [re.sub("[^a-zA-Z0-9]","",artist) for artist in artists]
#unsafetitle = title.translate(None,"-_./\\")
safetitle = re.sub("[^a-zA-Z0-9]","",title)
if len(artists) < 4:
unsafeperms = itertools.permutations(artists)
safeperms = itertools.permutations(safeartists)
else:
unsafeperms = [sorted(artists)]
safeperms = [sorted(safeartists)]
for unsafeartistlist in unsafeperms:
filename = "-".join(unsafeartistlist) + "_" + title
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
for safeartistlist in safeperms:
filename = "-".join(safeartistlist) + "_" + safetitle
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filenames = list(set(filenames))
if len(filenames) == 0: filenames.append(str(hash((frozenset(artists),title))))
else:
#unsafeartist = artist.translate(None,"-_./\\")
safeartist = re.sub("[^a-zA-Z0-9]","",artist)
filename = artist
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filename = safeartist
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filenames = list(set(filenames))
if len(filenames) == 0: filenames.append(str(hash(artist)))
images = []
for purename in filenames:
# direct files
for ext in ["png","jpg","jpeg","gif"]:
#for num in [""] + [str(n) for n in range(0,10)]:
if os.path.exists(superfolder + purename + "." + ext):
images.append("/" + superfolder + purename + "." + ext)
# folder
try:
for f in os.listdir(superfolder + purename + "/"):
if f.split(".")[-1] in ["png","jpg","jpeg","gif"]:
images.append("/" + superfolder + purename + "/" + f)
except:
pass
return images
2019-04-03 17:43:09 +03:00
# these caches are there so we don't check all files every thime, but return the same one
local_cache_age = settings.get_settings("LOCAL_IMAGE_ROTATE")
local_artist_cache = caching.Cache(maxage=local_cache_age)
local_track_cache = caching.Cache(maxage=local_cache_age)
2019-03-12 13:39:36 +03:00
def getTrackImage(artists,title,fast=False):
2019-03-06 20:04:12 +03:00
2019-04-03 17:43:09 +03:00
# obj = (frozenset(artists),title)
# filename = "-".join([re.sub("[^a-zA-Z0-9]","",artist) for artist in sorted(artists)]) + "_" + re.sub("[^a-zA-Z0-9]","",title)
# if filename == "": filename = str(hash(obj))
# filepath = "images/tracks/" + filename
2019-04-03 17:43:09 +03:00
try:
return local_track_cache.get((frozenset(artists),title))
except:
images = local_files(artists=artists,title=title)
if len(images) != 0:
#return random.choice(images)
res = random.choice(images)
local_track_cache.add((frozenset(artists),title),res)
return urllib.parse.quote(res)
2019-04-03 17:03:48 +03:00
2018-12-28 20:06:09 +03:00
# check if custom image exists
# if os.path.exists(filepath + ".png"):
# imgurl = "/" + filepath + ".png"
# return imgurl
# elif os.path.exists(filepath + ".jpg"):
# imgurl = "/" + filepath + ".jpg"
# return imgurl
# elif os.path.exists(filepath + ".jpeg"):
# imgurl = "/" + filepath + ".jpeg"
# return imgurl
# elif os.path.exists(filepath + ".gif"):
# imgurl = "/" + filepath + ".gif"
# return imgurl
2018-12-28 20:06:09 +03:00
try:
2019-03-12 13:39:36 +03:00
# check our cache
# if we have cached the nonexistence of that image, we immediately return the redirect to the artist and let the resolver handle it
# (even if we're not in a fast lookup right now)
#result = cachedTracks[(frozenset(artists),title)]
2019-04-01 17:52:42 +03:00
result = track_cache.get((frozenset(artists),title)) #track_from_cache(artists,title)
2019-03-12 13:39:36 +03:00
if result is not None: return result
else:
for a in artists:
res = getArtistImage(artist=a,fast=True)
if res != "": return res
return ""
2018-12-28 20:06:09 +03:00
except:
pass
# do we have an api key?
apikey = settings.get_settings("LASTFM_API_KEY")
if apikey is None: return "" # DO NOT CACHE THAT
# fast request only retuns cached and local results, generates redirect link for rest
2019-03-12 13:39:36 +03:00
if fast: return "/image?title=" + urllib.parse.quote(title) + "&" + "&".join(["artist=" + urllib.parse.quote(a) for a in artists])
2019-03-12 13:39:36 +03:00
# non-fast lookup (esentially only the resolver lookup)
2018-12-28 20:06:09 +03:00
result = apirequest(artists=artists,title=title)
2019-03-12 13:39:36 +03:00
# cache results (even negative ones)
#cachedTracks[(frozenset(artists),title)] = result
2019-04-01 17:52:42 +03:00
track_cache.add((frozenset(artists),title),result) #cache_track(artists,title,result)
2019-03-12 13:39:36 +03:00
# return either result or redirect to artist
if result is not None: return result
2019-01-10 01:29:01 +03:00
else:
2019-03-12 13:39:36 +03:00
for a in artists:
res = getArtistImage(artist=a,fast=False)
if res != "": return res
return ""
def getArtistImage(artist,fast=False):
2019-04-03 17:43:09 +03:00
# obj = artist
# filename = re.sub("[^a-zA-Z0-9]","",artist)
# if filename == "": filename = str(hash(obj))
# filepath = "images/artists/" + filename
# #filepath_cache = "info/artists_cache/" + filename
try:
return local_artist_cache.get(artist)
except:
images = local_files(artist=artist)
if len(images) != 0:
#return random.choice(images)
res = random.choice(images)
local_artist_cache.add(artist,res)
return urllib.parse.quote(res)
2018-12-17 17:10:10 +03:00
# check if custom image exists
# if os.path.exists(filepath + ".png"):
# imgurl = "/" + filepath + ".png"
# return imgurl
# elif os.path.exists(filepath + ".jpg"):
# imgurl = "/" + filepath + ".jpg"
# return imgurl
# elif os.path.exists(filepath + ".jpeg"):
# imgurl = "/" + filepath + ".jpeg"
# return imgurl
# elif os.path.exists(filepath + ".gif"):
# imgurl = "/" + filepath + ".gif"
# return imgurl
2019-01-10 01:29:01 +03:00
2018-12-17 17:10:10 +03:00
try:
#result = cachedArtists[artist]
2019-04-01 17:52:42 +03:00
result = artist_cache.get(artist) #artist_from_cache(artist)
2019-03-12 13:39:36 +03:00
if result is not None: return result
else: return ""
2018-12-28 20:06:09 +03:00
except:
2018-12-17 17:10:10 +03:00
pass
2019-03-28 19:57:56 +03:00
# do we have an api key?
apikey = settings.get_settings("LASTFM_API_KEY")
if apikey is None: return "" # DO NOT CACHE THAT
2019-03-28 19:57:56 +03:00
# fast request only retuns cached and local results, generates redirect link for rest
2019-03-12 13:39:36 +03:00
if fast: return "/image?artist=" + urllib.parse.quote(artist)
# non-fast lookup (esentially only the resolver lookup)
2018-12-28 20:06:09 +03:00
result = apirequest(artist=artist)
2019-03-12 13:39:36 +03:00
# cache results (even negative ones)
#cachedArtists[artist] = result
2019-04-01 17:52:42 +03:00
artist_cache.add(artist,result) #cache_artist(artist,result)
2019-03-12 13:39:36 +03:00
if result is not None: return result
else: return ""
2019-02-02 20:08:30 +03:00
2019-03-12 13:39:36 +03:00
def getTrackImages(trackobjectlist,fast=False):
2019-02-02 20:08:30 +03:00
threads = []
2019-02-02 20:08:30 +03:00
for track in trackobjectlist:
2019-03-12 13:39:36 +03:00
t = Thread(target=getTrackImage,args=(track["artists"],track["title"],),kwargs={"fast":fast})
2019-02-02 20:08:30 +03:00
t.start()
threads.append(t)
2019-02-02 20:08:30 +03:00
for t in threads:
t.join()
2019-03-12 13:39:36 +03:00
return [getTrackImage(t["artists"],t["title"]) for t in trackobjectlist]
2019-03-12 13:39:36 +03:00
def getArtistImages(artistlist,fast=False):
threads = []
for artist in artistlist:
2019-03-12 13:39:36 +03:00
t = Thread(target=getArtistImage,args=(artist,),kwargs={"fast":fast})
t.start()
threads.append(t)
for t in threads:
t.join()
# async calls only cached results, now we need to get them
2019-03-12 13:39:36 +03:00
return [getArtistImage(a) for a in artistlist]
2019-02-03 18:52:37 +03:00
# new way of serving images
# instead always generate a link locally, but redirect that on the fly
# this way the page can load faster and images will trickle in without having to resort to XHTTP requests
def resolveImage(artist=None,track=None):
if track is not None:
2019-03-12 13:39:36 +03:00
return getTrackImage(track["artists"],track["title"])
elif artist is not None:
2019-03-12 13:39:36 +03:00
return getArtistImage(artist)
#####
## PULSE MAINTENANCE
#####
def startpulse():
# execute all actions for startup
# they will themselves trigger their next pass
yearly()
monthly()
daily()
def yearly():
#medals
from database import MEDALS, STAMPS, get_charts_artists
MEDALS.clear()
firstyear = datetime.datetime.utcfromtimestamp(STAMPS[0]).year
currentyear = datetime.datetime.utcnow().year
for year in range(firstyear,currentyear):
charts = get_charts_artists(within=[year])
scr = -1
rank = 0
for a in charts:
if a["scrobbles"] != scr: rank = charts.index(a) + 1
if rank > 3: break
artist = a["artist"]
if rank == 1: MEDALS.setdefault(artist,{}).setdefault("gold",[]).append(year)
if rank == 2: MEDALS.setdefault(artist,{}).setdefault("silver",[]).append(year)
if rank == 3: MEDALS.setdefault(artist,{}).setdefault("bronze",[]).append(year)
scr = a["scrobbles"]
# schedule for next year
now = datetime.datetime.utcnow()
nextyear = datetime.datetime(now.year+1,1,1)
wait = nextyear.timestamp() - now.timestamp()
Timer(wait,yearly).start()
def monthly():
log("New month!",module="debug")
# schedule for next month
now = datetime.datetime.utcnow()
nextmonth = datetime.datetime(now.year,now.month + 1,1) if now.month != 12 else datetime.datetime(now.year+1,1,1)
wait = nextmonth.timestamp() - now.timestamp()
Timer(wait,monthly).start()
def daily():
log("New day!",module="debug")
# schedule for tomorrow
now = datetime.datetime.utcnow()
nextday = datetime.datetime(now.year,now.month,now.day) + datetime.timedelta(days=1)
wait = nextday.timestamp() - now.timestamp()
Timer(wait,daily).start()