mirror of
https://github.com/krateng/maloja.git
synced 2023-08-10 21:12:55 +03:00
1176 lines
31 KiB
Python
1176 lines
31 KiB
Python
# server
|
||
from bottle import request, response, FormsDict, HTTPError
|
||
|
||
# rest of the project
|
||
from .cleanup import CleanerAgent, CollectorAgent
|
||
from . import utilities
|
||
from .malojatime import register_scrobbletime, time_stamps, ranges
|
||
from .malojauri import uri_to_internal, internal_to_uri, compose_querystring
|
||
|
||
from .thirdparty import proxy_scrobble_all
|
||
|
||
from .globalconf import data_dir, malojaconfig, apikeystore
|
||
|
||
# doreah toolkit
|
||
from doreah.logging import log
|
||
from doreah import tsv
|
||
from doreah.caching import Cache, DeepCache
|
||
from doreah.auth import authenticated_api, authenticated_api_with_alternate
|
||
from doreah.io import ProgressBar
|
||
try:
|
||
from doreah.persistence import DiskDict
|
||
except: pass
|
||
import doreah
|
||
|
||
|
||
|
||
# technical
|
||
import os
|
||
import datetime
|
||
import sys
|
||
import unicodedata
|
||
from collections import namedtuple
|
||
from threading import Lock
|
||
import yaml
|
||
import lru
|
||
import math
|
||
|
||
# url handling
|
||
from importlib.machinery import SourceFileLoader
|
||
import urllib
|
||
|
||
|
||
|
||
dblock = Lock() #global database lock
|
||
dbstatus = {
|
||
"healthy":False,
|
||
"rebuildinprogress":False,
|
||
"complete":False
|
||
}
|
||
class DatabaseNotBuilt(HTTPError):
|
||
def __init__(self):
|
||
super().__init__(
|
||
status=503,
|
||
body="The Maloja Database is still being built. Try again in a few seconds.",
|
||
headers={"Retry-After":10}
|
||
)
|
||
|
||
SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved)
|
||
ARTISTS = [] # Format: artist
|
||
TRACKS = [] # Format: namedtuple(artists=frozenset(artist_ref,...),title=title)
|
||
|
||
|
||
Track = namedtuple("Track",["artists","title"])
|
||
Scrobble = namedtuple("Scrobble",["track","timestamp","album","duration","saved"])
|
||
# album is saved in the scrobble because it's not actually authorative information about the track, just info
|
||
# what was sent with this scrobble
|
||
|
||
### OPTIMIZATION
|
||
SCROBBLESDICT = {} # timestamps to scrobble mapping
|
||
STAMPS = [] # sorted
|
||
#STAMPS_SET = set() # as set for easier check if exists # we use the scrobbles dict for that now
|
||
TRACKS_NORMALIZED = []
|
||
ARTISTS_NORMALIZED = []
|
||
ARTISTS_NORMALIZED_SET = set()
|
||
TRACKS_NORMALIZED_SET = set()
|
||
|
||
MEDALS_ARTISTS = {} #literally only changes once per year, no need to calculate that on the fly
|
||
MEDALS_TRACKS = {}
|
||
WEEKLY_TOPTRACKS = {}
|
||
WEEKLY_TOPARTISTS = {}
|
||
|
||
ISSUES = {}
|
||
|
||
cla = CleanerAgent()
|
||
coa = CollectorAgent()
|
||
clients = []
|
||
|
||
lastsync = 0
|
||
|
||
|
||
try:
|
||
with open(data_dir['state']("known_servers.yml"),"r") as f:
|
||
KNOWN_SERVERS = set(yaml.safe_load(f))
|
||
except:
|
||
KNOWN_SERVERS = set()
|
||
|
||
|
||
def add_known_server(url):
|
||
KNOWN_SERVERS.add(url)
|
||
with open(data_dir['state']("known_servers.yml"),"w") as f:
|
||
f.write(yaml.dump(list(KNOWN_SERVERS)))
|
||
|
||
|
||
|
||
|
||
log("Authenticated Machines: " + ", ".join([k for k in apikeystore]))
|
||
|
||
def checkAPIkey(key):
|
||
return apikeystore.check_key(key)
|
||
|
||
def allAPIkeys():
|
||
return [apikeystore[k] for k in apikeystore]
|
||
|
||
|
||
####
|
||
## Getting dict representations of database objects
|
||
####
|
||
|
||
def get_scrobble_dict(o):
|
||
track = get_track_dict(TRACKS[o.track])
|
||
return {"artists":track["artists"],"title":track["title"],"time":o.timestamp,"album":o.album,"duration":o.duration}
|
||
|
||
def get_artist_dict(o):
|
||
return o
|
||
#technically not a dict, but... you know
|
||
|
||
def get_track_dict(o):
|
||
artists = [get_artist_dict(ARTISTS[a]) for a in o.artists]
|
||
return {"artists":artists,"title":o.title}
|
||
|
||
|
||
####
|
||
## Creating or finding existing database entries
|
||
####
|
||
|
||
|
||
|
||
def createScrobble(artists,title,time,album=None,duration=None,volatile=False):
|
||
|
||
if len(artists) == 0 or title == "":
|
||
return {}
|
||
|
||
dblock.acquire()
|
||
|
||
i = getTrackID(artists,title)
|
||
|
||
# idempotence
|
||
if time in SCROBBLESDICT and i == SCROBBLESDICT[time].track:
|
||
dblock.release()
|
||
return get_track_dict(TRACKS[i])
|
||
# timestamp as unique identifier
|
||
while (time in SCROBBLESDICT):
|
||
time += 1
|
||
|
||
obj = Scrobble(i,time,album,duration,volatile) # if volatile generated, we simply pretend we have already saved it to disk
|
||
#SCROBBLES.append(obj)
|
||
# immediately insert scrobble correctly so we can guarantee sorted list
|
||
index = insert(SCROBBLES,obj,key=lambda x:x[1])
|
||
SCROBBLESDICT[time] = obj
|
||
STAMPS.insert(index,time) #should be same index as scrobblelist
|
||
register_scrobbletime(time)
|
||
invalidate_caches()
|
||
dblock.release()
|
||
|
||
proxy_scrobble_all(artists,title,time)
|
||
|
||
return get_track_dict(TRACKS[obj.track])
|
||
|
||
|
||
# this will never be called from different threads, so no lock
|
||
def readScrobble(artists,title,time):
|
||
while (time in SCROBBLESDICT):
|
||
time += 1
|
||
i = getTrackID(artists,title)
|
||
obj = Scrobble(i,time,None,None,True)
|
||
SCROBBLES.append(obj)
|
||
SCROBBLESDICT[time] = obj
|
||
#STAMPS.append(time)
|
||
|
||
|
||
|
||
def getArtistID(name):
|
||
|
||
obj = name
|
||
obj_normalized = normalize_name(name)
|
||
|
||
if obj_normalized in ARTISTS_NORMALIZED_SET:
|
||
return ARTISTS_NORMALIZED.index(obj_normalized)
|
||
|
||
i = len(ARTISTS)
|
||
ARTISTS.append(obj)
|
||
ARTISTS_NORMALIZED_SET.add(obj_normalized)
|
||
ARTISTS_NORMALIZED.append(obj_normalized)
|
||
|
||
# with a new artist added, we might also get new artists that they are credited as
|
||
cr = coa.getCredited(name)
|
||
getArtistID(cr)
|
||
|
||
coa.updateIDs(ARTISTS)
|
||
|
||
return i
|
||
|
||
def getTrackID(artists,title):
|
||
artistset = {getArtistID(name=a) for a in artists}
|
||
obj = Track(artists=frozenset(artistset),title=title)
|
||
obj_normalized = Track(artists=frozenset(artistset),title=normalize_name(title))
|
||
|
||
if obj_normalized in TRACKS_NORMALIZED_SET:
|
||
return TRACKS_NORMALIZED.index(obj_normalized)
|
||
i = len(TRACKS)
|
||
TRACKS.append(obj)
|
||
TRACKS_NORMALIZED_SET.add(obj_normalized)
|
||
TRACKS_NORMALIZED.append(obj_normalized)
|
||
return i
|
||
|
||
import unicodedata
|
||
|
||
# function to turn the name into a representation that can be easily compared, ignoring minor differences
|
||
remove_symbols = ["'","`","’"]
|
||
replace_with_space = [" - ",": "]
|
||
def normalize_name(name):
|
||
for r in replace_with_space:
|
||
name = name.replace(r," ")
|
||
name = "".join(char for char in unicodedata.normalize('NFD',name.lower())
|
||
if char not in remove_symbols and unicodedata.category(char) != 'Mn')
|
||
return name
|
||
|
||
|
||
|
||
|
||
|
||
########
|
||
########
|
||
## HTTP requests and their associated functions
|
||
########
|
||
########
|
||
|
||
# skip regular authentication if api key is present in request
|
||
# an api key now ONLY permits scrobbling tracks, no other admin tasks
|
||
def api_key_correct(request):
|
||
args = request.params
|
||
try:
|
||
args.update(request.json)
|
||
except:
|
||
pass
|
||
if "key" in args:
|
||
apikey = args["key"]
|
||
del args["key"]
|
||
elif "apikey" in args:
|
||
apikey = args["apikey"]
|
||
del args["apikey"]
|
||
else: return False
|
||
|
||
return checkAPIkey(apikey)
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def get_scrobbles(**keys):
|
||
r = db_query(**{k:keys[k] for k in keys if k in ["artist","artists","title","since","to","within","timerange","associated","track"]})
|
||
#offset = (keys.get('page') * keys.get('perpage')) if keys.get('perpage') is not math.inf else 0
|
||
#r = r[offset:]
|
||
#if keys.get('perpage') is not math.inf: r = r[:keys.get('perpage')]
|
||
return r
|
||
|
||
|
||
def info():
|
||
totalscrobbles = get_scrobbles_num()
|
||
artists = {}
|
||
|
||
return {
|
||
"name":malojaconfig["NAME"],
|
||
"artists":{
|
||
chartentry["artist"]:round(chartentry["scrobbles"] * 100 / totalscrobbles,3)
|
||
for chartentry in get_charts_artists() if chartentry["scrobbles"]/totalscrobbles >= 0
|
||
},
|
||
"known_servers":list(KNOWN_SERVERS)
|
||
}
|
||
|
||
|
||
|
||
def get_scrobbles_num(**keys):
|
||
r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","since","to","within","timerange","associated"]})
|
||
return len(r)
|
||
|
||
|
||
#for multiple since values (must be ordered)
|
||
# DOESN'T SEEM TO ACTUALLY BE FASTER
|
||
# REEVALUATE
|
||
|
||
#def get_scrobbles_num_multiple(sinces=[],to=None,**keys):
|
||
#
|
||
# sinces_stamps = [time_stamps(since,to,None)[0] for since in sinces]
|
||
# #print(sinces)
|
||
# #print(sinces_stamps)
|
||
# minsince = sinces[-1]
|
||
# r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","associated","to"]},since=minsince)
|
||
#
|
||
# #print(r)
|
||
#
|
||
# validtracks = [0 for s in sinces]
|
||
#
|
||
# i = 0
|
||
# si = 0
|
||
# while True:
|
||
# if si == len(sinces): break
|
||
# if i == len(r): break
|
||
# if r[i]["time"] >= sinces_stamps[si]:
|
||
# validtracks[si] += 1
|
||
# else:
|
||
# si += 1
|
||
# continue
|
||
# i += 1
|
||
#
|
||
#
|
||
# return validtracks
|
||
|
||
|
||
|
||
def get_tracks(artist=None):
|
||
|
||
artistid = ARTISTS.index(artist) if artist is not None else None
|
||
# Option 1
|
||
return [get_track_dict(t) for t in TRACKS if (artistid in t.artists) or (artistid==None)]
|
||
|
||
# Option 2 is a bit more elegant but much slower
|
||
#tracklist = [get_track_dict(t) for t in TRACKS]
|
||
#ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
|
||
|
||
|
||
def get_artists():
|
||
if not dbstatus['healthy']: raise DatabaseNotBuilt()
|
||
return ARTISTS #well
|
||
|
||
|
||
|
||
def get_charts_artists(**keys):
|
||
return db_aggregate(by="ARTIST",**{k:keys[k] for k in keys if k in ["since","to","within","timerange"]})
|
||
|
||
|
||
def get_charts_tracks(**keys):
|
||
return db_aggregate(by="TRACK",**{k:keys[k] for k in keys if k in ["since","to","within","timerange","artist"]})
|
||
|
||
def get_pulse(**keys):
|
||
|
||
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
|
||
results = []
|
||
for rng in rngs:
|
||
res = len(db_query(timerange=rng,**{k:keys[k] for k in keys if k in ["artists","artist","track","title","associated"]}))
|
||
results.append({"range":rng,"scrobbles":res})
|
||
|
||
return results
|
||
|
||
|
||
def get_performance(**keys):
|
||
|
||
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
|
||
results = []
|
||
|
||
for rng in rngs:
|
||
if "track" in keys:
|
||
charts = get_charts_tracks(timerange=rng)
|
||
rank = None
|
||
for c in charts:
|
||
if c["track"] == keys["track"]:
|
||
rank = c["rank"]
|
||
break
|
||
elif "artist" in keys:
|
||
charts = get_charts_artists(timerange=rng)
|
||
rank = None
|
||
for c in charts:
|
||
if c["artist"] == keys["artist"]:
|
||
rank = c["rank"]
|
||
break
|
||
results.append({"range":rng,"rank":rank})
|
||
|
||
return results
|
||
|
||
|
||
def get_top_artists(**keys):
|
||
|
||
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
|
||
results = []
|
||
|
||
for rng in rngs:
|
||
try:
|
||
res = db_aggregate(timerange=rng,by="ARTIST")[0]
|
||
results.append({"range":rng,"artist":res["artist"],"counting":res["counting"],"scrobbles":res["scrobbles"]})
|
||
except:
|
||
results.append({"range":rng,"artist":None,"scrobbles":0})
|
||
|
||
return results
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def get_top_tracks(**keys):
|
||
|
||
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
|
||
results = []
|
||
|
||
for rng in rngs:
|
||
try:
|
||
res = db_aggregate(timerange=rng,by="TRACK")[0]
|
||
results.append({"range":rng,"track":res["track"],"scrobbles":res["scrobbles"]})
|
||
except:
|
||
results.append({"range":rng,"track":None,"scrobbles":0})
|
||
|
||
return results
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def artistInfo(artist):
|
||
|
||
charts = db_aggregate(by="ARTIST")
|
||
scrobbles = len(db_query(artists=[artist]))
|
||
#we cant take the scrobble number from the charts because that includes all countas scrobbles
|
||
try:
|
||
c = [e for e in charts if e["artist"] == artist][0]
|
||
others = [a for a in coa.getAllAssociated(artist) if a in ARTISTS]
|
||
position = c["rank"]
|
||
performance = get_performance(artist=artist,step="week")
|
||
return {
|
||
"artist":artist,
|
||
"scrobbles":scrobbles,
|
||
"position":position,
|
||
"associated":others,
|
||
"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_ARTISTS.get(artist,{})},
|
||
"topweeks":WEEKLY_TOPARTISTS.get(artist,0)
|
||
}
|
||
except:
|
||
# if the artist isnt in the charts, they are not being credited and we
|
||
# need to show information about the credited one
|
||
artist = coa.getCredited(artist)
|
||
c = [e for e in charts if e["artist"] == artist][0]
|
||
position = c["rank"]
|
||
return {"replace":artist,"scrobbles":scrobbles,"position":position}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def trackInfo(track):
|
||
charts = db_aggregate(by="TRACK")
|
||
#scrobbles = len(db_query(artists=artists,title=title)) #chart entry of track always has right scrobble number, no countas rules here
|
||
#c = [e for e in charts if set(e["track"]["artists"]) == set(artists) and e["track"]["title"] == title][0]
|
||
c = [e for e in charts if e["track"] == track][0]
|
||
scrobbles = c["scrobbles"]
|
||
position = c["rank"]
|
||
cert = None
|
||
threshold_gold, threshold_platinum, threshold_diamond = malojaconfig["SCROBBLES_GOLD","SCROBBLES_PLATINUM","SCROBBLES_DIAMOND"]
|
||
if scrobbles >= threshold_diamond: cert = "diamond"
|
||
elif scrobbles >= threshold_platinum: cert = "platinum"
|
||
elif scrobbles >= threshold_gold: cert = "gold"
|
||
|
||
|
||
return {
|
||
"track":track,
|
||
"scrobbles":scrobbles,
|
||
"position":position,
|
||
"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_TRACKS.get((frozenset(track["artists"]),track["title"]),{})},
|
||
"certification":cert,
|
||
"topweeks":WEEKLY_TOPTRACKS.get(((frozenset(track["artists"]),track["title"])),0)
|
||
}
|
||
|
||
|
||
|
||
|
||
def compare(remoteurl):
|
||
import json
|
||
compareurl = remoteurl + "/api/info"
|
||
|
||
response = urllib.request.urlopen(compareurl)
|
||
strangerinfo = json.loads(response.read())
|
||
owninfo = info()
|
||
|
||
#add_known_server(compareto)
|
||
|
||
artists = {}
|
||
|
||
for a in owninfo["artists"]:
|
||
artists[a.lower()] = {"name":a,"self":int(owninfo["artists"][a]*1000),"other":0}
|
||
|
||
for a in strangerinfo["artists"]:
|
||
artists[a.lower()] = artists.setdefault(a.lower(),{"name":a,"self":0})
|
||
artists[a.lower()]["other"] = int(strangerinfo["artists"][a]*1000)
|
||
|
||
for a in artists:
|
||
common = min(artists[a]["self"],artists[a]["other"])
|
||
artists[a]["self"] -= common
|
||
artists[a]["other"] -= common
|
||
artists[a]["common"] = common
|
||
|
||
best = sorted((artists[a]["name"] for a in artists),key=lambda x: artists[x.lower()]["common"],reverse=True)
|
||
|
||
result = {
|
||
"unique_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] == 0),
|
||
"more_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] != 0),
|
||
"common":sum(artists[a]["common"] for a in artists),
|
||
"more_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] != 0),
|
||
"unique_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] == 0)
|
||
}
|
||
|
||
total = sum(result[c] for c in result)
|
||
|
||
for r in result:
|
||
result[r] = (result[r],result[r]/total)
|
||
|
||
|
||
|
||
return {
|
||
"result":result,
|
||
"info":{
|
||
"ownname":owninfo["name"],
|
||
"remotename":strangerinfo["name"]
|
||
},
|
||
"commonartist":best[0]
|
||
}
|
||
|
||
|
||
def incoming_scrobble(artists,title,album=None,duration=None,time=None,fix=True):
|
||
if time is None:
|
||
time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
||
|
||
log("Incoming scrobble (): ARTISTS: " + str(artists) + ", TRACK: " + title,module="debug")
|
||
if fix:
|
||
(artists,title) = cla.fullclean(artists,title)
|
||
trackdict = createScrobble(artists,title,time,album,duration)
|
||
|
||
sync()
|
||
|
||
return {"status":"success","track":trackdict}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def issues():
|
||
return ISSUES
|
||
|
||
def check_issues():
|
||
combined = []
|
||
duplicates = []
|
||
newartists = []
|
||
|
||
import itertools
|
||
import difflib
|
||
|
||
sortedartists = ARTISTS.copy()
|
||
sortedartists.sort(key=len,reverse=True)
|
||
reversesortedartists = sortedartists.copy()
|
||
reversesortedartists.reverse()
|
||
for a in reversesortedartists:
|
||
|
||
nochange = cla.confirmedReal(a)
|
||
|
||
st = a
|
||
lis = []
|
||
reachedmyself = False
|
||
for ar in sortedartists:
|
||
if (ar != a) and not reachedmyself:
|
||
continue
|
||
elif not reachedmyself:
|
||
reachedmyself = True
|
||
continue
|
||
|
||
if (ar.lower() == a.lower()) or ("the " + ar.lower() == a.lower()) or ("a " + ar.lower() == a.lower()):
|
||
duplicates.append((ar,a))
|
||
break
|
||
|
||
if (ar + " " in st) or (" " + ar in st):
|
||
lis.append(ar)
|
||
st = st.replace(ar,"").strip()
|
||
elif (ar == st):
|
||
lis.append(ar)
|
||
st = ""
|
||
if not nochange:
|
||
combined.append((a,lis))
|
||
break
|
||
|
||
elif (ar in st) and len(ar)*2 > len(st):
|
||
duplicates.append((a,ar))
|
||
|
||
st = st.replace("&","").replace("and","").replace("with","").strip()
|
||
if st not in ["", a]:
|
||
if len(st) < 5 and len(lis) == 1:
|
||
#check if we havent just randomly found the string in another word
|
||
#if (" " + st + " ") in lis[0] or (lis[0].endswith(" " + st)) or (lis[0].startswith(st + " ")):
|
||
duplicates.append((a,lis[0]))
|
||
elif len(st) < 5 and len(lis) > 1 and not nochange:
|
||
combined.append((a,lis))
|
||
elif len(st) >= 5 and not nochange:
|
||
#check if we havent just randomly found the string in another word
|
||
if (" " + st + " ") in a or (a.endswith(" " + st)) or (a.startswith(st + " ")):
|
||
newartists.append((st,a,lis))
|
||
|
||
#for c in itertools.combinations(ARTISTS,3):
|
||
# l = list(c)
|
||
# print(l)
|
||
# l.sort(key=len,reverse=True)
|
||
# [full,a1,a2] = l
|
||
# if (a1 + " " + a2 in full) or (a2 + " " + a1 in full):
|
||
# combined.append((full,a1,a2))
|
||
|
||
|
||
#for c in itertools.combinations(ARTISTS,2):
|
||
# if
|
||
#
|
||
# if (c[0].lower == c[1].lower):
|
||
# duplicates.append((c[0],c[1]))
|
||
|
||
|
||
# elif (c[0] + " " in c[1]) or (" " + c[0] in c[1]) or (c[1] + " " in c[0]) or (" " + c[1] in c[0]):
|
||
# if (c[0] in c[1]):
|
||
# full, part = c[1],c[0]
|
||
# rest = c[1].replace(c[0],"").strip()
|
||
# else:
|
||
# full, part = c[0],c[1]
|
||
# rest = c[0].replace(c[1],"").strip()
|
||
# if rest in ARTISTS and full not in [c[0] for c in combined]:
|
||
# combined.append((full,part,rest))
|
||
|
||
# elif (c[0] in c[1]) or (c[1] in c[0]):
|
||
# duplicates.append((c[0],c[1]))
|
||
|
||
|
||
return {"duplicates":duplicates,"combined":combined,"newartists":newartists}
|
||
|
||
|
||
|
||
def get_predefined_rulesets():
|
||
validchars = "-_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||
|
||
rulesets = []
|
||
|
||
for f in os.listdir(data_dir['rules']("predefined")):
|
||
if f.endswith(".tsv"):
|
||
|
||
rawf = f.replace(".tsv","")
|
||
valid = all(char in validchars for char in rawf)
|
||
if not valid: continue
|
||
if "_" not in rawf: continue
|
||
|
||
try:
|
||
with open(data_dir['rules']("predefined",f)) as tsvfile:
|
||
line1 = tsvfile.readline()
|
||
line2 = tsvfile.readline()
|
||
|
||
if "# NAME: " in line1:
|
||
name = line1.replace("# NAME: ","")
|
||
else: name = rawf.split("_")[1]
|
||
desc = line2.replace("# DESC: ","") if "# DESC: " in line2 else ""
|
||
author = rawf.split("_")[0]
|
||
except:
|
||
continue
|
||
|
||
ruleset = {"file":rawf}
|
||
rulesets.append(ruleset)
|
||
ruleset["active"] = bool(os.path.exists(data_dir['rules'](f)))
|
||
ruleset["name"] = name
|
||
ruleset["author"] = author
|
||
ruleset["desc"] = desc
|
||
|
||
return rulesets
|
||
|
||
|
||
####
|
||
## Server operation
|
||
####
|
||
|
||
|
||
|
||
# Starts the server
|
||
def start_db():
|
||
log("Starting database...")
|
||
global lastsync
|
||
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
||
build_db()
|
||
#run(dbserver, host='::', port=PORT, server='waitress')
|
||
log("Database reachable!")
|
||
|
||
def build_db():
|
||
|
||
global dbstatus
|
||
dbstatus['healthy'] = False
|
||
dbstatus['complete'] = False
|
||
dbstatus['rebuildinprogress'] = True
|
||
|
||
log("Building database...")
|
||
|
||
global SCROBBLES, ARTISTS, TRACKS
|
||
global TRACKS_NORMALIZED_SET, TRACKS_NORMALIZED, ARTISTS_NORMALIZED_SET, ARTISTS_NORMALIZED
|
||
global SCROBBLESDICT, STAMPS
|
||
|
||
SCROBBLES = []
|
||
ARTISTS = []
|
||
TRACKS = []
|
||
STAMPS = []
|
||
SCROBBLESDICT = {}
|
||
|
||
TRACKS_NORMALIZED = []
|
||
ARTISTS_NORMALIZED = []
|
||
ARTISTS_NORMALIZED_SET = set()
|
||
TRACKS_NORMALIZED_SET = set()
|
||
|
||
|
||
# parse files
|
||
db = tsv.parse_all(data_dir['scrobbles'](),"int","string","string",comments=False)
|
||
scrobblenum = len(db)
|
||
log(f"Found {scrobblenum} scrobbles...")
|
||
|
||
usebar = not malojaconfig["CLEAN_OUTPUT"]
|
||
if usebar: pbar = ProgressBar(max=scrobblenum,prefix="Loading scrobbles")
|
||
else:
|
||
n = 0
|
||
m = max(int(scrobblenum / 25),20)
|
||
#db = parseAllTSV("scrobbles","int","string","string",escape=False)
|
||
for sc in db:
|
||
artists = sc[1].split("␟")
|
||
title = sc[2]
|
||
time = sc[0]
|
||
|
||
readScrobble(artists,title,time)
|
||
if usebar: pbar.progress()
|
||
else:
|
||
n += 1
|
||
if n % m == 0: log(f"Loaded {n}/{scrobblenum}...")
|
||
|
||
if usebar: pbar.done()
|
||
|
||
|
||
log("Database loaded, optimizing...")
|
||
|
||
# optimize database
|
||
SCROBBLES.sort(key = lambda tup: tup[1])
|
||
#SCROBBLESDICT = {obj[1]:obj for obj in SCROBBLES}
|
||
STAMPS = [t for t in SCROBBLESDICT]
|
||
STAMPS.sort()
|
||
|
||
# inform malojatime module about earliest scrobble
|
||
if STAMPS: register_scrobbletime(STAMPS[0])
|
||
|
||
# NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY
|
||
# get extra artists with no real scrobbles from countas rules
|
||
#for artist in coa.getAllArtists():
|
||
#for artist in coa.getCreditedList(ARTISTS):
|
||
# if artist not in ARTISTS:
|
||
# log(artist + " is added to database because of countas rules",module="debug")
|
||
# ARTISTS.append(artist)
|
||
# coa.updateIDs(ARTISTS)
|
||
|
||
dbstatus['healthy'] = True
|
||
|
||
|
||
#start regular tasks
|
||
utilities.update_medals()
|
||
utilities.update_weekly()
|
||
utilities.send_stats()
|
||
|
||
|
||
global ISSUES
|
||
ISSUES = check_issues()
|
||
|
||
|
||
dbstatus['complete'] = True
|
||
dbstatus['rebuildinprogress'] = False
|
||
|
||
log("Database fully built!")
|
||
|
||
|
||
|
||
# Saves all cached entries to disk
|
||
def sync():
|
||
|
||
# all entries by file collected
|
||
# so we don't open the same file for every entry
|
||
#log("Syncing",module="debug")
|
||
entries = {}
|
||
|
||
for idx in range(len(SCROBBLES)):
|
||
if not SCROBBLES[idx].saved:
|
||
|
||
t = get_scrobble_dict(SCROBBLES[idx])
|
||
|
||
artistlist = list(t["artists"])
|
||
artistlist.sort() #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed
|
||
artistss = "␟".join(artistlist)
|
||
timestamp = datetime.date.fromtimestamp(t["time"])
|
||
|
||
album = t["album"] or "-"
|
||
duration = t["duration"] or "-"
|
||
|
||
entry = [str(t["time"]),artistss,t["title"],album,duration]
|
||
|
||
monthcode = str(timestamp.year) + "_" + str(timestamp.month)
|
||
entries.setdefault(monthcode,[]).append(entry) #i feckin love the setdefault function
|
||
|
||
SCROBBLES[idx] = Scrobble(*SCROBBLES[idx][:-1],True)
|
||
# save copy with last tuple entry set to true
|
||
|
||
#log("Sorted into months",module="debug")
|
||
|
||
for e in entries:
|
||
tsv.add_entries(data_dir['scrobbles'](e + ".tsv"),entries[e],comments=False)
|
||
#addEntries("scrobbles/" + e + ".tsv",entries[e],escape=False)
|
||
|
||
#log("Written files",module="debug")
|
||
|
||
|
||
global lastsync
|
||
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
||
#log("Database saved to disk.")
|
||
|
||
# save cached images
|
||
#saveCache()
|
||
|
||
|
||
|
||
###
|
||
## Caches in front of DB
|
||
## the volatile caches are intended mainly for excessive site navigation during one session
|
||
## the permanent caches are there to save data that is hard to calculate and never changes (old charts)
|
||
###
|
||
|
||
|
||
|
||
|
||
import copy
|
||
|
||
if malojaconfig["USE_DB_CACHE"]:
|
||
def db_query(**kwargs):
|
||
return db_query_cached(**kwargs)
|
||
def db_aggregate(**kwargs):
|
||
return db_aggregate_cached(**kwargs)
|
||
else:
|
||
def db_query(**kwargs):
|
||
return db_query_full(**kwargs)
|
||
def db_aggregate(**kwargs):
|
||
return db_aggregate_full(**kwargs)
|
||
|
||
|
||
csz = malojaconfig["DB_CACHE_ENTRIES"]
|
||
cmp = malojaconfig["DB_MAX_MEMORY"]
|
||
try:
|
||
import psutil
|
||
use_psutil = True
|
||
except:
|
||
use_psutil = False
|
||
|
||
cache_query = lru.LRU(csz)
|
||
cache_query_perm = lru.LRU(csz)
|
||
cache_aggregate = lru.LRU(csz)
|
||
cache_aggregate_perm = lru.LRU(csz)
|
||
|
||
perm_caching = malojaconfig["CACHE_DATABASE_PERM"]
|
||
temp_caching = malojaconfig["CACHE_DATABASE_SHORT"]
|
||
|
||
cachestats = {
|
||
"cache_query":{
|
||
"hits_perm":0,
|
||
"hits_tmp":0,
|
||
"misses":0,
|
||
"objperm":cache_query_perm,
|
||
"objtmp":cache_query,
|
||
"name":"Query Cache"
|
||
},
|
||
"cache_aggregate":{
|
||
"hits_perm":0,
|
||
"hits_tmp":0,
|
||
"misses":0,
|
||
"objperm":cache_aggregate_perm,
|
||
"objtmp":cache_aggregate,
|
||
"name":"Aggregate Cache"
|
||
}
|
||
}
|
||
|
||
from doreah.regular import runhourly
|
||
|
||
@runhourly
|
||
def log_stats():
|
||
logstr = "{name}: {hitsperm} Perm Hits, {hitstmp} Tmp Hits, {misses} Misses; Current Size: {sizeperm}/{sizetmp}"
|
||
for s in (cachestats["cache_query"],cachestats["cache_aggregate"]):
|
||
log(logstr.format(name=s["name"],hitsperm=s["hits_perm"],hitstmp=s["hits_tmp"],misses=s["misses"],
|
||
sizeperm=len(s["objperm"]),sizetmp=len(s["objtmp"])),module="debug")
|
||
|
||
def db_query_cached(**kwargs):
|
||
global cache_query, cache_query_perm
|
||
key = utilities.serialize(kwargs)
|
||
|
||
eligible_permanent_caching = (
|
||
"timerange" in kwargs and
|
||
not kwargs["timerange"].active() and
|
||
perm_caching
|
||
)
|
||
eligible_temporary_caching = (
|
||
not eligible_permanent_caching and
|
||
temp_caching
|
||
)
|
||
|
||
# hit permanent cache for past timeranges
|
||
if eligible_permanent_caching and key in cache_query_perm:
|
||
cachestats["cache_query"]["hits_perm"] += 1
|
||
return copy.copy(cache_query_perm.get(key))
|
||
|
||
# hit short term cache
|
||
elif eligible_temporary_caching and key in cache_query:
|
||
cachestats["cache_query"]["hits_tmp"] += 1
|
||
return copy.copy(cache_query.get(key))
|
||
|
||
else:
|
||
cachestats["cache_query"]["misses"] += 1
|
||
result = db_query_full(**kwargs)
|
||
if eligible_permanent_caching: cache_query_perm[key] = result
|
||
elif eligible_temporary_caching: cache_query[key] = result
|
||
|
||
if use_psutil:
|
||
reduce_caches_if_low_ram()
|
||
|
||
return result
|
||
|
||
|
||
def db_aggregate_cached(**kwargs):
|
||
global cache_aggregate, cache_aggregate_perm
|
||
key = utilities.serialize(kwargs)
|
||
|
||
eligible_permanent_caching = (
|
||
"timerange" in kwargs and
|
||
not kwargs["timerange"].active() and
|
||
perm_caching
|
||
)
|
||
eligible_temporary_caching = (
|
||
not eligible_permanent_caching and
|
||
temp_caching
|
||
)
|
||
|
||
# hit permanent cache for past timeranges
|
||
if eligible_permanent_caching and key in cache_aggregate_perm:
|
||
cachestats["cache_aggregate"]["hits_perm"] += 1
|
||
return copy.copy(cache_aggregate_perm.get(key))
|
||
|
||
# hit short term cache
|
||
elif eligible_temporary_caching and key in cache_aggregate:
|
||
cachestats["cache_aggregate"]["hits_tmp"] += 1
|
||
return copy.copy(cache_aggregate.get(key))
|
||
|
||
else:
|
||
cachestats["cache_aggregate"]["misses"] += 1
|
||
result = db_aggregate_full(**kwargs)
|
||
if eligible_permanent_caching: cache_aggregate_perm[key] = result
|
||
elif eligible_temporary_caching: cache_aggregate[key] = result
|
||
|
||
if use_psutil:
|
||
reduce_caches_if_low_ram()
|
||
|
||
return result
|
||
|
||
def invalidate_caches():
|
||
global cache_query, cache_aggregate
|
||
cache_query.clear()
|
||
cache_aggregate.clear()
|
||
log("Database caches invalidated.")
|
||
|
||
def reduce_caches(to=0.75):
|
||
global cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm
|
||
for c in cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm:
|
||
currentsize = len(c)
|
||
if currentsize > 100:
|
||
targetsize = max(int(currentsize * to),10)
|
||
c.set_size(targetsize)
|
||
c.set_size(csz)
|
||
|
||
def reduce_caches_if_low_ram():
|
||
ramprct = psutil.virtual_memory().percent
|
||
if ramprct > cmp:
|
||
log("{prct}% RAM usage, reducing caches!".format(prct=ramprct),module="debug")
|
||
ratio = (cmp / ramprct) ** 3
|
||
reduce_caches(to=ratio)
|
||
|
||
####
|
||
## Database queries
|
||
####
|
||
|
||
|
||
|
||
# Queries the database
|
||
def db_query_full(artist=None,artists=None,title=None,track=None,since=None,to=None,within=None,timerange=None,associated=False,max_=None):
|
||
|
||
if not dbstatus['healthy']: raise DatabaseNotBuilt()
|
||
(since, to) = time_stamps(since=since,to=to,within=within,range=timerange)
|
||
|
||
# this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly
|
||
# if a title is specified, we assume that a specific track (with the exact artist combination) is requested
|
||
# if not, duplicate artist arguments are ignored
|
||
|
||
#artist = None
|
||
|
||
if artist is not None and isinstance(artist,str):
|
||
artist = ARTISTS.index(artist)
|
||
|
||
# artists to numbers
|
||
if artists is not None:
|
||
artists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in artists])
|
||
|
||
# track to number
|
||
if track is not None and isinstance(track,dict):
|
||
trackartists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in track["artists"]])
|
||
track = TRACKS.index((frozenset(trackartists),track["title"]))
|
||
artists = None
|
||
|
||
#check if track is requested via title
|
||
if title!=None and track==None:
|
||
track = TRACKS.index((frozenset(artists),title))
|
||
artists = None
|
||
|
||
# if we're not looking for a track (either directly or per title artist arguments, which is converted to track above)
|
||
# we only need one artist
|
||
elif artist is None and track is None and artists is not None and len(artists) != 0:
|
||
artist = artists.pop()
|
||
|
||
|
||
# db query always reverse by default
|
||
|
||
result = []
|
||
|
||
i = 0
|
||
for s in scrobbles_in_range(since,to,reverse=True):
|
||
if i == max_: break
|
||
if (track is None or s[0] == track) and (artist is None or artist in TRACKS[s[0]][0] or associated and artist in coa.getCreditedList(TRACKS[s[0]][0])):
|
||
result.append(get_scrobble_dict(s))
|
||
i += 1
|
||
|
||
return result
|
||
|
||
# pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way
|
||
|
||
|
||
# Queries that... well... aggregate
|
||
def db_aggregate_full(by=None,since=None,to=None,within=None,timerange=None,artist=None):
|
||
|
||
if not dbstatus['healthy']: raise DatabaseNotBuilt()
|
||
(since, to) = time_stamps(since=since,to=to,within=within,range=timerange)
|
||
|
||
if isinstance(artist, str):
|
||
artist = ARTISTS.index(artist)
|
||
|
||
if (by=="ARTIST"):
|
||
#this is probably a really bad idea
|
||
#for a in ARTISTS:
|
||
# num = len(db_query(artist=a,since=since,to=to))
|
||
#
|
||
|
||
# alright let's try for real
|
||
charts = {}
|
||
#for s in [scr for scr in SCROBBLES if since < scr[1] < to]:
|
||
for s in scrobbles_in_range(since,to):
|
||
artists = TRACKS[s[0]][0]
|
||
for a in coa.getCreditedList(artists):
|
||
# this either creates the new entry or increments the existing one
|
||
charts[a] = charts.setdefault(a,0) + 1
|
||
|
||
ls = [{"artist":get_artist_dict(ARTISTS[a]),"scrobbles":charts[a],"counting":[arti for arti in coa.getAllAssociated(ARTISTS[a]) if arti in ARTISTS]} for a in charts]
|
||
ls.sort(key=lambda k:k["scrobbles"],reverse=True)
|
||
# add ranks
|
||
for rnk in range(len(ls)):
|
||
if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
|
||
ls[rnk]["rank"] = rnk + 1
|
||
else:
|
||
ls[rnk]["rank"] = ls[rnk-1]["rank"]
|
||
return ls
|
||
|
||
elif (by=="TRACK"):
|
||
charts = {}
|
||
#for s in [scr for scr in SCROBBLES if since < scr[1] < to and (artist==None or (artist in TRACKS[scr[0]][0]))]:
|
||
for s in [scr for scr in scrobbles_in_range(since,to) if (artist is None or (artist in TRACKS[scr[0]][0]))]:
|
||
track = s[0]
|
||
# this either creates the new entry or increments the existing one
|
||
charts[track] = charts.setdefault(track,0) + 1
|
||
|
||
ls = [{"track":get_track_dict(TRACKS[t]),"scrobbles":charts[t]} for t in charts]
|
||
ls.sort(key=lambda k:k["scrobbles"],reverse=True)
|
||
# add ranks
|
||
for rnk in range(len(ls)):
|
||
if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
|
||
ls[rnk]["rank"] = rnk + 1
|
||
else:
|
||
ls[rnk]["rank"] = ls[rnk-1]["rank"]
|
||
return ls
|
||
|
||
else:
|
||
#return len([scr for scr in SCROBBLES if since < scr[1] < to])
|
||
return len(list(scrobbles_in_range(since,to)))
|
||
|
||
|
||
# Search for strings
|
||
def db_search(query,type=None):
|
||
results = []
|
||
if type=="ARTIST":
|
||
results = [a for a in ARTISTS if simplestr(query) in simplestr(a)]
|
||
if type=="TRACK":
|
||
results = [
|
||
get_track_dict(t) for t in TRACKS if simplestr(query) in simplestr(t[1])
|
||
]
|
||
return results
|
||
|
||
|
||
####
|
||
## Useful functions
|
||
####
|
||
|
||
# makes a string usable for searching (special characters are blanks, accents and stuff replaced with their real part)
|
||
def simplestr(input,ignorecapitalization=True):
|
||
norm = unicodedata.normalize("NFKD",input)
|
||
norm = [c for c in norm if not unicodedata.combining(c)]
|
||
norm = [c if len(c.encode())==1 else " " for c in norm]
|
||
clear = ''.join(c for c in norm)
|
||
if ignorecapitalization: clear = clear.lower()
|
||
return clear
|
||
|
||
|
||
|
||
#def getArtistId(nameorid):
|
||
# if isinstance(nameorid,int):
|
||
# return nameorid
|
||
# else:
|
||
# try:
|
||
# return ARTISTS.index(nameorid)
|
||
# except:
|
||
# return -1
|
||
|
||
|
||
def insert(list_,item,key=lambda x:x):
|
||
i = 0
|
||
while len(list_) > i:
|
||
if key(list_[i]) > key(item):
|
||
list_.insert(i,item)
|
||
return i
|
||
i += 1
|
||
|
||
list_.append(item)
|
||
return i
|
||
|
||
|
||
def scrobbles_in_range(start,end,reverse=False):
|
||
if reverse:
|
||
for stamp in reversed(STAMPS):
|
||
#print("Checking " + str(stamp))
|
||
if stamp < start: return
|
||
if stamp > end: continue
|
||
yield SCROBBLESDICT[stamp]
|
||
else:
|
||
for stamp in STAMPS:
|
||
#print("Checking " + str(stamp))
|
||
if stamp < start: continue
|
||
if stamp > end: return
|
||
yield SCROBBLESDICT[stamp]
|