maloja/maloja/database.py

1189 lines
31 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# server
from bottle import request, response, FormsDict, HTTPError
# rest of the project
from .cleanup import CleanerAgent, CollectorAgent
from . import utilities
from .malojatime import register_scrobbletime, time_stamps, ranges
from .malojauri import uri_to_internal, internal_to_uri, compose_querystring
from .thirdparty import proxy_scrobble_all
from .__pkginfo__ import version
from .globalconf import data_dir
# doreah toolkit
from doreah.logging import log
from doreah import tsv
from doreah import settings
from doreah.caching import Cache, DeepCache
from doreah.auth import authenticated_api, authenticated_api_with_alternate
from doreah.io import ProgressBar
try:
from doreah.persistence import DiskDict
except: pass
import doreah
# technical
import os
import datetime
import sys
import unicodedata
from collections import namedtuple
from threading import Lock
import yaml
import lru
import math
# url handling
from importlib.machinery import SourceFileLoader
import urllib
dblock = Lock() #global database lock
dbstatus = {
"healthy":False,
"rebuildinprogress":False,
"complete":False
}
class DatabaseNotBuilt(HTTPError):
def __init__(self):
super().__init__(
status=503,
body="The Maloja Database is still being built. Try again in a few seconds.",
headers={"Retry-After":10}
)
SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved)
ARTISTS = [] # Format: artist
TRACKS = [] # Format: namedtuple(artists=frozenset(artist_ref,...),title=title)
Track = namedtuple("Track",["artists","title"])
Scrobble = namedtuple("Scrobble",["track","timestamp","album","duration","saved"])
# album is saved in the scrobble because it's not actually authorative information about the track, just info
# what was sent with this scrobble
### OPTIMIZATION
SCROBBLESDICT = {} # timestamps to scrobble mapping
STAMPS = [] # sorted
#STAMPS_SET = set() # as set for easier check if exists # we use the scrobbles dict for that now
TRACKS_NORMALIZED = []
ARTISTS_NORMALIZED = []
ARTISTS_NORMALIZED_SET = set()
TRACKS_NORMALIZED_SET = set()
MEDALS_ARTISTS = {} #literally only changes once per year, no need to calculate that on the fly
MEDALS_TRACKS = {}
WEEKLY_TOPTRACKS = {}
WEEKLY_TOPARTISTS = {}
ISSUES = {}
cla = CleanerAgent()
coa = CollectorAgent()
clients = []
lastsync = 0
try:
with open(data_dir['state']("known_servers.yml"),"r") as f:
KNOWN_SERVERS = set(yaml.safe_load(f))
except:
KNOWN_SERVERS = set()
def add_known_server(url):
KNOWN_SERVERS.add(url)
with open(data_dir['state']("known_servers.yml"),"w") as f:
f.write(yaml.dump(list(KNOWN_SERVERS)))
### symmetric keys are fine for now since we hopefully use HTTPS
def loadAPIkeys():
global clients
tsv.create(data_dir['clients']("authenticated_machines.tsv"))
#createTSV("clients/authenticated_machines.tsv")
clients = tsv.parse(data_dir['clients']("authenticated_machines.tsv"),"string","string")
#clients = parseTSV("clients/authenticated_machines.tsv","string","string")
log("Authenticated Machines: " + ", ".join([m[1] for m in clients]))
def checkAPIkey(k):
#return (k in [k for [k,d] in clients])
for key, identifier in clients:
if key == k: return identifier
return False
def allAPIkeys():
return [k for [k,d] in clients]
####
## Getting dict representations of database objects
####
def get_scrobble_dict(o):
track = get_track_dict(TRACKS[o.track])
return {"artists":track["artists"],"title":track["title"],"time":o.timestamp,"album":o.album,"duration":o.duration}
def get_artist_dict(o):
return o
#technically not a dict, but... you know
def get_track_dict(o):
artists = [get_artist_dict(ARTISTS[a]) for a in o.artists]
return {"artists":artists,"title":o.title}
####
## Creating or finding existing database entries
####
def createScrobble(artists,title,time,album=None,duration=None,volatile=False):
if len(artists) == 0 or title == "":
return {}
dblock.acquire()
i = getTrackID(artists,title)
# idempotence
if time in SCROBBLESDICT and i == SCROBBLESDICT[time].track:
dblock.release()
return get_track_dict(TRACKS[i])
# timestamp as unique identifier
while (time in SCROBBLESDICT):
time += 1
obj = Scrobble(i,time,album,duration,volatile) # if volatile generated, we simply pretend we have already saved it to disk
#SCROBBLES.append(obj)
# immediately insert scrobble correctly so we can guarantee sorted list
index = insert(SCROBBLES,obj,key=lambda x:x[1])
SCROBBLESDICT[time] = obj
STAMPS.insert(index,time) #should be same index as scrobblelist
register_scrobbletime(time)
invalidate_caches()
dblock.release()
proxy_scrobble_all(artists,title,time)
return get_track_dict(TRACKS[obj.track])
# this will never be called from different threads, so no lock
def readScrobble(artists,title,time):
while (time in SCROBBLESDICT):
time += 1
i = getTrackID(artists,title)
obj = Scrobble(i,time,None,None,True)
SCROBBLES.append(obj)
SCROBBLESDICT[time] = obj
#STAMPS.append(time)
def getArtistID(name):
obj = name
obj_normalized = normalize_name(name)
if obj_normalized in ARTISTS_NORMALIZED_SET:
return ARTISTS_NORMALIZED.index(obj_normalized)
i = len(ARTISTS)
ARTISTS.append(obj)
ARTISTS_NORMALIZED_SET.add(obj_normalized)
ARTISTS_NORMALIZED.append(obj_normalized)
# with a new artist added, we might also get new artists that they are credited as
cr = coa.getCredited(name)
getArtistID(cr)
coa.updateIDs(ARTISTS)
return i
def getTrackID(artists,title):
artistset = {getArtistID(name=a) for a in artists}
obj = Track(artists=frozenset(artistset),title=title)
obj_normalized = Track(artists=frozenset(artistset),title=normalize_name(title))
if obj_normalized in TRACKS_NORMALIZED_SET:
return TRACKS_NORMALIZED.index(obj_normalized)
i = len(TRACKS)
TRACKS.append(obj)
TRACKS_NORMALIZED_SET.add(obj_normalized)
TRACKS_NORMALIZED.append(obj_normalized)
return i
import unicodedata
# function to turn the name into a representation that can be easily compared, ignoring minor differences
remove_symbols = ["'","`",""]
replace_with_space = [" - ",": "]
def normalize_name(name):
for r in replace_with_space:
name = name.replace(r," ")
name = "".join(char for char in unicodedata.normalize('NFD',name.lower())
if char not in remove_symbols and unicodedata.category(char) != 'Mn')
return name
########
########
## HTTP requests and their associated functions
########
########
# skip regular authentication if api key is present in request
# an api key now ONLY permits scrobbling tracks, no other admin tasks
def api_key_correct(request):
args = request.params
try:
args.update(request.json)
except:
pass
if "key" in args:
apikey = args["key"]
del args["key"]
elif "apikey" in args:
apikey = args["apikey"]
del args["apikey"]
else: return False
return checkAPIkey(apikey)
def get_scrobbles(**keys):
r = db_query(**{k:keys[k] for k in keys if k in ["artist","artists","title","since","to","within","timerange","associated","track"]})
#offset = (keys.get('page') * keys.get('perpage')) if keys.get('perpage') is not math.inf else 0
#r = r[offset:]
#if keys.get('perpage') is not math.inf: r = r[:keys.get('perpage')]
return r
def info():
totalscrobbles = get_scrobbles_num()
artists = {}
return {
"name":malojaconfig["NAME"],
"artists":{
chartentry["artist"]:round(chartentry["scrobbles"] * 100 / totalscrobbles,3)
for chartentry in get_charts_artists() if chartentry["scrobbles"]/totalscrobbles >= 0
},
"known_servers":list(KNOWN_SERVERS)
}
def get_scrobbles_num(**keys):
r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","since","to","within","timerange","associated"]})
return len(r)
#for multiple since values (must be ordered)
# DOESN'T SEEM TO ACTUALLY BE FASTER
# REEVALUATE
#def get_scrobbles_num_multiple(sinces=[],to=None,**keys):
#
# sinces_stamps = [time_stamps(since,to,None)[0] for since in sinces]
# #print(sinces)
# #print(sinces_stamps)
# minsince = sinces[-1]
# r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","associated","to"]},since=minsince)
#
# #print(r)
#
# validtracks = [0 for s in sinces]
#
# i = 0
# si = 0
# while True:
# if si == len(sinces): break
# if i == len(r): break
# if r[i]["time"] >= sinces_stamps[si]:
# validtracks[si] += 1
# else:
# si += 1
# continue
# i += 1
#
#
# return validtracks
def get_tracks(artist=None):
artistid = ARTISTS.index(artist) if artist is not None else None
# Option 1
return [get_track_dict(t) for t in TRACKS if (artistid in t.artists) or (artistid==None)]
# Option 2 is a bit more elegant but much slower
#tracklist = [get_track_dict(t) for t in TRACKS]
#ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
def get_artists():
if not dbstatus['healthy']: raise DatabaseNotBuilt()
return ARTISTS #well
def get_charts_artists(**keys):
return db_aggregate(by="ARTIST",**{k:keys[k] for k in keys if k in ["since","to","within","timerange"]})
def get_charts_tracks(**keys):
return db_aggregate(by="TRACK",**{k:keys[k] for k in keys if k in ["since","to","within","timerange","artist"]})
def get_pulse(**keys):
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
results = []
for rng in rngs:
res = len(db_query(timerange=rng,**{k:keys[k] for k in keys if k in ["artists","artist","track","title","associated"]}))
results.append({"range":rng,"scrobbles":res})
return results
def get_performance(**keys):
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
results = []
for rng in rngs:
if "track" in keys:
charts = get_charts_tracks(timerange=rng)
rank = None
for c in charts:
if c["track"] == keys["track"]:
rank = c["rank"]
break
elif "artist" in keys:
charts = get_charts_artists(timerange=rng)
rank = None
for c in charts:
if c["artist"] == keys["artist"]:
rank = c["rank"]
break
results.append({"range":rng,"rank":rank})
return results
def get_top_artists(**keys):
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
results = []
for rng in rngs:
try:
res = db_aggregate(timerange=rng,by="ARTIST")[0]
results.append({"range":rng,"artist":res["artist"],"counting":res["counting"],"scrobbles":res["scrobbles"]})
except:
results.append({"range":rng,"artist":None,"scrobbles":0})
return results
def get_top_tracks(**keys):
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
results = []
for rng in rngs:
try:
res = db_aggregate(timerange=rng,by="TRACK")[0]
results.append({"range":rng,"track":res["track"],"scrobbles":res["scrobbles"]})
except:
results.append({"range":rng,"track":None,"scrobbles":0})
return results
def artistInfo(artist):
charts = db_aggregate(by="ARTIST")
scrobbles = len(db_query(artists=[artist]))
#we cant take the scrobble number from the charts because that includes all countas scrobbles
try:
c = [e for e in charts if e["artist"] == artist][0]
others = [a for a in coa.getAllAssociated(artist) if a in ARTISTS]
position = c["rank"]
performance = get_performance(artist=artist,step="week")
return {
"artist":artist,
"scrobbles":scrobbles,
"position":position,
"associated":others,
"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_ARTISTS.get(artist,{})},
"topweeks":WEEKLY_TOPARTISTS.get(artist,0)
}
except:
# if the artist isnt in the charts, they are not being credited and we
# need to show information about the credited one
artist = coa.getCredited(artist)
c = [e for e in charts if e["artist"] == artist][0]
position = c["rank"]
return {"replace":artist,"scrobbles":scrobbles,"position":position}
def trackInfo(track):
charts = db_aggregate(by="TRACK")
#scrobbles = len(db_query(artists=artists,title=title)) #chart entry of track always has right scrobble number, no countas rules here
#c = [e for e in charts if set(e["track"]["artists"]) == set(artists) and e["track"]["title"] == title][0]
c = [e for e in charts if e["track"] == track][0]
scrobbles = c["scrobbles"]
position = c["rank"]
cert = None
threshold_gold, threshold_platinum, threshold_diamond = malojaconfig["SCROBBLES_GOLD","SCROBBLES_PLATINUM","SCROBBLES_DIAMOND"]
if scrobbles >= threshold_diamond: cert = "diamond"
elif scrobbles >= threshold_platinum: cert = "platinum"
elif scrobbles >= threshold_gold: cert = "gold"
return {
"track":track,
"scrobbles":scrobbles,
"position":position,
"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_TRACKS.get((frozenset(track["artists"]),track["title"]),{})},
"certification":cert,
"topweeks":WEEKLY_TOPTRACKS.get(((frozenset(track["artists"]),track["title"])),0)
}
def compare(remoteurl):
import json
compareurl = remoteurl + "/api/info"
response = urllib.request.urlopen(compareurl)
strangerinfo = json.loads(response.read())
owninfo = info()
#add_known_server(compareto)
artists = {}
for a in owninfo["artists"]:
artists[a.lower()] = {"name":a,"self":int(owninfo["artists"][a]*1000),"other":0}
for a in strangerinfo["artists"]:
artists[a.lower()] = artists.setdefault(a.lower(),{"name":a,"self":0})
artists[a.lower()]["other"] = int(strangerinfo["artists"][a]*1000)
for a in artists:
common = min(artists[a]["self"],artists[a]["other"])
artists[a]["self"] -= common
artists[a]["other"] -= common
artists[a]["common"] = common
best = sorted((artists[a]["name"] for a in artists),key=lambda x: artists[x.lower()]["common"],reverse=True)
result = {
"unique_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] == 0),
"more_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] != 0),
"common":sum(artists[a]["common"] for a in artists),
"more_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] != 0),
"unique_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] == 0)
}
total = sum(result[c] for c in result)
for r in result:
result[r] = (result[r],result[r]/total)
return {
"result":result,
"info":{
"ownname":owninfo["name"],
"remotename":strangerinfo["name"]
},
"commonartist":best[0]
}
def incoming_scrobble(artists,title,album=None,duration=None,time=None,fix=True):
if time is None:
time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
log("Incoming scrobble (): ARTISTS: " + str(artists) + ", TRACK: " + title,module="debug")
if fix:
(artists,title) = cla.fullclean(artists,title)
trackdict = createScrobble(artists,title,time,album,duration)
sync()
return {"status":"success","track":trackdict}
def issues():
return ISSUES
def check_issues():
combined = []
duplicates = []
newartists = []
import itertools
import difflib
sortedartists = ARTISTS.copy()
sortedartists.sort(key=len,reverse=True)
reversesortedartists = sortedartists.copy()
reversesortedartists.reverse()
for a in reversesortedartists:
nochange = cla.confirmedReal(a)
st = a
lis = []
reachedmyself = False
for ar in sortedartists:
if (ar != a) and not reachedmyself:
continue
elif not reachedmyself:
reachedmyself = True
continue
if (ar.lower() == a.lower()) or ("the " + ar.lower() == a.lower()) or ("a " + ar.lower() == a.lower()):
duplicates.append((ar,a))
break
if (ar + " " in st) or (" " + ar in st):
lis.append(ar)
st = st.replace(ar,"").strip()
elif (ar == st):
lis.append(ar)
st = ""
if not nochange:
combined.append((a,lis))
break
elif (ar in st) and len(ar)*2 > len(st):
duplicates.append((a,ar))
st = st.replace("&","").replace("and","").replace("with","").strip()
if st not in ["", a]:
if len(st) < 5 and len(lis) == 1:
#check if we havent just randomly found the string in another word
#if (" " + st + " ") in lis[0] or (lis[0].endswith(" " + st)) or (lis[0].startswith(st + " ")):
duplicates.append((a,lis[0]))
elif len(st) < 5 and len(lis) > 1 and not nochange:
combined.append((a,lis))
elif len(st) >= 5 and not nochange:
#check if we havent just randomly found the string in another word
if (" " + st + " ") in a or (a.endswith(" " + st)) or (a.startswith(st + " ")):
newartists.append((st,a,lis))
#for c in itertools.combinations(ARTISTS,3):
# l = list(c)
# print(l)
# l.sort(key=len,reverse=True)
# [full,a1,a2] = l
# if (a1 + " " + a2 in full) or (a2 + " " + a1 in full):
# combined.append((full,a1,a2))
#for c in itertools.combinations(ARTISTS,2):
# if
#
# if (c[0].lower == c[1].lower):
# duplicates.append((c[0],c[1]))
# elif (c[0] + " " in c[1]) or (" " + c[0] in c[1]) or (c[1] + " " in c[0]) or (" " + c[1] in c[0]):
# if (c[0] in c[1]):
# full, part = c[1],c[0]
# rest = c[1].replace(c[0],"").strip()
# else:
# full, part = c[0],c[1]
# rest = c[0].replace(c[1],"").strip()
# if rest in ARTISTS and full not in [c[0] for c in combined]:
# combined.append((full,part,rest))
# elif (c[0] in c[1]) or (c[1] in c[0]):
# duplicates.append((c[0],c[1]))
return {"duplicates":duplicates,"combined":combined,"newartists":newartists}
def get_predefined_rulesets():
validchars = "-_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
rulesets = []
for f in os.listdir(data_dir['rules']("predefined")):
if f.endswith(".tsv"):
rawf = f.replace(".tsv","")
valid = all(char in validchars for char in rawf)
if not valid: continue
if "_" not in rawf: continue
try:
with open(data_dir['rules']("predefined",f)) as tsvfile:
line1 = tsvfile.readline()
line2 = tsvfile.readline()
if "# NAME: " in line1:
name = line1.replace("# NAME: ","")
else: name = rawf.split("_")[1]
desc = line2.replace("# DESC: ","") if "# DESC: " in line2 else ""
author = rawf.split("_")[0]
except:
continue
ruleset = {"file":rawf}
rulesets.append(ruleset)
ruleset["active"] = bool(os.path.exists(data_dir['rules'](f)))
ruleset["name"] = name
ruleset["author"] = author
ruleset["desc"] = desc
return rulesets
####
## Server operation
####
# Starts the server
def start_db():
log("Starting database...")
global lastsync
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
build_db()
loadAPIkeys()
#run(dbserver, host='::', port=PORT, server='waitress')
log("Database reachable!")
def build_db():
global dbstatus
dbstatus['healthy'] = False
dbstatus['complete'] = False
dbstatus['rebuildinprogress'] = True
log("Building database...")
global SCROBBLES, ARTISTS, TRACKS
global TRACKS_NORMALIZED_SET, TRACKS_NORMALIZED, ARTISTS_NORMALIZED_SET, ARTISTS_NORMALIZED
global SCROBBLESDICT, STAMPS
SCROBBLES = []
ARTISTS = []
TRACKS = []
STAMPS = []
SCROBBLESDICT = {}
TRACKS_NORMALIZED = []
ARTISTS_NORMALIZED = []
ARTISTS_NORMALIZED_SET = set()
TRACKS_NORMALIZED_SET = set()
# parse files
db = tsv.parse_all(data_dir['scrobbles'](),"int","string","string",comments=False)
scrobblenum = len(db)
log(f"Found {scrobblenum} scrobbles...")
usebar = not malojaconfig["CLEAN_OUTPUT"]
if usebar: pbar = ProgressBar(max=scrobblenum,prefix="Loading scrobbles")
else:
n = 0
m = max(int(scrobblenum / 25),20)
#db = parseAllTSV("scrobbles","int","string","string",escape=False)
for sc in db:
artists = sc[1].split("")
title = sc[2]
time = sc[0]
readScrobble(artists,title,time)
if usebar: pbar.progress()
else:
n += 1
if n % m == 0: log(f"Loaded {n}/{scrobblenum}...")
if usebar: pbar.done()
log("Database loaded, optimizing...")
# optimize database
SCROBBLES.sort(key = lambda tup: tup[1])
#SCROBBLESDICT = {obj[1]:obj for obj in SCROBBLES}
STAMPS = [t for t in SCROBBLESDICT]
STAMPS.sort()
# inform malojatime module about earliest scrobble
if STAMPS: register_scrobbletime(STAMPS[0])
# NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY
# get extra artists with no real scrobbles from countas rules
#for artist in coa.getAllArtists():
#for artist in coa.getCreditedList(ARTISTS):
# if artist not in ARTISTS:
# log(artist + " is added to database because of countas rules",module="debug")
# ARTISTS.append(artist)
# coa.updateIDs(ARTISTS)
dbstatus['healthy'] = True
#start regular tasks
utilities.update_medals()
utilities.update_weekly()
utilities.send_stats()
global ISSUES
ISSUES = check_issues()
dbstatus['complete'] = True
dbstatus['rebuildinprogress'] = False
log("Database fully built!")
# Saves all cached entries to disk
def sync():
# all entries by file collected
# so we don't open the same file for every entry
#log("Syncing",module="debug")
entries = {}
for idx in range(len(SCROBBLES)):
if not SCROBBLES[idx].saved:
t = get_scrobble_dict(SCROBBLES[idx])
artistlist = list(t["artists"])
artistlist.sort() #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed
artistss = "".join(artistlist)
timestamp = datetime.date.fromtimestamp(t["time"])
album = t["album"] or "-"
duration = t["duration"] or "-"
entry = [str(t["time"]),artistss,t["title"],album,duration]
monthcode = str(timestamp.year) + "_" + str(timestamp.month)
entries.setdefault(monthcode,[]).append(entry) #i feckin love the setdefault function
SCROBBLES[idx] = Scrobble(*SCROBBLES[idx][:-1],True)
# save copy with last tuple entry set to true
#log("Sorted into months",module="debug")
for e in entries:
tsv.add_entries(data_dir['scrobbles'](e + ".tsv"),entries[e],comments=False)
#addEntries("scrobbles/" + e + ".tsv",entries[e],escape=False)
#log("Written files",module="debug")
global lastsync
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
#log("Database saved to disk.")
# save cached images
#saveCache()
###
## Caches in front of DB
## the volatile caches are intended mainly for excessive site navigation during one session
## the permanent caches are there to save data that is hard to calculate and never changes (old charts)
###
import copy
if malojaconfig["USE_DB_CACHE"]:
def db_query(**kwargs):
return db_query_cached(**kwargs)
def db_aggregate(**kwargs):
return db_aggregate_cached(**kwargs)
else:
def db_query(**kwargs):
return db_query_full(**kwargs)
def db_aggregate(**kwargs):
return db_aggregate_full(**kwargs)
csz = malojaconfig["DB_CACHE_ENTRIES"]
cmp = malojaconfig["DB_MAX_MEMORY"]
try:
import psutil
use_psutil = True
except:
use_psutil = False
cache_query = lru.LRU(csz)
cache_query_perm = lru.LRU(csz)
cache_aggregate = lru.LRU(csz)
cache_aggregate_perm = lru.LRU(csz)
perm_caching = malojaconfig["CACHE_DATABASE_PERM"]
temp_caching = malojaconfig["CACHE_DATABASE_SHORT"]
cachestats = {
"cache_query":{
"hits_perm":0,
"hits_tmp":0,
"misses":0,
"objperm":cache_query_perm,
"objtmp":cache_query,
"name":"Query Cache"
},
"cache_aggregate":{
"hits_perm":0,
"hits_tmp":0,
"misses":0,
"objperm":cache_aggregate_perm,
"objtmp":cache_aggregate,
"name":"Aggregate Cache"
}
}
from doreah.regular import runhourly
@runhourly
def log_stats():
logstr = "{name}: {hitsperm} Perm Hits, {hitstmp} Tmp Hits, {misses} Misses; Current Size: {sizeperm}/{sizetmp}"
for s in (cachestats["cache_query"],cachestats["cache_aggregate"]):
log(logstr.format(name=s["name"],hitsperm=s["hits_perm"],hitstmp=s["hits_tmp"],misses=s["misses"],
sizeperm=len(s["objperm"]),sizetmp=len(s["objtmp"])),module="debug")
def db_query_cached(**kwargs):
global cache_query, cache_query_perm
key = utilities.serialize(kwargs)
eligible_permanent_caching = (
"timerange" in kwargs and
not kwargs["timerange"].active() and
perm_caching
)
eligible_temporary_caching = (
not eligible_permanent_caching and
temp_caching
)
# hit permanent cache for past timeranges
if eligible_permanent_caching and key in cache_query_perm:
cachestats["cache_query"]["hits_perm"] += 1
return copy.copy(cache_query_perm.get(key))
# hit short term cache
elif eligible_temporary_caching and key in cache_query:
cachestats["cache_query"]["hits_tmp"] += 1
return copy.copy(cache_query.get(key))
else:
cachestats["cache_query"]["misses"] += 1
result = db_query_full(**kwargs)
if eligible_permanent_caching: cache_query_perm[key] = result
elif eligible_temporary_caching: cache_query[key] = result
if use_psutil:
reduce_caches_if_low_ram()
return result
def db_aggregate_cached(**kwargs):
global cache_aggregate, cache_aggregate_perm
key = utilities.serialize(kwargs)
eligible_permanent_caching = (
"timerange" in kwargs and
not kwargs["timerange"].active() and
perm_caching
)
eligible_temporary_caching = (
not eligible_permanent_caching and
temp_caching
)
# hit permanent cache for past timeranges
if eligible_permanent_caching and key in cache_aggregate_perm:
cachestats["cache_aggregate"]["hits_perm"] += 1
return copy.copy(cache_aggregate_perm.get(key))
# hit short term cache
elif eligible_temporary_caching and key in cache_aggregate:
cachestats["cache_aggregate"]["hits_tmp"] += 1
return copy.copy(cache_aggregate.get(key))
else:
cachestats["cache_aggregate"]["misses"] += 1
result = db_aggregate_full(**kwargs)
if eligible_permanent_caching: cache_aggregate_perm[key] = result
elif eligible_temporary_caching: cache_aggregate[key] = result
if use_psutil:
reduce_caches_if_low_ram()
return result
def invalidate_caches():
global cache_query, cache_aggregate
cache_query.clear()
cache_aggregate.clear()
log("Database caches invalidated.")
def reduce_caches(to=0.75):
global cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm
for c in cache_query, cache_aggregate, cache_query_perm, cache_aggregate_perm:
currentsize = len(c)
if currentsize > 100:
targetsize = max(int(currentsize * to),10)
c.set_size(targetsize)
c.set_size(csz)
def reduce_caches_if_low_ram():
ramprct = psutil.virtual_memory().percent
if ramprct > cmp:
log("{prct}% RAM usage, reducing caches!".format(prct=ramprct),module="debug")
ratio = (cmp / ramprct) ** 3
reduce_caches(to=ratio)
####
## Database queries
####
# Queries the database
def db_query_full(artist=None,artists=None,title=None,track=None,since=None,to=None,within=None,timerange=None,associated=False,max_=None):
if not dbstatus['healthy']: raise DatabaseNotBuilt()
(since, to) = time_stamps(since=since,to=to,within=within,range=timerange)
# this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly
# if a title is specified, we assume that a specific track (with the exact artist combination) is requested
# if not, duplicate artist arguments are ignored
#artist = None
if artist is not None and isinstance(artist,str):
artist = ARTISTS.index(artist)
# artists to numbers
if artists is not None:
artists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in artists])
# track to number
if track is not None and isinstance(track,dict):
trackartists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in track["artists"]])
track = TRACKS.index((frozenset(trackartists),track["title"]))
artists = None
#check if track is requested via title
if title!=None and track==None:
track = TRACKS.index((frozenset(artists),title))
artists = None
# if we're not looking for a track (either directly or per title artist arguments, which is converted to track above)
# we only need one artist
elif artist is None and track is None and artists is not None and len(artists) != 0:
artist = artists.pop()
# db query always reverse by default
result = []
i = 0
for s in scrobbles_in_range(since,to,reverse=True):
if i == max_: break
if (track is None or s[0] == track) and (artist is None or artist in TRACKS[s[0]][0] or associated and artist in coa.getCreditedList(TRACKS[s[0]][0])):
result.append(get_scrobble_dict(s))
i += 1
return result
# pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way
# Queries that... well... aggregate
def db_aggregate_full(by=None,since=None,to=None,within=None,timerange=None,artist=None):
if not dbstatus['healthy']: raise DatabaseNotBuilt()
(since, to) = time_stamps(since=since,to=to,within=within,range=timerange)
if isinstance(artist, str):
artist = ARTISTS.index(artist)
if (by=="ARTIST"):
#this is probably a really bad idea
#for a in ARTISTS:
# num = len(db_query(artist=a,since=since,to=to))
#
# alright let's try for real
charts = {}
#for s in [scr for scr in SCROBBLES if since < scr[1] < to]:
for s in scrobbles_in_range(since,to):
artists = TRACKS[s[0]][0]
for a in coa.getCreditedList(artists):
# this either creates the new entry or increments the existing one
charts[a] = charts.setdefault(a,0) + 1
ls = [{"artist":get_artist_dict(ARTISTS[a]),"scrobbles":charts[a],"counting":[arti for arti in coa.getAllAssociated(ARTISTS[a]) if arti in ARTISTS]} for a in charts]
ls.sort(key=lambda k:k["scrobbles"],reverse=True)
# add ranks
for rnk in range(len(ls)):
if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
ls[rnk]["rank"] = rnk + 1
else:
ls[rnk]["rank"] = ls[rnk-1]["rank"]
return ls
elif (by=="TRACK"):
charts = {}
#for s in [scr for scr in SCROBBLES if since < scr[1] < to and (artist==None or (artist in TRACKS[scr[0]][0]))]:
for s in [scr for scr in scrobbles_in_range(since,to) if (artist is None or (artist in TRACKS[scr[0]][0]))]:
track = s[0]
# this either creates the new entry or increments the existing one
charts[track] = charts.setdefault(track,0) + 1
ls = [{"track":get_track_dict(TRACKS[t]),"scrobbles":charts[t]} for t in charts]
ls.sort(key=lambda k:k["scrobbles"],reverse=True)
# add ranks
for rnk in range(len(ls)):
if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
ls[rnk]["rank"] = rnk + 1
else:
ls[rnk]["rank"] = ls[rnk-1]["rank"]
return ls
else:
#return len([scr for scr in SCROBBLES if since < scr[1] < to])
return len(list(scrobbles_in_range(since,to)))
# Search for strings
def db_search(query,type=None):
results = []
if type=="ARTIST":
results = [a for a in ARTISTS if simplestr(query) in simplestr(a)]
if type=="TRACK":
results = [
get_track_dict(t) for t in TRACKS if simplestr(query) in simplestr(t[1])
]
return results
####
## Useful functions
####
# makes a string usable for searching (special characters are blanks, accents and stuff replaced with their real part)
def simplestr(input,ignorecapitalization=True):
norm = unicodedata.normalize("NFKD",input)
norm = [c for c in norm if not unicodedata.combining(c)]
norm = [c if len(c.encode())==1 else " " for c in norm]
clear = ''.join(c for c in norm)
if ignorecapitalization: clear = clear.lower()
return clear
#def getArtistId(nameorid):
# if isinstance(nameorid,int):
# return nameorid
# else:
# try:
# return ARTISTS.index(nameorid)
# except:
# return -1
def insert(list_,item,key=lambda x:x):
i = 0
while len(list_) > i:
if key(list_[i]) > key(item):
list_.insert(i,item)
return i
i += 1
list_.append(item)
return i
def scrobbles_in_range(start,end,reverse=False):
if reverse:
for stamp in reversed(STAMPS):
#print("Checking " + str(stamp))
if stamp < start: return
if stamp > end: continue
yield SCROBBLESDICT[stamp]
else:
for stamp in STAMPS:
#print("Checking " + str(stamp))
if stamp < start: continue
if stamp > end: return
yield SCROBBLESDICT[stamp]