maloja/maloja/database/__init__.py

# server
from bottle import request, response, FormsDict, HTTPError

# rest of the project
from ..cleanup import CleanerAgent, CollectorAgent
from .. import utilities
from ..malojatime import register_scrobbletime, time_stamps, ranges
from ..malojauri import uri_to_internal, internal_to_uri, compose_querystring
from ..thirdparty import proxy_scrobble_all
from ..globalconf import data_dir, malojaconfig, apikeystore
#db
from . import sqldb
from .cache import db_query, db_aggregate

# doreah toolkit
from doreah.logging import log
from doreah import tsv
from doreah.caching import Cache, DeepCache
from doreah.auth import authenticated_api, authenticated_api_with_alternate
from doreah.io import ProgressBar
try:
	from doreah.persistence import DiskDict
except: pass
import doreah


# technical
import os
import datetime
import sys
import unicodedata
from collections import namedtuple
from threading import Lock
import yaml, json
import math

# url handling
from importlib.machinery import SourceFileLoader
import urllib


dblock = Lock() #global database lock
dbstatus = {
	"healthy":False,
	"rebuildinprogress":False,
	"complete":False
}
class DatabaseNotBuilt(HTTPError):
	def __init__(self):
		super().__init__(
			status=503,
			body="The Maloja Database is still being built. Try again in a few seconds.",
			headers={"Retry-After":10}
		)


MEDALS_ARTISTS = {}	#literally only changes once per year, no need to calculate that on the fly
MEDALS_TRACKS = {}
WEEKLY_TOPTRACKS = {}
WEEKLY_TOPARTISTS = {}

ISSUES = {}

cla = CleanerAgent()
coa = CollectorAgent()


def checkAPIkey(key):
	return apikeystore.check_key(key)

def allAPIkeys():
	return [apikeystore[k] for k in apikeystore]


####
## Getting dict representations of database objects
####

def get_scrobble_dict(o):
	track = get_track_dict(TRACKS[o.track])
	return {"artists":track["artists"],"title":track["title"],"time":o.timestamp,"album":o.album,"duration":o.duration}

def get_artist_dict(o):
	return o
	#technically not a dict, but... you know

def get_track_dict(o):
	artists = [get_artist_dict(ARTISTS[a]) for a in o.artists]
	return {"artists":artists,"title":o.title}


####
## Creating or finding existing database entries
####


def createScrobble(artists,title,time,album=None,duration=None,volatile=False):

	if len(artists) == 0 or title == "":
		return {}

	scrobbledict = {
		"time":time,
		"track":{
			"artists":artists,
			"title":title,
			"album":{
				"name":album,
				"artists":None
			},
			"length":None
		},
		"duration":duration,
		"origin":"generic"
	}

	add_scrobble(scrobbledict)
	proxy_scrobble_all(artists,title,time)
	return scrobbledict


########
########
## HTTP requests and their associated functions
########
########

# skip regular authentication if api key is present in request
# an api key now ONLY permits scrobbling tracks, no other admin tasks
def api_key_correct(request):
	args = request.params
	try:
		args.update(request.json)
	except:
		pass
	if "key" in args:
		apikey = args["key"]
		del args["key"]
	elif "apikey" in args:
		apikey = args["apikey"]
		del args["apikey"]
	else: return False

	return checkAPIkey(apikey)


def get_scrobbles(**keys):
	r = db_query(**{k:keys[k] for k in keys if k in ["artist","artists","title","since","to","within","timerange","associated","track"]})
	return r


def info():
	totalscrobbles = get_scrobbles_num()
	artists = {}

	return {
		"name":malojaconfig["NAME"],
		"artists":{
			chartentry["artist"]:round(chartentry["scrobbles"] * 100 / totalscrobbles,3)
			for chartentry in get_charts_artists() if chartentry["scrobbles"]/totalscrobbles >= 0
		},
		"known_servers":list(KNOWN_SERVERS)
	}


def get_scrobbles_num(**keys):
	r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","since","to","within","timerange","associated"]})
	return len(r)

def get_tracks(artist=None):

	artistid = ARTISTS.index(artist) if artist is not None else None
	return [get_track_dict(t) for t in TRACKS if (artistid in t.artists) or (artistid==None)]


def get_artists():
	if not dbstatus['healthy']: raise DatabaseNotBuilt()
	return ARTISTS #well


def get_charts_artists(**keys):
	return db_aggregate(by="ARTIST",**{k:keys[k] for k in keys if k in ["since","to","within","timerange"]})


def get_charts_tracks(**keys):
	return db_aggregate(by="TRACK",**{k:keys[k] for k in keys if k in ["since","to","within","timerange","artist"]})

def get_pulse(**keys):

	rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
	results = []
	for rng in rngs:
		res = len(db_query(timerange=rng,**{k:keys[k] for k in keys if k in ["artists","artist","track","title","associated"]}))
		results.append({"range":rng,"scrobbles":res})

	return results


def get_performance(**keys):

	rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
	results = []

	for rng in rngs:
		if "track" in keys:
			charts = get_charts_tracks(timerange=rng)
			rank = None
			for c in charts:
				if c["track"] == keys["track"]:
					rank = c["rank"]
					break
		elif "artist" in keys:
			charts = get_charts_artists(timerange=rng)
			rank = None
			for c in charts:
				if c["artist"] == keys["artist"]:
					rank = c["rank"]
					break
		results.append({"range":rng,"rank":rank})

	return results


def get_top_artists(**keys):

	rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
	results = []

	for rng in rngs:
		try:
			res = db_aggregate(timerange=rng,by="ARTIST")[0]
			results.append({"range":rng,"artist":res["artist"],"counting":res["counting"],"scrobbles":res["scrobbles"]})
		except:
			results.append({"range":rng,"artist":None,"scrobbles":0})

	return results


def get_top_tracks(**keys):

	rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
	results = []

	for rng in rngs:
		try:
			res = db_aggregate(timerange=rng,by="TRACK")[0]
			results.append({"range":rng,"track":res["track"],"scrobbles":res["scrobbles"]})
		except:
			results.append({"range":rng,"track":None,"scrobbles":0})

	return results


def artistInfo(artist):

	charts = db_aggregate(by="ARTIST")
	scrobbles = len(db_query(artists=[artist]))
	#we cant take the scrobble number from the charts because that includes all countas scrobbles
	try:
		c = [e for e in charts if e["artist"] == artist][0]
		others = [a for a in coa.getAllAssociated(artist) if a in ARTISTS]
		position = c["rank"]
		performance = get_performance(artist=artist,step="week")
		return {
			"artist":artist,
			"scrobbles":scrobbles,
			"position":position,
			"associated":others,
			"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_ARTISTS.get(artist,{})},
			"topweeks":WEEKLY_TOPARTISTS.get(artist,0)
		}
	except:
		# if the artist isnt in the charts, they are not being credited and we
		# need to show information about the credited one
		artist = coa.getCredited(artist)
		c = [e for e in charts if e["artist"] == artist][0]
		position = c["rank"]
		return {"replace":artist,"scrobbles":scrobbles,"position":position}


def trackInfo(track):
	charts = db_aggregate(by="TRACK")
	#scrobbles = len(db_query(artists=artists,title=title))	#chart entry of track always has right scrobble number, no countas rules here
	#c = [e for e in charts if set(e["track"]["artists"]) == set(artists) and e["track"]["title"] == title][0]
	c = [e for e in charts if e["track"] == track][0]
	scrobbles = c["scrobbles"]
	position = c["rank"]
	cert = None
	threshold_gold, threshold_platinum, threshold_diamond = malojaconfig["SCROBBLES_GOLD","SCROBBLES_PLATINUM","SCROBBLES_DIAMOND"]
	if scrobbles >= threshold_diamond: cert = "diamond"
	elif scrobbles >= threshold_platinum: cert = "platinum"
	elif scrobbles >= threshold_gold: cert = "gold"


	return {
		"track":track,
		"scrobbles":scrobbles,
		"position":position,
		"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_TRACKS.get((frozenset(track["artists"]),track["title"]),{})},
		"certification":cert,
		"topweeks":WEEKLY_TOPTRACKS.get(((frozenset(track["artists"]),track["title"])),0)
	}


def compare(remoteurl):
	import json
	compareurl = remoteurl + "/api/info"

	response = urllib.request.urlopen(compareurl)
	strangerinfo = json.loads(response.read())
	owninfo = info()

	#add_known_server(compareto)

	artists = {}

	for a in owninfo["artists"]:
		artists[a.lower()] = {"name":a,"self":int(owninfo["artists"][a]*1000),"other":0}

	for a in strangerinfo["artists"]:
		artists[a.lower()] = artists.setdefault(a.lower(),{"name":a,"self":0})
		artists[a.lower()]["other"] = int(strangerinfo["artists"][a]*1000)

	for a in artists:
		common = min(artists[a]["self"],artists[a]["other"])
		artists[a]["self"] -= common
		artists[a]["other"] -= common
		artists[a]["common"] = common

	best = sorted((artists[a]["name"] for a in artists),key=lambda x: artists[x.lower()]["common"],reverse=True)

	result = {
		"unique_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] == 0),
		"more_self":sum(artists[a]["self"] for a in artists if artists[a]["common"] != 0),
		"common":sum(artists[a]["common"] for a in artists),
		"more_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] != 0),
		"unique_other":sum(artists[a]["other"] for a in artists if artists[a]["common"] == 0)
	}

	total = sum(result[c] for c in result)

	for r in result:
		result[r] = (result[r],result[r]/total)


	return {
		"result":result,
		"info":{
			"ownname":owninfo["name"],
			"remotename":strangerinfo["name"]
		},
		"commonartist":best[0]
	}


def incoming_scrobble(artists,title,album=None,duration=None,time=None,fix=True):
	if time is None:
		time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())

	log("Incoming scrobble (): ARTISTS: " + str(artists) + ", TRACK: " + title,module="debug")
	if fix:
		(artists,title) = cla.fullclean(artists,title)
	trackdict = createScrobble(artists,title,time,album,duration)

	sync()

	return {"status":"success","track":trackdict}


def issues():
	return ISSUES

def check_issues():
	combined = []
	duplicates = []
	newartists = []

	import itertools
	import difflib

	sortedartists = ARTISTS.copy()
	sortedartists.sort(key=len,reverse=True)
	reversesortedartists = sortedartists.copy()
	reversesortedartists.reverse()
	for a in reversesortedartists:

		nochange = cla.confirmedReal(a)

		st = a
		lis = []
		reachedmyself = False
		for ar in sortedartists:
			if (ar != a) and not reachedmyself:
				continue
			elif not reachedmyself:
				reachedmyself = True
				continue

			if (ar.lower() == a.lower()) or ("the " + ar.lower() == a.lower()) or ("a " + ar.lower() == a.lower()):
				duplicates.append((ar,a))
				break

			if (ar + " " in st) or (" " + ar in st):
				lis.append(ar)
				st = st.replace(ar,"").strip()
			elif (ar == st):
				lis.append(ar)
				st = ""
				if not nochange:
					combined.append((a,lis))
				break

			elif (ar in st) and len(ar)*2 > len(st):
				duplicates.append((a,ar))

		st = st.replace("&","").replace("and","").replace("with","").strip()
		if st not in ["", a]:
			if len(st) < 5 and len(lis) == 1:
				#check if we havent just randomly found the string in another word
				#if (" " + st + " ") in lis[0] or (lis[0].endswith(" " + st)) or (lis[0].startswith(st + " ")):
				duplicates.append((a,lis[0]))
			elif len(st) < 5 and len(lis) > 1 and not nochange:
				combined.append((a,lis))
			elif len(st) >= 5 and not nochange:
				#check if we havent just randomly found the string in another word
				if (" " + st + " ") in a or (a.endswith(" " + st)) or (a.startswith(st + " ")):
					newartists.append((st,a,lis))

	#for c in itertools.combinations(ARTISTS,3):
	#	l = list(c)
	#	print(l)
	#	l.sort(key=len,reverse=True)
	#	[full,a1,a2] = l
	#	if (a1 + " " + a2 in full) or (a2 + " " + a1 in full):
	#		combined.append((full,a1,a2))


	#for c in itertools.combinations(ARTISTS,2):
	#	if
	#
	#	if (c[0].lower == c[1].lower):
	#		duplicates.append((c[0],c[1]))


	#	elif (c[0] + " " in c[1]) or (" " + c[0] in c[1]) or (c[1] + " " in c[0]) or (" " + c[1] in c[0]):
	#		if (c[0] in c[1]):
	#			full, part = c[1],c[0]
	#			rest = c[1].replace(c[0],"").strip()
	#		else:
	#			full, part = c[0],c[1]
	#			rest = c[0].replace(c[1],"").strip()
	#		if rest in ARTISTS and full not in [c[0] for c in combined]:
	#			combined.append((full,part,rest))

	#	elif (c[0] in c[1]) or (c[1] in c[0]):
	#		duplicates.append((c[0],c[1]))


	return {"duplicates":duplicates,"combined":combined,"newartists":newartists}


def get_predefined_rulesets():
	validchars = "-_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

	rulesets = []

	for f in os.listdir(data_dir['rules']("predefined")):
		if f.endswith(".tsv"):

			rawf = f.replace(".tsv","")
			valid = all(char in validchars for char in rawf)
			if not valid: continue
			if "_" not in rawf: continue

			try:
				with open(data_dir['rules']("predefined",f)) as tsvfile:
					line1 = tsvfile.readline()
					line2 = tsvfile.readline()

					if "# NAME: " in line1:
						name = line1.replace("# NAME: ","")
					else: name = rawf.split("_")[1]
					desc = line2.replace("# DESC: ","") if "# DESC: " in line2 else ""
					author = rawf.split("_")[0]
			except:
				continue

			ruleset = {"file":rawf}
			rulesets.append(ruleset)
			ruleset["active"] = bool(os.path.exists(data_dir['rules'](f)))
			ruleset["name"] = name
			ruleset["author"] = author
			ruleset["desc"] = desc

	return rulesets


####
## Server operation
####


def start_db():
	from .. import upgrade
	upgrade.upgrade_db(sqldb.add_scrobbles)
	dbstatus['healthy'] = True
	dbstatus['complete'] = True


####
## Database queries
####


# Queries the database
def db_query_full(artist=None,artists=None,title=None,track=None,timerange=None,associated=False,max_=None):
	print((artist,artists,title,track,timerange))
	if not dbstatus['healthy']: raise DatabaseNotBuilt()
	(since, to) = time_stamps(range=timerange)

	if artists is not None and title is not None:
		print(col['red']("THIS SHOULD NO LONGER HAPPEN"))
		track = {'artists':artists,'title':title}

	if track is not None:
		return sqldb.get_scrobbles_of_track(track=track,since=since,to=to)

	if artist is not None:
		return sqldb.get_scrobbles_of_artist(artist=artist,since=since,to=to)

	return sqldb.get_scrobbles(since=since,to=to)


# Queries that... well... aggregate
def db_aggregate_full(by=None,timerange=None,artist=None):

	if not dbstatus['healthy']: raise DatabaseNotBuilt()
	(since, to) = time_stamps(range=timerange)


	if (by=="ARTIST"):

		trackcharts = {}
		charts = {}
		scrobbles = sqldb.get_scrobbles(since=since,to=to,resolve_references=False)

		for s in scrobbles:
			trackcharts[s['track']] = trackcharts.setdefault(s['track'],0) + 1

		for t in trackcharts:
			artists = sqldb.get_artists_of_track(t,resolve_references=False)
			for a in coa.getCreditedList(artists):
				charts[a] = charts.setdefault(a,0) + trackcharts[t]


		ls = [{"artist":sqldb.get_artist(a),"scrobbles":charts[a],"counting":[]} for a in charts]
		ls.sort(key=lambda k:k["scrobbles"],reverse=True)
		# add ranks
		for rnk in range(len(ls)):
			if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
				ls[rnk]["rank"] = rnk + 1
			else:
				ls[rnk]["rank"] = ls[rnk-1]["rank"]
		return ls

	elif (by=="TRACK"):
		charts = {}
		if artist is None:
			scrobbles = sqldb.get_scrobbles(since=since,to=to,resolve_references=False)
		else:
			scrobbles = sqldb.get_scrobbles_of_artist(since=since,to=to,artist=artist,resolve_references=False)

		for s in scrobbles:
			charts[s['track']] = charts.setdefault(s['track'],0) + 1


		ls = [{"track":sqldb.get_track(t),"scrobbles":charts[t]} for t in charts]
		ls.sort(key=lambda k:k["scrobbles"],reverse=True)
		# add ranks
		for rnk in range(len(ls)):
			if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
				ls[rnk]["rank"] = rnk + 1
			else:
				ls[rnk]["rank"] = ls[rnk-1]["rank"]
		return ls

	else:
		#return len([scr for scr in SCROBBLES if since < scr[1] < to])
		return len(list(scrobbles_in_range(since,to)))


# Search for strings
def db_search(query,type=None):
	results = []
	if type=="ARTIST":
		results = [a for a in ARTISTS if simplestr(query) in simplestr(a)]
	if type=="TRACK":
		results = [
		    get_track_dict(t) for t in TRACKS if simplestr(query) in simplestr(t[1])
		]
	return results


####
## Useful functions
####


# makes a string usable for searching (special characters are blanks, accents and stuff replaced with their real part)
def simplestr(input,ignorecapitalization=True):
	norm = unicodedata.normalize("NFKD",input)
	norm = [c for c in norm if not unicodedata.combining(c)]
	norm = [c if len(c.encode())==1 else " " for c in norm]
	clear = ''.join(c for c in norm)
	if ignorecapitalization: clear = clear.lower()
	return clear


#def getArtistId(nameorid):
#	if isinstance(nameorid,int):
#		return nameorid
#	else:
#		try:
#			return ARTISTS.index(nameorid)
#		except:
#			return -1


def insert(list_,item,key=lambda x:x):
	i = 0
	while len(list_) > i:
		if key(list_[i]) > key(item):
			list_.insert(i,item)
			return i
		i += 1

	list_.append(item)
	return i


def scrobbles_in_range(start,end,reverse=False):
	if reverse:
		for stamp in reversed(STAMPS):
			#print("Checking " + str(stamp))
			if stamp < start: return
			if stamp > end: continue
			yield SCROBBLESDICT[stamp]
	else:
		for stamp in STAMPS:
			#print("Checking " + str(stamp))
			if stamp < start: continue
			if stamp > end: return
			yield SCROBBLESDICT[stamp]