1
0
mirror of https://github.com/krateng/maloja.git synced 2023-08-10 21:12:55 +03:00
maloja/database.py

1279 lines
33 KiB
Python
Raw Normal View History

2019-03-31 13:18:49 +03:00
# server
2019-05-12 19:39:46 +03:00
from bottle import request, response, FormsDict
2019-03-31 13:18:49 +03:00
# rest of the project
2019-06-13 11:51:25 +03:00
from cleanup import CleanerAgent, CollectorAgent
import utilities
from malojatime import register_scrobbletime, time_stamps, ranges
2019-05-14 13:07:47 +03:00
from urihandler import uri_to_internal, internal_to_uri, compose_querystring
import compliant_api
from external import proxy_scrobble
2019-03-31 13:18:49 +03:00
# doreah toolkit
2019-03-29 21:44:42 +03:00
from doreah.logging import log
2019-03-29 22:23:32 +03:00
from doreah import tsv
2019-06-13 11:51:25 +03:00
from doreah import settings
2019-05-09 17:58:25 +03:00
from doreah.caching import Cache, DeepCache
try:
from doreah.persistence import DiskDict
except: pass
import doreah
2019-05-23 14:13:42 +03:00
# nimrodel API
from nimrodel import EAPI as API
from nimrodel import Multi
2019-03-31 13:18:49 +03:00
# technical
import os
import datetime
import sys
2019-03-10 19:38:33 +03:00
import unicodedata
from collections import namedtuple
2019-04-07 16:27:24 +03:00
from threading import Lock
2019-03-31 13:18:49 +03:00
# url handling
from importlib.machinery import SourceFileLoader
import urllib
2019-04-07 16:27:24 +03:00
dblock = Lock() #global database lock
2018-11-24 18:29:24 +03:00
SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved)
ARTISTS = [] # Format: artist
TRACKS = [] # Format: namedtuple(artists=frozenset(artist_ref,...),title=title)
2019-04-07 16:27:24 +03:00
Track = namedtuple("Track",["artists","title"])
Scrobble = namedtuple("Scrobble",["track","timestamp","saved"])
### OPTIMIZATION
SCROBBLESDICT = {} # timestamps to scrobble mapping
STAMPS = [] # sorted
2019-04-07 16:55:49 +03:00
#STAMPS_SET = set() # as set for easier check if exists # we use the scrobbles dict for that now
TRACKS_LOWER = []
ARTISTS_LOWER = []
2019-04-07 16:55:49 +03:00
ARTIST_SET = set()
TRACK_SET = set()
2019-06-27 12:04:45 +03:00
2019-04-02 17:53:57 +03:00
MEDALS = {} #literally only changes once per year, no need to calculate that on the fly
2019-04-04 22:29:03 +03:00
MEDALS_TRACKS = {}
2019-06-27 12:25:11 +03:00
WEEKLY_TOPTRACKS = {}
WEEKLY_TOPARTISTS = {}
2018-12-19 17:28:10 +03:00
cla = CleanerAgent()
coa = CollectorAgent()
2018-11-30 17:44:30 +03:00
clients = []
2018-11-28 19:45:52 +03:00
2018-11-28 15:02:43 +03:00
lastsync = 0
# rulestate that the entire current database was built with, or False if the database was built from inconsistent scrobble files
db_rulestate = False
2018-11-30 17:44:30 +03:00
### symmetric keys are fine for now since we hopefully use HTTPS
def loadAPIkeys():
global clients
2019-03-29 22:23:32 +03:00
tsv.create("clients/authenticated_machines.tsv")
#createTSV("clients/authenticated_machines.tsv")
clients = tsv.parse("clients/authenticated_machines.tsv","string","string")
#clients = parseTSV("clients/authenticated_machines.tsv","string","string")
log("Authenticated Machines: " + ", ".join([m[1] for m in clients]))
2018-11-30 17:44:30 +03:00
def checkAPIkey(k):
2019-08-22 22:35:58 +03:00
#return (k in [k for [k,d] in clients])
for key, identifier in clients:
if key == k: return identifier
return False
def allAPIkeys():
return [k for [k,d] in clients]
2018-12-12 21:37:59 +03:00
####
## Getting dict representations of database objects
####
2019-04-07 16:01:04 +03:00
def get_scrobble_dict(o):
2019-04-07 16:27:24 +03:00
track = get_track_dict(TRACKS[o.track])
return {"artists":track["artists"],"title":track["title"],"time":o.timestamp}
2019-04-07 16:01:04 +03:00
def get_artist_dict(o):
return o
2019-04-07 16:01:04 +03:00
#technically not a dict, but... you know
2019-04-07 16:01:04 +03:00
def get_track_dict(o):
artists = [get_artist_dict(ARTISTS[a]) for a in o.artists]
return {"artists":artists,"title":o.title}
2018-12-12 21:37:59 +03:00
####
## Creating or finding existing database entries
####
2019-03-11 22:04:23 +03:00
def createScrobble(artists,title,time,volatile=False):
if len(artists) == 0 or title == "":
return {}
2019-04-07 16:27:24 +03:00
dblock.acquire()
i = getTrackID(artists,title)
# idempotence
if time in SCROBBLESDICT:
if i == SCROBBLESDICT[time].track:
dblock.release()
return get_track_dict(TRACKS[i])
# timestamp as unique identifier
2019-03-12 16:37:04 +03:00
while (time in SCROBBLESDICT):
time += 1
obj = Scrobble(i,time,volatile) # if volatile generated, we simply pretend we have already saved it to disk
#SCROBBLES.append(obj)
# immediately insert scrobble correctly so we can guarantee sorted list
index = insert(SCROBBLES,obj,key=lambda x:x[1])
SCROBBLESDICT[time] = obj
STAMPS.insert(index,time) #should be same index as scrobblelist
2019-03-03 00:55:22 +03:00
register_scrobbletime(time)
2019-03-11 22:44:37 +03:00
invalidate_caches()
2019-04-07 16:27:24 +03:00
dblock.release()
proxy_scrobble(artists,title,time)
return get_track_dict(TRACKS[obj.track])
2018-12-05 16:30:50 +03:00
2019-04-07 16:27:24 +03:00
# this will never be called from different threads, so no lock
def readScrobble(artists,title,time):
2019-03-12 16:37:04 +03:00
while (time in SCROBBLESDICT):
time += 1
i = getTrackID(artists,title)
obj = Scrobble(i,time,True)
SCROBBLES.append(obj)
2019-03-12 16:37:04 +03:00
SCROBBLESDICT[time] = obj
#STAMPS.append(time)
def getArtistID(name):
obj = name
2019-04-08 14:38:47 +03:00
objlower = name.lower().replace("'","")
2019-04-07 16:55:49 +03:00
if objlower in ARTIST_SET:
return ARTISTS_LOWER.index(objlower)
2019-04-07 16:55:49 +03:00
else:
i = len(ARTISTS)
ARTISTS.append(obj)
2019-04-07 16:55:49 +03:00
ARTIST_SET.add(objlower)
ARTISTS_LOWER.append(objlower)
# with a new artist added, we might also get new artists that they are credited as
cr = coa.getCredited(name)
getArtistID(cr)
coa.updateIDs(ARTISTS)
return i
def getTrackID(artists,title):
artistset = set()
for a in artists:
artistset.add(getArtistID(name=a))
obj = Track(artists=frozenset(artistset),title=title)
2019-04-08 14:38:47 +03:00
objlower = Track(artists=frozenset(artistset),title=title.lower().replace("'",""))
2019-04-07 16:55:49 +03:00
if objlower in TRACK_SET:
return TRACKS_LOWER.index(objlower)
2019-04-07 16:55:49 +03:00
else:
i = len(TRACKS)
TRACKS.append(obj)
2019-04-07 16:55:49 +03:00
TRACK_SET.add(objlower)
TRACKS_LOWER.append(objlower)
return i
2018-11-24 18:29:24 +03:00
########
########
## HTTP requests and their associated functions
########
########
2019-05-23 14:13:42 +03:00
dbserver = API(delay=True,path="api")
2019-05-12 19:39:46 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get("test")
2019-05-23 15:25:35 +03:00
def test_server(key=None):
response.set_header("Access-Control-Allow-Origin","*")
2019-05-23 15:27:10 +03:00
if key is not None and not (checkAPIkey(key)):
response.status = 403
return "Wrong API key"
elif db_rulestate:
response.status = 204
return
else:
response.status = 205
return
2018-12-27 05:09:29 +03:00
# 204 Database server is up and operational
# 205 Database server is up, but DB is not fully built or is inconsistent
# 403 Database server is up, but provided API key is not valid
2018-12-12 21:37:59 +03:00
2019-11-19 22:52:07 +03:00
@dbserver.get("serverinfo")
def server_info():
import info
response.set_header("Access-Control-Allow-Origin","*")
response.set_header("Content-Type","application/json")
return {
"name":settings.get_settings("NAME"),
"version":info.version
}
## All database functions are separated - the external wrapper only reads the request keys, converts them into lists and renames them where necessary, and puts the end result in a dict if not already so it can be returned as json
2019-05-23 14:13:42 +03:00
@dbserver.get("scrobbles")
def get_scrobbles_external(**keys):
k_filter, k_time, _, k_amount = uri_to_internal(keys)
ckeys = {**k_filter, **k_time, **k_amount}
result = get_scrobbles(**ckeys)
return {"list":result}
def get_scrobbles(**keys):
r = db_query(**{k:keys[k] for k in keys if k in ["artist","artists","title","since","to","within","timerange","associated","track","max_"]})
2019-03-12 14:56:53 +03:00
#if keys.get("max_") is not None:
# return r[:int(keys.get("max_"))]
#else:
# return r
return r
# info for comparison
@dbserver.get("info")
def info_external(**keys):
result = info()
return result
def info():
totalscrobbles = get_scrobbles_num()
artists = {}
return {
"name":settings.get_settings("NAME"),
"artists":{
chartentry["artist"]:round(chartentry["scrobbles"] * 100 / totalscrobbles,3)
for chartentry in get_charts_artists() if chartentry["scrobbles"]/totalscrobbles >= 0}
}
2019-02-16 18:28:32 +03:00
# UNUSED
#@dbserver.route("/amounts")
#def get_amounts_external():
# return get_amounts() #really now
#
#def get_amounts():
# return {"scrobbles":len(SCROBBLES),"tracks":len(TRACKS),"artists":len(ARTISTS)}
2019-02-16 18:28:32 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get("numscrobbles")
def get_scrobbles_num_external(**keys):
k_filter, k_time, _, k_amount = uri_to_internal(keys)
ckeys = {**k_filter, **k_time, **k_amount}
result = get_scrobbles_num(**ckeys)
return {"amount":result}
def get_scrobbles_num(**keys):
r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","since","to","within","timerange","associated"]})
return len(r)
2019-03-12 18:06:09 +03:00
#for multiple since values (must be ordered)
# DOESN'T SEEM TO ACTUALLY BE FASTER
# REEVALUATE
#def get_scrobbles_num_multiple(sinces=[],to=None,**keys):
#
2019-03-12 18:06:09 +03:00
# sinces_stamps = [time_stamps(since,to,None)[0] for since in sinces]
# #print(sinces)
# #print(sinces_stamps)
# minsince = sinces[-1]
# r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","associated","to"]},since=minsince)
#
2019-03-12 18:06:09 +03:00
# #print(r)
#
2019-03-12 18:06:09 +03:00
# validtracks = [0 for s in sinces]
#
2019-03-12 18:06:09 +03:00
# i = 0
# si = 0
# while True:
# if si == len(sinces): break
# if i == len(r): break
# if r[i]["time"] >= sinces_stamps[si]:
# validtracks[si] += 1
# else:
# si += 1
# continue
# i += 1
#
#
2019-03-12 18:06:09 +03:00
# return validtracks
2019-03-12 18:06:09 +03:00
2019-02-16 18:28:32 +03:00
# UNUSED
#@dbserver.route("/charts")
#def get_charts_external():
# keys = FormsDict.decode(request.query)
# ckeys = {}
# ckeys["since"], ckeys["to"], ckeys["within"] = keys.get("since"), keys.get("to"), keys.get("in")
#
# result = get_scrobbles_num(**ckeys)
# return {"number":result}
2019-02-16 18:28:32 +03:00
#def get_charts(**keys):
# return db_aggregate(**{k:keys[k] for k in keys if k in ["since","to","within"]})
2019-02-02 20:08:30 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get("tracks")
def get_tracks_external(**keys):
k_filter, _, _, _ = uri_to_internal(keys,forceArtist=True)
ckeys = {**k_filter}
2019-02-16 18:28:32 +03:00
result = get_tracks(**ckeys)
return {"list":result}
def get_tracks(artist=None):
2018-11-30 15:39:12 +03:00
if artist is not None:
artistid = ARTISTS.index(artist)
else:
artistid = None
# Option 1
2019-04-07 16:01:04 +03:00
return [get_track_dict(t) for t in TRACKS if (artistid in t.artists) or (artistid==None)]
# Option 2 is a bit more elegant but much slower
2019-04-07 16:01:04 +03:00
#tracklist = [get_track_dict(t) for t in TRACKS]
#ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
2019-05-23 14:13:42 +03:00
@dbserver.get("artists")
def get_artists_external():
result = get_artists()
return {"list":result}
2018-11-25 16:49:53 +03:00
def get_artists():
return ARTISTS #well
2019-05-23 14:13:42 +03:00
@dbserver.get("charts/artists")
def get_charts_artists_external(**keys):
_, k_time, _, _ = uri_to_internal(keys)
ckeys = {**k_time}
result = get_charts_artists(**ckeys)
return {"list":result}
def get_charts_artists(**keys):
return db_aggregate(by="ARTIST",**{k:keys[k] for k in keys if k in ["since","to","within","timerange"]})
2019-05-23 14:13:42 +03:00
@dbserver.get("charts/tracks")
def get_charts_tracks_external(**keys):
k_filter, k_time, _, _ = uri_to_internal(keys,forceArtist=True)
ckeys = {**k_filter, **k_time}
result = get_charts_tracks(**ckeys)
return {"list":result}
def get_charts_tracks(**keys):
return db_aggregate(by="TRACK",**{k:keys[k] for k in keys if k in ["since","to","within","timerange","artist"]})
2019-05-23 14:13:42 +03:00
@dbserver.get("pulse")
def get_pulse_external(**keys):
k_filter, k_time, k_internal, k_amount = uri_to_internal(keys)
ckeys = {**k_filter, **k_time, **k_internal, **k_amount}
results = get_pulse(**ckeys)
return {"list":results}
def get_pulse(**keys):
2018-12-08 02:01:44 +03:00
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
2018-12-08 02:01:44 +03:00
results = []
for rng in rngs:
res = len(db_query(timerange=rng,**{k:keys[k] for k in keys if k in ["artists","artist","track","title","associated"]}))
results.append({"range":rng,"scrobbles":res})
return results
2019-05-23 14:13:42 +03:00
@dbserver.get("performance")
def get_performance_external(**keys):
2019-04-09 13:13:07 +03:00
k_filter, k_time, k_internal, k_amount = uri_to_internal(keys)
ckeys = {**k_filter, **k_time, **k_internal, **k_amount}
results = get_performance(**ckeys)
return {"list":results}
def get_performance(**keys):
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
2019-04-09 13:13:07 +03:00
results = []
for rng in rngs:
2019-04-09 13:13:07 +03:00
if "track" in keys:
charts = get_charts_tracks(timerange=rng)
2019-04-09 13:13:07 +03:00
rank = None
for c in charts:
if c["track"] == keys["track"]:
rank = c["rank"]
break
elif "artist" in keys:
charts = get_charts_artists(timerange=rng)
2019-04-09 13:13:07 +03:00
rank = None
for c in charts:
if c["artist"] == keys["artist"]:
rank = c["rank"]
break
results.append({"range":rng,"rank":rank})
2019-04-09 13:13:07 +03:00
return results
2019-05-23 14:13:42 +03:00
@dbserver.get("top/artists")
2019-05-23 15:25:35 +03:00
def get_top_artists_external(**keys):
_, k_time, k_internal, _ = uri_to_internal(keys)
ckeys = {**k_time, **k_internal}
results = get_top_artists(**ckeys)
return {"list":results}
def get_top_artists(**keys):
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
2018-12-05 16:30:50 +03:00
results = []
for rng in rngs:
2018-12-15 17:25:00 +03:00
try:
res = db_aggregate(timerange=rng,by="ARTIST")[0]
results.append({"range":rng,"artist":res["artist"],"counting":res["counting"],"scrobbles":res["scrobbles"]})
2018-12-15 17:25:00 +03:00
except:
results.append({"range":rng,"artist":None,"scrobbles":0})
return results
2019-05-23 14:13:42 +03:00
@dbserver.get("top/tracks")
def get_top_tracks_external(**keys):
_, k_time, k_internal, _ = uri_to_internal(keys)
ckeys = {**k_time, **k_internal}
# IMPLEMENT THIS FOR TOP TRACKS OF ARTIST AS WELL?
results = get_top_tracks(**ckeys)
return {"list":results}
def get_top_tracks(**keys):
rngs = ranges(**{k:keys[k] for k in keys if k in ["since","to","within","timerange","step","stepn","trail"]})
2018-12-16 19:52:13 +03:00
results = []
for rng in rngs:
2018-12-16 19:52:13 +03:00
try:
res = db_aggregate(timerange=rng,by="TRACK")[0]
results.append({"range":rng,"track":res["track"],"scrobbles":res["scrobbles"]})
2018-12-16 19:52:13 +03:00
except:
results.append({"range":rng,"track":None,"scrobbles":0})
return results
2019-05-23 14:13:42 +03:00
@dbserver.get("artistinfo")
def artistInfo_external(**keys):
k_filter, _, _, _ = uri_to_internal(keys,forceArtist=True)
ckeys = {**k_filter}
results = artistInfo(**ckeys)
return results
def artistInfo(artist):
2018-12-17 01:56:30 +03:00
charts = db_aggregate(by="ARTIST")
scrobbles = len(db_query(artists=[artist]))
#we cant take the scrobble number from the charts because that includes all countas scrobbles
2018-12-17 01:56:30 +03:00
try:
c = [e for e in charts if e["artist"] == artist][0]
others = [a for a in coa.getAllAssociated(artist) if a in ARTISTS]
position = c["rank"]
performance = get_performance(artist=artist,step="week")
return {
"scrobbles":scrobbles,
"position":position,
"associated":others,
2019-11-20 23:11:10 +03:00
"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS.get(artist,{})},
2019-06-27 12:25:11 +03:00
"topweeks":WEEKLY_TOPARTISTS.get(artist,0)
}
2018-12-17 01:56:30 +03:00
except:
# if the artist isnt in the charts, they are not being credited and we
# need to show information about the credited one
2018-12-19 17:28:10 +03:00
artist = coa.getCredited(artist)
2018-12-17 01:56:30 +03:00
c = [e for e in charts if e["artist"] == artist][0]
position = c["rank"]
return {"replace":artist,"scrobbles":scrobbles,"position":position}
2019-05-23 14:13:42 +03:00
@dbserver.get("trackinfo")
def trackInfo_external(artist:Multi[str],**keys):
# transform into a multidict so we can use our nomral uri_to_internal function
keys = FormsDict(keys)
for a in artist:
keys.append("artist",a)
k_filter, _, _, _ = uri_to_internal(keys,forceTrack=True)
ckeys = {**k_filter}
results = trackInfo(**ckeys)
return results
def trackInfo(track):
2018-12-27 16:57:25 +03:00
charts = db_aggregate(by="TRACK")
#scrobbles = len(db_query(artists=artists,title=title)) #chart entry of track always has right scrobble number, no countas rules here
#c = [e for e in charts if set(e["track"]["artists"]) == set(artists) and e["track"]["title"] == title][0]
c = [e for e in charts if e["track"] == track][0]
scrobbles = c["scrobbles"]
position = c["rank"]
cert = None
threshold_gold, threshold_platinum, threshold_diamond = settings.get_settings("SCROBBLES_GOLD","SCROBBLES_PLATINUM","SCROBBLES_DIAMOND")
if scrobbles >= threshold_diamond: cert = "diamond"
elif scrobbles >= threshold_platinum: cert = "platinum"
elif scrobbles >= threshold_gold: cert = "gold"
2019-06-27 12:04:45 +03:00
return {
"scrobbles":scrobbles,
"position":position,
2019-11-20 23:11:10 +03:00
"medals":{"gold":[],"silver":[],"bronze":[],**MEDALS_TRACKS.get((frozenset(track["artists"]),track["title"]),{})},
"certification":cert,
2019-06-27 12:25:11 +03:00
"topweeks":WEEKLY_TOPTRACKS.get(((frozenset(track["artists"]),track["title"])),0)
}
2019-05-23 14:13:42 +03:00
@dbserver.get("newscrobble")
def pseudo_post_scrobble(artist:Multi,**keys):
artists = "/".join(artist)
2018-11-26 18:21:07 +03:00
title = keys.get("title")
apikey = keys.get("key")
2019-08-22 22:35:58 +03:00
client = checkAPIkey(apikey)
if client == False: # empty string allowed!
response.status = 403
return ""
2018-11-28 19:45:52 +03:00
try:
time = int(keys.get("time"))
except:
2018-11-28 17:33:30 +03:00
time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
2019-08-22 22:35:58 +03:00
log("Incoming scrobble (native API): Client " + client + ", ARTISTS: " + str(artists) + ", TRACK: " + title,module="debug")
2018-12-19 17:28:10 +03:00
(artists,title) = cla.fullclean(artists,title)
2018-11-28 19:45:52 +03:00
## this is necessary for localhost testing
response.set_header("Access-Control-Allow-Origin","*")
trackdict = createScrobble(artists,title,time)
2019-10-24 16:46:38 +03:00
sync()
2019-08-22 22:35:58 +03:00
return {"status":"success","track":trackdict}
2019-05-23 14:13:42 +03:00
@dbserver.post("newscrobble")
def post_scrobble(artist:Multi,**keys):
artists = "/".join(artist)
2018-11-30 15:39:12 +03:00
title = keys.get("title")
2018-11-30 17:44:30 +03:00
apikey = keys.get("key")
2019-08-22 22:35:58 +03:00
client = checkAPIkey(apikey)
if client == False: # empty string allowed!
2018-11-30 17:44:30 +03:00
response.status = 403
return ""
2018-11-30 15:39:12 +03:00
try:
time = int(keys.get("time"))
except:
time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
2019-08-22 22:35:58 +03:00
log("Incoming scrobble (native API): Client " + client + ", ARTISTS: " + str(artists) + ", TRACK: " + title,module="debug")
2018-12-19 17:28:10 +03:00
(artists,title) = cla.fullclean(artists,title)
2018-11-30 15:39:12 +03:00
## this is necessary for localhost testing
#response.set_header("Access-Control-Allow-Origin","*")
trackdict = createScrobble(artists,title,time)
2019-05-15 11:11:41 +03:00
sync()
#always sync, one filesystem access every three minutes shouldn't matter
2019-08-22 22:35:58 +03:00
return {"status":"success","track":trackdict}
# standard-compliant scrobbling methods
@dbserver.post("s/{path}",pass_headers=True)
@dbserver.get("s/{path}",pass_headers=True)
2019-05-23 14:13:42 +03:00
def sapi(path:Multi,**keys):
"""Scrobbles according to a standardized protocol.
:param string path: Path according to the scrobble protocol
:param string keys: Query keys according to the scrobble protocol
"""
2019-05-12 19:39:46 +03:00
path = list(filter(None,path))
2019-05-23 14:13:42 +03:00
return compliant_api.handle(path,keys)
2019-05-23 14:13:42 +03:00
@dbserver.get("sync")
def abouttoshutdown():
2018-11-28 15:02:43 +03:00
sync()
#sys.exit()
2019-05-23 14:13:42 +03:00
@dbserver.post("newrule")
2019-10-03 06:24:47 +03:00
def newrule(**keys):
apikey = keys.pop("key",None)
if (checkAPIkey(apikey)):
2019-03-29 22:23:32 +03:00
tsv.add_entry("rules/webmade.tsv",[k for k in keys])
#addEntry("rules/webmade.tsv",[k for k in keys])
global db_rulestate
db_rulestate = False
2019-05-23 14:13:42 +03:00
@dbserver.get("issues")
def issues_external(): #probably not even needed
return issues()
def issues():
combined = []
duplicates = []
newartists = []
inconsistent = not db_rulestate
2018-12-23 01:19:52 +03:00
# if the user manually edits files while the server is running this won't show, but too lazy to check the rulestate here
import itertools
import difflib
sortedartists = ARTISTS.copy()
sortedartists.sort(key=len,reverse=True)
reversesortedartists = sortedartists.copy()
reversesortedartists.reverse()
for a in reversesortedartists:
nochange = cla.confirmedReal(a)
st = a
lis = []
reachedmyself = False
for ar in sortedartists:
if (ar != a) and not reachedmyself:
continue
elif not reachedmyself:
reachedmyself = True
continue
if (ar.lower() == a.lower()) or ("the " + ar.lower() == a.lower()) or ("a " + ar.lower() == a.lower()):
duplicates.append((ar,a))
break
if (ar + " " in st) or (" " + ar in st):
lis.append(ar)
st = st.replace(ar,"").strip()
elif (ar == st):
lis.append(ar)
st = ""
if not nochange:
combined.append((a,lis))
break
elif (ar in st) and len(ar)*2 > len(st):
duplicates.append((a,ar))
st = st.replace("&","").replace("and","").replace("with","").strip()
if st != "" and st != a:
if len(st) < 5 and len(lis) == 1:
#check if we havent just randomly found the string in another word
#if (" " + st + " ") in lis[0] or (lis[0].endswith(" " + st)) or (lis[0].startswith(st + " ")):
duplicates.append((a,lis[0]))
elif len(st) < 5 and len(lis) > 1 and not nochange:
combined.append((a,lis))
elif len(st) >= 5 and not nochange:
#check if we havent just randomly found the string in another word
if (" " + st + " ") in a or (a.endswith(" " + st)) or (a.startswith(st + " ")):
newartists.append((st,a,lis))
#for c in itertools.combinations(ARTISTS,3):
# l = list(c)
# print(l)
# l.sort(key=len,reverse=True)
# [full,a1,a2] = l
# if (a1 + " " + a2 in full) or (a2 + " " + a1 in full):
# combined.append((full,a1,a2))
#for c in itertools.combinations(ARTISTS,2):
# if
#
# if (c[0].lower == c[1].lower):
# duplicates.append((c[0],c[1]))
# elif (c[0] + " " in c[1]) or (" " + c[0] in c[1]) or (c[1] + " " in c[0]) or (" " + c[1] in c[0]):
# if (c[0] in c[1]):
# full, part = c[1],c[0]
# rest = c[1].replace(c[0],"").strip()
# else:
# full, part = c[0],c[1]
# rest = c[0].replace(c[1],"").strip()
# if rest in ARTISTS and full not in [c[0] for c in combined]:
# combined.append((full,part,rest))
# elif (c[0] in c[1]) or (c[1] in c[0]):
# duplicates.append((c[0],c[1]))
return {"duplicates":duplicates,"combined":combined,"newartists":newartists,"inconsistent":inconsistent}
2019-03-24 16:56:34 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.post("importrules")
2019-10-03 06:24:47 +03:00
def import_rulemodule(**keys):
2019-03-24 18:04:44 +03:00
apikey = keys.pop("key",None)
if (checkAPIkey(apikey)):
filename = keys.get("filename")
remove = keys.get("remove") is not None
validchars = "-_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
filename = "".join(c for c in filename if c in validchars)
if remove:
log("Deactivating predefined rulefile " + filename)
os.remove("rules/" + filename + ".tsv")
else:
log("Importing predefined rulefile " + filename)
os.symlink("predefined/" + filename + ".tsv","rules/" + filename + ".tsv")
2019-03-24 16:56:34 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.post("rebuild")
2019-10-03 06:24:47 +03:00
def rebuild(**keys):
apikey = keys.pop("key",None)
if (checkAPIkey(apikey)):
2019-03-10 22:05:38 +03:00
log("Database rebuild initiated!")
global db_rulestate
db_rulestate = False
sync()
os.system("python3 fixexisting.py")
global cla, coa
cla = CleanerAgent()
coa = CollectorAgent()
build_db()
2019-03-14 18:27:53 +03:00
invalidate_caches()
2019-03-06 19:50:36 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get("search")
def search(**keys):
2019-03-06 19:50:36 +03:00
query = keys.get("query")
2019-03-07 01:18:11 +03:00
max_ = keys.get("max")
if max_ is not None: max_ = int(max_)
2019-03-06 22:18:26 +03:00
query = query.lower()
2019-03-06 19:50:36 +03:00
artists = db_search(query,type="ARTIST")
tracks = db_search(query,type="TRACK")
2019-05-14 13:07:47 +03:00
2019-03-06 22:18:26 +03:00
# if the string begins with the query it's a better match, if a word in it begins with it, still good
2019-03-07 01:18:11 +03:00
# also, shorter is better (because longer titles would be easier to further specify)
artists.sort(key=lambda x: ((0 if x.lower().startswith(query) else 1 if " " + query in x.lower() else 2),len(x)))
tracks.sort(key=lambda x: ((0 if x["title"].lower().startswith(query) else 1 if " " + query in x["title"].lower() else 2),len(x["title"])))
2019-05-14 13:07:47 +03:00
# add links
artists_result = []
for a in artists:
result = {"name":a}
result["link"] = "/artist?" + compose_querystring(internal_to_uri({"artist":a}))
result["image"] = "/image?" + compose_querystring(internal_to_uri({"artist":a}))
artists_result.append(result)
tracks_result = []
for t in tracks:
result = t
result["link"] = "/track?" + compose_querystring(internal_to_uri({"track":t}))
result["image"] = "/image?" + compose_querystring(internal_to_uri({"track":t}))
tracks_result.append(result)
return {"artists":artists_result[:max_],"tracks":tracks_result[:max_]}
2018-12-12 21:37:59 +03:00
####
## Server operation
####
2018-11-24 18:29:24 +03:00
# Starts the server
2019-05-12 19:39:46 +03:00
def start_db():
log("Starting database...")
2018-11-28 15:02:43 +03:00
global lastsync
2018-12-21 19:22:44 +03:00
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
build_db()
2018-11-30 17:44:30 +03:00
loadAPIkeys()
2019-05-12 19:39:46 +03:00
#run(dbserver, host='::', port=PORT, server='waitress')
log("Database reachable!")
def build_db():
2019-03-10 22:05:38 +03:00
log("Building database...")
global SCROBBLES, ARTISTS, TRACKS
global SCROBBLESDICT, STAMPS
SCROBBLES = []
ARTISTS = []
TRACKS = []
2019-03-14 18:27:53 +03:00
STAMPS = []
SCROBBLESDICT = {}
2019-03-12 16:37:04 +03:00
# parse files
2019-03-29 22:23:32 +03:00
db = tsv.parse_all("scrobbles","int","string","string",comments=False)
#db = parseAllTSV("scrobbles","int","string","string",escape=False)
for sc in db:
artists = sc[1].split("")
title = sc[2]
time = sc[0]
readScrobble(artists,title,time)
2018-12-24 21:14:24 +03:00
# optimize database
2019-03-12 16:37:04 +03:00
SCROBBLES.sort(key = lambda tup: tup[1])
#SCROBBLESDICT = {obj[1]:obj for obj in SCROBBLES}
STAMPS = [t for t in SCROBBLESDICT]
STAMPS.sort()
2019-03-12 16:37:04 +03:00
# inform malojatime module about earliest scrobble
2019-04-11 18:44:33 +03:00
if len(STAMPS) > 0: register_scrobbletime(STAMPS[0])
# NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY
# get extra artists with no real scrobbles from countas rules
#for artist in coa.getAllArtists():
#for artist in coa.getCreditedList(ARTISTS):
# if artist not in ARTISTS:
# log(artist + " is added to database because of countas rules",module="debug")
# ARTISTS.append(artist)
# coa.updateIDs(ARTISTS)
#start regular tasks
2019-06-13 11:51:25 +03:00
utilities.update_medals()
2019-06-27 12:04:45 +03:00
utilities.update_weekly()
2019-04-08 14:38:47 +03:00
global db_rulestate
2019-06-13 11:51:25 +03:00
db_rulestate = utilities.consistentRulestate("scrobbles",cla.checksums)
2019-04-02 17:53:57 +03:00
2019-03-10 22:05:38 +03:00
log("Database fully built!")
2018-11-24 18:29:24 +03:00
# Saves all cached entries to disk
2018-11-28 15:02:43 +03:00
def sync():
# all entries by file collected
# so we don't open the same file for every entry
#log("Syncing",module="debug")
entries = {}
2018-11-27 21:05:50 +03:00
for idx in range(len(SCROBBLES)):
if not SCROBBLES[idx][2]:
2019-04-07 16:01:04 +03:00
t = get_scrobble_dict(SCROBBLES[idx])
2018-12-21 21:13:24 +03:00
artistlist = list(t["artists"])
artistlist.sort() #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed
artistss = "".join(artistlist)
2018-11-24 18:29:24 +03:00
timestamp = datetime.date.fromtimestamp(t["time"])
entry = [str(t["time"]),artistss,t["title"]]
monthcode = str(timestamp.year) + "_" + str(timestamp.month)
entries.setdefault(monthcode,[]).append(entry) #i feckin love the setdefault function
2018-11-27 21:05:50 +03:00
SCROBBLES[idx] = (SCROBBLES[idx][0],SCROBBLES[idx][1],True)
2019-08-22 21:51:32 +03:00
#log("Sorted into months",module="debug")
for e in entries:
2019-03-29 22:23:32 +03:00
tsv.add_entries("scrobbles/" + e + ".tsv",entries[e],comments=False)
#addEntries("scrobbles/" + e + ".tsv",entries[e],escape=False)
2019-06-13 11:51:25 +03:00
utilities.combineChecksums("scrobbles/" + e + ".tsv",cla.checksums)
2019-08-22 21:51:32 +03:00
#log("Written files",module="debug")
2018-11-28 15:02:43 +03:00
global lastsync
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
2019-08-22 21:51:32 +03:00
#log("Database saved to disk.")
# save cached images
2019-04-01 17:52:42 +03:00
#saveCache()
2018-11-24 18:29:24 +03:00
2018-12-12 21:37:59 +03:00
2019-03-11 22:44:37 +03:00
###
## Caches in front of DB
2019-05-09 17:58:25 +03:00
## the volatile caches are intended mainly for excessive site navigation during one session
## the permanent caches are there to save data that is hard to calculate and never changes (old charts)
2019-03-11 22:44:37 +03:00
###
import copy
cache_query = {}
2019-05-10 14:35:06 +03:00
if doreah.version >= (0,7,1) and settings.get_settings("EXPERIMENTAL_FEATURES"):
cache_query_permanent = DiskDict(name="dbquery",folder="cache",maxmemory=1024*1024*500,maxstorage=1024*1024*settings.get_settings("DB_CACHE_SIZE"))
2019-05-09 17:58:25 +03:00
else:
2019-05-10 14:35:06 +03:00
cache_query_permanent = Cache(maxmemory=1024*1024*500)
2019-03-11 22:44:37 +03:00
cacheday = (0,0,0)
def db_query(**kwargs):
check_cache_age()
2019-04-12 18:57:28 +03:00
global cache_query, cache_query_permanent
2019-06-13 11:51:25 +03:00
key = utilities.serialize(kwargs)
2019-04-12 18:57:28 +03:00
if "timerange" in kwargs and not kwargs["timerange"].active():
if key in cache_query_permanent:
#print("Hit")
return copy.copy(cache_query_permanent.get(key))
#print("Miss")
result = db_query_full(**kwargs)
cache_query_permanent.add(key,copy.copy(result))
#print(cache_query_permanent.cache)
else:
#print("I guess they never miss huh")
if key in cache_query: return copy.copy(cache_query[key])
result = db_query_full(**kwargs)
cache_query[key] = copy.copy(result)
2019-03-11 22:44:37 +03:00
return result
cache_aggregate = {}
2019-05-10 14:35:06 +03:00
if doreah.version >= (0,7,1) and settings.get_settings("EXPERIMENTAL_FEATURES"):
cache_aggregate_permanent = DiskDict(name="dbaggregate",folder="cache",maxmemory=1024*1024*500,maxstorage=1024*1024*settings.get_settings("DB_CACHE_SIZE"))
2019-05-09 17:58:25 +03:00
else:
2019-05-10 14:35:06 +03:00
cache_aggregate_permanent = Cache(maxmemory=1024*1024*500)
2019-03-11 22:44:37 +03:00
def db_aggregate(**kwargs):
check_cache_age()
2019-04-12 18:57:28 +03:00
global cache_aggregate, cache_aggregate_permanent
2019-06-13 11:51:25 +03:00
key = utilities.serialize(kwargs)
2019-04-12 18:57:28 +03:00
if "timerange" in kwargs and not kwargs["timerange"].active():
if key in cache_aggregate_permanent: return copy.copy(cache_aggregate_permanent.get(key))
result = db_aggregate_full(**kwargs)
cache_aggregate_permanent.add(key,copy.copy(result))
else:
if key in cache_aggregate: return copy.copy(cache_aggregate[key])
result = db_aggregate_full(**kwargs)
cache_aggregate[key] = copy.copy(result)
2019-03-11 22:44:37 +03:00
return result
2019-03-11 22:44:37 +03:00
def invalidate_caches():
global cache_query, cache_aggregate
cache_query = {}
cache_aggregate = {}
2019-03-15 13:16:53 +03:00
now = datetime.datetime.utcnow()
2019-03-11 22:44:37 +03:00
global cacheday
cacheday = (now.year,now.month,now.day)
log("Database caches invalidated.")
2019-03-11 22:44:37 +03:00
def check_cache_age():
now = datetime.datetime.utcnow()
2019-03-11 22:44:37 +03:00
global cacheday
if cacheday != (now.year,now.month,now.day): invalidate_caches()
2018-12-12 21:37:59 +03:00
####
## Database queries
####
# Queries the database
def db_query_full(artist=None,artists=None,title=None,track=None,since=None,to=None,within=None,timerange=None,associated=False,max_=None):
(since, to) = time_stamps(since=since,to=to,within=within,range=timerange)
# this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly
2018-12-24 21:14:24 +03:00
# if a title is specified, we assume that a specific track (with the exact artist combination) is requested
2018-12-26 19:42:55 +03:00
# if not, duplicate artist arguments are ignored
#artist = None
if artist is not None and isinstance(artist,str):
artist = ARTISTS.index(artist)
# artists to numbers
if artists is not None:
artists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in artists])
# track to number
if track is not None and isinstance(track,dict):
trackartists = set([(ARTISTS.index(a) if isinstance(a,str) else a) for a in track["artists"]])
track = TRACKS.index((frozenset(trackartists),track["title"]))
artists = None
2018-12-26 19:42:55 +03:00
#check if track is requested via title
2018-12-24 21:14:24 +03:00
if title!=None and track==None:
track = TRACKS.index((frozenset(artists),title))
artists = None
2018-12-26 19:42:55 +03:00
# if we're not looking for a track (either directly or per title artist arguments, which is converted to track above)
# we only need one artist
elif artist is None and track is None and artists is not None and len(artists) != 0:
2018-12-26 19:42:55 +03:00
artist = artists.pop()
# db query always reverse by default
2019-03-12 14:56:53 +03:00
result = []
2019-03-12 14:56:53 +03:00
i = 0
for s in scrobbles_in_range(since,to,reverse=True):
if i == max_: break
if (track is None or s[0] == track) and (artist is None or artist in TRACKS[s[0]][0] or associated and artist in coa.getCreditedList(TRACKS[s[0]][0])):
2019-04-07 16:01:04 +03:00
result.append(get_scrobble_dict(s))
2019-03-12 14:56:53 +03:00
i += 1
2019-03-12 14:56:53 +03:00
return result
# pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way
2018-12-04 19:07:07 +03:00
# Queries that... well... aggregate
def db_aggregate_full(by=None,since=None,to=None,within=None,timerange=None,artist=None):
(since, to) = time_stamps(since=since,to=to,within=within,range=timerange)
if isinstance(artist, str):
artist = ARTISTS.index(artist)
2018-12-04 19:07:07 +03:00
if (by=="ARTIST"):
#this is probably a really bad idea
#for a in ARTISTS:
# num = len(db_query(artist=a,since=since,to=to))
#
2018-12-04 19:07:07 +03:00
# alright let's try for real
charts = {}
#for s in [scr for scr in SCROBBLES if since < scr[1] < to]:
for s in scrobbles_in_range(since,to):
2018-12-04 19:07:07 +03:00
artists = TRACKS[s[0]][0]
2018-12-19 17:28:10 +03:00
for a in coa.getCreditedList(artists):
2018-12-04 19:07:07 +03:00
# this either creates the new entry or increments the existing one
charts[a] = charts.setdefault(a,0) + 1
ls = [{"artist":get_artist_dict(ARTISTS[a]),"scrobbles":charts[a],"counting":[arti for arti in coa.getAllAssociated(ARTISTS[a]) if arti in ARTISTS]} for a in charts]
ls.sort(key=lambda k:k["scrobbles"],reverse=True)
# add ranks
for rnk in range(len(ls)):
if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
ls[rnk]["rank"] = rnk + 1
else:
ls[rnk]["rank"] = ls[rnk-1]["rank"]
return ls
2018-12-04 19:07:07 +03:00
elif (by=="TRACK"):
charts = {}
#for s in [scr for scr in SCROBBLES if since < scr[1] < to and (artist==None or (artist in TRACKS[scr[0]][0]))]:
for s in [scr for scr in scrobbles_in_range(since,to) if (artist is None or (artist in TRACKS[scr[0]][0]))]:
2018-12-04 19:07:07 +03:00
track = s[0]
# this either creates the new entry or increments the existing one
charts[track] = charts.setdefault(track,0) + 1
2019-04-07 16:01:04 +03:00
ls = [{"track":get_track_dict(TRACKS[t]),"scrobbles":charts[t]} for t in charts]
ls.sort(key=lambda k:k["scrobbles"],reverse=True)
# add ranks
for rnk in range(len(ls)):
if rnk == 0 or ls[rnk]["scrobbles"] < ls[rnk-1]["scrobbles"]:
ls[rnk]["rank"] = rnk + 1
else:
ls[rnk]["rank"] = ls[rnk-1]["rank"]
return ls
2018-12-08 02:01:44 +03:00
else:
#return len([scr for scr in SCROBBLES if since < scr[1] < to])
return len(list(scrobbles_in_range(since,to)))
2018-12-04 19:07:07 +03:00
2018-12-12 21:37:59 +03:00
# Search for strings
def db_search(query,type=None):
if type=="ARTIST":
results = []
for a in ARTISTS:
2019-03-10 19:38:33 +03:00
#if query.lower() in a.lower():
if simplestr(query) in simplestr(a):
2018-12-12 21:37:59 +03:00
results.append(a)
2018-12-12 21:37:59 +03:00
if type=="TRACK":
results = []
for t in TRACKS:
2019-03-10 19:38:33 +03:00
#if query.lower() in t[1].lower():
if simplestr(query) in simplestr(t[1]):
2019-04-07 16:01:04 +03:00
results.append(get_track_dict(t))
2018-12-12 21:37:59 +03:00
return results
####
## Useful functions
####
2019-03-10 19:38:33 +03:00
# makes a string usable for searching (special characters are blanks, accents and stuff replaced with their real part)
def simplestr(input,ignorecapitalization=True):
norm = unicodedata.normalize("NFKD",input)
norm = [c for c in norm if not unicodedata.combining(c)]
norm = [c if len(c.encode())==1 else " " for c in norm]
clear = ''.join(c for c in norm)
if ignorecapitalization: clear = clear.lower()
return clear
2018-12-12 21:37:59 +03:00
2019-04-07 16:55:49 +03:00
#def getArtistId(nameorid):
# if isinstance(nameorid,int):
# return nameorid
# else:
# try:
# return ARTISTS.index(nameorid)
# except:
# return -1
def insert(list_,item,key=lambda x:x):
i = 0
while len(list_) > i:
if key(list_[i]) > key(item):
list_.insert(i,item)
return i
i += 1
list_.append(item)
return i
2019-03-12 14:56:53 +03:00
2019-03-11 22:04:23 +03:00
def scrobbles_in_range(start,end,reverse=False):
if reverse:
for stamp in reversed(STAMPS):
#print("Checking " + str(stamp))
if stamp < start: return
if stamp > end: continue
2019-03-12 14:56:53 +03:00
yield SCROBBLESDICT[stamp]
2019-03-11 22:04:23 +03:00
else:
for stamp in STAMPS:
#print("Checking " + str(stamp))
if stamp < start: continue
if stamp > end: return
yield SCROBBLESDICT[stamp]
2019-03-11 22:04:23 +03:00
# for performance testing
def generateStuff(num=0,pertrack=0,mult=0):
import random
for i in range(num):
track = random.choice(TRACKS)
2019-04-07 16:01:04 +03:00
t = get_track_dict(track)
2019-03-11 22:04:23 +03:00
time = random.randint(STAMPS[0],STAMPS[-1])
createScrobble(t["artists"],t["title"],time,volatile=True)
2019-03-11 22:04:23 +03:00
for track in TRACKS:
2019-04-07 16:01:04 +03:00
t = get_track_dict(track)
2019-03-11 22:04:23 +03:00
for i in range(pertrack):
time = random.randint(STAMPS[0],STAMPS[-1])
createScrobble(t["artists"],t["title"],time,volatile=True)
2019-03-11 22:04:23 +03:00
for scrobble in SCROBBLES:
2019-04-07 16:01:04 +03:00
s = get_scrobble_dict(scrobble)
2019-03-11 22:04:23 +03:00
for i in range(mult):
createScrobble(s["artists"],s["title"],s["time"] - i*500,volatile=True)