2019-03-31 13:18:49 +03:00
# server
2019-05-12 19:39:46 +03:00
from bottle import request , response , FormsDict
2019-03-31 13:18:49 +03:00
# rest of the project
2019-06-13 11:51:25 +03:00
from cleanup import CleanerAgent , CollectorAgent
import utilities
from malojatime import register_scrobbletime , time_stamps , ranges
2019-05-14 13:07:47 +03:00
from urihandler import uri_to_internal , internal_to_uri , compose_querystring
2019-05-12 12:46:25 +03:00
import compliant_api
2019-06-24 16:43:38 +03:00
from external import proxy_scrobble
2019-03-31 13:18:49 +03:00
# doreah toolkit
2019-03-29 21:44:42 +03:00
from doreah . logging import log
2019-03-29 22:23:32 +03:00
from doreah import tsv
2019-06-13 11:51:25 +03:00
from doreah import settings
2019-05-09 17:58:25 +03:00
from doreah . caching import Cache , DeepCache
try :
from doreah . persistence import DiskDict
except : pass
import doreah
2019-05-23 14:13:42 +03:00
# nimrodel API
from nimrodel import EAPI as API
from nimrodel import Multi
2019-03-31 13:18:49 +03:00
# technical
import os
import datetime
2018-11-27 18:08:14 +03:00
import sys
2019-03-10 19:38:33 +03:00
import unicodedata
2019-04-07 15:43:36 +03:00
from collections import namedtuple
2019-04-07 16:27:24 +03:00
from threading import Lock
2019-03-31 13:18:49 +03:00
# url handling
from importlib . machinery import SourceFileLoader
import urllib
2019-04-07 16:27:24 +03:00
dblock = Lock ( ) #global database lock
2018-11-24 18:29:24 +03:00
2018-11-25 20:17:14 +03:00
SCROBBLES = [ ] # Format: tuple(track_ref,timestamp,saved)
ARTISTS = [ ] # Format: artist
2019-04-07 15:43:36 +03:00
TRACKS = [ ] # Format: namedtuple(artists=frozenset(artist_ref,...),title=title)
2019-04-07 16:27:24 +03:00
2019-04-07 15:43:36 +03:00
Track = namedtuple ( " Track " , [ " artists " , " title " ] )
Scrobble = namedtuple ( " Scrobble " , [ " track " , " timestamp " , " saved " ] )
2018-11-25 20:17:14 +03:00
2019-03-11 20:06:45 +03:00
### OPTIMIZATION
SCROBBLESDICT = { } # timestamps to scrobble mapping
STAMPS = [ ] # sorted
2019-04-07 16:55:49 +03:00
#STAMPS_SET = set() # as set for easier check if exists # we use the scrobbles dict for that now
2019-04-07 15:43:36 +03:00
TRACKS_LOWER = [ ]
ARTISTS_LOWER = [ ]
2019-04-07 16:55:49 +03:00
ARTIST_SET = set ( )
TRACK_SET = set ( )
2019-06-27 12:04:45 +03:00
2019-04-02 17:53:57 +03:00
MEDALS = { } #literally only changes once per year, no need to calculate that on the fly
2019-04-04 22:29:03 +03:00
MEDALS_TRACKS = { }
2019-06-27 12:25:11 +03:00
WEEKLY_TOPTRACKS = { }
WEEKLY_TOPARTISTS = { }
2018-11-29 18:05:44 +03:00
2018-12-19 17:28:10 +03:00
cla = CleanerAgent ( )
coa = CollectorAgent ( )
2018-11-30 17:44:30 +03:00
clients = [ ]
2018-11-28 19:45:52 +03:00
2018-11-28 15:02:43 +03:00
lastsync = 0
2018-12-21 18:32:21 +03:00
# rulestate that the entire current database was built with, or False if the database was built from inconsistent scrobble files
db_rulestate = False
2018-12-20 19:23:16 +03:00
2018-11-25 20:17:14 +03:00
2018-11-30 17:44:30 +03:00
### symmetric keys are fine for now since we hopefully use HTTPS
def loadAPIkeys ( ) :
global clients
2019-03-29 22:23:32 +03:00
tsv . create ( " clients/authenticated_machines.tsv " )
#createTSV("clients/authenticated_machines.tsv")
clients = tsv . parse ( " clients/authenticated_machines.tsv " , " string " , " string " )
#clients = parseTSV("clients/authenticated_machines.tsv","string","string")
2019-03-11 16:32:53 +03:00
log ( " Authenticated Machines: " + " , " . join ( [ m [ 1 ] for m in clients ] ) )
2018-11-25 20:17:14 +03:00
2018-11-30 17:44:30 +03:00
def checkAPIkey ( k ) :
2019-08-22 22:35:58 +03:00
#return (k in [k for [k,d] in clients])
for key , identifier in clients :
if key == k : return identifier
return False
2019-05-12 12:46:25 +03:00
def allAPIkeys ( ) :
return [ k for [ k , d ] in clients ]
2018-11-25 20:17:14 +03:00
2018-12-12 21:37:59 +03:00
####
## Getting dict representations of database objects
####
2019-04-07 16:01:04 +03:00
def get_scrobble_dict ( o ) :
2019-04-07 16:27:24 +03:00
track = get_track_dict ( TRACKS [ o . track ] )
return { " artists " : track [ " artists " ] , " title " : track [ " title " ] , " time " : o . timestamp }
2019-03-14 13:07:20 +03:00
2019-04-07 16:01:04 +03:00
def get_artist_dict ( o ) :
2018-11-25 20:17:14 +03:00
return o
2019-04-07 16:01:04 +03:00
#technically not a dict, but... you know
2019-03-14 13:07:20 +03:00
2019-04-07 16:01:04 +03:00
def get_track_dict ( o ) :
artists = [ get_artist_dict ( ARTISTS [ a ] ) for a in o . artists ]
2019-04-07 15:43:36 +03:00
return { " artists " : artists , " title " : o . title }
2018-11-25 20:17:14 +03:00
2018-12-12 21:37:59 +03:00
####
## Creating or finding existing database entries
####
2019-03-14 13:07:20 +03:00
2019-03-11 22:04:23 +03:00
def createScrobble ( artists , title , time , volatile = False ) :
2019-08-30 16:57:54 +03:00
if len ( artists ) == 0 or title == " " :
return { }
2019-04-07 16:27:24 +03:00
dblock . acquire ( )
2019-08-15 16:51:01 +03:00
i = getTrackID ( artists , title )
# idempotence
if time in SCROBBLESDICT :
if i == SCROBBLESDICT [ time ] . track :
dblock . release ( )
return get_track_dict ( TRACKS [ i ] )
# timestamp as unique identifier
2019-03-12 16:37:04 +03:00
while ( time in SCROBBLESDICT ) :
2018-11-29 18:05:44 +03:00
time + = 1
2019-08-15 16:51:01 +03:00
2019-04-07 15:43:36 +03:00
obj = Scrobble ( i , time , volatile ) # if volatile generated, we simply pretend we have already saved it to disk
2019-03-11 20:06:45 +03:00
#SCROBBLES.append(obj)
# immediately insert scrobble correctly so we can guarantee sorted list
index = insert ( SCROBBLES , obj , key = lambda x : x [ 1 ] )
SCROBBLESDICT [ time ] = obj
STAMPS . insert ( index , time ) #should be same index as scrobblelist
2019-03-03 00:55:22 +03:00
register_scrobbletime ( time )
2019-03-11 22:44:37 +03:00
invalidate_caches ( )
2019-04-07 16:27:24 +03:00
dblock . release ( )
2018-11-25 20:17:14 +03:00
2019-06-24 16:43:38 +03:00
proxy_scrobble ( artists , title , time )
2019-05-14 13:31:24 +03:00
return get_track_dict ( TRACKS [ obj . track ] )
2018-12-05 16:30:50 +03:00
2019-04-07 16:27:24 +03:00
# this will never be called from different threads, so no lock
2018-11-29 18:05:44 +03:00
def readScrobble ( artists , title , time ) :
2019-03-12 16:37:04 +03:00
while ( time in SCROBBLESDICT ) :
2018-11-29 18:05:44 +03:00
time + = 1
i = getTrackID ( artists , title )
2019-04-07 15:43:36 +03:00
obj = Scrobble ( i , time , True )
2018-11-25 20:17:14 +03:00
SCROBBLES . append ( obj )
2019-03-12 16:37:04 +03:00
SCROBBLESDICT [ time ] = obj
2019-03-11 20:06:45 +03:00
#STAMPS.append(time)
2019-03-14 13:07:20 +03:00
2018-11-25 20:17:14 +03:00
def getArtistID ( name ) :
obj = name
2019-04-08 14:38:47 +03:00
objlower = name . lower ( ) . replace ( " ' " , " " )
2019-03-14 13:07:20 +03:00
2019-04-07 16:55:49 +03:00
if objlower in ARTIST_SET :
2019-04-07 15:43:36 +03:00
return ARTISTS_LOWER . index ( objlower )
2019-04-07 16:55:49 +03:00
else :
2018-11-25 20:17:14 +03:00
i = len ( ARTISTS )
ARTISTS . append ( obj )
2019-04-07 16:55:49 +03:00
ARTIST_SET . add ( objlower )
2019-04-07 15:43:36 +03:00
ARTISTS_LOWER . append ( objlower )
2019-04-08 18:32:31 +03:00
# with a new artist added, we might also get new artists that they are credited as
cr = coa . getCredited ( name )
getArtistID ( cr )
coa . updateIDs ( ARTISTS )
2018-12-21 20:22:58 +03:00
return i
2019-03-14 13:07:20 +03:00
2018-11-25 20:17:14 +03:00
def getTrackID ( artists , title ) :
artistset = set ( )
for a in artists :
artistset . add ( getArtistID ( name = a ) )
2019-04-07 15:43:36 +03:00
obj = Track ( artists = frozenset ( artistset ) , title = title )
2019-04-08 14:38:47 +03:00
objlower = Track ( artists = frozenset ( artistset ) , title = title . lower ( ) . replace ( " ' " , " " ) )
2019-03-14 13:07:20 +03:00
2019-04-07 16:55:49 +03:00
if objlower in TRACK_SET :
2019-04-07 15:43:36 +03:00
return TRACKS_LOWER . index ( objlower )
2019-04-07 16:55:49 +03:00
else :
2018-11-25 20:17:14 +03:00
i = len ( TRACKS )
TRACKS . append ( obj )
2019-04-07 16:55:49 +03:00
TRACK_SET . add ( objlower )
TRACKS_LOWER . append ( objlower )
2018-12-21 20:22:58 +03:00
return i
2018-11-24 18:29:24 +03:00
2019-02-15 21:39:19 +03:00
2019-04-07 15:43:36 +03:00
2019-02-15 21:39:19 +03:00
########
########
## HTTP requests and their associated functions
########
########
2019-05-23 14:13:42 +03:00
dbserver = API ( delay = True , path = " api " )
2019-05-12 19:39:46 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " test " )
2019-05-23 15:25:35 +03:00
def test_server ( key = None ) :
2018-12-14 21:52:31 +03:00
response . set_header ( " Access-Control-Allow-Origin " , " * " )
2019-05-23 15:27:10 +03:00
if key is not None and not ( checkAPIkey ( key ) ) :
2018-12-14 21:52:31 +03:00
response . status = 403
2018-12-21 18:32:21 +03:00
return " Wrong API key "
2019-03-14 13:07:20 +03:00
2018-12-21 18:32:21 +03:00
elif db_rulestate :
2018-12-14 21:52:31 +03:00
response . status = 204
return
2018-12-21 18:32:21 +03:00
else :
response . status = 205
return
2019-03-14 13:07:20 +03:00
2018-12-27 05:09:29 +03:00
# 204 Database server is up and operational
# 205 Database server is up, but DB is not fully built or is inconsistent
# 403 Database server is up, but provided API key is not valid
2018-12-12 21:37:59 +03:00
2019-02-15 21:39:19 +03:00
## All database functions are separated - the external wrapper only reads the request keys, converts them into lists and renames them where necessary, and puts the end result in a dict if not already so it can be returned as json
2019-05-23 14:13:42 +03:00
@dbserver.get ( " scrobbles " )
def get_scrobbles_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
k_filter , k_time , _ , k_amount = uri_to_internal ( keys )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_filter , * * k_time , * * k_amount }
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
result = get_scrobbles ( * * ckeys )
2019-02-15 21:39:19 +03:00
return { " list " : result }
2019-02-15 20:11:40 +03:00
def get_scrobbles ( * * keys ) :
2019-04-11 13:07:57 +03:00
r = db_query ( * * { k : keys [ k ] for k in keys if k in [ " artist " , " artists " , " title " , " since " , " to " , " within " , " timerange " , " associated " , " track " , " max_ " ] } )
2019-03-12 14:56:53 +03:00
#if keys.get("max_") is not None:
# return r[:int(keys.get("max_"))]
#else:
# return r
return r
2019-02-15 21:39:19 +03:00
2019-06-17 17:57:20 +03:00
# info for comparison
@dbserver.get ( " info " )
def info_external ( * * keys ) :
result = info ( )
return result
def info ( ) :
totalscrobbles = get_scrobbles_num ( )
artists = { }
return {
" name " : settings . get_settings ( " NAME " ) ,
" artists " : {
chartentry [ " artist " ] : round ( chartentry [ " scrobbles " ] * 100 / totalscrobbles , 3 )
2019-06-18 13:34:10 +03:00
for chartentry in get_charts_artists ( ) if chartentry [ " scrobbles " ] / totalscrobbles > = 0 }
2019-06-17 17:57:20 +03:00
}
2019-02-16 18:28:32 +03:00
2019-03-03 03:29:55 +03:00
# UNUSED
#@dbserver.route("/amounts")
#def get_amounts_external():
# return get_amounts() #really now
#
#def get_amounts():
# return {"scrobbles":len(SCROBBLES),"tracks":len(TRACKS),"artists":len(ARTISTS)}
2019-03-14 13:07:20 +03:00
2019-02-16 18:28:32 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " numscrobbles " )
def get_scrobbles_num_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
k_filter , k_time , _ , k_amount = uri_to_internal ( keys )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_filter , * * k_time , * * k_amount }
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
result = get_scrobbles_num ( * * ckeys )
2019-02-15 21:39:19 +03:00
return { " amount " : result }
def get_scrobbles_num ( * * keys ) :
2019-04-11 13:07:57 +03:00
r = db_query ( * * { k : keys [ k ] for k in keys if k in [ " artist " , " track " , " artists " , " title " , " since " , " to " , " within " , " timerange " , " associated " ] } )
2019-02-15 21:39:19 +03:00
return len ( r )
2019-03-03 03:29:55 +03:00
2019-03-12 18:06:09 +03:00
#for multiple since values (must be ordered)
# DOESN'T SEEM TO ACTUALLY BE FASTER
# REEVALUATE
#def get_scrobbles_num_multiple(sinces=[],to=None,**keys):
2019-03-14 13:07:20 +03:00
#
2019-03-12 18:06:09 +03:00
# sinces_stamps = [time_stamps(since,to,None)[0] for since in sinces]
# #print(sinces)
# #print(sinces_stamps)
# minsince = sinces[-1]
# r = db_query(**{k:keys[k] for k in keys if k in ["artist","track","artists","title","associated","to"]},since=minsince)
2019-03-14 13:07:20 +03:00
#
2019-03-12 18:06:09 +03:00
# #print(r)
2019-03-14 13:07:20 +03:00
#
2019-03-12 18:06:09 +03:00
# validtracks = [0 for s in sinces]
2019-03-14 13:07:20 +03:00
#
2019-03-12 18:06:09 +03:00
# i = 0
# si = 0
# while True:
# if si == len(sinces): break
# if i == len(r): break
# if r[i]["time"] >= sinces_stamps[si]:
# validtracks[si] += 1
# else:
# si += 1
# continue
# i += 1
2019-03-14 13:07:20 +03:00
#
#
2019-03-12 18:06:09 +03:00
# return validtracks
2019-03-14 13:07:20 +03:00
2019-03-12 18:06:09 +03:00
2019-02-16 18:28:32 +03:00
# UNUSED
2019-03-03 03:29:55 +03:00
#@dbserver.route("/charts")
#def get_charts_external():
# keys = FormsDict.decode(request.query)
# ckeys = {}
# ckeys["since"], ckeys["to"], ckeys["within"] = keys.get("since"), keys.get("to"), keys.get("in")
2019-03-14 13:07:20 +03:00
#
# result = get_scrobbles_num(**ckeys)
2019-03-03 03:29:55 +03:00
# return {"number":result}
2019-02-16 18:28:32 +03:00
#def get_charts(**keys):
# return db_aggregate(**{k:keys[k] for k in keys if k in ["since","to","within"]})
2019-02-15 21:39:19 +03:00
2019-02-02 20:08:30 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " tracks " )
def get_tracks_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
k_filter , _ , _ , _ = uri_to_internal ( keys , forceArtist = True )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_filter }
2019-02-16 18:28:32 +03:00
2019-02-15 23:07:08 +03:00
result = get_tracks ( * * ckeys )
2019-02-15 21:39:19 +03:00
return { " list " : result }
2019-02-15 23:07:08 +03:00
def get_tracks ( artist = None ) :
2019-03-14 13:07:20 +03:00
2018-11-30 15:39:12 +03:00
if artist is not None :
2019-02-15 23:07:08 +03:00
artistid = ARTISTS . index ( artist )
else :
artistid = None
2019-02-15 21:39:19 +03:00
2018-11-25 20:17:14 +03:00
# Option 1
2019-04-07 16:01:04 +03:00
return [ get_track_dict ( t ) for t in TRACKS if ( artistid in t . artists ) or ( artistid == None ) ]
2019-03-14 13:07:20 +03:00
2018-11-25 20:17:14 +03:00
# Option 2 is a bit more elegant but much slower
2019-04-07 16:01:04 +03:00
#tracklist = [get_track_dict(t) for t in TRACKS]
2018-11-25 20:17:14 +03:00
#ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
2019-02-15 21:39:19 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " artists " )
2019-02-15 21:39:19 +03:00
def get_artists_external ( ) :
result = get_artists ( )
return { " list " : result }
2018-11-25 16:49:53 +03:00
def get_artists ( ) :
2019-02-15 21:39:19 +03:00
return ARTISTS #well
2019-03-14 13:07:20 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " charts/artists " )
def get_charts_artists_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
_ , k_time , _ , _ = uri_to_internal ( keys )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_time }
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
result = get_charts_artists ( * * ckeys )
2019-02-15 21:39:19 +03:00
return { " list " : result }
def get_charts_artists ( * * keys ) :
2019-04-11 13:07:57 +03:00
return db_aggregate ( by = " ARTIST " , * * { k : keys [ k ] for k in keys if k in [ " since " , " to " , " within " , " timerange " ] } )
2019-02-15 21:39:19 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " charts/tracks " )
def get_charts_tracks_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
k_filter , k_time , _ , _ = uri_to_internal ( keys , forceArtist = True )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_filter , * * k_time }
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
result = get_charts_tracks ( * * ckeys )
2019-02-15 21:39:19 +03:00
return { " list " : result }
2019-03-14 13:07:20 +03:00
2019-02-15 21:39:19 +03:00
def get_charts_tracks ( * * keys ) :
2019-04-11 13:07:57 +03:00
return db_aggregate ( by = " TRACK " , * * { k : keys [ k ] for k in keys if k in [ " since " , " to " , " within " , " timerange " , " artist " ] } )
2019-02-15 21:39:19 +03:00
2019-03-14 13:07:20 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " pulse " )
def get_pulse_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
k_filter , k_time , k_internal , k_amount = uri_to_internal ( keys )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_filter , * * k_time , * * k_internal , * * k_amount }
2019-02-15 23:07:08 +03:00
results = get_pulse ( * * ckeys )
return { " list " : results }
2019-03-03 03:29:55 +03:00
def get_pulse ( * * keys ) :
2018-12-08 02:01:44 +03:00
2019-04-10 19:50:56 +03:00
rngs = ranges ( * * { k : keys [ k ] for k in keys if k in [ " since " , " to " , " within " , " timerange " , " step " , " stepn " , " trail " ] } )
2018-12-08 02:01:44 +03:00
results = [ ]
2019-04-10 16:45:50 +03:00
for rng in rngs :
res = len ( db_query ( timerange = rng , * * { k : keys [ k ] for k in keys if k in [ " artists " , " artist " , " track " , " title " , " associated " ] } ) )
results . append ( { " range " : rng , " scrobbles " : res } )
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
return results
2019-05-23 14:13:42 +03:00
@dbserver.get ( " performance " )
def get_performance_external ( * * keys ) :
2019-04-09 13:13:07 +03:00
k_filter , k_time , k_internal , k_amount = uri_to_internal ( keys )
ckeys = { * * k_filter , * * k_time , * * k_internal , * * k_amount }
results = get_performance ( * * ckeys )
return { " list " : results }
def get_performance ( * * keys ) :
2019-04-11 13:07:57 +03:00
rngs = ranges ( * * { k : keys [ k ] for k in keys if k in [ " since " , " to " , " within " , " timerange " , " step " , " stepn " , " trail " ] } )
2019-04-09 13:13:07 +03:00
results = [ ]
2019-04-10 16:45:50 +03:00
for rng in rngs :
2019-04-09 13:13:07 +03:00
if " track " in keys :
2019-04-10 16:45:50 +03:00
charts = get_charts_tracks ( timerange = rng )
2019-04-09 13:13:07 +03:00
rank = None
for c in charts :
if c [ " track " ] == keys [ " track " ] :
rank = c [ " rank " ]
break
elif " artist " in keys :
2019-04-10 16:45:50 +03:00
charts = get_charts_artists ( timerange = rng )
2019-04-09 13:13:07 +03:00
rank = None
for c in charts :
if c [ " artist " ] == keys [ " artist " ] :
rank = c [ " rank " ]
break
2019-04-10 16:45:50 +03:00
results . append ( { " range " : rng , " rank " : rank } )
2019-04-09 13:13:07 +03:00
return results
2019-02-15 23:07:08 +03:00
2019-03-14 13:07:20 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " top/artists " )
2019-05-23 15:25:35 +03:00
def get_top_artists_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
_ , k_time , k_internal , _ = uri_to_internal ( keys )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_time , * * k_internal }
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
results = get_top_artists ( * * ckeys )
return { " list " : results }
2019-03-14 13:07:20 +03:00
2019-03-03 03:29:55 +03:00
def get_top_artists ( * * keys ) :
2019-03-14 13:07:20 +03:00
2019-04-11 13:07:57 +03:00
rngs = ranges ( * * { k : keys [ k ] for k in keys if k in [ " since " , " to " , " within " , " timerange " , " step " , " stepn " , " trail " ] } )
2018-12-05 16:30:50 +03:00
results = [ ]
2019-03-14 13:07:20 +03:00
2019-04-11 13:07:57 +03:00
for rng in rngs :
2018-12-15 17:25:00 +03:00
try :
2019-04-11 13:07:57 +03:00
res = db_aggregate ( timerange = rng , by = " ARTIST " ) [ 0 ]
results . append ( { " range " : rng , " artist " : res [ " artist " ] , " counting " : res [ " counting " ] , " scrobbles " : res [ " scrobbles " ] } )
2018-12-15 17:25:00 +03:00
except :
2019-04-11 13:07:57 +03:00
results . append ( { " range " : rng , " artist " : None , " scrobbles " : 0 } )
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
return results
2019-05-23 14:13:42 +03:00
@dbserver.get ( " top/tracks " )
def get_top_tracks_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
_ , k_time , k_internal , _ = uri_to_internal ( keys )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_time , * * k_internal }
# IMPLEMENT THIS FOR TOP TRACKS OF ARTIST AS WELL?
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
results = get_top_tracks ( * * ckeys )
return { " list " : results }
2019-03-03 03:29:55 +03:00
def get_top_tracks ( * * keys ) :
2019-02-15 23:07:08 +03:00
2019-04-11 13:07:57 +03:00
rngs = ranges ( * * { k : keys [ k ] for k in keys if k in [ " since " , " to " , " within " , " timerange " , " step " , " stepn " , " trail " ] } )
2018-12-16 19:52:13 +03:00
results = [ ]
2019-03-14 13:07:20 +03:00
2019-04-23 18:47:49 +03:00
for rng in rngs :
2018-12-16 19:52:13 +03:00
try :
2019-04-11 13:07:57 +03:00
res = db_aggregate ( timerange = rng , by = " TRACK " ) [ 0 ]
results . append ( { " range " : rng , " track " : res [ " track " ] , " scrobbles " : res [ " scrobbles " ] } )
2018-12-16 19:52:13 +03:00
except :
2019-04-11 13:07:57 +03:00
results . append ( { " range " : rng , " track " : None , " scrobbles " : 0 } )
2019-03-14 13:07:20 +03:00
2019-02-15 23:07:08 +03:00
return results
2019-03-14 13:07:20 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " artistinfo " )
def artistInfo_external ( * * keys ) :
2019-04-08 14:04:31 +03:00
k_filter , _ , _ , _ = uri_to_internal ( keys , forceArtist = True )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_filter }
2019-03-14 13:07:20 +03:00
2019-02-17 17:02:27 +03:00
results = artistInfo ( * * ckeys )
return results
2019-03-14 13:07:20 +03:00
2019-02-17 17:02:27 +03:00
def artistInfo ( artist ) :
2019-03-14 13:07:20 +03:00
2018-12-17 01:56:30 +03:00
charts = db_aggregate ( by = " ARTIST " )
2019-06-13 12:37:42 +03:00
scrobbles = len ( db_query ( artists = [ artist ] ) )
#we cant take the scrobble number from the charts because that includes all countas scrobbles
2018-12-17 01:56:30 +03:00
try :
c = [ e for e in charts if e [ " artist " ] == artist ] [ 0 ]
2019-04-08 18:32:31 +03:00
others = [ a for a in coa . getAllAssociated ( artist ) if a in ARTISTS ]
2019-04-04 22:33:15 +03:00
position = c [ " rank " ]
2019-06-27 11:40:38 +03:00
performance = get_performance ( artist = artist , step = " week " )
return {
" scrobbles " : scrobbles ,
" position " : position ,
" associated " : others ,
" medals " : MEDALS . get ( artist ) ,
2019-06-27 12:25:11 +03:00
" topweeks " : WEEKLY_TOPARTISTS . get ( artist , 0 )
2019-06-27 11:40:38 +03:00
}
2018-12-17 01:56:30 +03:00
except :
2019-06-13 12:37:42 +03:00
# if the artist isnt in the charts, they are not being credited and we
# need to show information about the credited one
2018-12-19 17:28:10 +03:00
artist = coa . getCredited ( artist )
2018-12-17 01:56:30 +03:00
c = [ e for e in charts if e [ " artist " ] == artist ] [ 0 ]
2019-04-04 22:33:15 +03:00
position = c [ " rank " ]
return { " replace " : artist , " scrobbles " : scrobbles , " position " : position }
2019-03-14 13:07:20 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " trackinfo " )
2019-06-13 12:37:42 +03:00
def trackInfo_external ( artist : Multi [ str ] , * * keys ) :
# transform into a multidict so we can use our nomral uri_to_internal function
keys = FormsDict ( keys )
for a in artist :
keys . append ( " artist " , a )
2019-04-08 14:04:31 +03:00
k_filter , _ , _ , _ = uri_to_internal ( keys , forceTrack = True )
2019-03-28 14:45:23 +03:00
ckeys = { * * k_filter }
2019-03-14 13:07:20 +03:00
2019-02-17 17:02:27 +03:00
results = trackInfo ( * * ckeys )
return results
2019-06-13 12:37:42 +03:00
def trackInfo ( track ) :
2018-12-27 16:57:25 +03:00
charts = db_aggregate ( by = " TRACK " )
2019-03-14 13:07:20 +03:00
#scrobbles = len(db_query(artists=artists,title=title)) #chart entry of track always has right scrobble number, no countas rules here
2019-06-13 12:37:42 +03:00
#c = [e for e in charts if set(e["track"]["artists"]) == set(artists) and e["track"]["title"] == title][0]
c = [ e for e in charts if e [ " track " ] == track ] [ 0 ]
2019-03-14 13:07:20 +03:00
scrobbles = c [ " scrobbles " ]
2019-04-04 22:33:15 +03:00
position = c [ " rank " ]
2019-06-13 12:37:42 +03:00
cert = None
2019-06-13 13:12:47 +03:00
threshold_gold , threshold_platinum , threshold_diamond = settings . get_settings ( " SCROBBLES_GOLD " , " SCROBBLES_PLATINUM " , " SCROBBLES_DIAMOND " )
2019-06-13 12:37:42 +03:00
if scrobbles > = threshold_diamond : cert = " diamond "
elif scrobbles > = threshold_platinum : cert = " platinum "
elif scrobbles > = threshold_gold : cert = " gold "
2019-06-27 12:04:45 +03:00
2019-06-13 12:37:42 +03:00
return {
" scrobbles " : scrobbles ,
" position " : position ,
" medals " : MEDALS_TRACKS . get ( ( frozenset ( track [ " artists " ] ) , track [ " title " ] ) ) ,
2019-06-27 11:40:38 +03:00
" certification " : cert ,
2019-06-27 12:25:11 +03:00
" topweeks " : WEEKLY_TOPTRACKS . get ( ( ( frozenset ( track [ " artists " ] ) , track [ " title " ] ) ) , 0 )
2019-06-13 12:37:42 +03:00
}
2019-02-17 17:02:27 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " newscrobble " )
def pseudo_post_scrobble ( * * keys ) :
2018-11-26 18:21:07 +03:00
artists = keys . get ( " artist " )
title = keys . get ( " title " )
2019-01-11 16:14:00 +03:00
apikey = keys . get ( " key " )
2019-08-22 22:35:58 +03:00
client = checkAPIkey ( apikey )
if client == False : # empty string allowed!
2019-01-11 16:14:00 +03:00
response . status = 403
return " "
2018-11-28 19:45:52 +03:00
try :
time = int ( keys . get ( " time " ) )
except :
2018-11-28 17:33:30 +03:00
time = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2019-08-22 22:35:58 +03:00
log ( " Incoming scrobble (native API): Client " + client + " , ARTISTS: " + str ( artists ) + " , TRACK: " + title , module = " debug " )
2018-12-19 17:28:10 +03:00
( artists , title ) = cla . fullclean ( artists , title )
2018-11-28 19:45:52 +03:00
2018-11-27 18:08:14 +03:00
## this is necessary for localhost testing
response . set_header ( " Access-Control-Allow-Origin " , " * " )
2019-03-14 13:07:20 +03:00
2019-05-14 13:31:24 +03:00
trackdict = createScrobble ( artists , title , time )
2019-03-14 13:07:20 +03:00
2018-11-28 15:02:43 +03:00
if ( time - lastsync ) > 3600 :
sync ( )
2019-03-14 13:07:20 +03:00
2019-08-22 22:35:58 +03:00
2019-05-14 13:31:24 +03:00
return { " status " : " success " , " track " : trackdict }
2019-03-14 13:07:20 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.post ( " newscrobble " )
def post_scrobble ( * * keys ) :
2018-11-30 15:39:12 +03:00
artists = keys . get ( " artist " )
title = keys . get ( " title " )
2018-11-30 17:44:30 +03:00
apikey = keys . get ( " key " )
2019-08-22 22:35:58 +03:00
client = checkAPIkey ( apikey )
if client == False : # empty string allowed!
2018-11-30 17:44:30 +03:00
response . status = 403
return " "
2019-03-14 13:07:20 +03:00
2018-11-30 15:39:12 +03:00
try :
time = int ( keys . get ( " time " ) )
except :
time = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2019-08-22 22:35:58 +03:00
2019-08-22 23:17:04 +03:00
log ( " Incoming scrobble (native API): Client " + client + " , ARTISTS: " + str ( artists ) + " , TRACK: " + title , module = " debug " )
2018-12-19 17:28:10 +03:00
( artists , title ) = cla . fullclean ( artists , title )
2018-11-30 15:39:12 +03:00
## this is necessary for localhost testing
2019-03-28 14:45:23 +03:00
#response.set_header("Access-Control-Allow-Origin","*")
2019-03-14 13:07:20 +03:00
2019-05-14 13:31:24 +03:00
trackdict = createScrobble ( artists , title , time )
2019-03-14 13:07:20 +03:00
2019-02-02 18:54:01 +03:00
#if (time - lastsync) > 3600:
# sync()
2019-05-15 11:11:41 +03:00
sync ( )
#always sync, one filesystem access every three minutes shouldn't matter
2019-03-14 13:07:20 +03:00
2019-05-14 13:31:24 +03:00
2019-08-22 22:35:58 +03:00
2019-05-14 13:31:24 +03:00
return { " status " : " success " , " track " : trackdict }
2019-03-14 13:07:20 +03:00
2019-05-12 12:46:25 +03:00
# standard-compliant scrobbling methods
2019-05-26 11:53:33 +03:00
@dbserver.post ( " s/ {path} " , pass_headers = True )
@dbserver.get ( " s/ {path} " , pass_headers = True )
2019-05-23 14:13:42 +03:00
def sapi ( path : Multi , * * keys ) :
""" Scrobbles according to a standardized protocol.
: param string path : Path according to the scrobble protocol
: param string keys : Query keys according to the scrobble protocol
"""
2019-05-12 19:39:46 +03:00
path = list ( filter ( None , path ) )
2019-05-23 14:13:42 +03:00
return compliant_api . handle ( path , keys )
2019-05-12 12:46:25 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " sync " )
2018-11-27 18:08:14 +03:00
def abouttoshutdown ( ) :
2018-11-28 15:02:43 +03:00
sync ( )
2018-11-27 18:08:14 +03:00
#sys.exit()
2018-12-20 19:23:16 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.post ( " newrule " )
2018-12-20 19:23:16 +03:00
def newrule ( ) :
keys = FormsDict . decode ( request . forms )
2018-12-29 18:57:52 +03:00
apikey = keys . pop ( " key " , None )
if ( checkAPIkey ( apikey ) ) :
2019-03-29 22:23:32 +03:00
tsv . add_entry ( " rules/webmade.tsv " , [ k for k in keys ] )
#addEntry("rules/webmade.tsv",[k for k in keys])
2018-12-29 18:57:52 +03:00
global db_rulestate
db_rulestate = False
2019-03-14 13:07:20 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " issues " )
2019-02-21 11:43:35 +03:00
def issues_external ( ) : #probably not even needed
return issues ( )
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
def issues ( ) :
combined = [ ]
duplicates = [ ]
newartists = [ ]
2018-12-21 18:32:21 +03:00
inconsistent = not db_rulestate
2018-12-23 01:19:52 +03:00
# if the user manually edits files while the server is running this won't show, but too lazy to check the rulestate here
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
import itertools
import difflib
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
sortedartists = ARTISTS . copy ( )
sortedartists . sort ( key = len , reverse = True )
reversesortedartists = sortedartists . copy ( )
reversesortedartists . reverse ( )
for a in reversesortedartists :
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
nochange = cla . confirmedReal ( a )
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
st = a
lis = [ ]
reachedmyself = False
for ar in sortedartists :
if ( ar != a ) and not reachedmyself :
continue
elif not reachedmyself :
reachedmyself = True
continue
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
if ( ar . lower ( ) == a . lower ( ) ) or ( " the " + ar . lower ( ) == a . lower ( ) ) or ( " a " + ar . lower ( ) == a . lower ( ) ) :
duplicates . append ( ( ar , a ) )
break
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
if ( ar + " " in st ) or ( " " + ar in st ) :
lis . append ( ar )
st = st . replace ( ar , " " ) . strip ( )
elif ( ar == st ) :
lis . append ( ar )
st = " "
if not nochange :
combined . append ( ( a , lis ) )
break
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
elif ( ar in st ) and len ( ar ) * 2 > len ( st ) :
duplicates . append ( ( a , ar ) )
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
st = st . replace ( " & " , " " ) . replace ( " and " , " " ) . replace ( " with " , " " ) . strip ( )
if st != " " and st != a :
if len ( st ) < 5 and len ( lis ) == 1 :
#check if we havent just randomly found the string in another word
#if (" " + st + " ") in lis[0] or (lis[0].endswith(" " + st)) or (lis[0].startswith(st + " ")):
duplicates . append ( ( a , lis [ 0 ] ) )
elif len ( st ) < 5 and len ( lis ) > 1 and not nochange :
combined . append ( ( a , lis ) )
elif len ( st ) > = 5 and not nochange :
#check if we havent just randomly found the string in another word
if ( " " + st + " " ) in a or ( a . endswith ( " " + st ) ) or ( a . startswith ( st + " " ) ) :
newartists . append ( ( st , a , lis ) )
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
#for c in itertools.combinations(ARTISTS,3):
# l = list(c)
# print(l)
# l.sort(key=len,reverse=True)
# [full,a1,a2] = l
# if (a1 + " " + a2 in full) or (a2 + " " + a1 in full):
# combined.append((full,a1,a2))
#for c in itertools.combinations(ARTISTS,2):
# if
#
# if (c[0].lower == c[1].lower):
# duplicates.append((c[0],c[1]))
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
# elif (c[0] + " " in c[1]) or (" " + c[0] in c[1]) or (c[1] + " " in c[0]) or (" " + c[1] in c[0]):
# if (c[0] in c[1]):
# full, part = c[1],c[0]
# rest = c[1].replace(c[0],"").strip()
# else:
# full, part = c[0],c[1]
# rest = c[0].replace(c[1],"").strip()
# if rest in ARTISTS and full not in [c[0] for c in combined]:
# combined.append((full,part,rest))
2019-03-14 13:07:20 +03:00
2018-12-20 19:23:16 +03:00
# elif (c[0] in c[1]) or (c[1] in c[0]):
# duplicates.append((c[0],c[1]))
2019-03-14 13:07:20 +03:00
2018-12-21 18:32:21 +03:00
return { " duplicates " : duplicates , " combined " : combined , " newartists " : newartists , " inconsistent " : inconsistent }
2019-03-24 16:56:34 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.post ( " importrules " )
2019-03-24 16:56:34 +03:00
def import_rulemodule ( ) :
keys = FormsDict . decode ( request . forms )
2019-03-24 18:04:44 +03:00
apikey = keys . pop ( " key " , None )
if ( checkAPIkey ( apikey ) ) :
filename = keys . get ( " filename " )
remove = keys . get ( " remove " ) is not None
validchars = " -_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "
filename = " " . join ( c for c in filename if c in validchars )
if remove :
log ( " Deactivating predefined rulefile " + filename )
os . remove ( " rules/ " + filename + " .tsv " )
else :
log ( " Importing predefined rulefile " + filename )
os . symlink ( " predefined/ " + filename + " .tsv " , " rules/ " + filename + " .tsv " )
2019-03-24 16:56:34 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.post ( " rebuild " )
2018-12-21 18:32:21 +03:00
def rebuild ( ) :
2019-03-14 13:07:20 +03:00
2019-01-10 01:29:01 +03:00
keys = FormsDict . decode ( request . forms )
2018-12-29 18:57:52 +03:00
apikey = keys . pop ( " key " , None )
if ( checkAPIkey ( apikey ) ) :
2019-03-10 22:05:38 +03:00
log ( " Database rebuild initiated! " )
2018-12-29 18:57:52 +03:00
global db_rulestate
db_rulestate = False
sync ( )
os . system ( " python3 fixexisting.py " )
global cla , coa
cla = CleanerAgent ( )
coa = CollectorAgent ( )
build_db ( )
2019-03-14 18:27:53 +03:00
invalidate_caches ( )
2018-12-20 19:23:16 +03:00
2019-03-06 19:50:36 +03:00
2019-05-23 14:13:42 +03:00
@dbserver.get ( " search " )
def search ( * * keys ) :
2019-03-06 19:50:36 +03:00
query = keys . get ( " query " )
2019-03-07 01:18:11 +03:00
max_ = keys . get ( " max " )
if max_ is not None : max_ = int ( max_ )
2019-03-06 22:18:26 +03:00
query = query . lower ( )
2019-03-14 13:07:20 +03:00
2019-03-06 19:50:36 +03:00
artists = db_search ( query , type = " ARTIST " )
tracks = db_search ( query , type = " TRACK " )
2019-05-14 13:07:47 +03:00
2019-03-06 22:18:26 +03:00
# if the string begins with the query it's a better match, if a word in it begins with it, still good
2019-03-07 01:18:11 +03:00
# also, shorter is better (because longer titles would be easier to further specify)
artists . sort ( key = lambda x : ( ( 0 if x . lower ( ) . startswith ( query ) else 1 if " " + query in x . lower ( ) else 2 ) , len ( x ) ) )
tracks . sort ( key = lambda x : ( ( 0 if x [ " title " ] . lower ( ) . startswith ( query ) else 1 if " " + query in x [ " title " ] . lower ( ) else 2 ) , len ( x [ " title " ] ) ) )
2019-05-14 13:07:47 +03:00
# add links
artists_result = [ ]
for a in artists :
result = { " name " : a }
result [ " link " ] = " /artist? " + compose_querystring ( internal_to_uri ( { " artist " : a } ) )
result [ " image " ] = " /image? " + compose_querystring ( internal_to_uri ( { " artist " : a } ) )
artists_result . append ( result )
tracks_result = [ ]
for t in tracks :
result = t
result [ " link " ] = " /track? " + compose_querystring ( internal_to_uri ( { " track " : t } ) )
result [ " image " ] = " /image? " + compose_querystring ( internal_to_uri ( { " track " : t } ) )
tracks_result . append ( result )
return { " artists " : artists_result [ : max_ ] , " tracks " : tracks_result [ : max_ ] }
2019-03-14 13:07:20 +03:00
2018-12-12 21:37:59 +03:00
####
## Server operation
####
2018-11-24 18:29:24 +03:00
# Starts the server
2019-05-12 19:39:46 +03:00
def start_db ( ) :
log ( " Starting database... " )
2018-11-28 15:02:43 +03:00
global lastsync
2018-12-21 19:22:44 +03:00
lastsync = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2018-11-25 20:17:14 +03:00
build_db ( )
2018-11-30 17:44:30 +03:00
loadAPIkeys ( )
2019-05-12 19:39:46 +03:00
#run(dbserver, host='::', port=PORT, server='waitress')
log ( " Database reachable! " )
2018-11-25 20:17:14 +03:00
def build_db ( ) :
2019-03-14 13:07:20 +03:00
2019-03-10 22:05:38 +03:00
log ( " Building database... " )
2019-03-14 13:07:20 +03:00
2018-12-21 18:32:21 +03:00
global SCROBBLES , ARTISTS , TRACKS
2019-03-11 20:06:45 +03:00
global SCROBBLESDICT , STAMPS
2019-03-14 13:07:20 +03:00
2018-12-21 18:32:21 +03:00
SCROBBLES = [ ]
ARTISTS = [ ]
TRACKS = [ ]
2019-03-14 18:27:53 +03:00
STAMPS = [ ]
SCROBBLESDICT = { }
2019-03-14 13:07:20 +03:00
2019-03-12 16:37:04 +03:00
# parse files
2019-03-29 22:23:32 +03:00
db = tsv . parse_all ( " scrobbles " , " int " , " string " , " string " , comments = False )
#db = parseAllTSV("scrobbles","int","string","string",escape=False)
2018-12-21 00:07:22 +03:00
for sc in db :
artists = sc [ 1 ] . split ( " ␟ " )
title = sc [ 2 ]
time = sc [ 0 ]
2019-03-14 13:07:20 +03:00
2018-12-21 00:07:22 +03:00
readScrobble ( artists , title , time )
2018-12-24 21:14:24 +03:00
2019-03-14 13:07:20 +03:00
# optimize database
2019-03-12 16:37:04 +03:00
SCROBBLES . sort ( key = lambda tup : tup [ 1 ] )
#SCROBBLESDICT = {obj[1]:obj for obj in SCROBBLES}
2019-03-11 20:06:45 +03:00
STAMPS = [ t for t in SCROBBLESDICT ]
STAMPS . sort ( )
2019-03-14 13:07:20 +03:00
2019-03-12 16:37:04 +03:00
# inform malojatime module about earliest scrobble
2019-04-11 18:44:33 +03:00
if len ( STAMPS ) > 0 : register_scrobbletime ( STAMPS [ 0 ] )
2019-03-14 13:07:20 +03:00
2019-04-08 18:32:31 +03:00
# NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY
# get extra artists with no real scrobbles from countas rules
#for artist in coa.getAllArtists():
#for artist in coa.getCreditedList(ARTISTS):
# if artist not in ARTISTS:
# log(artist + " is added to database because of countas rules",module="debug")
# ARTISTS.append(artist)
# coa.updateIDs(ARTISTS)
2019-03-14 13:07:20 +03:00
2019-04-03 18:16:27 +03:00
#start regular tasks
2019-06-13 11:51:25 +03:00
utilities . update_medals ( )
2019-06-27 12:04:45 +03:00
utilities . update_weekly ( )
2019-04-04 20:20:34 +03:00
2019-04-08 14:38:47 +03:00
global db_rulestate
2019-06-13 11:51:25 +03:00
db_rulestate = utilities . consistentRulestate ( " scrobbles " , cla . checksums )
2019-04-02 17:53:57 +03:00
2019-03-10 22:05:38 +03:00
log ( " Database fully built! " )
2018-11-24 18:29:24 +03:00
2019-03-14 13:07:20 +03:00
# Saves all cached entries to disk
2018-11-28 15:02:43 +03:00
def sync ( ) :
2018-12-21 00:07:22 +03:00
# all entries by file collected
# so we don't open the same file for every entry
2019-08-22 23:17:04 +03:00
#log("Syncing",module="debug")
2018-12-21 00:07:22 +03:00
entries = { }
2019-03-14 13:07:20 +03:00
2018-11-27 21:05:50 +03:00
for idx in range ( len ( SCROBBLES ) ) :
if not SCROBBLES [ idx ] [ 2 ] :
2019-03-14 13:07:20 +03:00
2019-04-07 16:01:04 +03:00
t = get_scrobble_dict ( SCROBBLES [ idx ] )
2019-03-14 13:07:20 +03:00
2018-12-21 21:13:24 +03:00
artistlist = list ( t [ " artists " ] )
artistlist . sort ( ) #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed
artistss = " ␟ " . join ( artistlist )
2018-11-24 18:29:24 +03:00
timestamp = datetime . date . fromtimestamp ( t [ " time " ] )
2019-03-14 13:07:20 +03:00
2018-12-21 00:07:22 +03:00
entry = [ str ( t [ " time " ] ) , artistss , t [ " title " ] ]
2019-03-14 13:07:20 +03:00
2018-12-21 00:07:22 +03:00
monthcode = str ( timestamp . year ) + " _ " + str ( timestamp . month )
entries . setdefault ( monthcode , [ ] ) . append ( entry ) #i feckin love the setdefault function
2019-03-14 13:07:20 +03:00
2018-11-27 21:05:50 +03:00
SCROBBLES [ idx ] = ( SCROBBLES [ idx ] [ 0 ] , SCROBBLES [ idx ] [ 1 ] , True )
2019-03-14 13:07:20 +03:00
2019-08-22 21:51:32 +03:00
#log("Sorted into months",module="debug")
2019-07-09 21:27:36 +03:00
2018-12-21 00:07:22 +03:00
for e in entries :
2019-03-29 22:23:32 +03:00
tsv . add_entries ( " scrobbles/ " + e + " .tsv " , entries [ e ] , comments = False )
#addEntries("scrobbles/" + e + ".tsv",entries[e],escape=False)
2019-06-13 11:51:25 +03:00
utilities . combineChecksums ( " scrobbles/ " + e + " .tsv " , cla . checksums )
2019-03-14 13:07:20 +03:00
2019-08-22 21:51:32 +03:00
#log("Written files",module="debug")
2019-07-09 21:27:36 +03:00
2019-03-14 13:07:20 +03:00
2018-11-28 15:02:43 +03:00
global lastsync
2018-12-21 00:07:22 +03:00
lastsync = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2019-08-22 21:51:32 +03:00
#log("Database saved to disk.")
2019-03-14 13:07:20 +03:00
2019-02-16 18:42:45 +03:00
# save cached images
2019-04-01 17:52:42 +03:00
#saveCache()
2019-03-14 13:07:20 +03:00
2018-11-24 18:29:24 +03:00
2018-12-12 21:37:59 +03:00
2019-03-11 22:44:37 +03:00
###
## Caches in front of DB
2019-05-09 17:58:25 +03:00
## the volatile caches are intended mainly for excessive site navigation during one session
## the permanent caches are there to save data that is hard to calculate and never changes (old charts)
2019-03-11 22:44:37 +03:00
###
import copy
cache_query = { }
2019-05-10 14:35:06 +03:00
if doreah . version > = ( 0 , 7 , 1 ) and settings . get_settings ( " EXPERIMENTAL_FEATURES " ) :
cache_query_permanent = DiskDict ( name = " dbquery " , folder = " cache " , maxmemory = 1024 * 1024 * 500 , maxstorage = 1024 * 1024 * settings . get_settings ( " DB_CACHE_SIZE " ) )
2019-05-09 17:58:25 +03:00
else :
2019-05-10 14:35:06 +03:00
cache_query_permanent = Cache ( maxmemory = 1024 * 1024 * 500 )
2019-03-11 22:44:37 +03:00
cacheday = ( 0 , 0 , 0 )
def db_query ( * * kwargs ) :
check_cache_age ( )
2019-04-12 18:57:28 +03:00
global cache_query , cache_query_permanent
2019-06-13 11:51:25 +03:00
key = utilities . serialize ( kwargs )
2019-04-12 18:57:28 +03:00
if " timerange " in kwargs and not kwargs [ " timerange " ] . active ( ) :
if key in cache_query_permanent :
#print("Hit")
return copy . copy ( cache_query_permanent . get ( key ) )
#print("Miss")
result = db_query_full ( * * kwargs )
cache_query_permanent . add ( key , copy . copy ( result ) )
#print(cache_query_permanent.cache)
else :
#print("I guess they never miss huh")
if key in cache_query : return copy . copy ( cache_query [ key ] )
result = db_query_full ( * * kwargs )
cache_query [ key ] = copy . copy ( result )
2019-03-14 13:07:20 +03:00
2019-03-11 22:44:37 +03:00
return result
cache_aggregate = { }
2019-05-10 14:35:06 +03:00
if doreah . version > = ( 0 , 7 , 1 ) and settings . get_settings ( " EXPERIMENTAL_FEATURES " ) :
cache_aggregate_permanent = DiskDict ( name = " dbaggregate " , folder = " cache " , maxmemory = 1024 * 1024 * 500 , maxstorage = 1024 * 1024 * settings . get_settings ( " DB_CACHE_SIZE " ) )
2019-05-09 17:58:25 +03:00
else :
2019-05-10 14:35:06 +03:00
cache_aggregate_permanent = Cache ( maxmemory = 1024 * 1024 * 500 )
2019-03-11 22:44:37 +03:00
def db_aggregate ( * * kwargs ) :
check_cache_age ( )
2019-04-12 18:57:28 +03:00
global cache_aggregate , cache_aggregate_permanent
2019-06-13 11:51:25 +03:00
key = utilities . serialize ( kwargs )
2019-04-12 18:57:28 +03:00
if " timerange " in kwargs and not kwargs [ " timerange " ] . active ( ) :
if key in cache_aggregate_permanent : return copy . copy ( cache_aggregate_permanent . get ( key ) )
result = db_aggregate_full ( * * kwargs )
cache_aggregate_permanent . add ( key , copy . copy ( result ) )
else :
if key in cache_aggregate : return copy . copy ( cache_aggregate [ key ] )
result = db_aggregate_full ( * * kwargs )
cache_aggregate [ key ] = copy . copy ( result )
2019-03-14 13:07:20 +03:00
2019-03-11 22:44:37 +03:00
return result
2019-03-14 13:07:20 +03:00
2019-03-11 22:44:37 +03:00
def invalidate_caches ( ) :
global cache_query , cache_aggregate
cache_query = { }
cache_aggregate = { }
2019-03-14 13:07:20 +03:00
2019-03-15 13:16:53 +03:00
now = datetime . datetime . utcnow ( )
2019-03-11 22:44:37 +03:00
global cacheday
cacheday = ( now . year , now . month , now . day )
2019-03-14 13:07:20 +03:00
log ( " Database caches invalidated. " )
2019-03-11 22:44:37 +03:00
def check_cache_age ( ) :
2019-03-14 13:07:20 +03:00
now = datetime . datetime . utcnow ( )
2019-03-11 22:44:37 +03:00
global cacheday
if cacheday != ( now . year , now . month , now . day ) : invalidate_caches ( )
2018-12-12 21:37:59 +03:00
####
## Database queries
####
2019-03-14 13:07:20 +03:00
# Queries the database
2019-04-10 16:45:50 +03:00
def db_query_full ( artist = None , artists = None , title = None , track = None , since = None , to = None , within = None , timerange = None , associated = False , max_ = None ) :
2019-02-20 20:22:45 +03:00
2019-04-10 16:45:50 +03:00
( since , to ) = time_stamps ( since = since , to = to , within = within , range = timerange )
2019-03-14 13:07:20 +03:00
# this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly
2018-12-24 21:14:24 +03:00
# if a title is specified, we assume that a specific track (with the exact artist combination) is requested
2018-12-26 19:42:55 +03:00
# if not, duplicate artist arguments are ignored
2019-03-14 13:07:20 +03:00
2019-02-20 20:22:45 +03:00
#artist = None
2019-03-14 13:07:20 +03:00
2019-02-20 20:22:45 +03:00
if artist is not None and isinstance ( artist , str ) :
artist = ARTISTS . index ( artist )
2019-03-14 13:07:20 +03:00
2019-02-20 20:22:45 +03:00
# artists to numbers
2019-03-14 13:07:20 +03:00
if artists is not None :
2019-02-20 20:22:45 +03:00
artists = set ( [ ( ARTISTS . index ( a ) if isinstance ( a , str ) else a ) for a in artists ] )
2019-03-14 13:07:20 +03:00
2019-02-20 20:22:45 +03:00
# track to number
if track is not None and isinstance ( track , dict ) :
trackartists = set ( [ ( ARTISTS . index ( a ) if isinstance ( a , str ) else a ) for a in track [ " artists " ] ] )
track = TRACKS . index ( ( frozenset ( trackartists ) , track [ " title " ] ) )
artists = None
2019-03-14 13:07:20 +03:00
2018-12-26 19:42:55 +03:00
#check if track is requested via title
2018-12-24 21:14:24 +03:00
if title != None and track == None :
track = TRACKS . index ( ( frozenset ( artists ) , title ) )
artists = None
2019-03-14 13:07:20 +03:00
2018-12-26 19:42:55 +03:00
# if we're not looking for a track (either directly or per title artist arguments, which is converted to track above)
# we only need one artist
2019-02-20 20:22:45 +03:00
elif artist is None and track is None and artists is not None and len ( artists ) != 0 :
2018-12-26 19:42:55 +03:00
artist = artists . pop ( )
2019-03-14 13:07:20 +03:00
# db query always reverse by default
2019-03-12 14:56:53 +03:00
result = [ ]
2019-03-14 13:07:20 +03:00
2019-03-12 14:56:53 +03:00
i = 0
for s in scrobbles_in_range ( since , to , reverse = True ) :
if i == max_ : break
if ( track is None or s [ 0 ] == track ) and ( artist is None or artist in TRACKS [ s [ 0 ] ] [ 0 ] or associated and artist in coa . getCreditedList ( TRACKS [ s [ 0 ] ] [ 0 ] ) ) :
2019-04-07 16:01:04 +03:00
result . append ( get_scrobble_dict ( s ) )
2019-03-12 14:56:53 +03:00
i + = 1
2019-03-14 13:07:20 +03:00
2019-03-12 14:56:53 +03:00
return result
2019-03-14 13:07:20 +03:00
2018-11-25 20:17:14 +03:00
# pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way
2019-03-14 13:07:20 +03:00
2018-12-04 19:07:07 +03:00
# Queries that... well... aggregate
2019-04-10 16:45:50 +03:00
def db_aggregate_full ( by = None , since = None , to = None , within = None , timerange = None , artist = None ) :
( since , to ) = time_stamps ( since = since , to = to , within = within , range = timerange )
2019-03-14 13:07:20 +03:00
2018-12-22 16:06:21 +03:00
if isinstance ( artist , str ) :
artist = ARTISTS . index ( artist )
2019-03-14 13:07:20 +03:00
2018-12-04 19:07:07 +03:00
if ( by == " ARTIST " ) :
#this is probably a really bad idea
#for a in ARTISTS:
# num = len(db_query(artist=a,since=since,to=to))
2019-03-14 13:07:20 +03:00
#
2018-12-04 19:07:07 +03:00
# alright let's try for real
charts = { }
2019-03-11 20:06:45 +03:00
#for s in [scr for scr in SCROBBLES if since < scr[1] < to]:
for s in scrobbles_in_range ( since , to ) :
2018-12-04 19:07:07 +03:00
artists = TRACKS [ s [ 0 ] ] [ 0 ]
2018-12-19 17:28:10 +03:00
for a in coa . getCreditedList ( artists ) :
2018-12-04 19:07:07 +03:00
# this either creates the new entry or increments the existing one
charts [ a ] = charts . setdefault ( a , 0 ) + 1
2019-03-14 13:07:20 +03:00
2019-04-08 18:32:31 +03:00
ls = [ { " artist " : get_artist_dict ( ARTISTS [ a ] ) , " scrobbles " : charts [ a ] , " counting " : [ arti for arti in coa . getAllAssociated ( ARTISTS [ a ] ) if arti in ARTISTS ] } for a in charts ]
2019-04-04 20:20:34 +03:00
ls . sort ( key = lambda k : k [ " scrobbles " ] , reverse = True )
# add ranks
for rnk in range ( len ( ls ) ) :
if rnk == 0 or ls [ rnk ] [ " scrobbles " ] < ls [ rnk - 1 ] [ " scrobbles " ] :
ls [ rnk ] [ " rank " ] = rnk + 1
else :
ls [ rnk ] [ " rank " ] = ls [ rnk - 1 ] [ " rank " ]
return ls
2019-03-14 13:07:20 +03:00
2018-12-04 19:07:07 +03:00
elif ( by == " TRACK " ) :
charts = { }
2019-03-11 20:06:45 +03:00
#for s in [scr for scr in SCROBBLES if since < scr[1] < to and (artist==None or (artist in TRACKS[scr[0]][0]))]:
for s in [ scr for scr in scrobbles_in_range ( since , to ) if ( artist is None or ( artist in TRACKS [ scr [ 0 ] ] [ 0 ] ) ) ] :
2018-12-04 19:07:07 +03:00
track = s [ 0 ]
# this either creates the new entry or increments the existing one
charts [ track ] = charts . setdefault ( track , 0 ) + 1
2019-03-14 13:07:20 +03:00
2019-04-07 16:01:04 +03:00
ls = [ { " track " : get_track_dict ( TRACKS [ t ] ) , " scrobbles " : charts [ t ] } for t in charts ]
2019-04-04 20:20:34 +03:00
ls . sort ( key = lambda k : k [ " scrobbles " ] , reverse = True )
# add ranks
for rnk in range ( len ( ls ) ) :
if rnk == 0 or ls [ rnk ] [ " scrobbles " ] < ls [ rnk - 1 ] [ " scrobbles " ] :
ls [ rnk ] [ " rank " ] = rnk + 1
else :
ls [ rnk ] [ " rank " ] = ls [ rnk - 1 ] [ " rank " ]
return ls
2019-03-14 13:07:20 +03:00
2018-12-08 02:01:44 +03:00
else :
2019-03-11 20:06:45 +03:00
#return len([scr for scr in SCROBBLES if since < scr[1] < to])
return len ( list ( scrobbles_in_range ( since , to ) ) )
2018-12-04 19:07:07 +03:00
2018-12-12 21:37:59 +03:00
# Search for strings
def db_search ( query , type = None ) :
if type == " ARTIST " :
results = [ ]
for a in ARTISTS :
2019-03-10 19:38:33 +03:00
#if query.lower() in a.lower():
if simplestr ( query ) in simplestr ( a ) :
2018-12-12 21:37:59 +03:00
results . append ( a )
2019-03-14 13:07:20 +03:00
2018-12-12 21:37:59 +03:00
if type == " TRACK " :
results = [ ]
for t in TRACKS :
2019-03-10 19:38:33 +03:00
#if query.lower() in t[1].lower():
if simplestr ( query ) in simplestr ( t [ 1 ] ) :
2019-04-07 16:01:04 +03:00
results . append ( get_track_dict ( t ) )
2019-03-14 13:07:20 +03:00
2018-12-12 21:37:59 +03:00
return results
####
## Useful functions
####
2019-03-10 19:38:33 +03:00
# makes a string usable for searching (special characters are blanks, accents and stuff replaced with their real part)
def simplestr ( input , ignorecapitalization = True ) :
norm = unicodedata . normalize ( " NFKD " , input )
norm = [ c for c in norm if not unicodedata . combining ( c ) ]
norm = [ c if len ( c . encode ( ) ) == 1 else " " for c in norm ]
clear = ' ' . join ( c for c in norm )
if ignorecapitalization : clear = clear . lower ( )
return clear
2018-12-12 21:37:59 +03:00
2019-02-15 17:41:58 +03:00
2019-03-14 13:07:20 +03:00
2019-04-07 16:55:49 +03:00
#def getArtistId(nameorid):
# if isinstance(nameorid,int):
# return nameorid
# else:
# try:
# return ARTISTS.index(nameorid)
# except:
# return -1
2019-03-14 13:07:20 +03:00
2019-03-11 20:06:45 +03:00
def insert ( list_ , item , key = lambda x : x ) :
i = 0
while len ( list_ ) > i :
if key ( list_ [ i ] ) > key ( item ) :
list_ . insert ( i , item )
return i
i + = 1
2019-03-14 13:07:20 +03:00
2019-03-11 20:06:45 +03:00
list_ . append ( item )
return i
2019-03-14 13:07:20 +03:00
2019-03-12 14:56:53 +03:00
2019-03-11 22:04:23 +03:00
def scrobbles_in_range ( start , end , reverse = False ) :
if reverse :
for stamp in reversed ( STAMPS ) :
#print("Checking " + str(stamp))
if stamp < start : return
if stamp > end : continue
2019-03-12 14:56:53 +03:00
yield SCROBBLESDICT [ stamp ]
2019-03-11 22:04:23 +03:00
else :
for stamp in STAMPS :
#print("Checking " + str(stamp))
if stamp < start : continue
if stamp > end : return
yield SCROBBLESDICT [ stamp ]
2019-03-14 13:07:20 +03:00
2019-03-11 22:04:23 +03:00
# for performance testing
def generateStuff ( num = 0 , pertrack = 0 , mult = 0 ) :
import random
for i in range ( num ) :
track = random . choice ( TRACKS )
2019-04-07 16:01:04 +03:00
t = get_track_dict ( track )
2019-03-11 22:04:23 +03:00
time = random . randint ( STAMPS [ 0 ] , STAMPS [ - 1 ] )
createScrobble ( t [ " artists " ] , t [ " title " ] , time , volatile = True )
2019-03-14 13:07:20 +03:00
2019-03-11 22:04:23 +03:00
for track in TRACKS :
2019-04-07 16:01:04 +03:00
t = get_track_dict ( track )
2019-03-11 22:04:23 +03:00
for i in range ( pertrack ) :
time = random . randint ( STAMPS [ 0 ] , STAMPS [ - 1 ] )
createScrobble ( t [ " artists " ] , t [ " title " ] , time , volatile = True )
2019-03-14 13:07:20 +03:00
2019-03-11 22:04:23 +03:00
for scrobble in SCROBBLES :
2019-04-07 16:01:04 +03:00
s = get_scrobble_dict ( scrobble )
2019-03-11 22:04:23 +03:00
for i in range ( mult ) :
createScrobble ( s [ " artists " ] , s [ " title " ] , s [ " time " ] - i * 500 , volatile = True )