2018-12-19 17:28:10 +03:00
from bottle import Bottle , route , get , post , run , template , static_file , request , response , FormsDict
2018-11-24 18:29:24 +03:00
from importlib . machinery import SourceFileLoader
2018-11-27 21:05:50 +03:00
import urllib
2018-11-24 18:29:24 +03:00
import waitress
import os
import datetime
2018-11-28 19:45:52 +03:00
from cleanup import *
2018-11-30 17:44:30 +03:00
from utilities import *
2018-11-27 18:08:14 +03:00
import sys
2018-11-24 18:29:24 +03:00
2018-12-19 17:28:10 +03:00
dbserver = Bottle ( )
2018-11-24 18:29:24 +03:00
2018-11-25 20:17:14 +03:00
SCROBBLES = [ ] # Format: tuple(track_ref,timestamp,saved)
ARTISTS = [ ] # Format: artist
TRACKS = [ ] # Format: tuple(frozenset(artist_ref,...),title)
2018-11-29 18:05:44 +03:00
timestamps = set ( )
2018-12-19 17:28:10 +03:00
cla = CleanerAgent ( )
coa = CollectorAgent ( )
2018-11-30 17:44:30 +03:00
clients = [ ]
2018-11-28 19:45:52 +03:00
2018-11-28 15:02:43 +03:00
lastsync = 0
2018-12-21 18:32:21 +03:00
# rulestate that the entire current database was built with, or False if the database was built from inconsistent scrobble files
db_rulestate = False
2018-12-20 19:23:16 +03:00
2018-11-25 20:17:14 +03:00
2018-11-30 17:44:30 +03:00
### symmetric keys are fine for now since we hopefully use HTTPS
def loadAPIkeys ( ) :
global clients
2018-11-30 18:01:32 +03:00
createTSV ( " clients/authenticated_machines.tsv " )
2018-11-30 17:44:30 +03:00
clients = parseTSV ( " clients/authenticated_machines.tsv " , " string " , " string " )
2018-11-25 20:17:14 +03:00
2018-11-30 17:44:30 +03:00
def checkAPIkey ( k ) :
return ( k in [ k for [ k , d ] in clients ] )
2018-11-25 20:17:14 +03:00
2018-12-12 21:37:59 +03:00
####
## Getting dict representations of database objects
####
2018-11-25 20:17:14 +03:00
def getScrobbleObject ( o ) :
track = getTrackObject ( TRACKS [ o [ 0 ] ] )
return { " artists " : track [ " artists " ] , " title " : track [ " title " ] , " time " : o [ 1 ] }
def getArtistObject ( o ) :
return o
def getTrackObject ( o ) :
artists = [ getArtistObject ( ARTISTS [ a ] ) for a in o [ 0 ] ]
return { " artists " : artists , " title " : o [ 1 ] }
2018-12-12 21:37:59 +03:00
####
## Creating or finding existing database entries
####
2018-11-25 20:17:14 +03:00
2018-11-29 18:05:44 +03:00
def createScrobble ( artists , title , time ) :
while ( time in timestamps ) :
time + = 1
2018-11-29 18:09:46 +03:00
timestamps . add ( time )
2018-11-29 18:05:44 +03:00
i = getTrackID ( artists , title )
2018-11-25 20:17:14 +03:00
obj = ( i , time , False )
SCROBBLES . append ( obj )
2018-12-05 16:30:50 +03:00
2018-11-29 18:05:44 +03:00
def readScrobble ( artists , title , time ) :
while ( time in timestamps ) :
time + = 1
2018-11-29 18:09:46 +03:00
timestamps . add ( time )
2018-11-29 18:05:44 +03:00
i = getTrackID ( artists , title )
2018-11-25 20:17:14 +03:00
obj = ( i , time , True )
SCROBBLES . append ( obj )
2018-12-05 16:30:50 +03:00
2018-11-25 20:17:14 +03:00
def getArtistID ( name ) :
obj = name
2018-12-21 20:22:58 +03:00
objlower = name . lower ( )
try :
return ARTISTS . index ( obj )
except :
pass
2018-11-25 20:17:14 +03:00
try :
2018-12-21 20:22:58 +03:00
return [ a . lower ( ) for a in ARTISTS ] . index ( objlower )
2018-11-25 20:17:14 +03:00
except :
i = len ( ARTISTS )
ARTISTS . append ( obj )
2018-12-21 20:22:58 +03:00
return i
2018-11-25 20:17:14 +03:00
def getTrackID ( artists , title ) :
artistset = set ( )
for a in artists :
artistset . add ( getArtistID ( name = a ) )
obj = ( frozenset ( artistset ) , title )
2018-12-21 20:22:58 +03:00
objlower = ( frozenset ( artistset ) , title . lower ( ) )
2018-11-25 20:17:14 +03:00
try :
2018-12-21 20:22:58 +03:00
return TRACKS . index ( obj )
except :
pass
try :
# not the best performance
return [ ( t [ 0 ] , t [ 1 ] . lower ( ) ) for t in TRACKS ] . index ( objlower )
2018-11-25 20:17:14 +03:00
except :
i = len ( TRACKS )
TRACKS . append ( obj )
2018-12-21 20:22:58 +03:00
return i
2018-11-24 18:29:24 +03:00
2018-12-12 21:37:59 +03:00
####
## HTTP requests
####
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /test " )
2018-12-14 21:52:31 +03:00
def test_server ( ) :
apikey = request . query . get ( " key " )
response . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-12-21 18:32:21 +03:00
if apikey is not None and not ( checkAPIkey ( apikey ) ) :
2018-12-14 21:52:31 +03:00
response . status = 403
2018-12-21 18:32:21 +03:00
return " Wrong API key "
2018-12-14 21:52:31 +03:00
2018-12-21 18:32:21 +03:00
elif db_rulestate :
2018-12-14 21:52:31 +03:00
response . status = 204
return
2018-12-21 18:32:21 +03:00
else :
response . status = 205
return
2018-12-27 05:09:29 +03:00
# 204 Database server is up and operational
# 205 Database server is up, but DB is not fully built or is inconsistent
# 403 Database server is up, but provided API key is not valid
2018-12-12 21:37:59 +03:00
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /scrobbles " )
2018-11-24 18:29:24 +03:00
def get_scrobbles ( ) :
2018-12-20 01:17:38 +03:00
keys = FormsDict . decode ( request . query )
2018-12-22 16:06:21 +03:00
2018-12-24 21:14:24 +03:00
r = db_query ( artists = keys . getall ( " artist " ) , title = keys . get ( " title " ) , since = keys . get ( " since " ) , to = keys . get ( " to " ) , associated = ( keys . get ( " associated " ) != None ) )
2018-12-19 18:11:10 +03:00
r . reverse ( )
2019-02-02 22:51:04 +03:00
if keys . get ( " max " ) is not None :
return { " list " : r [ : int ( keys . get ( " max " ) ) ] }
else :
return { " list " : r } ##json can't be a list apparently???
2018-11-24 18:29:24 +03:00
2019-02-02 20:08:30 +03:00
@dbserver.route ( " /numscrobbles " )
def get_scrobbles ( ) :
keys = FormsDict . decode ( request . query )
r = db_query ( artists = keys . getall ( " artist " ) , title = keys . get ( " title " ) , since = keys . get ( " since " ) , to = keys . get ( " to " ) , associated = ( keys . get ( " associated " ) != None ) )
r . reverse ( )
return { " amount " : len ( r ) }
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /tracks " )
2018-11-24 18:29:24 +03:00
def get_tracks ( ) :
2018-12-17 01:56:30 +03:00
keys = FormsDict . decode ( request . query )
artist = keys . get ( " artist " )
2018-11-24 18:29:24 +03:00
2018-11-30 15:39:12 +03:00
if artist is not None :
artistid = ARTISTS . index ( artist )
2018-11-28 20:44:33 +03:00
2018-11-25 20:17:14 +03:00
# Option 1
2018-11-28 20:44:33 +03:00
ls = [ getTrackObject ( t ) for t in TRACKS if ( artistid in t [ 0 ] ) or ( artistid == None ) ]
2018-11-25 20:17:14 +03:00
# Option 2 is a bit more elegant but much slower
#tracklist = [getTrackObject(t) for t in TRACKS]
#ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
2018-11-25 16:49:53 +03:00
return { " list " : ls }
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /artists " )
2018-11-25 16:49:53 +03:00
def get_artists ( ) :
2018-11-25 21:31:03 +03:00
2018-11-25 16:49:53 +03:00
return { " list " : ARTISTS }
2019-02-02 20:08:30 +03:00
@dbserver.route ( " /amounts " )
def get_amounts ( ) :
return { " scrobbles " : len ( SCROBBLES ) , " tracks " : len ( TRACKS ) , " artists " : len ( ARTISTS ) }
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /charts/artists " )
2018-12-05 16:30:50 +03:00
def get_charts_artists ( ) :
2018-11-25 16:49:53 +03:00
since = request . query . get ( " since " )
to = request . query . get ( " to " )
2018-11-25 20:17:14 +03:00
2018-12-04 19:07:07 +03:00
return { " list " : db_aggregate ( by = " ARTIST " , since = since , to = to ) }
2018-11-25 20:17:14 +03:00
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /charts/tracks " )
2018-12-05 16:30:50 +03:00
def get_charts_tracks ( ) :
2018-12-22 16:06:21 +03:00
keys = FormsDict . decode ( request . query )
since = keys . get ( " since " )
to = keys . get ( " to " )
artist = keys . get ( " artist " )
2018-12-04 19:07:07 +03:00
2018-12-22 16:06:21 +03:00
return { " list " : db_aggregate ( by = " TRACK " , since = since , to = to , artist = artist ) }
2018-11-26 18:21:07 +03:00
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /charts " )
2018-12-08 02:01:44 +03:00
def get_charts ( ) :
since = request . query . get ( " since " )
to = request . query . get ( " to " )
return { " number " : db_aggregate ( since = since , to = to ) }
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /pulse " )
2018-12-08 02:01:44 +03:00
def get_pulse ( ) :
since = request . query . get ( " since " )
to = request . query . get ( " to " )
( ts_start , ts_end ) = getTimestamps ( since , to )
2018-12-16 19:52:13 +03:00
step = request . query . get ( " step " , " month " )
trail = int ( request . query . get ( " trail " , 3 ) )
2018-12-08 02:01:44 +03:00
[ step , stepn ] = ( step . split ( " - " ) + [ 1 ] ) [ : 2 ] # makes the multiplier 1 if not assigned
2018-12-16 19:52:13 +03:00
stepn = int ( stepn )
2018-12-08 02:01:44 +03:00
2018-12-16 19:52:13 +03:00
d_start = getStartOf ( ts_start , step )
d_end = getStartOf ( ts_end , step )
2018-12-08 02:01:44 +03:00
2018-12-16 19:52:13 +03:00
d_start = getNext ( d_start , step , stepn ) # first range should end right after the first active scrobbling week / month / whatever relevant step
d_start = getNext ( d_start , step , stepn * trail * - 1 ) # go one range back to begin
2018-12-08 02:01:44 +03:00
results = [ ]
d_current = d_start
while True :
2018-12-16 19:52:13 +03:00
d_current_end = getNext ( d_current , step , stepn * trail )
#print("Checking from " + str(d_current[0]) + "-" + str(d_current[1]) + "-" + str(d_current[2]) + " to " + str(d_current_end[0]) + "-" + str(d_current_end[1]) + "-" + str(d_current_end[2]))
2018-12-08 02:01:44 +03:00
res = db_aggregate ( since = d_current , to = d_current_end )
results . append ( { " from " : d_current , " to " : d_current_end , " scrobbles " : res } )
2018-12-16 19:52:13 +03:00
d_current = getNext ( d_current , step , stepn )
2018-12-08 02:01:44 +03:00
if isPast ( d_current_end , d_end ) :
break
return { " list " : results }
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /top/artists " )
2018-12-05 16:30:50 +03:00
def get_top_artists ( ) :
2018-12-08 02:01:44 +03:00
since = request . query . get ( " since " )
to = request . query . get ( " to " )
( ts_start , ts_end ) = getTimestamps ( since , to )
2018-12-15 17:25:00 +03:00
step = request . query . get ( " step " , " month " )
2018-12-16 19:52:13 +03:00
trail = int ( request . query . get ( " trail " , 3 ) )
2018-12-05 16:30:50 +03:00
[ step , stepn ] = ( step . split ( " - " ) + [ 1 ] ) [ : 2 ] # makes the multiplier 1 if not assigned
2018-12-15 17:25:00 +03:00
stepn = int ( stepn )
d_start = getStartOf ( ts_start , step )
d_end = getStartOf ( ts_end , step )
2018-12-05 16:30:50 +03:00
2018-12-16 19:52:13 +03:00
d_start = getNext ( d_start , step , stepn ) # first range should end right after the first active scrobbling week / month / whatever relevant step
d_start = getNext ( d_start , step , stepn * trail * - 1 ) # go one range back to begin
2018-12-05 16:30:50 +03:00
results = [ ]
d_current = d_start
while True :
2018-12-15 17:25:00 +03:00
d_current_end = getNext ( d_current , step , stepn * trail )
#print("Checking from " + str(d_current[0]) + "-" + str(d_current[1]) + "-" + str(d_current[2]) + " to " + str(d_current_end[0]) + "-" + str(d_current_end[1]) + "-" + str(d_current_end[2]))
try :
res = db_aggregate ( since = d_current , to = d_current_end , by = " ARTIST " ) [ 0 ]
results . append ( { " from " : d_current , " to " : d_current_end , " artist " : res [ " artist " ] , " scrobbles " : res [ " scrobbles " ] } )
except :
results . append ( { " from " : d_current , " to " : d_current_end , " artist " : None , " scrobbles " : 0 } )
d_current = getNext ( d_current , step , stepn )
2018-12-16 19:52:13 +03:00
if isPast ( d_current_end , d_end ) :
break
return { " list " : results }
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /top/tracks " )
2018-12-16 19:52:13 +03:00
def get_top_tracks ( ) :
since = request . query . get ( " since " )
to = request . query . get ( " to " )
( ts_start , ts_end ) = getTimestamps ( since , to )
step = request . query . get ( " step " , " month " )
trail = int ( request . query . get ( " trail " , 3 ) )
[ step , stepn ] = ( step . split ( " - " ) + [ 1 ] ) [ : 2 ] # makes the multiplier 1 if not assigned
stepn = int ( stepn )
d_start = getStartOf ( ts_start , step )
d_end = getStartOf ( ts_end , step )
d_start = getNext ( d_start , step , stepn ) # first range should end right after the first active scrobbling week / month / whatever relevant step
d_start = getNext ( d_start , step , stepn * trail * - 1 ) # go one range back to begin
results = [ ]
d_current = d_start
while True :
d_current_end = getNext ( d_current , step , stepn * trail )
#print("Checking from " + str(d_current[0]) + "-" + str(d_current[1]) + "-" + str(d_current[2]) + " to " + str(d_current_end[0]) + "-" + str(d_current_end[1]) + "-" + str(d_current_end[2]))
try :
res = db_aggregate ( since = d_current , to = d_current_end , by = " TRACK " ) [ 0 ]
results . append ( { " from " : d_current , " to " : d_current_end , " track " : res [ " track " ] , " scrobbles " : res [ " scrobbles " ] } )
except :
results . append ( { " from " : d_current , " to " : d_current_end , " track " : None , " scrobbles " : 0 } )
d_current = getNext ( d_current , step , stepn )
2018-12-05 16:30:50 +03:00
if isPast ( d_current_end , d_end ) :
break
return { " list " : results }
2018-12-15 17:25:00 +03:00
def getStartOf ( timestamp , unit ) :
date = datetime . datetime . utcfromtimestamp ( timestamp )
if unit == " year " :
return [ date . year , 1 , 1 ]
elif unit == " month " :
return [ date . year , date . month , 1 ]
elif unit == " day " :
return [ date . year , date . month , date . day ]
elif unit == " week " :
change = ( date . weekday ( ) + 1 ) % 7
d = datetime . timedelta ( days = change )
newdate = date - d
return [ newdate . year , newdate . month , newdate . day ]
2018-12-05 16:30:50 +03:00
2018-12-15 17:25:00 +03:00
def getNext ( time , unit , step = 1 ) :
if unit == " year " :
return [ time [ 0 ] + step , time [ 1 ] , time [ 2 ] ]
elif unit == " month " :
result = [ time [ 0 ] , time [ 1 ] + step , time [ 2 ] ]
while result [ 1 ] > 12 :
result [ 1 ] - = 12
result [ 0 ] + = 1
while result [ 1 ] < 1 :
result [ 1 ] + = 12
result [ 0 ] - = 1
return result
elif unit == " day " :
dt = datetime . datetime ( time [ 0 ] , time [ 1 ] , time [ 2 ] )
d = datetime . timedelta ( days = step )
newdate = dt + d
return [ newdate . year , newdate . month , newdate . day ]
#eugh
elif unit == " week " :
return getNext ( time , " day " , step * 7 )
2018-12-16 19:52:13 +03:00
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /artistinfo " )
2018-12-17 01:56:30 +03:00
def artistInfo ( ) :
keys = FormsDict . decode ( request . query )
artist = keys . get ( " artist " )
2018-12-15 17:25:00 +03:00
2018-12-17 01:56:30 +03:00
charts = db_aggregate ( by = " ARTIST " )
2018-12-24 23:25:09 +03:00
scrobbles = len ( db_query ( artists = [ artist ] ) ) #we cant take the scrobble number from the charts because that includes all countas scrobbles
2018-12-17 01:56:30 +03:00
try :
c = [ e for e in charts if e [ " artist " ] == artist ] [ 0 ]
2018-12-19 17:28:10 +03:00
others = coa . getAllAssociated ( artist )
2018-12-17 01:56:30 +03:00
return { " scrobbles " : scrobbles , " position " : charts . index ( c ) + 1 , " associated " : others }
except :
# if the artist isnt in the charts, they are not being credited and we need to show information about the credited one
2018-12-19 17:28:10 +03:00
artist = coa . getCredited ( artist )
2018-12-17 01:56:30 +03:00
c = [ e for e in charts if e [ " artist " ] == artist ] [ 0 ]
return { " replace " : artist , " scrobbles " : scrobbles , " position " : charts . index ( c ) + 1 }
2018-12-27 16:57:25 +03:00
@dbserver.route ( " /trackinfo " )
def trackInfo ( ) :
keys = FormsDict . decode ( request . query )
artists = keys . getall ( " artist " )
title = keys . get ( " title " )
charts = db_aggregate ( by = " TRACK " )
scrobbles = len ( db_query ( artists = artists , title = title ) ) #we cant take the scrobble number from the charts because that includes all countas scrobbles
c = [ e for e in charts if set ( e [ " track " ] [ " artists " ] ) == set ( artists ) and e [ " track " ] [ " title " ] == title ] [ 0 ]
return { " scrobbles " : scrobbles , " position " : charts . index ( c ) + 1 }
2018-12-05 16:30:50 +03:00
def isPast ( date , limit ) :
if not date [ 0 ] == limit [ 0 ] :
return date [ 0 ] > limit [ 0 ]
if not date [ 1 ] == limit [ 1 ] :
return date [ 1 ] > limit [ 1 ]
return ( date [ 2 ] > limit [ 2 ] )
2018-12-19 17:28:10 +03:00
@dbserver.get ( " /newscrobble " )
2018-11-30 15:39:12 +03:00
def pseudo_post_scrobble ( ) :
2018-11-27 21:05:50 +03:00
keys = FormsDict . decode ( request . query ) # The Dal★Shabet handler
2018-11-26 18:21:07 +03:00
artists = keys . get ( " artist " )
title = keys . get ( " title " )
2019-01-11 16:14:00 +03:00
apikey = keys . get ( " key " )
if not ( checkAPIkey ( apikey ) ) :
response . status = 403
return " "
2018-11-28 19:45:52 +03:00
try :
time = int ( keys . get ( " time " ) )
except :
2018-11-28 17:33:30 +03:00
time = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2018-12-19 17:28:10 +03:00
( artists , title ) = cla . fullclean ( artists , title )
2018-11-28 19:45:52 +03:00
2018-11-27 18:08:14 +03:00
## this is necessary for localhost testing
response . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-11-26 18:21:07 +03:00
createScrobble ( artists , title , time )
2018-11-27 18:08:14 +03:00
2018-11-28 15:02:43 +03:00
if ( time - lastsync ) > 3600 :
sync ( )
2018-11-27 18:08:14 +03:00
return " "
2018-12-19 17:28:10 +03:00
@dbserver.post ( " /newscrobble " )
2018-11-30 15:39:12 +03:00
def post_scrobble ( ) :
keys = FormsDict . decode ( request . forms ) # The Dal★Shabet handler
artists = keys . get ( " artist " )
title = keys . get ( " title " )
2018-11-30 17:44:30 +03:00
apikey = keys . get ( " key " )
if not ( checkAPIkey ( apikey ) ) :
response . status = 403
return " "
2018-11-30 15:39:12 +03:00
try :
time = int ( keys . get ( " time " ) )
except :
time = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2018-12-19 17:28:10 +03:00
( artists , title ) = cla . fullclean ( artists , title )
2018-11-30 15:39:12 +03:00
## this is necessary for localhost testing
response . set_header ( " Access-Control-Allow-Origin " , " * " )
createScrobble ( artists , title , time )
2019-02-02 18:54:01 +03:00
#if (time - lastsync) > 3600:
# sync()
sync ( ) #let's just always sync, not like one filesystem access every three minutes is a problem and it avoids lost tracks when we lose power
2018-11-30 15:39:12 +03:00
return " "
2018-12-19 17:28:10 +03:00
@dbserver.route ( " /sync " )
2018-11-27 18:08:14 +03:00
def abouttoshutdown ( ) :
2018-11-28 15:02:43 +03:00
sync ( )
2018-11-27 18:08:14 +03:00
#sys.exit()
2018-12-20 19:23:16 +03:00
@dbserver.post ( " /newrule " )
def newrule ( ) :
keys = FormsDict . decode ( request . forms )
2018-12-29 18:57:52 +03:00
apikey = keys . pop ( " key " , None )
if ( checkAPIkey ( apikey ) ) :
addEntry ( " rules/webmade.tsv " , [ k for k in keys ] )
global db_rulestate
db_rulestate = False
2018-12-20 19:23:16 +03:00
@dbserver.route ( " /issues " )
def issues ( ) :
combined = [ ]
duplicates = [ ]
newartists = [ ]
2018-12-21 18:32:21 +03:00
inconsistent = not db_rulestate
2018-12-23 01:19:52 +03:00
# if the user manually edits files while the server is running this won't show, but too lazy to check the rulestate here
2018-12-12 21:37:59 +03:00
2018-12-20 19:23:16 +03:00
import itertools
import difflib
sortedartists = ARTISTS . copy ( )
sortedartists . sort ( key = len , reverse = True )
reversesortedartists = sortedartists . copy ( )
reversesortedartists . reverse ( )
for a in reversesortedartists :
nochange = cla . confirmedReal ( a )
st = a
lis = [ ]
reachedmyself = False
for ar in sortedartists :
if ( ar != a ) and not reachedmyself :
continue
elif not reachedmyself :
reachedmyself = True
continue
if ( ar . lower ( ) == a . lower ( ) ) or ( " the " + ar . lower ( ) == a . lower ( ) ) or ( " a " + ar . lower ( ) == a . lower ( ) ) :
duplicates . append ( ( ar , a ) )
break
if ( ar + " " in st ) or ( " " + ar in st ) :
lis . append ( ar )
st = st . replace ( ar , " " ) . strip ( )
elif ( ar == st ) :
lis . append ( ar )
st = " "
if not nochange :
combined . append ( ( a , lis ) )
break
elif ( ar in st ) and len ( ar ) * 2 > len ( st ) :
duplicates . append ( ( a , ar ) )
st = st . replace ( " & " , " " ) . replace ( " and " , " " ) . replace ( " with " , " " ) . strip ( )
if st != " " and st != a :
if len ( st ) < 5 and len ( lis ) == 1 :
#check if we havent just randomly found the string in another word
#if (" " + st + " ") in lis[0] or (lis[0].endswith(" " + st)) or (lis[0].startswith(st + " ")):
duplicates . append ( ( a , lis [ 0 ] ) )
elif len ( st ) < 5 and len ( lis ) > 1 and not nochange :
combined . append ( ( a , lis ) )
elif len ( st ) > = 5 and not nochange :
#check if we havent just randomly found the string in another word
if ( " " + st + " " ) in a or ( a . endswith ( " " + st ) ) or ( a . startswith ( st + " " ) ) :
newartists . append ( ( st , a , lis ) )
#for c in itertools.combinations(ARTISTS,3):
# l = list(c)
# print(l)
# l.sort(key=len,reverse=True)
# [full,a1,a2] = l
# if (a1 + " " + a2 in full) or (a2 + " " + a1 in full):
# combined.append((full,a1,a2))
#for c in itertools.combinations(ARTISTS,2):
# if
#
# if (c[0].lower == c[1].lower):
# duplicates.append((c[0],c[1]))
# elif (c[0] + " " in c[1]) or (" " + c[0] in c[1]) or (c[1] + " " in c[0]) or (" " + c[1] in c[0]):
# if (c[0] in c[1]):
# full, part = c[1],c[0]
# rest = c[1].replace(c[0],"").strip()
# else:
# full, part = c[0],c[1]
# rest = c[0].replace(c[1],"").strip()
# if rest in ARTISTS and full not in [c[0] for c in combined]:
# combined.append((full,part,rest))
# elif (c[0] in c[1]) or (c[1] in c[0]):
# duplicates.append((c[0],c[1]))
2018-12-21 18:32:21 +03:00
return { " duplicates " : duplicates , " combined " : combined , " newartists " : newartists , " inconsistent " : inconsistent }
@dbserver.post ( " /rebuild " )
def rebuild ( ) :
2019-01-10 01:29:01 +03:00
keys = FormsDict . decode ( request . forms )
2018-12-29 18:57:52 +03:00
apikey = keys . pop ( " key " , None )
if ( checkAPIkey ( apikey ) ) :
global db_rulestate
db_rulestate = False
sync ( )
os . system ( " python3 fixexisting.py " )
global cla , coa
cla = CleanerAgent ( )
coa = CollectorAgent ( )
build_db ( )
2018-12-20 19:23:16 +03:00
2018-12-12 21:37:59 +03:00
####
## Server operation
####
2018-11-24 18:29:24 +03:00
# Starts the server
2018-12-19 17:28:10 +03:00
def runserver ( PORT ) :
2018-11-28 15:02:43 +03:00
global lastsync
2018-12-21 19:22:44 +03:00
lastsync = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2018-11-25 20:17:14 +03:00
build_db ( )
2018-12-21 19:22:44 +03:00
2018-11-24 18:29:24 +03:00
2018-11-30 17:44:30 +03:00
loadAPIkeys ( )
2018-12-26 19:42:55 +03:00
run ( dbserver , host = ' :: ' , port = PORT , server = ' waitress ' )
2018-11-25 20:17:14 +03:00
def build_db ( ) :
2018-11-24 18:29:24 +03:00
2018-12-20 19:23:16 +03:00
2018-11-25 21:31:03 +03:00
2018-12-21 18:32:21 +03:00
global SCROBBLES , ARTISTS , TRACKS
SCROBBLES = [ ]
ARTISTS = [ ]
TRACKS = [ ]
2018-11-25 20:17:14 +03:00
2018-12-21 19:22:44 +03:00
2018-12-21 00:07:22 +03:00
db = parseAllTSV ( " scrobbles " , " int " , " string " , " string " )
for sc in db :
artists = sc [ 1 ] . split ( " ␟ " )
title = sc [ 2 ]
time = sc [ 0 ]
2018-11-25 20:17:14 +03:00
2018-12-21 00:07:22 +03:00
readScrobble ( artists , title , time )
2018-12-24 21:14:24 +03:00
2018-12-05 16:30:50 +03:00
2018-12-19 18:11:10 +03:00
SCROBBLES . sort ( key = lambda tup : tup [ 1 ] )
2018-12-21 19:22:44 +03:00
2019-02-03 01:55:13 +03:00
# get extra artists with zero scrobbles from countas rules
for artist in coa . getAllArtists ( ) :
if artist not in ARTISTS :
ARTISTS . append ( artist )
2018-12-21 19:22:44 +03:00
coa . updateIDs ( ARTISTS )
2018-12-19 18:11:10 +03:00
2018-12-21 18:32:21 +03:00
global db_rulestate
db_rulestate = consistentRulestate ( " scrobbles " , cla . checksums )
2018-11-25 20:17:14 +03:00
2018-11-24 18:29:24 +03:00
# Saves all cached entries to disk
2018-11-28 15:02:43 +03:00
def sync ( ) :
2018-12-21 00:07:22 +03:00
# all entries by file collected
# so we don't open the same file for every entry
entries = { }
2018-11-27 21:05:50 +03:00
for idx in range ( len ( SCROBBLES ) ) :
if not SCROBBLES [ idx ] [ 2 ] :
2018-11-25 21:31:03 +03:00
2018-11-27 21:05:50 +03:00
t = getScrobbleObject ( SCROBBLES [ idx ] )
2018-11-25 21:31:03 +03:00
2018-12-21 21:13:24 +03:00
artistlist = list ( t [ " artists " ] )
artistlist . sort ( ) #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed
artistss = " ␟ " . join ( artistlist )
2018-11-24 18:29:24 +03:00
timestamp = datetime . date . fromtimestamp ( t [ " time " ] )
2018-12-21 00:07:22 +03:00
entry = [ str ( t [ " time " ] ) , artistss , t [ " title " ] ]
2018-11-24 18:29:24 +03:00
2018-12-21 00:07:22 +03:00
monthcode = str ( timestamp . year ) + " _ " + str ( timestamp . month )
entries . setdefault ( monthcode , [ ] ) . append ( entry ) #i feckin love the setdefault function
2018-12-20 20:46:55 +03:00
2018-11-27 21:05:50 +03:00
SCROBBLES [ idx ] = ( SCROBBLES [ idx ] [ 0 ] , SCROBBLES [ idx ] [ 1 ] , True )
2018-11-24 18:29:24 +03:00
2018-12-21 00:07:22 +03:00
for e in entries :
addEntries ( " scrobbles/ " + e + " .tsv " , entries [ e ] )
combineChecksums ( " scrobbles/ " + e + " .tsv " , cla . checksums )
2018-11-28 15:02:43 +03:00
global lastsync
2018-12-21 00:07:22 +03:00
lastsync = int ( datetime . datetime . now ( tz = datetime . timezone . utc ) . timestamp ( ) )
2019-01-10 01:29:01 +03:00
log ( " Database saved to disk. " )
2018-11-28 15:02:43 +03:00
2018-11-24 18:29:24 +03:00
2018-12-12 21:37:59 +03:00
####
## Database queries
####
2018-11-24 18:29:24 +03:00
# Queries the database
2018-12-24 21:14:24 +03:00
def db_query ( artists = None , title = None , track = None , since = None , to = None , associated = False ) :
2018-12-04 19:07:07 +03:00
( since , to ) = getTimestamps ( since , to )
2018-12-24 21:14:24 +03:00
# this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly
# if a title is specified, we assume that a specific track (with the exact artist combination) is requested
2018-12-26 19:42:55 +03:00
# if not, duplicate artist arguments are ignored
2018-12-24 21:14:24 +03:00
2018-12-26 21:20:26 +03:00
artist = None
2018-12-26 19:42:55 +03:00
# artists to numbers
2018-12-24 21:14:24 +03:00
artists = set ( [ ( ARTISTS . index ( a ) if isinstance ( a , str ) else a ) for a in artists ] )
2018-12-26 19:42:55 +03:00
#check if track is requested via title
2018-12-24 21:14:24 +03:00
if title != None and track == None :
track = TRACKS . index ( ( frozenset ( artists ) , title ) )
artists = None
2018-12-26 19:42:55 +03:00
# if we're not looking for a track (either directly or per title artist arguments, which is converted to track above)
# we only need one artist
elif track == None and len ( artists ) != 0 :
artist = artists . pop ( )
2018-12-24 21:14:24 +03:00
# right now we always request everything by name, maybe we don't actually need the request by number, but i'll leave it in for now
2018-12-22 16:06:21 +03:00
if associated :
2018-12-26 19:42:55 +03:00
return [ getScrobbleObject ( s ) for s in SCROBBLES if ( s [ 0 ] == track or track == None ) and ( artist == None or artist in coa . getCreditedList ( TRACKS [ s [ 0 ] ] [ 0 ] ) ) and ( since < s [ 1 ] < to ) ]
2018-12-22 16:06:21 +03:00
else :
2018-12-26 19:42:55 +03:00
return [ getScrobbleObject ( s ) for s in SCROBBLES if ( s [ 0 ] == track or track == None ) and ( artist == None or artist in TRACKS [ s [ 0 ] ] [ 0 ] ) and ( since < s [ 1 ] < to ) ]
2018-11-25 20:17:14 +03:00
# pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way
2018-11-24 18:29:24 +03:00
2018-12-04 19:07:07 +03:00
# Queries that... well... aggregate
2018-12-22 16:06:21 +03:00
def db_aggregate ( by = None , since = None , to = None , artist = None ) :
2018-12-04 19:07:07 +03:00
( since , to ) = getTimestamps ( since , to )
2018-12-22 16:06:21 +03:00
if isinstance ( artist , str ) :
artist = ARTISTS . index ( artist )
2018-12-04 19:07:07 +03:00
if ( by == " ARTIST " ) :
#this is probably a really bad idea
#for a in ARTISTS:
# num = len(db_query(artist=a,since=since,to=to))
#
# alright let's try for real
charts = { }
for s in [ scr for scr in SCROBBLES if since < scr [ 1 ] < to ] :
artists = TRACKS [ s [ 0 ] ] [ 0 ]
2018-12-19 17:28:10 +03:00
for a in coa . getCreditedList ( artists ) :
2018-12-04 19:07:07 +03:00
# this either creates the new entry or increments the existing one
charts [ a ] = charts . setdefault ( a , 0 ) + 1
2018-12-22 16:06:21 +03:00
ls = [ { " artist " : getArtistObject ( ARTISTS [ a ] ) , " scrobbles " : charts [ a ] , " counting " : coa . getAllAssociated ( ARTISTS [ a ] ) } for a in charts ]
2018-12-04 19:07:07 +03:00
return sorted ( ls , key = lambda k : k [ " scrobbles " ] , reverse = True )
elif ( by == " TRACK " ) :
charts = { }
2018-12-22 16:06:21 +03:00
for s in [ scr for scr in SCROBBLES if since < scr [ 1 ] < to and ( artist == None or ( artist in TRACKS [ scr [ 0 ] ] [ 0 ] ) ) ] :
2018-12-04 19:07:07 +03:00
track = s [ 0 ]
# this either creates the new entry or increments the existing one
charts [ track ] = charts . setdefault ( track , 0 ) + 1
ls = [ { " track " : getTrackObject ( TRACKS [ t ] ) , " scrobbles " : charts [ t ] } for t in charts ]
return sorted ( ls , key = lambda k : k [ " scrobbles " ] , reverse = True )
2018-12-08 02:01:44 +03:00
else :
return len ( [ scr for scr in SCROBBLES if since < scr [ 1 ] < to ] )
2018-12-04 19:07:07 +03:00
2018-12-12 21:37:59 +03:00
# Search for strings
def db_search ( query , type = None ) :
if type == " ARTIST " :
results = [ ]
for a in ARTISTS :
if query . lower ( ) in a . lower ( ) :
results . append ( a )
if type == " TRACK " :
results = [ ]
for t in TRACKS :
if query . lower ( ) in t [ 1 ] . lower ( ) :
results . append ( t )
return results
####
## Useful functions
####
2018-12-04 19:07:07 +03:00
# Takes user inputs like YYYY/MM and returns the timestamps. Returns timestamp if timestamp was already given.
def getTimestamps ( f , t ) :
#(f,t) = inp
2018-12-21 18:32:21 +03:00
if isinstance ( f , str ) and f . lower ( ) == " today " :
2018-12-26 19:42:55 +03:00
tod = datetime . datetime . utcnow ( )
2018-12-21 18:32:21 +03:00
f = [ tod . year , tod . month , tod . day ]
if isinstance ( t , str ) and t . lower ( ) == " today " :
2018-12-26 19:42:55 +03:00
tod = datetime . datetime . utcnow ( )
2018-12-21 18:32:21 +03:00
t = [ tod . year , tod . month , tod . day ]
2019-02-02 20:08:30 +03:00
if isinstance ( f , str ) and f . lower ( ) == " month " :
tod = datetime . datetime . utcnow ( )
f = [ tod . year , tod . month ]
if isinstance ( t , str ) and t . lower ( ) == " month " :
tod = datetime . datetime . utcnow ( )
t = [ tod . year , tod . month ]
if isinstance ( f , str ) and f . lower ( ) == " year " :
tod = datetime . datetime . utcnow ( )
f = [ tod . year ]
if isinstance ( t , str ) and t . lower ( ) == " year " :
tod = datetime . datetime . utcnow ( )
t = [ tod . year ]
2018-12-21 18:32:21 +03:00
2018-12-04 19:07:07 +03:00
if isinstance ( f , str ) :
2018-12-05 16:30:50 +03:00
f = [ int ( x ) for x in f . split ( " / " ) ]
if isinstance ( t , str ) :
t = [ int ( x ) for x in t . split ( " / " ) ]
# this step is done if either the input is a list or the first step was done (which creates a list)
if isinstance ( f , list ) :
2018-12-04 19:07:07 +03:00
date = [ 1970 , 1 , 1 , 0 , 0 ]
2018-12-05 16:30:50 +03:00
date [ : len ( f ) ] = f
2018-12-04 19:07:07 +03:00
f = int ( datetime . datetime ( date [ 0 ] , date [ 1 ] , date [ 2 ] , date [ 3 ] , date [ 4 ] , tzinfo = datetime . timezone . utc ) . timestamp ( ) )
2018-12-05 16:30:50 +03:00
if isinstance ( t , list ) :
2018-12-04 19:07:07 +03:00
date = [ 1970 , 1 , 1 , 0 , 0 ]
2018-12-05 16:30:50 +03:00
date [ : len ( t ) ] = t
2018-12-04 19:07:07 +03:00
t = int ( datetime . datetime ( date [ 0 ] , date [ 1 ] , date [ 2 ] , date [ 3 ] , date [ 4 ] , tzinfo = datetime . timezone . utc ) . timestamp ( ) )
2018-12-05 16:30:50 +03:00
2018-12-04 19:07:07 +03:00
if ( f == None ) :
2018-12-08 02:01:44 +03:00
f = min ( timestamps )
2018-12-04 19:07:07 +03:00
if ( t == None ) :
2018-12-08 02:01:44 +03:00
t = datetime . datetime . utcnow ( ) . replace ( tzinfo = datetime . timezone . utc ) . timestamp ( )
2018-12-04 19:07:07 +03:00
return ( f , t )
2018-12-12 21:37:59 +03:00
2018-11-24 18:29:24 +03:00
2018-12-12 21:37:59 +03:00
def getArtistId ( nameorid ) :
if isinstance ( nameorid , int ) :
return nameorid
else :
try :
return ARTISTS . index ( nameorid )
except :
return - 1
2018-11-24 18:29:24 +03:00