2019-02-02 18:17:07 +03:00
import re
import os
import hashlib
from threading import Thread
2019-02-16 18:42:45 +03:00
import pickle
2019-03-03 23:55:35 +03:00
import urllib
2019-03-10 20:14:50 +03:00
import datetime
2019-02-02 18:17:07 +03:00
2018-12-17 17:10:10 +03:00
### TSV files
2018-11-28 19:45:52 +03:00
2019-03-11 16:49:29 +03:00
def parseTSV ( filename , * args , escape = True ) :
2018-11-28 19:45:52 +03:00
f = open ( filename )
result = [ ]
for l in [ l for l in f if ( not l . startswith ( " # " ) ) and ( not l . strip ( ) == " " ) ] :
2019-03-11 16:49:29 +03:00
l = l . replace ( " \n " , " " )
if escape :
l = l . split ( " # " ) [ 0 ]
l = l . replace ( r " \ num " , " # " ) # translate escape sequences even if we don't support comments in the file and they are not actually necessary (they might still be used for some reason)
2018-11-28 19:45:52 +03:00
data = list ( filter ( None , l . split ( " \t " ) ) ) # Multiple tabs are okay, we don't accept empty fields unless trailing
entry = [ ] * len ( args )
for i in range ( len ( args ) ) :
if args [ i ] == " list " :
try :
entry . append ( data [ i ] . split ( " ␟ " ) )
except :
entry . append ( [ ] )
elif args [ i ] == " string " :
try :
entry . append ( data [ i ] )
except :
entry . append ( " " )
elif args [ i ] == " int " :
try :
entry . append ( int ( data [ i ] ) )
except :
entry . append ( 0 )
elif args [ i ] == " bool " :
try :
entry . append ( ( data [ i ] . lower ( ) in [ " true " , " yes " , " 1 " , " y " ] ) )
except :
entry . append ( False )
result . append ( entry )
f . close ( )
return result
2018-12-20 20:46:55 +03:00
def checksumTSV ( folder ) :
sums = " "
for f in os . listdir ( folder + " / " ) :
if ( f . endswith ( " .tsv " ) ) :
f = open ( folder + " / " + f , " rb " )
sums + = hashlib . md5 ( f . read ( ) ) . hexdigest ( ) + " \n "
f . close ( )
return sums
2018-12-21 00:07:22 +03:00
# returns whether checksums match and sets the checksum to invalid if they don't (or sets the new one if no previous one exists)
def combineChecksums ( filename , checksums ) :
import os
if os . path . exists ( filename + " .rulestate " ) :
f = open ( filename + " .rulestate " , " r " )
oldchecksums = f . read ( )
f . close ( )
if oldchecksums == checksums :
# the new checksum given by the calling db server represents the rule state that all current unsaved scrobbles were created under
# if this is the same as the existing one, we're all good
return True
elif ( oldchecksums != " INVALID " ) :
#if not, the file is not consistent to any single rule state (some scrobbles were created with an old ruleset, some not)
f = open ( filename + " .rulestate " , " w " )
f . write ( " INVALID " ) # this will never match any sha256sum
f . close ( )
return False
else :
#if the file already says invalid, no need to open it and rewrite
return False
else :
f = open ( filename + " .rulestate " , " w " )
f . write ( checksums )
f . close ( )
return True
2018-12-21 18:32:21 +03:00
# checks ALL files for their rule state. if they are all the same as the current loaded one, the entire database can be assumed to be consistent with the current ruleset
# in any other case, get out
def consistentRulestate ( folder , checksums ) :
result = [ ]
for scrobblefile in os . listdir ( folder + " / " ) :
if ( scrobblefile . endswith ( " .tsv " ) ) :
try :
f = open ( folder + " / " + scrobblefile + " .rulestate " , " r " )
if f . read ( ) != checksums :
return False
except :
return False
finally :
f . close ( )
2018-12-21 00:07:22 +03:00
2018-12-21 18:32:21 +03:00
return True
2018-12-20 20:46:55 +03:00
2019-03-11 16:49:29 +03:00
def parseAllTSV ( path , * args , escape = True ) :
2019-02-02 18:17:07 +03:00
2018-11-28 19:45:52 +03:00
result = [ ]
for f in os . listdir ( path + " / " ) :
2019-01-11 16:14:00 +03:00
if ( f . endswith ( " .tsv " ) ) :
2018-11-28 19:45:52 +03:00
2019-03-11 16:49:29 +03:00
result + = parseTSV ( path + " / " + f , * args , escape = escape )
2018-11-28 19:45:52 +03:00
return result
2018-11-30 18:01:32 +03:00
def createTSV ( filename ) :
2019-02-02 18:17:07 +03:00
2018-11-30 18:01:32 +03:00
if not os . path . exists ( filename ) :
open ( filename , " w " ) . close ( )
2018-12-20 19:23:16 +03:00
2019-03-11 16:49:29 +03:00
def addEntry ( filename , a , escape = True ) :
2018-12-20 19:23:16 +03:00
createTSV ( filename )
2018-12-21 00:07:22 +03:00
line = " \t " . join ( a )
2019-03-11 16:49:29 +03:00
if escape : line = line . replace ( " # " , r " \ num " )
2018-12-20 19:23:16 +03:00
with open ( filename , " a " ) as f :
f . write ( line + " \n " )
2019-03-11 16:49:29 +03:00
def addEntries ( filename , al , escape = True ) :
2018-12-21 00:07:22 +03:00
with open ( filename , " a " ) as f :
for a in al :
line = " \t " . join ( a )
2019-03-11 16:49:29 +03:00
if escape : line = line . replace ( " # " , r " \ num " )
2018-12-21 00:07:22 +03:00
f . write ( line + " \n " )
2019-02-15 23:07:08 +03:00
### Useful functions
def int_or_none ( input_ ) :
try :
return int ( input_ )
except :
return None
def cleandict ( d ) :
newdict = { k : d [ k ] for k in d if d [ k ] is not None }
d . clear ( )
d . update ( newdict )
2018-12-21 00:07:22 +03:00
2018-12-08 02:01:44 +03:00
2018-12-17 17:10:10 +03:00
### Logging
2018-12-08 02:01:44 +03:00
2019-03-10 22:05:38 +03:00
def log ( msg , module = None ) :
2019-03-10 20:14:50 +03:00
now = datetime . datetime . utcnow ( ) . strftime ( " % Y/ % m/ %d % H: % M: % S " )
2019-03-10 22:05:38 +03:00
if module is None :
import inspect
module = inspect . getmodule ( inspect . stack ( ) [ 1 ] [ 0 ] ) . __name__
if module == " __main__ " : module = " mainserver "
2019-01-10 01:29:01 +03:00
print ( " [ " + module + " ] " + msg )
2019-02-18 02:34:25 +03:00
with open ( " logs/ " + module + " .log " , " a " ) as logfile :
2019-03-10 20:14:50 +03:00
logfile . write ( now + " " + msg + " \n " )
2018-12-17 17:10:10 +03:00
2019-03-11 22:04:23 +03:00
### not meant to be precise, just for a rough idea
measurement = 0
def clock ( * args ) :
import time
global measurement
now = time . time ( )
if len ( args ) > 0 :
print ( args [ 0 ] + " : " + str ( now - measurement ) )
measurement = now
2019-02-19 16:57:39 +03:00
2019-03-12 14:56:53 +03:00
2018-12-17 17:10:10 +03:00
### Media info
2018-12-28 20:06:09 +03:00
def apirequest ( artists = None , artist = None , title = None ) :
import urllib . parse , urllib . request
import json
2019-02-19 16:57:39 +03:00
try :
with open ( " apikey " , " r " ) as keyfile :
apikey = keyfile . read ( ) . replace ( " \n " , " " )
2019-03-12 13:39:36 +03:00
if apikey == " NONE " : return None
2019-02-19 16:57:39 +03:00
except :
2019-03-12 13:39:36 +03:00
return None
2019-02-19 16:57:39 +03:00
2018-12-28 20:06:09 +03:00
sites = [
{
" name " : " lastfm " ,
" artisturl " : " https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist= {artist} &api_key= " + apikey + " &format=json " ,
" trackurl " : " https://ws.audioscrobbler.com/2.0/?method=track.getinfo&track= {title} &artist= {artist} &api_key= " + apikey + " &format=json " ,
2019-02-21 02:13:18 +03:00
" result_artist_imgurl " : lambda data : data [ " artist " ] [ " image " ] [ 3 ] [ " #text " ] ,
" result_track_imgurl " : lambda data : data [ " track " ] [ " album " ] [ " image " ] [ 3 ] [ " #text " ]
2018-12-28 20:06:09 +03:00
#"result_artist_desc":lambda data:data["artist"]["bio"]["summary"],
#"result_track_desc":lambda data:None
}
]
# TRACKS
if title is not None :
for s in sites :
try :
artiststr = urllib . parse . quote ( " , " . join ( artists ) )
titlestr = urllib . parse . quote ( title )
response = urllib . request . urlopen ( s [ " trackurl " ] . format ( artist = artiststr , title = titlestr ) )
2019-03-12 13:39:36 +03:00
log ( " API: " + s [ " name " ] + " ; Image request: " + " / " . join ( artists ) + " - " + title , module = " external " )
2018-12-28 20:06:09 +03:00
data = json . loads ( response . read ( ) )
if s [ " result_track_imgurl " ] ( data ) != " " :
2019-03-12 13:39:36 +03:00
return s [ " result_track_imgurl " ] ( data )
2018-12-28 20:06:09 +03:00
except :
pass
if len ( artists ) == 1 :
2019-01-10 01:29:01 +03:00
#return {"image":apirequest(artist=artists[0])["image"]}
2019-03-12 13:39:36 +03:00
return None
2018-12-28 20:06:09 +03:00
# try the same track with every single artist
for a in artists :
rec = apirequest ( artists = [ a ] , title = title )
2019-03-12 13:39:36 +03:00
if rec is not None :
2018-12-28 20:06:09 +03:00
return rec
2019-01-10 01:29:01 +03:00
2019-03-12 13:39:36 +03:00
return None
2018-12-28 20:06:09 +03:00
# ARTISTS
else :
for s in sites :
try :
response = urllib . request . urlopen ( s [ " artisturl " ] . format ( artist = urllib . parse . quote ( artist ) ) )
2019-03-12 13:39:36 +03:00
log ( " API: " + s [ " name " ] + " ; Image request: " + artist , module = " external " )
2018-12-28 20:06:09 +03:00
data = json . loads ( response . read ( ) )
if s [ " result_artist_imgurl " ] ( data ) != " " :
2019-03-12 13:39:36 +03:00
return s [ " result_artist_imgurl " ] ( data )
2018-12-28 20:06:09 +03:00
except :
pass
2019-03-12 13:39:36 +03:00
return None
2018-12-28 20:06:09 +03:00
2019-01-10 01:29:01 +03:00
# I think I've only just understood modules
2018-12-28 20:06:09 +03:00
cachedTracks = { }
cachedArtists = { }
2019-02-16 18:42:45 +03:00
def saveCache ( ) :
2019-02-16 18:47:01 +03:00
fl = open ( " images/cache " , " wb " )
2019-02-16 18:42:45 +03:00
stream = pickle . dumps ( ( cachedTracks , cachedArtists ) )
fl . write ( stream )
fl . close ( )
def loadCache ( ) :
try :
2019-02-16 18:47:01 +03:00
fl = open ( " images/cache " , " rb " )
2019-02-16 18:42:45 +03:00
except :
return
try :
ob = pickle . loads ( fl . read ( ) )
global cachedTracks , cachedArtists
( cachedTracks , cachedArtists ) = ob
finally :
fl . close ( )
2019-03-12 18:06:09 +03:00
# remove corrupt caching from previous versions
2019-03-12 18:17:53 +03:00
toremove = [ ]
2019-03-12 18:06:09 +03:00
for k in cachedTracks :
if cachedTracks [ k ] == " " :
2019-03-12 18:17:53 +03:00
toremove . append ( k )
for k in toremove :
del cachedTracks [ k ]
log ( " Removed invalid cache key: " + str ( k ) )
toremove = [ ]
2019-03-12 18:06:09 +03:00
for k in cachedArtists :
if cachedArtists [ k ] == " " :
2019-03-12 18:17:53 +03:00
toremove . append ( k )
for k in toremove :
del cachedArtists [ k ]
log ( " Removed invalid cache key: " + str ( k ) )
2019-02-16 18:42:45 +03:00
2019-03-12 13:39:36 +03:00
def getTrackImage ( artists , title , fast = False ) :
2019-03-06 20:04:12 +03:00
2018-12-28 20:06:09 +03:00
obj = ( frozenset ( artists ) , title )
filename = " - " . join ( [ re . sub ( " [^a-zA-Z0-9] " , " " , artist ) for artist in artists ] ) + " _ " + re . sub ( " [^a-zA-Z0-9] " , " " , title )
2018-12-29 02:08:00 +03:00
if filename == " " : filename = str ( hash ( obj ) )
2018-12-28 20:06:09 +03:00
filepath = " images/tracks/ " + filename
2018-12-17 17:10:10 +03:00
2018-12-28 20:06:09 +03:00
# check if custom image exists
if os . path . exists ( filepath + " .png " ) :
imgurl = " / " + filepath + " .png "
2019-03-12 13:39:36 +03:00
return imgurl
2018-12-28 20:06:09 +03:00
elif os . path . exists ( filepath + " .jpg " ) :
imgurl = " / " + filepath + " .jpg "
2019-03-12 13:39:36 +03:00
return imgurl
2018-12-28 20:06:09 +03:00
elif os . path . exists ( filepath + " .jpeg " ) :
imgurl = " / " + filepath + " .jpeg "
2019-03-12 13:39:36 +03:00
return imgurl
2018-12-28 20:06:09 +03:00
try :
2019-03-12 13:39:36 +03:00
# check our cache
# if we have cached the nonexistence of that image, we immediately return the redirect to the artist and let the resolver handle it
# (even if we're not in a fast lookup right now)
result = cachedTracks [ ( frozenset ( artists ) , title ) ]
if result is not None : return result
else :
for a in artists :
res = getArtistImage ( artist = a , fast = True )
if res != " " : return res
return " "
2018-12-28 20:06:09 +03:00
except :
pass
2019-03-12 13:39:36 +03:00
2019-03-03 23:55:35 +03:00
# fast request only retuns cached and local results, generates redirect link for rest
2019-03-12 13:39:36 +03:00
if fast : return " /image?title= " + urllib . parse . quote ( title ) + " & " + " & " . join ( [ " artist= " + urllib . parse . quote ( a ) for a in artists ] )
2018-12-28 20:06:09 +03:00
2019-03-12 13:39:36 +03:00
# non-fast lookup (esentially only the resolver lookup)
2018-12-28 20:06:09 +03:00
result = apirequest ( artists = artists , title = title )
2019-03-12 13:39:36 +03:00
# cache results (even negative ones)
cachedTracks [ ( frozenset ( artists ) , title ) ] = result
# return either result or redirect to artist
if result is not None : return result
2019-01-10 01:29:01 +03:00
else :
2019-03-12 13:39:36 +03:00
for a in artists :
res = getArtistImage ( artist = a , fast = False )
if res != " " : return res
return " "
2018-12-28 20:06:09 +03:00
2019-03-12 13:39:36 +03:00
def getArtistImage ( artist , fast = False ) :
2018-12-28 20:06:09 +03:00
obj = artist
2018-12-17 17:10:10 +03:00
filename = re . sub ( " [^a-zA-Z0-9] " , " " , artist )
2018-12-29 02:08:00 +03:00
if filename == " " : filename = str ( hash ( obj ) )
2018-12-28 20:06:09 +03:00
filepath = " images/artists/ " + filename
#filepath_cache = "info/artists_cache/" + filename
2018-12-17 17:10:10 +03:00
# check if custom image exists
if os . path . exists ( filepath + " .png " ) :
imgurl = " / " + filepath + " .png "
2019-03-12 13:39:36 +03:00
return imgurl
2018-12-17 17:10:10 +03:00
elif os . path . exists ( filepath + " .jpg " ) :
imgurl = " / " + filepath + " .jpg "
2019-03-12 13:39:36 +03:00
return imgurl
2018-12-17 17:10:10 +03:00
elif os . path . exists ( filepath + " .jpeg " ) :
imgurl = " / " + filepath + " .jpeg "
2019-03-12 13:39:36 +03:00
return imgurl
2018-12-17 17:10:10 +03:00
2019-01-10 01:29:01 +03:00
2018-12-17 17:10:10 +03:00
try :
2019-03-12 13:39:36 +03:00
result = cachedArtists [ artist ]
if result is not None : return result
else : return " "
2018-12-28 20:06:09 +03:00
except :
2018-12-17 17:10:10 +03:00
pass
2018-12-28 20:06:09 +03:00
2019-03-06 20:04:12 +03:00
2019-03-03 23:55:35 +03:00
# fast request only retuns cached and local results, generates redirect link for rest
2019-03-12 13:39:36 +03:00
if fast : return " /image?artist= " + urllib . parse . quote ( artist )
2019-03-06 20:04:12 +03:00
2019-03-12 13:39:36 +03:00
# non-fast lookup (esentially only the resolver lookup)
2018-12-28 20:06:09 +03:00
result = apirequest ( artist = artist )
2019-03-12 13:39:36 +03:00
# cache results (even negative ones)
cachedArtists [ artist ] = result
if result is not None : return result
else : return " "
2019-02-02 20:08:30 +03:00
2019-03-12 13:39:36 +03:00
def getTrackImages ( trackobjectlist , fast = False ) :
2019-02-02 20:08:30 +03:00
threads = [ ]
for track in trackobjectlist :
2019-03-12 13:39:36 +03:00
t = Thread ( target = getTrackImage , args = ( track [ " artists " ] , track [ " title " ] , ) , kwargs = { " fast " : fast } )
2019-02-02 20:08:30 +03:00
t . start ( )
threads . append ( t )
for t in threads :
t . join ( )
2019-03-12 13:39:36 +03:00
return [ getTrackImage ( t [ " artists " ] , t [ " title " ] ) for t in trackobjectlist ]
2018-12-19 18:11:10 +03:00
2019-03-12 13:39:36 +03:00
def getArtistImages ( artistlist , fast = False ) :
2019-02-02 18:17:07 +03:00
threads = [ ]
for artist in artistlist :
2019-03-12 13:39:36 +03:00
t = Thread ( target = getArtistImage , args = ( artist , ) , kwargs = { " fast " : fast } )
2019-02-02 18:17:07 +03:00
t . start ( )
threads . append ( t )
for t in threads :
t . join ( )
# async calls only cached results, now we need to get them
2019-03-12 13:39:36 +03:00
return [ getArtistImage ( a ) for a in artistlist ]
2019-02-03 18:52:37 +03:00
2019-02-20 23:10:58 +03:00
# new way of serving images
# instead always generate a link locally, but redirect that on the fly
# this way the page can load faster and images will trickle in without having to resort to XHTTP requests
def resolveImage ( artist = None , track = None ) :
if track is not None :
2019-03-12 13:39:36 +03:00
return getTrackImage ( track [ " artists " ] , track [ " title " ] )
2019-02-20 23:10:58 +03:00
elif artist is not None :
2019-03-12 13:39:36 +03:00
return getArtistImage ( artist )
2019-02-02 18:17:07 +03:00