import sqlalchemy as sql import json import unicodedata import math from datetime import datetime from threading import Lock from ..pkg_global.conf import data_dir from .dbcache import cached_wrapper, cached_wrapper_individual from . import exceptions as exc from doreah.logging import log from doreah.regular import runhourly, runmonthly ##### DB Technical DBTABLES = { # name - type - foreign key - kwargs 'scrobbles':{ 'columns':[ ("timestamp", sql.Integer, {'primary_key':True}), ("rawscrobble", sql.String, {}), ("origin", sql.String, {}), ("duration", sql.Integer, {}), ("track_id", sql.Integer, sql.ForeignKey('tracks.id'), {}), ("extra", sql.String, {}) ], 'extraargs':(),'extrakwargs':{} }, 'tracks':{ 'columns':[ ("id", sql.Integer, {'primary_key':True}), ("title", sql.String, {}), ("title_normalized", sql.String, {}), ("length", sql.Integer, {}), ("album_id", sql.Integer, sql.ForeignKey('albums.id'), {}) ], 'extraargs':(),'extrakwargs':{'sqlite_autoincrement':True} }, 'artists':{ 'columns':[ ("id", sql.Integer, {'primary_key':True}), ("name", sql.String, {}), ("name_normalized", sql.String, {}) ], 'extraargs':(),'extrakwargs':{'sqlite_autoincrement':True} }, 'albums':{ 'columns':[ ("id", sql.Integer, {'primary_key':True}), ("albtitle", sql.String, {}), ("albtitle_normalized", sql.String, {}) #("albumartist", sql.String, {}) # when an album has no artists, always use 'Various Artists' ], 'extraargs':(),'extrakwargs':{'sqlite_autoincrement':True} }, 'trackartists':{ 'columns':[ ("id", sql.Integer, {'primary_key':True}), ("artist_id", sql.Integer, sql.ForeignKey('artists.id'), {}), ("track_id", sql.Integer, sql.ForeignKey('tracks.id'), {}) ], 'extraargs':(sql.UniqueConstraint('artist_id', 'track_id'),),'extrakwargs':{} }, 'albumartists':{ 'columns':[ ("id", sql.Integer, {'primary_key':True}), ("artist_id", sql.Integer, sql.ForeignKey('artists.id'), {}), ("album_id", sql.Integer, sql.ForeignKey('albums.id'), {}) ], 'extraargs':(sql.UniqueConstraint('artist_id', 'album_id'),),'extrakwargs':{} }, # 'albumtracks':{ # # tracks can be in multiple albums # 'columns':[ # ("id", sql.Integer, {'primary_key':True}), # ("album_id", sql.Integer, sql.ForeignKey('albums.id'), {}), # ("track_id", sql.Integer, sql.ForeignKey('tracks.id'), {}) # ], # 'extraargs':(sql.UniqueConstraint('album_id', 'track_id'),),'extrakwargs':{} # }, 'associated_artists':{ 'columns':[ ("source_artist", sql.Integer, sql.ForeignKey('artists.id'), {}), ("target_artist", sql.Integer, sql.ForeignKey('artists.id'), {}) ], 'extraargs':(sql.UniqueConstraint('source_artist', 'target_artist'),),'extrakwargs':{} } } DB = {} engine = sql.create_engine(f"sqlite:///{data_dir['scrobbles']('malojadb.sqlite')}", echo = False) meta = sql.MetaData() # create table definitions for tablename in DBTABLES: DB[tablename] = sql.Table( tablename, meta, *[sql.Column(colname,*args,**kwargs) for colname,*args,kwargs in DBTABLES[tablename]['columns']], *DBTABLES[tablename]['extraargs'], **DBTABLES[tablename]['extrakwargs'] ) # actually create tables for new databases meta.create_all(engine) # upgrade old database with new columns with engine.begin() as conn: for tablename in DBTABLES: info = DBTABLES[tablename] table = DB[tablename] for colname,datatype,*args,kwargs in info['columns']: try: statement = f"ALTER TABLE {tablename} ADD {colname} {datatype().compile()}" conn.execute(sql.text(statement)) log(f"Column {colname} was added to table {tablename}!") # TODO figure out how to compile foreign key references! except sql.exc.OperationalError as e: pass # adding a scrobble could consist of multiple write operations that sqlite doesn't # see as belonging together SCROBBLE_LOCK = Lock() # decorator that passes either the provided dbconn, or creates a separate one # just for this function call def connection_provider(func): def wrapper(*args,**kwargs): if kwargs.get("dbconn") is not None: return func(*args,**kwargs) else: with engine.connect() as connection: with connection.begin(): kwargs['dbconn'] = connection return func(*args,**kwargs) wrapper.__innerfunc__ = func return wrapper ##### DB <-> Dict translations ## ATTENTION ALL ADVENTURERS ## this is what a scrobble dict will look like from now on ## this is the single canonical source of truth ## stop making different little dicts in every single function ## this is the schema that will definitely 100% stay like this and not ## randomly get changed two versions later ## here we go # # { # "time":int, # "track":{ # "artists":list, # "title":string, # "album":{ # "albumtitle":string, # "artists":list # }, # "length":None # }, # "duration":int, # "origin":string, # "extra":{string-keyed mapping for all flags with the scrobble}, # "rawscrobble":{string-keyed mapping of the original scrobble received} # } # # The last two fields are not returned under normal circumstances ##### Conversions between DB and dicts # These should work on whole lists and collect all the references, # then look them up once and fill them in ### DB -> DICT def scrobbles_db_to_dict(rows,include_internal=False,dbconn=None): tracks = get_tracks_map(set(row.track_id for row in rows),dbconn=dbconn) return [ { **{ "time":row.timestamp, "track":tracks[row.track_id], "duration":row.duration, "origin":row.origin, }, **({ "extra":json.loads(row.extra or '{}'), "rawscrobble":json.loads(row.rawscrobble or '{}') } if include_internal else {}) } for row in rows ] def scrobble_db_to_dict(row,dbconn=None): return scrobbles_db_to_dict([row],dbconn=dbconn)[0] def tracks_db_to_dict(rows,dbconn=None): artists = get_artists_of_tracks(set(row.id for row in rows),dbconn=dbconn) return [ { "artists":artists[row.id], "title":row.title, #"album": "length":row.length } for row in rows ] def track_db_to_dict(row,dbconn=None): return tracks_db_to_dict([row],dbconn=dbconn)[0] def artists_db_to_dict(rows,dbconn=None): return [ row.name for row in rows ] def artist_db_to_dict(row,dbconn=None): return artists_db_to_dict([row],dbconn=dbconn)[0] def albums_db_to_dict(rows,dbconn=None): artists = get_artists_of_albums(set(row.id for row in rows),dbconn=dbconn) return [ { "artists":artists[row.id], "albumtitle":row.albtitle, } for row in rows ] def album_db_to_dict(row,dbconn=None): return albums_db_to_dict([row],dbconn=dbconn)[0] ### DICT -> DB # These should return None when no data is in the dict so they can be used for update statements def scrobble_dict_to_db(info,update_album=False,dbconn=None): return { "timestamp":info.get('time'), "origin":info.get('origin'), "duration":info.get('duration'), "track_id":get_track_id(info.get('track'),update_album=update_album,dbconn=dbconn), "extra":json.dumps(info.get('extra')) if info.get('extra') else None, "rawscrobble":json.dumps(info.get('rawscrobble')) if info.get('rawscrobble') else None } def track_dict_to_db(info,dbconn=None): return { "title":info.get('title'), "title_normalized":normalize_name(info.get('title','')) or None, "length":info.get('length') } def artist_dict_to_db(info,dbconn=None): return { "name": info, "name_normalized":normalize_name(info) } def album_dict_to_db(info,dbconn=None): return { "albtitle":info.get('albumtitle'), "albtitle_normalized":normalize_name(info.get('albumtitle')) } ##### Actual Database interactions @connection_provider def add_scrobble(scrobbledict,update_album=False,dbconn=None): add_scrobbles([scrobbledict],update_album=update_album,dbconn=dbconn) @connection_provider def add_scrobbles(scrobbleslist,update_album=False,dbconn=None): with SCROBBLE_LOCK: ops = [ DB['scrobbles'].insert().values( **scrobble_dict_to_db(s,update_album=update_album,dbconn=dbconn) ) for s in scrobbleslist ] success,errors = 0,0 for op in ops: try: dbconn.execute(op) success += 1 except sql.exc.IntegrityError as e: errors += 1 # TODO check if actual duplicate if errors > 0: log(f"{errors} Scrobbles have not been written to database!",color='red') return success,errors @connection_provider def delete_scrobble(scrobble_id,dbconn=None): with SCROBBLE_LOCK: op = DB['scrobbles'].delete().where( DB['scrobbles'].c.timestamp == scrobble_id ) result = dbconn.execute(op) return True @connection_provider def add_track_to_album(track_id,album_id,replace=False,dbconn=None): conditions = [ DB['tracks'].c.id == track_id ] if not replace: # if we dont want replacement, just update if there is no album yet conditions.append( DB['tracks'].c.album_id == None ) op = DB['tracks'].update().where( *conditions ).values( album_id=album_id ) result = dbconn.execute(op) return True ### these will 'get' the ID of an entity, creating it if necessary @cached_wrapper @connection_provider def get_track_id(trackdict,create_new=True,update_album=False,dbconn=None): ntitle = normalize_name(trackdict['title']) artist_ids = [get_artist_id(a,dbconn=dbconn) for a in trackdict['artists']] artist_ids = list(set(artist_ids)) op = DB['tracks'].select( # DB['tracks'].c.id ).where( DB['tracks'].c.title_normalized==ntitle ) result = dbconn.execute(op).all() for row in result: # check if the artists are the same foundtrackartists = [] op = DB['trackartists'].select( # DB['trackartists'].c.artist_id ).where( DB['trackartists'].c.track_id==row.id ) result = dbconn.execute(op).all() match_artist_ids = [r.artist_id for r in result] #print("required artists",artist_ids,"this match",match_artist_ids) if set(artist_ids) == set(match_artist_ids): #print("ID for",trackdict['title'],"was",row[0]) if 'album' in trackdict: add_track_to_album(row.id,get_album_id(trackdict['album'],dbconn=dbconn),replace=update_album,dbconn=dbconn) return row.id if not create_new: return None op = DB['tracks'].insert().values( **track_dict_to_db(trackdict,dbconn=dbconn) ) result = dbconn.execute(op) track_id = result.inserted_primary_key[0] for artist_id in artist_ids: op = DB['trackartists'].insert().values( track_id=track_id, artist_id=artist_id ) result = dbconn.execute(op) #print("Created",trackdict['title'],track_id) if 'album' in trackdict: add_track_to_album(track_id,get_album_id(trackdict['album'],dbconn=dbconn),dbconn=dbconn) return track_id @cached_wrapper @connection_provider def get_artist_id(artistname,create_new=True,dbconn=None): nname = normalize_name(artistname) #print("looking for",nname) op = DB['artists'].select( # DB['artists'].c.id ).where( DB['artists'].c.name_normalized==nname ) result = dbconn.execute(op).all() for row in result: #print("ID for",artistname,"was",row[0]) return row.id if not create_new: return None op = DB['artists'].insert().values( name=artistname, name_normalized=nname ) result = dbconn.execute(op) #print("Created",artistname,result.inserted_primary_key) return result.inserted_primary_key[0] @cached_wrapper @connection_provider def get_album_id(albumdict,create_new=True,dbconn=None): ntitle = normalize_name(albumdict['albumtitle']) artist_ids = [get_artist_id(a,dbconn=dbconn) for a in albumdict['artists']] artist_ids = list(set(artist_ids)) op = DB['albums'].select( # DB['albums'].c.id ).where( DB['albums'].c.albtitle_normalized==ntitle ) result = dbconn.execute(op).all() for row in result: # check if the artists are the same foundtrackartists = [] op = DB['albumartists'].select( # DB['albumartists'].c.artist_id ).where( DB['albumartists'].c.album_id==row.id ) result = dbconn.execute(op).all() match_artist_ids = [r.artist_id for r in result] #print("required artists",artist_ids,"this match",match_artist_ids) if set(artist_ids) == set(match_artist_ids): #print("ID for",albumdict['title'],"was",row[0]) return row.id if not create_new: return None op = DB['albums'].insert().values( **album_dict_to_db(albumdict,dbconn=dbconn) ) result = dbconn.execute(op) album_id = result.inserted_primary_key[0] for artist_id in artist_ids: op = DB['albumartists'].insert().values( album_id=album_id, artist_id=artist_id ) result = dbconn.execute(op) #print("Created",trackdict['title'],track_id) return album_id ### Edit existing @connection_provider def edit_scrobble(scrobble_id,scrobbleupdatedict,dbconn=None): dbentry = scrobble_dict_to_db(scrobbleupdatedict,dbconn=dbconn) dbentry = {k:v for k,v in dbentry.items() if v} print("Updating scrobble",dbentry) with SCROBBLE_LOCK: op = DB['scrobbles'].update().where( DB['scrobbles'].c.timestamp == scrobble_id ).values( **dbentry ) dbconn.execute(op) @connection_provider def edit_artist(id,artistupdatedict,dbconn=None): artist = get_artist(id) changedartist = artistupdatedict # well dbentry = artist_dict_to_db(artistupdatedict,dbconn=dbconn) dbentry = {k:v for k,v in dbentry.items() if v} existing_artist_id = get_artist_id(changedartist,create_new=False,dbconn=dbconn) if existing_artist_id not in (None,id): raise exc.ArtistExists(changedartist) op = DB['artists'].update().where( DB['artists'].c.id==id ).values( **dbentry ) result = dbconn.execute(op) return True @connection_provider def edit_track(id,trackupdatedict,dbconn=None): track = get_track(id,dbconn=dbconn) changedtrack = {**track,**trackupdatedict} dbentry = track_dict_to_db(trackupdatedict,dbconn=dbconn) dbentry = {k:v for k,v in dbentry.items() if v} existing_track_id = get_track_id(changedtrack,create_new=False,dbconn=dbconn) if existing_track_id not in (None,id): raise exc.TrackExists(changedtrack) op = DB['tracks'].update().where( DB['tracks'].c.id==id ).values( **dbentry ) result = dbconn.execute(op) return True ### Merge @connection_provider def merge_tracks(target_id,source_ids,dbconn=None): op = DB['scrobbles'].update().where( DB['scrobbles'].c.track_id.in_(source_ids) ).values( track_id=target_id ) result = dbconn.execute(op) clean_db(dbconn=dbconn) return True @connection_provider def merge_artists(target_id,source_ids,dbconn=None): # some tracks could already have multiple of the to be merged artists # find literally all tracksartist entries that have any of the artists involved op = DB['trackartists'].select().where( DB['trackartists'].c.artist_id.in_(source_ids + [target_id]) ) result = dbconn.execute(op) track_ids = set(row.track_id for row in result) # now just delete them all lmao op = DB['trackartists'].delete().where( #DB['trackartists'].c.track_id.in_(track_ids), DB['trackartists'].c.artist_id.in_(source_ids + [target_id]), ) result = dbconn.execute(op) # now add back the real new artist op = DB['trackartists'].insert().values([ {'track_id':track_id,'artist_id':target_id} for track_id in track_ids ]) result = dbconn.execute(op) # tracks_artists = {} # for row in result: # tracks_artists.setdefault(row.track_id,[]).append(row.artist_id) # # multiple = {k:v for k,v in tracks_artists.items() if len(v) > 1} # # print([(get_track(k),[get_artist(a) for a in v]) for k,v in multiple.items()]) # # op = DB['trackartists'].update().where( # DB['trackartists'].c.artist_id.in_(source_ids) # ).values( # artist_id=target_id # ) # result = dbconn.execute(op) # this could have created duplicate tracks merge_duplicate_tracks(artist_id=target_id,dbconn=dbconn) clean_db(dbconn=dbconn) return True ### Functions that get rows according to parameters @cached_wrapper @connection_provider def get_scrobbles_of_artist(artist,since=None,to=None,resolve_references=True,dbconn=None): if since is None: since=0 if to is None: to=now() artist_id = get_artist_id(artist,dbconn=dbconn) jointable = sql.join(DB['scrobbles'],DB['trackartists'],DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id) op = jointable.select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['trackartists'].c.artist_id==artist_id ).order_by(sql.asc('timestamp')) result = dbconn.execute(op).all() if resolve_references: result = scrobbles_db_to_dict(result,dbconn=dbconn) #result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for row in result] return result @cached_wrapper @connection_provider def get_scrobbles_of_track(track,since=None,to=None,resolve_references=True,dbconn=None): if since is None: since=0 if to is None: to=now() track_id = get_track_id(track,dbconn=dbconn) op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['scrobbles'].c.track_id==track_id ).order_by(sql.asc('timestamp')) result = dbconn.execute(op).all() if resolve_references: result = scrobbles_db_to_dict(result) #result = [scrobble_db_to_dict(row) for row in result] return result @cached_wrapper @connection_provider def get_scrobbles_of_album(album,since=None,to=None,resolve_references=True,dbconn=None): if since is None: since=0 if to is None: to=now() album_id = get_album_id(album,dbconn=dbconn) jointable = sql.join(DB['scrobbles'],DB['tracks'],DB['scrobbles'].c.track_id == DB['tracks'].c.id) op = jointable.select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['tracks'].c.album_id==album_id ).order_by(sql.asc('timestamp')) result = dbconn.execute(op).all() if resolve_references: result = scrobbles_db_to_dict(result) #result = [scrobble_db_to_dict(row) for row in result] return result @cached_wrapper @connection_provider def get_scrobbles(since=None,to=None,resolve_references=True,dbconn=None): if since is None: since=0 if to is None: to=now() op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, ).order_by(sql.asc('timestamp')) result = dbconn.execute(op).all() if resolve_references: result = scrobbles_db_to_dict(result,dbconn=dbconn) #result = [scrobble_db_to_dict(row,resolve_references=resolve_references) for i,row in enumerate(result) if i=since, ) result = dbconn.execute(op).all() return result[0][0] @cached_wrapper @connection_provider def get_artists_of_track(track_id,resolve_references=True,dbconn=None): op = DB['trackartists'].select().where( DB['trackartists'].c.track_id==track_id ) result = dbconn.execute(op).all() artists = [get_artist(row.artist_id,dbconn=dbconn) if resolve_references else row.artist_id for row in result] return artists @cached_wrapper @connection_provider def get_tracks_of_artist(artist,dbconn=None): artist_id = get_artist_id(artist,dbconn=dbconn) op = sql.join(DB['tracks'],DB['trackartists']).select().where( DB['trackartists'].c.artist_id==artist_id ) result = dbconn.execute(op).all() return tracks_db_to_dict(result,dbconn=dbconn) @cached_wrapper @connection_provider def get_artists(dbconn=None): op = DB['artists'].select() result = dbconn.execute(op).all() return artists_db_to_dict(result,dbconn=dbconn) @cached_wrapper @connection_provider def get_tracks(dbconn=None): op = DB['tracks'].select() result = dbconn.execute(op).all() return tracks_db_to_dict(result,dbconn=dbconn) ### functions that count rows for parameters @cached_wrapper @connection_provider def count_scrobbles_by_artist(since,to,resolve_ids=True,dbconn=None): jointable = sql.join( DB['scrobbles'], DB['trackartists'], DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id ) jointable2 = sql.join( jointable, DB['associated_artists'], DB['trackartists'].c.artist_id == DB['associated_artists'].c.source_artist, isouter=True ) op = sql.select( sql.func.count(sql.func.distinct(DB['scrobbles'].c.timestamp)).label('count'), # only count distinct scrobbles - because of artist replacement, we could end up # with two artists of the same scrobble counting it twice for the same artist # e.g. Irene and Seulgi adding two scrobbles to Red Velvet for one real scrobble sql.func.coalesce(DB['associated_artists'].c.target_artist,DB['trackartists'].c.artist_id).label('artist_id') # use the replaced artist as artist to count if it exists, otherwise original one ).select_from(jointable2).where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since ).group_by( sql.func.coalesce(DB['associated_artists'].c.target_artist,DB['trackartists'].c.artist_id) ).order_by(sql.desc('count')) result = dbconn.execute(op).all() if resolve_ids: counts = [row.count for row in result] artists = get_artists_map([row.artist_id for row in result],dbconn=dbconn) result = [{'scrobbles':row.count,'artist':artists[row.artist_id]} for row in result] else: result = [{'scrobbles':row.count,'artist_id':row.artist_id} for row in result] result = rank(result,key='scrobbles') return result @cached_wrapper @connection_provider def count_scrobbles_by_track(since,to,resolve_ids=True,dbconn=None): op = sql.select( sql.func.count(sql.func.distinct(DB['scrobbles'].c.timestamp)).label('count'), DB['scrobbles'].c.track_id ).select_from(DB['scrobbles']).where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since ).group_by(DB['scrobbles'].c.track_id).order_by(sql.desc('count')) result = dbconn.execute(op).all() if resolve_ids: counts = [row.count for row in result] tracks = get_tracks_map([row.track_id for row in result],dbconn=dbconn) result = [{'scrobbles':row.count,'track':tracks[row.track_id]} for row in result] else: result = [{'scrobbles':row.count,'track_id':row.track_id} for row in result] result = rank(result,key='scrobbles') return result @cached_wrapper @connection_provider def count_scrobbles_by_album(since,to,resolve_ids=True,dbconn=None): jointable = sql.join( DB['scrobbles'], DB['tracks'], DB['scrobbles'].c.track_id == DB['tracks'].c.id ) op = sql.select( sql.func.count(sql.func.distinct(DB['scrobbles'].c.timestamp)).label('count'), DB['tracks'].c.album_id ).select_from(jointable).where( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['tracks'].c.album_id != None ).group_by(DB['tracks'].c.album_id).order_by(sql.desc('count')) result = dbconn.execute(op).all() if resolve_ids: counts = [row.count for row in result] albums = get_albums_map([row.album_id for row in result],dbconn=dbconn) result = [{'scrobbles':row.count,'album':albums[row.album_id]} for row in result] else: result = [{'scrobbles':row.count,'album_id':row.album_id} for row in result] result = rank(result,key='scrobbles') return result @cached_wrapper @connection_provider def count_scrobbles_by_track_of_artist(since,to,artist,dbconn=None): artist_id = get_artist_id(artist,dbconn=dbconn) jointable = sql.join( DB['scrobbles'], DB['trackartists'], DB['scrobbles'].c.track_id == DB['trackartists'].c.track_id ) op = sql.select( sql.func.count(sql.func.distinct(DB['scrobbles'].c.timestamp)).label('count'), DB['scrobbles'].c.track_id ).select_from(jointable).filter( DB['scrobbles'].c.timestamp<=to, DB['scrobbles'].c.timestamp>=since, DB['trackartists'].c.artist_id==artist_id ).group_by(DB['scrobbles'].c.track_id).order_by(sql.desc('count')) result = dbconn.execute(op).all() counts = [row.count for row in result] tracks = get_tracks_map([row.track_id for row in result],dbconn=dbconn) result = [{'scrobbles':row.count,'track':tracks[row.track_id]} for row in result] result = rank(result,key='scrobbles') return result ### functions that get mappings for several entities -> rows @cached_wrapper_individual @connection_provider def get_artists_of_tracks(track_ids,dbconn=None): op = sql.join(DB['trackartists'],DB['artists']).select().where( DB['trackartists'].c.track_id.in_(track_ids) ) result = dbconn.execute(op).all() artists = {} for row in result: artists.setdefault(row.track_id,[]).append(artist_db_to_dict(row,dbconn=dbconn)) return artists @cached_wrapper_individual @connection_provider def get_artists_of_albums(album_ids,dbconn=None): op = sql.join(DB['albumartists'],DB['artists']).select().where( DB['albumartists'].c.album_id.in_(album_ids) ) result = dbconn.execute(op).all() artists = {} for row in result: artists.setdefault(row.album_id,[]).append(artist_db_to_dict(row,dbconn=dbconn)) return artists @cached_wrapper_individual @connection_provider def get_tracks_map(track_ids,dbconn=None): op = DB['tracks'].select().where( DB['tracks'].c.id.in_(track_ids) ) result = dbconn.execute(op).all() tracks = {} result = list(result) # this will get a list of artistdicts in the correct order of our rows trackdicts = tracks_db_to_dict(result,dbconn=dbconn) for row,trackdict in zip(result,trackdicts): tracks[row.id] = trackdict return tracks @cached_wrapper_individual @connection_provider def get_artists_map(artist_ids,dbconn=None): op = DB['artists'].select().where( DB['artists'].c.id.in_(artist_ids) ) result = dbconn.execute(op).all() artists = {} result = list(result) # this will get a list of artistdicts in the correct order of our rows artistdicts = artists_db_to_dict(result,dbconn=dbconn) for row,artistdict in zip(result,artistdicts): artists[row.id] = artistdict return artists @cached_wrapper_individual @connection_provider def get_albums_map(album_ids,dbconn=None): op = DB['albums'].select().where( DB['albums'].c.id.in_(album_ids) ) result = dbconn.execute(op).all() albums = {} result = list(result) # this will get a list of albumdicts in the correct order of our rows albumdicts = albums_db_to_dict(result,dbconn=dbconn) for row,albumdict in zip(result,albumdicts): albums[row.id] = albumdict return albums ### associations @cached_wrapper @connection_provider def get_associated_artists(*artists,dbconn=None): artist_ids = [get_artist_id(a,dbconn=dbconn) for a in artists] jointable = sql.join( DB['associated_artists'], DB['artists'], DB['associated_artists'].c.source_artist == DB['artists'].c.id ) op = jointable.select().where( DB['associated_artists'].c.target_artist.in_(artist_ids) ) result = dbconn.execute(op).all() artists = artists_db_to_dict(result,dbconn=dbconn) return artists @cached_wrapper @connection_provider def get_credited_artists(*artists,dbconn=None): artist_ids = [get_artist_id(a,dbconn=dbconn) for a in artists] jointable = sql.join( DB['associated_artists'], DB['artists'], DB['associated_artists'].c.target_artist == DB['artists'].c.id ) op = jointable.select().where( DB['associated_artists'].c.source_artist.in_(artist_ids) ) result = dbconn.execute(op).all() artists = artists_db_to_dict(result,dbconn=dbconn) return artists ### get a specific entity by id @cached_wrapper @connection_provider def get_track(id,dbconn=None): op = DB['tracks'].select().where( DB['tracks'].c.id==id ) result = dbconn.execute(op).all() trackinfo = result[0] return track_db_to_dict(trackinfo,dbconn=dbconn) @cached_wrapper @connection_provider def get_artist(id,dbconn=None): op = DB['artists'].select().where( DB['artists'].c.id==id ) result = dbconn.execute(op).all() artistinfo = result[0] return artist_db_to_dict(artistinfo,dbconn=dbconn) @cached_wrapper @connection_provider def get_album(id,dbconn=None): op = DB['albums'].select().where( DB['albums'].c.id==id ) result = dbconn.execute(op).all() albuminfo = result[0] return album_db_to_dict(albuminfo,dbconn=dbconn) @cached_wrapper @connection_provider def get_scrobble(timestamp, include_internal=False, dbconn=None): op = DB['scrobbles'].select().where( DB['scrobbles'].c.timestamp==timestamp ) result = dbconn.execute(op).all() scrobble = result[0] return scrobbles_db_to_dict(rows=[scrobble], include_internal=include_internal)[0] @cached_wrapper @connection_provider def search_artist(searchterm,dbconn=None): op = DB['artists'].select().where( DB['artists'].c.name_normalized.ilike(normalize_name(f"%{searchterm}%")) ) result = dbconn.execute(op).all() return [get_artist(row.id,dbconn=dbconn) for row in result] @cached_wrapper @connection_provider def search_track(searchterm,dbconn=None): op = DB['tracks'].select().where( DB['tracks'].c.title_normalized.ilike(normalize_name(f"%{searchterm}%")) ) result = dbconn.execute(op).all() return [get_track(row.id,dbconn=dbconn) for row in result] ##### MAINTENANCE @runhourly @connection_provider def clean_db(dbconn=None): log(f"Database Cleanup...") to_delete = [ # tracks with no scrobbles (trackartist entries first) "from trackartists where track_id in (select id from tracks where id not in (select track_id from scrobbles))", "from tracks where id not in (select track_id from scrobbles)", # artists with no tracks "from artists where id not in (select artist_id from trackartists) and id not in (select target_artist from associated_artists)", # tracks with no artists (scrobbles first) "from scrobbles where track_id in (select id from tracks where id not in (select track_id from trackartists))", "from tracks where id not in (select track_id from trackartists)" ] for d in to_delete: selection = dbconn.execute(sql.text(f"select * {d}")) for row in selection.all(): log(f"Deleting {row}") deletion = dbconn.execute(sql.text(f"delete {d}")) log("Database Cleanup complete!") #if a2+a1>0: log(f"Deleted {a2} tracks without scrobbles ({a1} track artist entries)") #if a3>0: log(f"Deleted {a3} artists without tracks") #if a5+a4>0: log(f"Deleted {a5} tracks without artists ({a4} scrobbles)") @runmonthly def renormalize_names(): with SCROBBLE_LOCK: with engine.begin() as conn: rows = conn.execute(DB['artists'].select()).all() for row in rows: id = row.id name = row.name norm_actual = row.name_normalized norm_target = normalize_name(name) if norm_actual != norm_target: log(f"{name} should be normalized to {norm_target}, but is instead {norm_actual}, fixing...") rows = conn.execute(DB['artists'].update().where(DB['artists'].c.id == id).values(name_normalized=norm_target)) @connection_provider def merge_duplicate_tracks(artist_id,dbconn=None): rows = dbconn.execute( DB['trackartists'].select().where( DB['trackartists'].c.artist_id == artist_id ) ) affected_tracks = [r.track_id for r in rows] track_artists = {} rows = dbconn.execute( DB['trackartists'].select().where( DB['trackartists'].c.track_id.in_(affected_tracks) ) ) for row in rows: track_artists.setdefault(row.track_id,[]).append(row.artist_id) artist_combos = {} for track_id in track_artists: artist_combos.setdefault(tuple(sorted(track_artists[track_id])),[]).append(track_id) for c in artist_combos: if len(artist_combos[c]) > 1: track_identifiers = {} for track_id in artist_combos[c]: track_identifiers.setdefault(normalize_name(get_track(track_id)['title']),[]).append(track_id) for track in track_identifiers: if len(track_identifiers[track]) > 1: target,*src = track_identifiers[track] merge_tracks(target,src,dbconn=dbconn) ##### AUX FUNCS # function to turn the name into a representation that can be easily compared, ignoring minor differences remove_symbols = ["'","`","’"] replace_with_space = [" - ",": "] def normalize_name(name): for r in replace_with_space: name = name.replace(r," ") name = "".join(char for char in unicodedata.normalize('NFD',name.lower()) if char not in remove_symbols and unicodedata.category(char) != 'Mn') return name def now(): return int(datetime.now().timestamp()) def rank(ls,key): for rnk in range(len(ls)): if rnk == 0 or ls[rnk][key] < ls[rnk-1][key]: ls[rnk]["rank"] = rnk + 1 else: ls[rnk]["rank"] = ls[rnk-1]["rank"] return ls