Fixed database inconsistencies introduced by overeager maintenance

This commit is contained in:
krateng 2022-04-04 17:51:19 +02:00
parent c647a57983
commit b41203bac7
1 changed files with 58 additions and 46 deletions

View File

@ -3,6 +3,7 @@ import json
import unicodedata import unicodedata
import math import math
from datetime import datetime from datetime import datetime
from threading import Lock
from ..globalconf import data_dir from ..globalconf import data_dir
from .dbcache import cached_wrapper, cached_wrapper_individual, invalidate_entity_cache from .dbcache import cached_wrapper, cached_wrapper_individual, invalidate_entity_cache
@ -60,6 +61,12 @@ DB['associated_artists'] = sql.Table(
meta.create_all(engine) meta.create_all(engine)
# adding a scrobble could consist of multiple write operations that sqlite doesn't
# see as belonging together
SCROBBLE_LOCK = Lock()
# decorator that passes either the provided dbconn, or creates a separate one # decorator that passes either the provided dbconn, or creates a separate one
# just for this function call # just for this function call
def connection_provider(func): def connection_provider(func):
@ -189,18 +196,20 @@ def add_scrobble(scrobbledict,dbconn=None):
@connection_provider @connection_provider
def add_scrobbles(scrobbleslist,dbconn=None): def add_scrobbles(scrobbleslist,dbconn=None):
ops = [ with SCROBBLE_LOCK:
DB['scrobbles'].insert().values(
**scrobble_dict_to_db(s) ops = [
) for s in scrobbleslist DB['scrobbles'].insert().values(
] **scrobble_dict_to_db(s)
) for s in scrobbleslist
]
for op in ops: for op in ops:
try: try:
dbconn.execute(op) dbconn.execute(op)
except sql.exc.IntegrityError: except sql.exc.IntegrityError:
pass pass
### these will 'get' the ID of an entity, creating it if necessary ### these will 'get' the ID of an entity, creating it if necessary
@ -619,58 +628,61 @@ def get_artist(id,dbconn=None):
@runhourly @runhourly
def clean_db(): def clean_db():
with engine.begin() as conn:
#log(f"Database Cleanup...")
### Delete tracks that have no scrobbles (delete their trackartist entries first) with SCROBBLE_LOCK:
a1 = conn.execute(sql.text(''' with engine.begin() as conn:
delete from trackartists where track_id in (select id from tracks where id not in (select track_id from scrobbles)) #log(f"Database Cleanup...")
''')).rowcount
a2 = conn.execute(sql.text('''
delete from tracks where id not in (select track_id from scrobbles)
''')).rowcount
if a2+a1>0: log(f"Deleted {a2} tracks without scrobbles ({a1} track artist entries)") ### Delete tracks that have no scrobbles (delete their trackartist entries first)
a1 = conn.execute(sql.text('''
delete from trackartists where track_id in (select id from tracks where id not in (select track_id from scrobbles))
''')).rowcount
a2 = conn.execute(sql.text('''
delete from tracks where id not in (select track_id from scrobbles)
''')).rowcount
### Delete artists that have no tracks if a2+a1>0: log(f"Deleted {a2} tracks without scrobbles ({a1} track artist entries)")
a3 = conn.execute(sql.text('''
delete from artists where id not in (select artist_id from trackartists) and id not in (select target_artist from associated_artists)
''')).rowcount
if a3>0: log(f"Deleted {a3} artists without tracks") ### Delete artists that have no tracks
a3 = conn.execute(sql.text('''
delete from artists where id not in (select artist_id from trackartists) and id not in (select target_artist from associated_artists)
''')).rowcount
### Delete tracks that have no artists (delete their scrobbles first) if a3>0: log(f"Deleted {a3} artists without tracks")
a4 = conn.execute(sql.text('''
delete from scrobbles where track_id in (select id from tracks where id not in (select track_id from trackartists))
''')).rowcount
a5 = conn.execute(sql.text('''
delete from tracks where id not in (select track_id from trackartists)
''')).rowcount
if a5+a4>0: log(f"Deleted {a5} tracks without artists ({a4} scrobbles)") ### Delete tracks that have no artists (delete their scrobbles first)
a4 = conn.execute(sql.text('''
delete from scrobbles where track_id in (select id from tracks where id not in (select track_id from trackartists))
''')).rowcount
a5 = conn.execute(sql.text('''
delete from tracks where id not in (select track_id from trackartists)
''')).rowcount
if a5+a4>0: log(f"Deleted {a5} tracks without artists ({a4} scrobbles)")
# Clear caches # Clear caches
invalidate_entity_cache() invalidate_entity_cache()
@runmonthly @runmonthly
def renormalize_names(): def renormalize_names():
with engine.begin() as conn: with SCROBBLE_LOCK:
rows = conn.execute(DB['artists'].select()).all() with engine.begin() as conn:
rows = conn.execute(DB['artists'].select()).all()
for row in rows: for row in rows:
id = row.id id = row.id
name = row.name name = row.name
norm_actual = row.name_normalized norm_actual = row.name_normalized
norm_target = normalize_name(name) norm_target = normalize_name(name)
if norm_actual != norm_target: if norm_actual != norm_target:
log(f"{name} should be normalized to {norm_target}, but is instead {norm_actual}, fixing...") log(f"{name} should be normalized to {norm_target}, but is instead {norm_actual}, fixing...")
with engine.begin() as conn: with engine.begin() as conn:
rows = conn.execute(DB['artists'].update().where(DB['artists'].c.id == id).values(name_normalized=norm_target)) rows = conn.execute(DB['artists'].update().where(DB['artists'].c.id == id).values(name_normalized=norm_target))