mirror of
https://github.com/krateng/maloja.git
synced 2023-08-10 21:12:55 +03:00
Initial work on SQLite
This commit is contained in:
parent
68a450672e
commit
9eb8dc0b47
@ -23,6 +23,9 @@ except: pass
|
||||
import doreah
|
||||
|
||||
|
||||
#db
|
||||
import sqlalchemy as sql
|
||||
|
||||
|
||||
# technical
|
||||
import os
|
||||
@ -31,7 +34,7 @@ import sys
|
||||
import unicodedata
|
||||
from collections import namedtuple
|
||||
from threading import Lock
|
||||
import yaml
|
||||
import yaml, json
|
||||
import lru
|
||||
import math
|
||||
|
||||
@ -688,151 +691,154 @@ def get_predefined_rulesets():
|
||||
## Server operation
|
||||
####
|
||||
|
||||
DB = {}
|
||||
|
||||
|
||||
# Starts the server
|
||||
engine = sql.create_engine(f"sqlite:///{data_dir['scrobbles']('malojadb.sqlite')}", echo = False)
|
||||
meta = sql.MetaData()
|
||||
|
||||
DB['scrobbles'] = sql.Table(
|
||||
'scrobbles', meta,
|
||||
sql.Column('timestamp',sql.Integer,primary_key=True),
|
||||
sql.Column('rawscrobble',sql.String),
|
||||
sql.Column('origin',sql.String),
|
||||
sql.Column('duration',sql.Integer),
|
||||
sql.Column('track_id',sql.Integer)
|
||||
)
|
||||
DB['tracks'] = sql.Table(
|
||||
'tracks', meta,
|
||||
sql.Column('id',sql.Integer,primary_key=True),
|
||||
sql.Column('title',sql.String),
|
||||
sql.Column('title_normalized',sql.String)
|
||||
)
|
||||
DB['artists'] = sql.Table(
|
||||
'artists', meta,
|
||||
sql.Column('id',sql.Integer,primary_key=True),
|
||||
sql.Column('name',sql.String),
|
||||
sql.Column('name_normalized',sql.String)
|
||||
)
|
||||
DB['trackartists'] = sql.Table(
|
||||
'trackartists', meta,
|
||||
sql.Column('id',sql.Integer,primary_key=True),
|
||||
sql.Column('artist_id',sql.Integer),
|
||||
sql.Column('track_id',sql.Integer)
|
||||
)
|
||||
|
||||
meta.create_all(engine)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### ATTENTION ALL ADVENTURERS
|
||||
#### THIS IS WHAT A SCROBBLE DICT WILL LOOK LIKE FROM NOW ON
|
||||
#### THIS IS THE SINGLE CANONICAL SOURCE OF TRUTH
|
||||
#### STOP MAKING DIFFERENT LITTLE DICTS IN EVERY SINGLE FUNCTION
|
||||
#### THIS IS THE SCHEMA THAT WILL DEFINITELY 100% STAY LIKE THIS AND NOT
|
||||
#### RANDOMLY GET CHANGED TWO VERSIONS LATER
|
||||
#### HERE WE GO
|
||||
#
|
||||
# {
|
||||
# "time":int,
|
||||
# "track":{
|
||||
# "artists":list,
|
||||
# "title":string,
|
||||
# "album":{
|
||||
# "name":string,
|
||||
# "artists":list
|
||||
# },
|
||||
# "length":None
|
||||
# },
|
||||
# "duration":int,
|
||||
# "origin":string
|
||||
# }
|
||||
|
||||
def add_scrobble(scrobbledict):
|
||||
add_scrobbles([scrobbledict])
|
||||
|
||||
def add_scrobbles(scrobbleslist):
|
||||
|
||||
ops = [
|
||||
DB['scrobbles'].insert().values(
|
||||
rawscrobble=json.dumps(s),
|
||||
timestamp=s['time'],
|
||||
origin=s['origin'],
|
||||
duration=s['duration'] or -1,
|
||||
track_id=get_track_id(s['track'])
|
||||
) for s in scrobbleslist
|
||||
]
|
||||
|
||||
with engine.begin() as conn:
|
||||
for op in ops:
|
||||
conn.execute(op)
|
||||
|
||||
|
||||
|
||||
### DB interface functions - these will 'get' the ID of an entity,
|
||||
### creating it if necessary
|
||||
|
||||
|
||||
def get_track_id(trackdict):
|
||||
ntitle = normalize_name(trackdict['title'])
|
||||
artist_ids = [get_artist_id(a) for a in trackdict['artists']]
|
||||
|
||||
|
||||
|
||||
with engine.begin() as conn:
|
||||
op = DB['tracks'].select(
|
||||
DB['tracks'].c.id
|
||||
).where(
|
||||
DB['tracks'].c.title_normalized==ntitle
|
||||
)
|
||||
result = conn.execute(op)
|
||||
for row in result:
|
||||
print("ID for",trackdict['title'],"was",row[0])
|
||||
return row[0]
|
||||
|
||||
with engine.begin() as conn:
|
||||
op = DB['tracks'].insert().values(
|
||||
title=trackdict['title'],
|
||||
title_normalized=ntitle
|
||||
)
|
||||
result = conn.execute(op)
|
||||
print("Created",trackdict['title'],result.inserted_primary_key)
|
||||
return result.inserted_primary_key[0]
|
||||
|
||||
def get_artist_id(artistname):
|
||||
nname = normalize_name(artistname)
|
||||
print("looking for",nname)
|
||||
|
||||
with engine.begin() as conn:
|
||||
op = DB['artists'].select(
|
||||
DB['artists'].c.id
|
||||
).where(
|
||||
DB['artists'].c.name_normalized==nname
|
||||
)
|
||||
result = conn.execute(op)
|
||||
for row in result:
|
||||
print("ID for",artistname,"was",row[0])
|
||||
return row[0]
|
||||
|
||||
with engine.begin() as conn:
|
||||
op = DB['artists'].insert().values(
|
||||
name=artistname,
|
||||
name_normalized=nname
|
||||
)
|
||||
result = conn.execute(op)
|
||||
print("Created",artistname,result.inserted_primary_key)
|
||||
return result.inserted_primary_key[0]
|
||||
|
||||
def start_db():
|
||||
log("Starting database...")
|
||||
global lastsync
|
||||
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
||||
build_db()
|
||||
#run(dbserver, host='::', port=PORT, server='waitress')
|
||||
log("Database reachable!")
|
||||
|
||||
def build_db():
|
||||
|
||||
global dbstatus
|
||||
dbstatus['healthy'] = False
|
||||
dbstatus['complete'] = False
|
||||
dbstatus['rebuildinprogress'] = True
|
||||
|
||||
log("Building database...")
|
||||
|
||||
global SCROBBLES, ARTISTS, TRACKS
|
||||
global TRACKS_NORMALIZED_SET, TRACKS_NORMALIZED, ARTISTS_NORMALIZED_SET, ARTISTS_NORMALIZED
|
||||
global SCROBBLESDICT, STAMPS
|
||||
|
||||
SCROBBLES = []
|
||||
ARTISTS = []
|
||||
TRACKS = []
|
||||
STAMPS = []
|
||||
SCROBBLESDICT = {}
|
||||
|
||||
TRACKS_NORMALIZED = []
|
||||
ARTISTS_NORMALIZED = []
|
||||
ARTISTS_NORMALIZED_SET = set()
|
||||
TRACKS_NORMALIZED_SET = set()
|
||||
|
||||
|
||||
# parse files
|
||||
db = tsv.parse_all(data_dir['scrobbles'](),"int","string","string",comments=False)
|
||||
scrobblenum = len(db)
|
||||
log(f"Found {scrobblenum} scrobbles...")
|
||||
|
||||
usebar = not malojaconfig["CLEAN_OUTPUT"]
|
||||
if usebar: pbar = ProgressBar(max=scrobblenum,prefix="Loading scrobbles")
|
||||
else:
|
||||
n = 0
|
||||
m = max(int(scrobblenum / 25),20)
|
||||
#db = parseAllTSV("scrobbles","int","string","string",escape=False)
|
||||
for sc in db:
|
||||
artists = sc[1].split("␟")
|
||||
title = sc[2]
|
||||
time = sc[0]
|
||||
|
||||
readScrobble(artists,title,time)
|
||||
if usebar: pbar.progress()
|
||||
else:
|
||||
n += 1
|
||||
if n % m == 0: log(f"Loaded {n}/{scrobblenum}...")
|
||||
|
||||
if usebar: pbar.done()
|
||||
|
||||
|
||||
log("Database loaded, optimizing...")
|
||||
|
||||
# optimize database
|
||||
SCROBBLES.sort(key = lambda tup: tup[1])
|
||||
#SCROBBLESDICT = {obj[1]:obj for obj in SCROBBLES}
|
||||
STAMPS = [t for t in SCROBBLESDICT]
|
||||
STAMPS.sort()
|
||||
|
||||
# inform malojatime module about earliest scrobble
|
||||
if STAMPS: register_scrobbletime(STAMPS[0])
|
||||
|
||||
# NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY
|
||||
# get extra artists with no real scrobbles from countas rules
|
||||
#for artist in coa.getAllArtists():
|
||||
#for artist in coa.getCreditedList(ARTISTS):
|
||||
# if artist not in ARTISTS:
|
||||
# log(artist + " is added to database because of countas rules",module="debug")
|
||||
# ARTISTS.append(artist)
|
||||
# coa.updateIDs(ARTISTS)
|
||||
|
||||
dbstatus['healthy'] = True
|
||||
|
||||
|
||||
#start regular tasks
|
||||
utilities.update_medals()
|
||||
utilities.update_weekly()
|
||||
utilities.send_stats()
|
||||
|
||||
|
||||
global ISSUES
|
||||
ISSUES = check_issues()
|
||||
|
||||
|
||||
dbstatus['complete'] = True
|
||||
dbstatus['rebuildinprogress'] = False
|
||||
|
||||
log("Database fully built!")
|
||||
from . import upgrade
|
||||
upgrade.upgrade_db(add_scrobbles)
|
||||
|
||||
|
||||
|
||||
# Saves all cached entries to disk
|
||||
def sync():
|
||||
|
||||
# all entries by file collected
|
||||
# so we don't open the same file for every entry
|
||||
#log("Syncing",module="debug")
|
||||
entries = {}
|
||||
|
||||
for idx in range(len(SCROBBLES)):
|
||||
if not SCROBBLES[idx].saved:
|
||||
|
||||
t = get_scrobble_dict(SCROBBLES[idx])
|
||||
|
||||
artistlist = list(t["artists"])
|
||||
artistlist.sort() #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed
|
||||
artistss = "␟".join(artistlist)
|
||||
timestamp = datetime.date.fromtimestamp(t["time"])
|
||||
|
||||
album = t["album"] or "-"
|
||||
duration = t["duration"] or "-"
|
||||
|
||||
entry = [str(t["time"]),artistss,t["title"],album,duration]
|
||||
|
||||
monthcode = str(timestamp.year) + "_" + str(timestamp.month)
|
||||
entries.setdefault(monthcode,[]).append(entry) #i feckin love the setdefault function
|
||||
|
||||
SCROBBLES[idx] = Scrobble(*SCROBBLES[idx][:-1],True)
|
||||
# save copy with last tuple entry set to true
|
||||
|
||||
#log("Sorted into months",module="debug")
|
||||
|
||||
for e in entries:
|
||||
tsv.add_entries(data_dir['scrobbles'](e + ".tsv"),entries[e],comments=False)
|
||||
#addEntries("scrobbles/" + e + ".tsv",entries[e],escape=False)
|
||||
|
||||
#log("Written files",module="debug")
|
||||
|
||||
|
||||
global lastsync
|
||||
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
||||
#log("Database saved to disk.")
|
||||
|
||||
# save cached images
|
||||
#saveCache()
|
||||
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ from .__pkginfo__ import VERSION
|
||||
|
||||
|
||||
|
||||
|
||||
# if DATA_DIRECTORY is specified, this is the directory to use for EVERYTHING, no matter what
|
||||
# but with asynnetrical structure, cache and logs in subfolders
|
||||
# otherwise, each directory is treated seperately
|
||||
@ -311,24 +312,19 @@ config(
|
||||
|
||||
|
||||
|
||||
|
||||
### API KEYS
|
||||
### symmetric keys are fine since we hopefully use HTTPS
|
||||
|
||||
|
||||
|
||||
### symmetric keys are fine for now since we hopefully use HTTPS
|
||||
apikeystore = KeyStore(file=data_dir['clients']("apikeys.yml"),save_endpoint="/apis/mlj_1/apikeys")
|
||||
from . import upgrade
|
||||
upgrade.upgrade_apikeys()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
oldfile = pthj(dir_settings['config'],"clients","authenticated_machines.tsv")
|
||||
if os.path.exists(oldfile):
|
||||
try:
|
||||
from doreah import tsv
|
||||
clients = tsv.parse(oldfile,"string","string")
|
||||
for key,identifier in clients:
|
||||
apikeystore[identifier] = key
|
||||
os.remove(oldfile)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# what the fuck did i just write
|
||||
|
60
maloja/upgrade.py
Normal file
60
maloja/upgrade.py
Normal file
@ -0,0 +1,60 @@
|
||||
# This module should take care of recognizing old install data and upgrading it before the actual server deals with it
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from doreah.logging import log
|
||||
|
||||
from .globalconf import data_dir, dir_settings, apikeystore
|
||||
|
||||
|
||||
def upgrade_apikeys():
|
||||
|
||||
oldfile = os.path.join(dir_settings['config'],"clients","authenticated_machines.tsv")
|
||||
if os.path.exists(oldfile):
|
||||
try:
|
||||
from doreah import tsv
|
||||
clients = tsv.parse(oldfile,"string","string")
|
||||
for key,identifier in clients:
|
||||
apikeystore[identifier] = key
|
||||
os.remove(oldfile)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def upgrade_db(callback_add_scrobbles):
|
||||
oldfolder = os.path.join(dir_settings['state'],"scrobbles")
|
||||
if os.path.exists(oldfolder):
|
||||
scrobblefiles = os.listdir(oldfolder)
|
||||
for sf in scrobblefiles:
|
||||
if sf.endswith(".tsv"):
|
||||
log(f"Found old tsv scrobble file: {sf}")
|
||||
if re.match(r"[0-9]+_[0-9]+\.tsv",sf):
|
||||
origin = 'native'
|
||||
elif sf == "lastfmimport.tsv":
|
||||
origin = 'lastfm-import'
|
||||
else:
|
||||
origin = 'unknown'
|
||||
|
||||
from doreah import tsv
|
||||
scrobbles = tsv.parse(os.path.join(oldfolder,sf),"int","string","string","string","string",comments=False)
|
||||
scrobblelist = []
|
||||
for scrobble in scrobbles:
|
||||
timestamp, artists, title, album, duration = scrobble
|
||||
if album in ('-',''): album = None
|
||||
if duration in ('-',''): duration = None
|
||||
scrobblelist.append({
|
||||
"time":int(timestamp),
|
||||
"track":{
|
||||
"artists":artists.split('␟'),
|
||||
"title":title,
|
||||
"album":{
|
||||
"name":album,
|
||||
"artists":None
|
||||
},
|
||||
"length":None
|
||||
},
|
||||
"duration":duration,
|
||||
"origin":origin
|
||||
})
|
||||
callback_add_scrobbles(scrobblelist)
|
Loading…
Reference in New Issue
Block a user