mirror of
https://github.com/krateng/maloja.git
synced 2023-08-10 21:12:55 +03:00
Initial work on SQLite
This commit is contained in:
parent
68a450672e
commit
9eb8dc0b47
@ -23,6 +23,9 @@ except: pass
|
|||||||
import doreah
|
import doreah
|
||||||
|
|
||||||
|
|
||||||
|
#db
|
||||||
|
import sqlalchemy as sql
|
||||||
|
|
||||||
|
|
||||||
# technical
|
# technical
|
||||||
import os
|
import os
|
||||||
@ -31,7 +34,7 @@ import sys
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
import yaml
|
import yaml, json
|
||||||
import lru
|
import lru
|
||||||
import math
|
import math
|
||||||
|
|
||||||
@ -688,151 +691,154 @@ def get_predefined_rulesets():
|
|||||||
## Server operation
|
## Server operation
|
||||||
####
|
####
|
||||||
|
|
||||||
|
DB = {}
|
||||||
|
|
||||||
|
|
||||||
# Starts the server
|
engine = sql.create_engine(f"sqlite:///{data_dir['scrobbles']('malojadb.sqlite')}", echo = False)
|
||||||
|
meta = sql.MetaData()
|
||||||
|
|
||||||
|
DB['scrobbles'] = sql.Table(
|
||||||
|
'scrobbles', meta,
|
||||||
|
sql.Column('timestamp',sql.Integer,primary_key=True),
|
||||||
|
sql.Column('rawscrobble',sql.String),
|
||||||
|
sql.Column('origin',sql.String),
|
||||||
|
sql.Column('duration',sql.Integer),
|
||||||
|
sql.Column('track_id',sql.Integer)
|
||||||
|
)
|
||||||
|
DB['tracks'] = sql.Table(
|
||||||
|
'tracks', meta,
|
||||||
|
sql.Column('id',sql.Integer,primary_key=True),
|
||||||
|
sql.Column('title',sql.String),
|
||||||
|
sql.Column('title_normalized',sql.String)
|
||||||
|
)
|
||||||
|
DB['artists'] = sql.Table(
|
||||||
|
'artists', meta,
|
||||||
|
sql.Column('id',sql.Integer,primary_key=True),
|
||||||
|
sql.Column('name',sql.String),
|
||||||
|
sql.Column('name_normalized',sql.String)
|
||||||
|
)
|
||||||
|
DB['trackartists'] = sql.Table(
|
||||||
|
'trackartists', meta,
|
||||||
|
sql.Column('id',sql.Integer,primary_key=True),
|
||||||
|
sql.Column('artist_id',sql.Integer),
|
||||||
|
sql.Column('track_id',sql.Integer)
|
||||||
|
)
|
||||||
|
|
||||||
|
meta.create_all(engine)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### ATTENTION ALL ADVENTURERS
|
||||||
|
#### THIS IS WHAT A SCROBBLE DICT WILL LOOK LIKE FROM NOW ON
|
||||||
|
#### THIS IS THE SINGLE CANONICAL SOURCE OF TRUTH
|
||||||
|
#### STOP MAKING DIFFERENT LITTLE DICTS IN EVERY SINGLE FUNCTION
|
||||||
|
#### THIS IS THE SCHEMA THAT WILL DEFINITELY 100% STAY LIKE THIS AND NOT
|
||||||
|
#### RANDOMLY GET CHANGED TWO VERSIONS LATER
|
||||||
|
#### HERE WE GO
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# "time":int,
|
||||||
|
# "track":{
|
||||||
|
# "artists":list,
|
||||||
|
# "title":string,
|
||||||
|
# "album":{
|
||||||
|
# "name":string,
|
||||||
|
# "artists":list
|
||||||
|
# },
|
||||||
|
# "length":None
|
||||||
|
# },
|
||||||
|
# "duration":int,
|
||||||
|
# "origin":string
|
||||||
|
# }
|
||||||
|
|
||||||
|
def add_scrobble(scrobbledict):
|
||||||
|
add_scrobbles([scrobbledict])
|
||||||
|
|
||||||
|
def add_scrobbles(scrobbleslist):
|
||||||
|
|
||||||
|
ops = [
|
||||||
|
DB['scrobbles'].insert().values(
|
||||||
|
rawscrobble=json.dumps(s),
|
||||||
|
timestamp=s['time'],
|
||||||
|
origin=s['origin'],
|
||||||
|
duration=s['duration'] or -1,
|
||||||
|
track_id=get_track_id(s['track'])
|
||||||
|
) for s in scrobbleslist
|
||||||
|
]
|
||||||
|
|
||||||
|
with engine.begin() as conn:
|
||||||
|
for op in ops:
|
||||||
|
conn.execute(op)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### DB interface functions - these will 'get' the ID of an entity,
|
||||||
|
### creating it if necessary
|
||||||
|
|
||||||
|
|
||||||
|
def get_track_id(trackdict):
|
||||||
|
ntitle = normalize_name(trackdict['title'])
|
||||||
|
artist_ids = [get_artist_id(a) for a in trackdict['artists']]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with engine.begin() as conn:
|
||||||
|
op = DB['tracks'].select(
|
||||||
|
DB['tracks'].c.id
|
||||||
|
).where(
|
||||||
|
DB['tracks'].c.title_normalized==ntitle
|
||||||
|
)
|
||||||
|
result = conn.execute(op)
|
||||||
|
for row in result:
|
||||||
|
print("ID for",trackdict['title'],"was",row[0])
|
||||||
|
return row[0]
|
||||||
|
|
||||||
|
with engine.begin() as conn:
|
||||||
|
op = DB['tracks'].insert().values(
|
||||||
|
title=trackdict['title'],
|
||||||
|
title_normalized=ntitle
|
||||||
|
)
|
||||||
|
result = conn.execute(op)
|
||||||
|
print("Created",trackdict['title'],result.inserted_primary_key)
|
||||||
|
return result.inserted_primary_key[0]
|
||||||
|
|
||||||
|
def get_artist_id(artistname):
|
||||||
|
nname = normalize_name(artistname)
|
||||||
|
print("looking for",nname)
|
||||||
|
|
||||||
|
with engine.begin() as conn:
|
||||||
|
op = DB['artists'].select(
|
||||||
|
DB['artists'].c.id
|
||||||
|
).where(
|
||||||
|
DB['artists'].c.name_normalized==nname
|
||||||
|
)
|
||||||
|
result = conn.execute(op)
|
||||||
|
for row in result:
|
||||||
|
print("ID for",artistname,"was",row[0])
|
||||||
|
return row[0]
|
||||||
|
|
||||||
|
with engine.begin() as conn:
|
||||||
|
op = DB['artists'].insert().values(
|
||||||
|
name=artistname,
|
||||||
|
name_normalized=nname
|
||||||
|
)
|
||||||
|
result = conn.execute(op)
|
||||||
|
print("Created",artistname,result.inserted_primary_key)
|
||||||
|
return result.inserted_primary_key[0]
|
||||||
|
|
||||||
def start_db():
|
def start_db():
|
||||||
log("Starting database...")
|
from . import upgrade
|
||||||
global lastsync
|
upgrade.upgrade_db(add_scrobbles)
|
||||||
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
|
||||||
build_db()
|
|
||||||
#run(dbserver, host='::', port=PORT, server='waitress')
|
|
||||||
log("Database reachable!")
|
|
||||||
|
|
||||||
def build_db():
|
|
||||||
|
|
||||||
global dbstatus
|
|
||||||
dbstatus['healthy'] = False
|
|
||||||
dbstatus['complete'] = False
|
|
||||||
dbstatus['rebuildinprogress'] = True
|
|
||||||
|
|
||||||
log("Building database...")
|
|
||||||
|
|
||||||
global SCROBBLES, ARTISTS, TRACKS
|
|
||||||
global TRACKS_NORMALIZED_SET, TRACKS_NORMALIZED, ARTISTS_NORMALIZED_SET, ARTISTS_NORMALIZED
|
|
||||||
global SCROBBLESDICT, STAMPS
|
|
||||||
|
|
||||||
SCROBBLES = []
|
|
||||||
ARTISTS = []
|
|
||||||
TRACKS = []
|
|
||||||
STAMPS = []
|
|
||||||
SCROBBLESDICT = {}
|
|
||||||
|
|
||||||
TRACKS_NORMALIZED = []
|
|
||||||
ARTISTS_NORMALIZED = []
|
|
||||||
ARTISTS_NORMALIZED_SET = set()
|
|
||||||
TRACKS_NORMALIZED_SET = set()
|
|
||||||
|
|
||||||
|
|
||||||
# parse files
|
|
||||||
db = tsv.parse_all(data_dir['scrobbles'](),"int","string","string",comments=False)
|
|
||||||
scrobblenum = len(db)
|
|
||||||
log(f"Found {scrobblenum} scrobbles...")
|
|
||||||
|
|
||||||
usebar = not malojaconfig["CLEAN_OUTPUT"]
|
|
||||||
if usebar: pbar = ProgressBar(max=scrobblenum,prefix="Loading scrobbles")
|
|
||||||
else:
|
|
||||||
n = 0
|
|
||||||
m = max(int(scrobblenum / 25),20)
|
|
||||||
#db = parseAllTSV("scrobbles","int","string","string",escape=False)
|
|
||||||
for sc in db:
|
|
||||||
artists = sc[1].split("␟")
|
|
||||||
title = sc[2]
|
|
||||||
time = sc[0]
|
|
||||||
|
|
||||||
readScrobble(artists,title,time)
|
|
||||||
if usebar: pbar.progress()
|
|
||||||
else:
|
|
||||||
n += 1
|
|
||||||
if n % m == 0: log(f"Loaded {n}/{scrobblenum}...")
|
|
||||||
|
|
||||||
if usebar: pbar.done()
|
|
||||||
|
|
||||||
|
|
||||||
log("Database loaded, optimizing...")
|
|
||||||
|
|
||||||
# optimize database
|
|
||||||
SCROBBLES.sort(key = lambda tup: tup[1])
|
|
||||||
#SCROBBLESDICT = {obj[1]:obj for obj in SCROBBLES}
|
|
||||||
STAMPS = [t for t in SCROBBLESDICT]
|
|
||||||
STAMPS.sort()
|
|
||||||
|
|
||||||
# inform malojatime module about earliest scrobble
|
|
||||||
if STAMPS: register_scrobbletime(STAMPS[0])
|
|
||||||
|
|
||||||
# NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY
|
|
||||||
# get extra artists with no real scrobbles from countas rules
|
|
||||||
#for artist in coa.getAllArtists():
|
|
||||||
#for artist in coa.getCreditedList(ARTISTS):
|
|
||||||
# if artist not in ARTISTS:
|
|
||||||
# log(artist + " is added to database because of countas rules",module="debug")
|
|
||||||
# ARTISTS.append(artist)
|
|
||||||
# coa.updateIDs(ARTISTS)
|
|
||||||
|
|
||||||
dbstatus['healthy'] = True
|
|
||||||
|
|
||||||
|
|
||||||
#start regular tasks
|
|
||||||
utilities.update_medals()
|
|
||||||
utilities.update_weekly()
|
|
||||||
utilities.send_stats()
|
|
||||||
|
|
||||||
|
|
||||||
global ISSUES
|
|
||||||
ISSUES = check_issues()
|
|
||||||
|
|
||||||
|
|
||||||
dbstatus['complete'] = True
|
|
||||||
dbstatus['rebuildinprogress'] = False
|
|
||||||
|
|
||||||
log("Database fully built!")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Saves all cached entries to disk
|
|
||||||
def sync():
|
|
||||||
|
|
||||||
# all entries by file collected
|
|
||||||
# so we don't open the same file for every entry
|
|
||||||
#log("Syncing",module="debug")
|
|
||||||
entries = {}
|
|
||||||
|
|
||||||
for idx in range(len(SCROBBLES)):
|
|
||||||
if not SCROBBLES[idx].saved:
|
|
||||||
|
|
||||||
t = get_scrobble_dict(SCROBBLES[idx])
|
|
||||||
|
|
||||||
artistlist = list(t["artists"])
|
|
||||||
artistlist.sort() #we want the order of artists to be deterministic so when we update files with new rules a diff can see what has actually been changed
|
|
||||||
artistss = "␟".join(artistlist)
|
|
||||||
timestamp = datetime.date.fromtimestamp(t["time"])
|
|
||||||
|
|
||||||
album = t["album"] or "-"
|
|
||||||
duration = t["duration"] or "-"
|
|
||||||
|
|
||||||
entry = [str(t["time"]),artistss,t["title"],album,duration]
|
|
||||||
|
|
||||||
monthcode = str(timestamp.year) + "_" + str(timestamp.month)
|
|
||||||
entries.setdefault(monthcode,[]).append(entry) #i feckin love the setdefault function
|
|
||||||
|
|
||||||
SCROBBLES[idx] = Scrobble(*SCROBBLES[idx][:-1],True)
|
|
||||||
# save copy with last tuple entry set to true
|
|
||||||
|
|
||||||
#log("Sorted into months",module="debug")
|
|
||||||
|
|
||||||
for e in entries:
|
|
||||||
tsv.add_entries(data_dir['scrobbles'](e + ".tsv"),entries[e],comments=False)
|
|
||||||
#addEntries("scrobbles/" + e + ".tsv",entries[e],escape=False)
|
|
||||||
|
|
||||||
#log("Written files",module="debug")
|
|
||||||
|
|
||||||
|
|
||||||
global lastsync
|
|
||||||
lastsync = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
|
||||||
#log("Database saved to disk.")
|
|
||||||
|
|
||||||
# save cached images
|
|
||||||
#saveCache()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ from .__pkginfo__ import VERSION
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# if DATA_DIRECTORY is specified, this is the directory to use for EVERYTHING, no matter what
|
# if DATA_DIRECTORY is specified, this is the directory to use for EVERYTHING, no matter what
|
||||||
# but with asynnetrical structure, cache and logs in subfolders
|
# but with asynnetrical structure, cache and logs in subfolders
|
||||||
# otherwise, each directory is treated seperately
|
# otherwise, each directory is treated seperately
|
||||||
@ -311,24 +312,19 @@ config(
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### API KEYS
|
### API KEYS
|
||||||
|
### symmetric keys are fine since we hopefully use HTTPS
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### symmetric keys are fine for now since we hopefully use HTTPS
|
|
||||||
apikeystore = KeyStore(file=data_dir['clients']("apikeys.yml"),save_endpoint="/apis/mlj_1/apikeys")
|
apikeystore = KeyStore(file=data_dir['clients']("apikeys.yml"),save_endpoint="/apis/mlj_1/apikeys")
|
||||||
|
from . import upgrade
|
||||||
|
upgrade.upgrade_apikeys()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
oldfile = pthj(dir_settings['config'],"clients","authenticated_machines.tsv")
|
|
||||||
if os.path.exists(oldfile):
|
|
||||||
try:
|
|
||||||
from doreah import tsv
|
|
||||||
clients = tsv.parse(oldfile,"string","string")
|
|
||||||
for key,identifier in clients:
|
|
||||||
apikeystore[identifier] = key
|
|
||||||
os.remove(oldfile)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# what the fuck did i just write
|
# what the fuck did i just write
|
||||||
|
60
maloja/upgrade.py
Normal file
60
maloja/upgrade.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# This module should take care of recognizing old install data and upgrading it before the actual server deals with it
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from doreah.logging import log
|
||||||
|
|
||||||
|
from .globalconf import data_dir, dir_settings, apikeystore
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade_apikeys():
|
||||||
|
|
||||||
|
oldfile = os.path.join(dir_settings['config'],"clients","authenticated_machines.tsv")
|
||||||
|
if os.path.exists(oldfile):
|
||||||
|
try:
|
||||||
|
from doreah import tsv
|
||||||
|
clients = tsv.parse(oldfile,"string","string")
|
||||||
|
for key,identifier in clients:
|
||||||
|
apikeystore[identifier] = key
|
||||||
|
os.remove(oldfile)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade_db(callback_add_scrobbles):
|
||||||
|
oldfolder = os.path.join(dir_settings['state'],"scrobbles")
|
||||||
|
if os.path.exists(oldfolder):
|
||||||
|
scrobblefiles = os.listdir(oldfolder)
|
||||||
|
for sf in scrobblefiles:
|
||||||
|
if sf.endswith(".tsv"):
|
||||||
|
log(f"Found old tsv scrobble file: {sf}")
|
||||||
|
if re.match(r"[0-9]+_[0-9]+\.tsv",sf):
|
||||||
|
origin = 'native'
|
||||||
|
elif sf == "lastfmimport.tsv":
|
||||||
|
origin = 'lastfm-import'
|
||||||
|
else:
|
||||||
|
origin = 'unknown'
|
||||||
|
|
||||||
|
from doreah import tsv
|
||||||
|
scrobbles = tsv.parse(os.path.join(oldfolder,sf),"int","string","string","string","string",comments=False)
|
||||||
|
scrobblelist = []
|
||||||
|
for scrobble in scrobbles:
|
||||||
|
timestamp, artists, title, album, duration = scrobble
|
||||||
|
if album in ('-',''): album = None
|
||||||
|
if duration in ('-',''): duration = None
|
||||||
|
scrobblelist.append({
|
||||||
|
"time":int(timestamp),
|
||||||
|
"track":{
|
||||||
|
"artists":artists.split('␟'),
|
||||||
|
"title":title,
|
||||||
|
"album":{
|
||||||
|
"name":album,
|
||||||
|
"artists":None
|
||||||
|
},
|
||||||
|
"length":None
|
||||||
|
},
|
||||||
|
"duration":duration,
|
||||||
|
"origin":origin
|
||||||
|
})
|
||||||
|
callback_add_scrobbles(scrobblelist)
|
Loading…
Reference in New Issue
Block a user