Added basic scrobble database consistency system

This commit is contained in:
Krateng 2018-12-20 18:46:55 +01:00
parent f5dced4f6e
commit 62b3591913
7 changed files with 51 additions and 3 deletions

View File

@ -13,6 +13,9 @@ class CleanerAgent:
self.rules_notanartist = [b for [a,b,c] in raw if a=="notanartist"]
self.rules_replacetitle = {b:c for [a,b,c] in raw if a=="replacetitle"}
self.rules_replaceartist = {b:c for [a,b,c] in raw if a=="replaceartist"}
# we always need to be able to tell if our current database is made with the current rules
self.checksums = utilities.checksumTSV("rules")

View File

@ -511,7 +511,7 @@ def build_db():
for f in os.listdir("scrobbles/"):
if not (".tsv" in f):
if not (f.endswith(".tsv")):
continue
logfile = open("scrobbles/" + f)
@ -552,6 +552,24 @@ def sync():
monthfile.write("\n")
monthfile.close()
if os.path.exists("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate"):
checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","r")
print("Checking rulestate of " + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv")
if checkfile.read() != cla.checksums:
print("Checksum does not match, file is inconsistent")
#cla.checksums represents the rule state that all current unsaved scrobbles were created under. if this is the same than the existing one, we're all good
#if not, the file is not consistent to any single rule state
checkfile.close()
checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","w")
checkfile.write("INVALID") # this will never match any sha256sum
checkfile.close()
else:
print(str(timestamp.year) + "_" + str(timestamp.month) + ".tsv does not yet exist, writing current rulestate")
#if the file didn't exist before, all its scrobbles come from our current server instance and are therefore under the current rule state
checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","w")
checkfile.write(cla.checksums)
checkfile.close()
SCROBBLES[idx] = (SCROBBLES[idx][0],SCROBBLES[idx][1],True)
global lastsync

View File

@ -30,3 +30,7 @@ for fn in os.listdir("scrobbles/"):
fnew.close()
os.rename("scrobbles/" + fn + "_new","scrobbles/" + fn)
checkfile = open("scrobbles/" + fn + ".rulestate","w")
checkfile.write(wendigo.checksums)
checkfile.close()

View File

@ -5,6 +5,7 @@ from utilities import *
log = open(sys.argv[1])
outputlog = open(sys.argv[2],"w")
checksumfile = open(sys.argv[2] + ".rulestate","w") #this file stores an identifier for all rules that were in place when the corresponding file was created
c = CleanerAgent()
@ -34,7 +35,7 @@ for l in log:
## We prevent double timestamps in the database creation, so we technically don't need them in the files
## however since the conversion from lastfm to maloja is a one-time, thing, we should take any effort to make the file as good as possible
## however since the conversion from lastfm to maloja is a one-time thing, we should take any effort to make the file as good as possible
if (timestamp < stamps[-1]):
pass
elif (timestamp == stamps[-1]):
@ -55,6 +56,11 @@ for l in log:
outputlog.write(entry)
outputlog.write("\n")
checksumfile.write(c.checksums)
log.close()
outputlog.close()
checksumfile.close()

View File

@ -1,2 +1,3 @@
*.tsv
*.csv
*.tsv.rulestate

View File

@ -36,6 +36,22 @@ def parseTSV(filename,*args):
f.close()
return result
def checksumTSV(folder):
import hashlib
import os
sums = ""
for f in os.listdir(folder + "/"):
if (f.endswith(".tsv")):
f = open(folder + "/" + f,"rb")
sums += hashlib.md5(f.read()).hexdigest() + "\n"
f.close()
return sums
def parseAllTSV(path,*args):
import os

View File

@ -57,7 +57,7 @@ table td.artists,td.artist,td.title,td.amount {
}
table td.button {
width:150px;
width:200px;
background-color:yellow;
color:#333337;
padding:1px;