diff --git a/cleanup.py b/cleanup.py index 1e71e05..2803cc3 100644 --- a/cleanup.py +++ b/cleanup.py @@ -13,6 +13,9 @@ class CleanerAgent: self.rules_notanartist = [b for [a,b,c] in raw if a=="notanartist"] self.rules_replacetitle = {b:c for [a,b,c] in raw if a=="replacetitle"} self.rules_replaceartist = {b:c for [a,b,c] in raw if a=="replaceartist"} + + # we always need to be able to tell if our current database is made with the current rules + self.checksums = utilities.checksumTSV("rules") diff --git a/database.py b/database.py index 7f611b1..f094cdf 100644 --- a/database.py +++ b/database.py @@ -511,7 +511,7 @@ def build_db(): for f in os.listdir("scrobbles/"): - if not (".tsv" in f): + if not (f.endswith(".tsv")): continue logfile = open("scrobbles/" + f) @@ -552,6 +552,24 @@ def sync(): monthfile.write("\n") monthfile.close() + if os.path.exists("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate"): + checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","r") + print("Checking rulestate of " + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv") + if checkfile.read() != cla.checksums: + print("Checksum does not match, file is inconsistent") + #cla.checksums represents the rule state that all current unsaved scrobbles were created under. if this is the same than the existing one, we're all good + #if not, the file is not consistent to any single rule state + checkfile.close() + checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","w") + checkfile.write("INVALID") # this will never match any sha256sum + checkfile.close() + else: + print(str(timestamp.year) + "_" + str(timestamp.month) + ".tsv does not yet exist, writing current rulestate") + #if the file didn't exist before, all its scrobbles come from our current server instance and are therefore under the current rule state + checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","w") + checkfile.write(cla.checksums) + checkfile.close() + SCROBBLES[idx] = (SCROBBLES[idx][0],SCROBBLES[idx][1],True) global lastsync diff --git a/fixexisting.py b/fixexisting.py index 488f448..e584776 100644 --- a/fixexisting.py +++ b/fixexisting.py @@ -30,3 +30,7 @@ for fn in os.listdir("scrobbles/"): fnew.close() os.rename("scrobbles/" + fn + "_new","scrobbles/" + fn) + + checkfile = open("scrobbles/" + fn + ".rulestate","w") + checkfile.write(wendigo.checksums) + checkfile.close() diff --git a/lastfmconverter.py b/lastfmconverter.py index 0567498..b6da66f 100644 --- a/lastfmconverter.py +++ b/lastfmconverter.py @@ -5,6 +5,7 @@ from utilities import * log = open(sys.argv[1]) outputlog = open(sys.argv[2],"w") +checksumfile = open(sys.argv[2] + ".rulestate","w") #this file stores an identifier for all rules that were in place when the corresponding file was created c = CleanerAgent() @@ -34,7 +35,7 @@ for l in log: ## We prevent double timestamps in the database creation, so we technically don't need them in the files - ## however since the conversion from lastfm to maloja is a one-time, thing, we should take any effort to make the file as good as possible + ## however since the conversion from lastfm to maloja is a one-time thing, we should take any effort to make the file as good as possible if (timestamp < stamps[-1]): pass elif (timestamp == stamps[-1]): @@ -55,6 +56,11 @@ for l in log: outputlog.write(entry) outputlog.write("\n") +checksumfile.write(c.checksums) + +log.close() +outputlog.close() +checksumfile.close() diff --git a/scrobbles/.gitignore b/scrobbles/.gitignore index a59cd8e..abbcb8e 100644 --- a/scrobbles/.gitignore +++ b/scrobbles/.gitignore @@ -1,2 +1,3 @@ *.tsv *.csv +*.tsv.rulestate diff --git a/utilities.py b/utilities.py index 92f84e2..4d66576 100644 --- a/utilities.py +++ b/utilities.py @@ -36,6 +36,22 @@ def parseTSV(filename,*args): f.close() return result +def checksumTSV(folder): + import hashlib + import os + + sums = "" + + for f in os.listdir(folder + "/"): + if (f.endswith(".tsv")): + f = open(folder + "/" + f,"rb") + sums += hashlib.md5(f.read()).hexdigest() + "\n" + f.close() + + return sums + + + def parseAllTSV(path,*args): import os diff --git a/website/maloja.css b/website/maloja.css index 8d9ea42..945e4b3 100644 --- a/website/maloja.css +++ b/website/maloja.css @@ -57,7 +57,7 @@ table td.artists,td.artist,td.title,td.amount { } table td.button { - width:150px; + width:200px; background-color:yellow; color:#333337; padding:1px;