1
0
mirror of https://github.com/krateng/maloja.git synced 2023-08-10 21:12:55 +03:00

Fixing DB now creates individual patch files

This commit is contained in:
Krateng 2020-04-21 18:11:16 +02:00
parent 0ceb70b27e
commit b17060184b
4 changed files with 73 additions and 77 deletions

View File

@ -5,7 +5,7 @@ author = {
"email":"maloja@krateng.dev",
"github": "krateng"
}
version = 2,3,6
version = 2,3,7
versionstr = ".".join(str(n) for n in version)
links = {
"pypi":"malojaserver",

View File

@ -22,43 +22,41 @@ def fix():
datestr = now.strftime("%Y/%m/%d")
timestr = now.strftime("%H:%M:%S")
with open(datadir("logs","dbfix",nowstr + ".log"),"a") as logfile:
patchfolder = datadir("logs","dbfix",nowstr)
os.makedirs(patchfolder)
logfile.write("Database fix initiated on " + datestr + " " + timestr + " UTC")
logfile.write("\n\n")
for filename in os.listdir(datadir("scrobbles")):
if filename.endswith(".tsv"):
filename_new = filename + "_new"
with open(datadir("scrobbles",filename_new),"w") as newfile:
with open(datadir("scrobbles",filename),"r") as oldfile:
for l in oldfile:
a,t = re.sub(exp,r"\3",l), re.sub(exp,r"\5",l)
r1,r2,r3 = re.sub(exp,r"\1\2",l),re.sub(exp,r"\4",l),re.sub(exp,r"\6\7",l)
a = a.replace("",";")
(al,t) = wendigo.fullclean(a,t)
a = "".join(al)
newfile.write(r1 + a + r2 + t + r3 + "\n")
#with open(datadir("logs","dbfix",nowstr + ".log"),"a") as logfile:
#os.system("diff " + "scrobbles/" + fn + "_new" + " " + "scrobbles/" + fn)
with open(datadir("scrobbles",filename_new),"r") as newfile:
with open(datadir("scrobbles",filename),"r") as oldfile:
for filename in os.listdir(datadir("scrobbles")):
if filename.endswith(".tsv"):
filename_new = filename + "_new"
diff = difflib.unified_diff(oldfile.read().split("\n"),newfile.read().split("\n"),lineterm="")
diff = list(diff)[2:]
#log("Diff for scrobbles/" + filename + "".join("\n\t" + d for d in diff),module="fixer")
output = "Diff for scrobbles/" + filename + "".join("\n\t" + d for d in diff)
print(output)
logfile.write(output)
logfile.write("\n")
with open(datadir("scrobbles",filename_new),"w") as newfile:
with open(datadir("scrobbles",filename),"r") as oldfile:
os.rename(datadir("scrobbles",filename_new),datadir("scrobbles",filename))
for l in oldfile:
with open(datadir("scrobbles",filename + ".rulestate"),"w") as checkfile:
checkfile.write(wendigo.checksums)
a,t = re.sub(exp,r"\3",l), re.sub(exp,r"\5",l)
r1,r2,r3 = re.sub(exp,r"\1\2",l),re.sub(exp,r"\4",l),re.sub(exp,r"\6\7",l)
a = a.replace("",";")
(al,t) = wendigo.fullclean(a,t)
a = "".join(al)
newfile.write(r1 + a + r2 + t + r3 + "\n")
#os.system("diff " + "scrobbles/" + fn + "_new" + " " + "scrobbles/" + fn)
with open(datadir("scrobbles",filename_new),"r") as newfile, open(datadir("scrobbles",filename),"r") as oldfile:
diff = difflib.unified_diff(oldfile.read().split("\n"),newfile.read().split("\n"),lineterm="")
diff = list(diff)
with open(os.path.join(patchfolder,filename + ".diff"),"w") as patchfile:
patchfile.write("\n".join(diff))
os.rename(datadir("scrobbles",filename_new),datadir("scrobbles",filename))
with open(datadir("scrobbles",filename + ".rulestate"),"w") as checkfile:
checkfile.write(wendigo.checksums)

View File

@ -11,59 +11,57 @@ c = CleanerAgent()
def convert(input,output):
log = open(input,"r")
outputlog = open(output,"w")
checksumfile = open(output + ".rulestate","w") #this file stores an identifier for all rules that were in place when the corresponding file was created
stamps = [99999999999999]
for l in log:
l = l.replace("\n","")
data = l.split(",")
artist = data[0]
album = data[1]
title = data[2]
time = data[3]
with open(input,"r",encoding="utf-8") as log:
with open(output,"w") as outputlog:
(artists,title) = c.fullclean(artist,title)
stamps = [99999999999999]
artistsstr = "".join(artists)
for l in log:
l = l.replace("\n","")
data = l.split(",")
artist = data[0]
album = data[1]
title = data[2]
time = data[3]
timeparts = time.split(" ")
(h,m) = timeparts[3].split(":")
(artists,title) = c.fullclean(artist,title)
months = {"Jan":1,"Feb":2,"Mar":3,"Apr":4,"May":5,"Jun":6,"Jul":7,"Aug":8,"Sep":9,"Oct":10,"Nov":11,"Dec":12}
timestamp = int(datetime.datetime(int(timeparts[2]),months[timeparts[1]],int(timeparts[0]),int(h),int(m)).timestamp())
artistsstr = "".join(artists)
## We prevent double timestamps in the database creation, so we technically don't need them in the files
## however since the conversion from lastfm to maloja is a one-time thing, we should take any effort to make the file as good as possible
if (timestamp < stamps[-1]):
pass
elif (timestamp == stamps[-1]):
timestamp -= 1
else:
while(timestamp in stamps):
timestamp -= 1
timeparts = time.split(" ")
(h,m) = timeparts[3].split(":")
if (timestamp < stamps[-1]):
stamps.append(timestamp)
else:
stamps.insert(0,timestamp)
months = {"Jan":1,"Feb":2,"Mar":3,"Apr":4,"May":5,"Jun":6,"Jul":7,"Aug":8,"Sep":9,"Oct":10,"Nov":11,"Dec":12}
timestamp = int(datetime.datetime(int(timeparts[2]),months[timeparts[1]],int(timeparts[0]),int(h),int(m)).timestamp())
entry = "\t".join([str(timestamp),artistsstr,title,album])
entry = entry.replace("#",r"\num")
## We prevent double timestamps in the database creation, so we technically don't need them in the files
## however since the conversion from lastfm to maloja is a one-time thing, we should take any effort to make the file as good as possible
if (timestamp < stamps[-1]):
pass
elif (timestamp == stamps[-1]):
timestamp -= 1
else:
while(timestamp in stamps):
timestamp -= 1
outputlog.write(entry)
outputlog.write("\n")
if (timestamp < stamps[-1]):
stamps.append(timestamp)
else:
stamps.insert(0,timestamp)
checksumfile.write(c.checksums)
log.close()
outputlog.close()
checksumfile.close()
entry = "\t".join([str(timestamp),artistsstr,title,album])
entry = entry.replace("#",r"\num")
outputlog.write(entry)
outputlog.write("\n")
with open(output + ".rulestate","w") as checksumfile:
#this file stores an identifier for all rules that were in place when the corresponding file was created
checksumfile.write(c.checksums)

Binary file not shown.