mirror of
				https://github.com/krateng/maloja.git
				synced 2023-08-10 21:12:55 +03:00 
			
		
		
		
	Added basic scrobble database consistency system
This commit is contained in:
		| @@ -13,6 +13,9 @@ class CleanerAgent: | |||||||
| 		self.rules_notanartist = [b for [a,b,c] in raw if a=="notanartist"] | 		self.rules_notanartist = [b for [a,b,c] in raw if a=="notanartist"] | ||||||
| 		self.rules_replacetitle = {b:c for [a,b,c] in raw if a=="replacetitle"} | 		self.rules_replacetitle = {b:c for [a,b,c] in raw if a=="replacetitle"} | ||||||
| 		self.rules_replaceartist = {b:c for [a,b,c] in raw if a=="replaceartist"} | 		self.rules_replaceartist = {b:c for [a,b,c] in raw if a=="replaceartist"} | ||||||
|  | 		 | ||||||
|  | 		# we always need to be able to tell if our current database is made with the current rules | ||||||
|  | 		self.checksums = utilities.checksumTSV("rules") | ||||||
| 			 | 			 | ||||||
| 	 | 	 | ||||||
| 	 | 	 | ||||||
|   | |||||||
							
								
								
									
										20
									
								
								database.py
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								database.py
									
									
									
									
									
								
							| @@ -511,7 +511,7 @@ def build_db(): | |||||||
| 	 | 	 | ||||||
| 	for f in os.listdir("scrobbles/"): | 	for f in os.listdir("scrobbles/"): | ||||||
| 		 | 		 | ||||||
| 		if not (".tsv" in f): | 		if not (f.endswith(".tsv")): | ||||||
| 			continue | 			continue | ||||||
| 		 | 		 | ||||||
| 		logfile = open("scrobbles/" + f) | 		logfile = open("scrobbles/" + f) | ||||||
| @@ -552,6 +552,24 @@ def sync(): | |||||||
| 			monthfile.write("\n") | 			monthfile.write("\n") | ||||||
| 			monthfile.close() | 			monthfile.close() | ||||||
| 			 | 			 | ||||||
|  | 			if os.path.exists("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate"): | ||||||
|  | 				checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","r") | ||||||
|  | 				print("Checking rulestate of " + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv") | ||||||
|  | 				if checkfile.read() != cla.checksums: | ||||||
|  | 					print("Checksum does not match, file is inconsistent") | ||||||
|  | 					#cla.checksums represents the rule state that all current unsaved scrobbles were created under. if this is the same than the existing one, we're all good | ||||||
|  | 					#if not, the file is not consistent to any single rule state | ||||||
|  | 					checkfile.close() | ||||||
|  | 					checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","w") | ||||||
|  | 					checkfile.write("INVALID") # this will never match any sha256sum | ||||||
|  | 				checkfile.close() | ||||||
|  | 			else: | ||||||
|  | 				print(str(timestamp.year) + "_" + str(timestamp.month) + ".tsv does not yet exist, writing current rulestate") | ||||||
|  | 				#if the file didn't exist before, all its scrobbles come from our current server instance and are therefore under the current rule state | ||||||
|  | 				checkfile = open("scrobbles/" + str(timestamp.year) + "_" + str(timestamp.month) + ".tsv.rulestate","w") | ||||||
|  | 				checkfile.write(cla.checksums) | ||||||
|  | 				checkfile.close() | ||||||
|  | 			 | ||||||
| 			SCROBBLES[idx] = (SCROBBLES[idx][0],SCROBBLES[idx][1],True) | 			SCROBBLES[idx] = (SCROBBLES[idx][0],SCROBBLES[idx][1],True) | ||||||
| 			 | 			 | ||||||
| 	global lastsync | 	global lastsync | ||||||
|   | |||||||
| @@ -30,3 +30,7 @@ for fn in os.listdir("scrobbles/"): | |||||||
| 		fnew.close() | 		fnew.close() | ||||||
| 		 | 		 | ||||||
| 		os.rename("scrobbles/" + fn + "_new","scrobbles/" + fn) | 		os.rename("scrobbles/" + fn + "_new","scrobbles/" + fn) | ||||||
|  | 		 | ||||||
|  | 		checkfile = open("scrobbles/" + fn + ".rulestate","w") | ||||||
|  | 		checkfile.write(wendigo.checksums) | ||||||
|  | 		checkfile.close() | ||||||
|   | |||||||
| @@ -5,6 +5,7 @@ from utilities import * | |||||||
|  |  | ||||||
| log = open(sys.argv[1]) | log = open(sys.argv[1]) | ||||||
| outputlog = open(sys.argv[2],"w") | outputlog = open(sys.argv[2],"w") | ||||||
|  | checksumfile = open(sys.argv[2] + ".rulestate","w") #this file stores an identifier for all rules that were in place when the corresponding file was created | ||||||
|  |  | ||||||
|  |  | ||||||
| c = CleanerAgent() | c = CleanerAgent() | ||||||
| @@ -34,7 +35,7 @@ for l in log: | |||||||
| 	 | 	 | ||||||
| 	 | 	 | ||||||
| 	## We prevent double timestamps in the database creation, so we technically don't need them in the files | 	## We prevent double timestamps in the database creation, so we technically don't need them in the files | ||||||
| 	## however since the conversion from lastfm to maloja is a one-time, thing, we should take any effort to make the file as good as possible | 	## however since the conversion from lastfm to maloja is a one-time thing, we should take any effort to make the file as good as possible | ||||||
| 	if (timestamp < stamps[-1]): | 	if (timestamp < stamps[-1]): | ||||||
| 		pass | 		pass | ||||||
| 	elif (timestamp == stamps[-1]): | 	elif (timestamp == stamps[-1]): | ||||||
| @@ -55,6 +56,11 @@ for l in log: | |||||||
| 	outputlog.write(entry) | 	outputlog.write(entry) | ||||||
| 	outputlog.write("\n") | 	outputlog.write("\n") | ||||||
| 	 | 	 | ||||||
|  | checksumfile.write(c.checksums) | ||||||
|  | 	 | ||||||
|  | log.close() | ||||||
|  | outputlog.close() | ||||||
|  | checksumfile.close() | ||||||
| 	 | 	 | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								scrobbles/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								scrobbles/.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1,2 +1,3 @@ | |||||||
| *.tsv | *.tsv | ||||||
| *.csv | *.csv | ||||||
|  | *.tsv.rulestate | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								utilities.py
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								utilities.py
									
									
									
									
									
								
							| @@ -36,6 +36,22 @@ def parseTSV(filename,*args): | |||||||
| 	f.close() | 	f.close() | ||||||
| 	return result | 	return result | ||||||
| 	 | 	 | ||||||
|  | def checksumTSV(folder): | ||||||
|  | 	import hashlib | ||||||
|  | 	import os | ||||||
|  | 	 | ||||||
|  | 	sums = "" | ||||||
|  | 	 | ||||||
|  | 	for f in os.listdir(folder + "/"): | ||||||
|  | 		if (f.endswith(".tsv")): | ||||||
|  | 			f = open(folder + "/" + f,"rb") | ||||||
|  | 			sums += hashlib.md5(f.read()).hexdigest() + "\n" | ||||||
|  | 			f.close() | ||||||
|  | 			 | ||||||
|  | 	return sums | ||||||
|  | 	 | ||||||
|  | 	 | ||||||
|  | 	 | ||||||
| def parseAllTSV(path,*args): | def parseAllTSV(path,*args): | ||||||
| 	 | 	 | ||||||
| 	import os | 	import os | ||||||
|   | |||||||
| @@ -57,7 +57,7 @@ table td.artists,td.artist,td.title,td.amount { | |||||||
| } | } | ||||||
|  |  | ||||||
| table td.button { | table td.button { | ||||||
| 	width:150px; | 	width:200px; | ||||||
| 	background-color:yellow; | 	background-color:yellow; | ||||||
| 	color:#333337; | 	color:#333337; | ||||||
| 	padding:1px; | 	padding:1px; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Krateng
					Krateng