Reworked import and added support for Spotify, GH-104

2023-08-10 21:12:55 +03:00 · 2022-03-27 03:10:54 +02:00 · 2022-03-27 03:10:54 +02:00 · fa9fee758c
commit fa9fee758c
parent 538daeb284
4 changed files with 136 additions and 86 deletions
--- a/maloja/proccontrol/control.py
+++ b/maloja/proccontrol/control.py
@ -111,7 +111,7 @@ def main(*args,**kwargs):
 		"stop":stop,
 		"run":direct,
 		"debug":debug,
-		"import":tasks.loadlastfm,
+		"import":tasks.loadexternal,
 		"backup":tasks.backuphere,
 	#	"update":update,
 		"fix":tasks.fixdb,
--- a/maloja/proccontrol/tasks/init.py
+++ b/maloja/proccontrol/tasks/init.py
@ -5,20 +5,17 @@ from ...globalconf import data_dir



-def loadlastfm(filename):
+def loadexternal(filename):

 	if not os.path.exists(filename):
 		print("File could not be found.")
 		return

-	if os.path.exists(data_dir['scrobbles']("lastfmimport.tsv")):
-		overwrite = ask("Already imported Last.FM data. Overwrite?",default=False)
-		if not overwrite: return
 	print("Please wait...")

-	from .lastfmconverter import convert
-	imported,failed = convert(filename,data_dir['scrobbles']("lastfmimport.tsv"))
-	print("Successfully imported",imported,"Last.FM scrobbles!")
+	from .importer import import_scrobbles
+	imported,failed = import_scrobbles(filename)
+	print("Successfully imported",imported,"scrobbles!")
 	if failed > 0:
 		print(col['red'](str(failed) + " Errors!"))

--- a/maloja/proccontrol/tasks/importer.py
+++ b/maloja/proccontrol/tasks/importer.py
@ -0,0 +1,131 @@
+import os, datetime, re
+
+import json, csv
+
+from ...cleanup import *
+from doreah.io import col, ask
+from ...globalconf import data_dir
+#from ...utilities import *
+
+
+
+
+c = CleanerAgent()
+
+
+
+def import_scrobbles(inputf):
+
+	ext = inputf.split('.')[-1].lower()
+
+	if ext == 'csv':
+		type = "Last.fm"
+		outputf = data_dir['scrobbles']("lastfmimport.tsv")
+		importfunc = parse_lastfm
+
+
+	elif ext == 'json':
+		type = "Spotify"
+		outputf = data_dir['scrobbles']("spotifyimport.tsv")
+		importfunc = parse_spotify
+
+
+	print(f"Parsing {col['yellow'](inputf)} as {col['cyan'](type)} export")
+
+	if os.path.exists(outputf):
+		overwrite = ask("Already imported data. Overwrite?",default=False)
+		if not overwrite: return
+
+	with open(outputf,"w") as outputfd:
+		success = 0
+		failed = 0
+		timestamps = set()
+
+		for scrobble in importfunc(inputf):
+			if scrobble is None:
+				failed += 1
+			else:
+				success += 1
+
+				## We prevent double timestamps in the database creation, so we
+				## technically don't need them in the files
+				## however since the conversion to maloja is a one-time thing,
+				## we should take any effort to make the file as good as possible
+				while scrobble['timestamp'] in timestamps:
+					scrobble['timestamp'] += 1
+				timestamps.add(scrobble['timestamp'])
+
+				# Format fields for tsv
+				scrobble['timestamp'] = str(scrobble['timestamp'])
+				scrobble['duration'] = str(scrobble['duration']) if scrobble['duration'] is not None else '-'
+				(artists,scrobble['title']) = c.fullclean(scrobble['artiststr'],scrobble['title'])
+				scrobble['artiststr'] = "␟".join(artists)
+
+				outputline = "\t".join([
+					scrobble['timestamp'],
+					scrobble['artiststr'],
+					scrobble['title'],
+					scrobble['album'],
+					scrobble['duration']
+				])
+				outputfd.write(outputline + '\n')
+
+				if success % 100 == 0:
+					print(f"Imported {success} scrobbles...")
+
+	return success,failed
+
+
+def parse_spotify(inputf):
+	with open(inputf,'r') as inputfd:
+		data = json.load(inputfd)
+
+	for entry in data:
+
+		sec = int(entry['ms_played'] / 1000)
+
+		if sec > 30:
+			try:
+				yield {
+					'title':entry['master_metadata_track_name'],
+					'artiststr': entry['master_metadata_album_artist_name'],
+					'album': entry['master_metadata_album_album_name'],
+					'timestamp': int(datetime.datetime.strptime(
+						entry['ts'].replace('Z','+0000',),
+						"%Y-%m-%dT%H:%M:%S%z"
+					).timestamp()),
+					'duration':sec
+				}
+			except:
+				print(col['red'](str(entry) + " could not be parsed. Scrobble not imported."))
+				yield None
+				continue
+
+def parse_lastfm(inputf):
+
+	with open(inputf,'r',newline='') as inputfd:
+		reader = csv.reader(inputfd)
+
+		for row in reader:
+			try:
+				artist,album,title,time = row
+			except ValueError:
+				print(col['red'](str(row) + " does not look like a valid entry. Scrobble not imported."))
+				yield None
+				continue
+
+			try:
+				yield {
+					'title': row[2],
+					'artiststr': row[0],
+					'album': row[1],
+					'timestamp': int(datetime.datetime.strptime(
+						row[3] + '+0000',
+						"%d %b %Y %H:%M%z"
+					).timestamp()),
+					'duration':None
+				}
+			except:
+				print(col['red'](str(row) + " could not be parsed. Scrobble not imported."))
+				yield None
+				continue
--- a/maloja/proccontrol/tasks/lastfmconverter.py
+++ b/maloja/proccontrol/tasks/lastfmconverter.py
@ -1,78 +0,0 @@
-import os, datetime, re
-from ...cleanup import *
-from doreah.io import col
-#from ...utilities import *
-
-
-
-
-c = CleanerAgent()
-
-
-
-def convert(input,output):
-
-	with open(input,"r",encoding="utf-8") as log:
-		with open(output,"w") as outputlog:
-
-
-			stamps = [99999999999999]
-
-			success = 0
-			failed = 0
-			for l in log:
-				l = l.replace("\n","")
-				try:
-					artist,album,title,time = l.split(",")
-				except KeyboardInterrupt:
-					raise
-				except:
-					print(col['red']("Line '" + l + "' does not look like a valid entry. Scrobble not imported."))
-					failed += 1
-					continue
-
-				try:
-					(artists,title) = c.fullclean(artist,title)
-					artistsstr = "␟".join(artists)
-
-					timeparts = time.split(" ")
-					(h,m) = timeparts[3].split(":")
-
-					months = {"Jan":1,"Feb":2,"Mar":3,"Apr":4,"May":5,"Jun":6,"Jul":7,"Aug":8,"Sep":9,"Oct":10,"Nov":11,"Dec":12}
-					timestamp = int(datetime.datetime(int(timeparts[2]),months[timeparts[1]],int(timeparts[0]),int(h),int(m)).timestamp())
-
-
-					## We prevent double timestamps in the database creation, so we technically don't need them in the files
-					## however since the conversion from lastfm to maloja is a one-time thing, we should take any effort to make
-					## the file as good as possible
-					if (timestamp < stamps[-1]):
-						pass
-					elif (timestamp == stamps[-1]):
-						timestamp -= 1
-					else:
-						while(timestamp in stamps):
-							timestamp -= 1
-
-					if (timestamp < stamps[-1]):
-						stamps.append(timestamp)
-					else:
-						stamps.insert(0,timestamp)
-				except KeyboardInterrupt:
-					raise
-				except:
-					print(col['red']("Line '" + l + "' could not be parsed. Scrobble not imported."))
-					failed += 1
-					continue
-
-
-				entry = "\t".join([str(timestamp),artistsstr,title,album])
-
-				outputlog.write(entry)
-				outputlog.write("\n")
-
-				success += 1
-
-				if success % 100 == 0:
-					print("Imported " + str(success) + " scrobbles...")
-
-			return (success,failed)