Initial support for new Spotify export format, GH-215

2023-08-10 21:12:55 +03:00 · 2023-06-25 18:00:23 +02:00 · 2023-06-25 18:00:23 +02:00 · 39a42e915c
commit 39a42e915c
parent b8944b4954
1 changed files with 62 additions and 5 deletions
--- a/maloja/proccontrol/tasks/import_scrobbles.py
+++ b/maloja/proccontrol/tasks/import_scrobbles.py
@ -37,13 +37,17 @@ def import_scrobbles(inputf):
 		typeid,typedesc = "lastfm","Last.fm"
 		importfunc = parse_lastfm

+	elif re.match("Streaming_History_Audio.+\.json",filename):
+		typeid,typedesc = "spotify","Spotify"
+		importfunc = parse_spotify_lite
+
 	elif re.match("endsong_[0-9]+\.json",filename):
 		typeid,typedesc = "spotify","Spotify"
-		importfunc = parse_spotify_full
+		importfunc = parse_spotify

 	elif re.match("StreamingHistory[0-9]+\.json",filename):
 		typeid,typedesc = "spotify","Spotify"
-		importfunc = parse_spotify_lite
+		importfunc = parse_spotify_lite_legacy

 	elif re.match("maloja_export_[0-9]+\.json",filename):
 		typeid,typedesc = "maloja","Maloja"
@ -81,6 +85,7 @@ def import_scrobbles(inputf):
 			# extra info
 			extrainfo = {}
 			if scrobble.get('album_name'): extrainfo['album_name'] = scrobble['album_name']
+			if scrobble.get('album_artist'): extrainfo['album_artist'] = scrobble['album_artist']
 			# saving this in the scrobble instead of the track because for now it's not meant
 			# to be authorative information, just payload of the scrobble

@ -121,7 +126,7 @@ def import_scrobbles(inputf):

 	return result

-def parse_spotify_lite(inputf):
+def parse_spotify_lite_legacy(inputf):
 	pth = os.path
 	inputfolder = pth.relpath(pth.dirname(pth.abspath(inputf)))
 	filenames = re.compile(r'StreamingHistory[0-9]+\.json')
@ -171,7 +176,59 @@ def parse_spotify_lite(inputf):
 		print()


-def parse_spotify_full(inputf):
+def parse_spotify_lite(inputf):
+	pth = os.path
+	inputfolder = pth.relpath(pth.dirname(pth.abspath(inputf)))
+	filenames = re.compile(r'Streaming_History_Audio.+\.json')
+	inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)]
+
+	if len(inputfiles) == 0:
+		print("No files found!")
+		return
+
+	if inputfiles != [inputf]:
+		print("Spotify files should all be imported together to identify duplicates across the whole dataset.")
+		if not ask("Import " + ", ".join(col['yellow'](i) for i in inputfiles) + "?",default=True):
+			inputfiles = [inputf]
+
+	for inputf in inputfiles:
+
+		print("Importing",col['yellow'](inputf),"...")
+		with open(inputf,'r') as inputfd:
+			data = json.load(inputfd)
+
+		for entry in data:
+
+			try:
+				played = int(entry['ms_played'] / 1000)
+				timestamp = int(
+					datetime.datetime.strptime(entry['ts'],"%Y-%m-%dT%H:%M:%SZ").timestamp()
+				)
+				artist = entry['master_metadata_album_artist_name'] # hmmm
+				title = entry['master_metadata_track_name']
+				album = entry['master_metadata_album_album_name']
+				albumartist = entry['master_metadata_album_artist_name']
+
+				if played < 30:
+					yield ('CONFIDENT_SKIP',None,f"{entry} is shorter than 30 seconds, skipping...")
+					continue
+
+				yield ("CONFIDENT_IMPORT",{
+					'track_title':title,
+					'track_artists': artist,
+					'track_length': None,
+					'scrobble_time': timestamp,
+					'scrobble_duration':played,
+					'album_name': album,
+					'album_artist': albumartist
+				},'')
+			except Exception as e:
+				yield ('FAIL',None,f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})")
+				continue
+
+		print()
+
+def parse_spotify(inputf):
 	pth = os.path
 	inputfolder = pth.relpath(pth.dirname(pth.abspath(inputf)))
 	filenames = re.compile(r'endsong_[0-9]+\.json')