From 38f2173bde858c888f188ed25f980d4051a4ff02 Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 29 Mar 2022 17:27:34 +0200 Subject: [PATCH 01/12] Added handling for invalid Spotify scrobbles --- maloja/__pkginfo__.py | 2 +- maloja/proccontrol/tasks/__init__.py | 4 +- maloja/proccontrol/tasks/importer.py | 55 ++++++++++++++++++---------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/maloja/__pkginfo__.py b/maloja/__pkginfo__.py index 9bfb1d5..9e1199e 100644 --- a/maloja/__pkginfo__.py +++ b/maloja/__pkginfo__.py @@ -4,7 +4,7 @@ # you know what f*ck it # this is hardcoded for now because of that damn project / package name discrepancy # i'll fix it one day -VERSION = "2.14.6" +VERSION = "2.14.7" HOMEPAGE = "https://github.com/krateng/maloja" diff --git a/maloja/proccontrol/tasks/__init__.py b/maloja/proccontrol/tasks/__init__.py index 837a2e4..b8af5f3 100644 --- a/maloja/proccontrol/tasks/__init__.py +++ b/maloja/proccontrol/tasks/__init__.py @@ -14,8 +14,10 @@ def loadexternal(filename): print("Please wait...") from .importer import import_scrobbles - imported,failed = import_scrobbles(filename) + imported,failed,warning = import_scrobbles(filename) print("Successfully imported",imported,"scrobbles!") + if warning > 0: + print(col['orange'](str(warning) + " Warnings!")) if failed > 0: print(col['red'](str(failed) + " Errors!")) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index e24f740..e3d571e 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -39,11 +39,14 @@ def import_scrobbles(inputf): with open(outputf,"w") as outputfd: success = 0 failed = 0 + warning = 0 timestamps = set() for scrobble in importfunc(inputf): if scrobble is None: failed += 1 + if scrobble is False: + warning += 1 else: success += 1 @@ -73,7 +76,7 @@ def import_scrobbles(inputf): if success % 100 == 0: print(f"Imported {success} scrobbles...") - return success,failed + return success,failed,warning def parse_spotify(inputf): @@ -84,22 +87,34 @@ def parse_spotify(inputf): sec = int(entry['ms_played'] / 1000) - if sec > 30: - try: - yield { - 'title':entry['master_metadata_track_name'], - 'artiststr': entry['master_metadata_album_artist_name'], - 'album': entry['master_metadata_album_album_name'], - 'timestamp': int(datetime.datetime.strptime( - entry['ts'].replace('Z','+0000',), - "%Y-%m-%dT%H:%M:%S%z" - ).timestamp()), - 'duration':sec - } - except: - print(col['red'](str(entry) + " could not be parsed. Scrobble not imported.")) - yield None - continue + if entry['master_metadata_track_name'] is None: + print(col['orange'](f"{entry} has no title, skipping...")) + yield False + continue + if entry['master_metadata_album_artist_name'] is None: + print(col['orange'](f"{entry} has no artist, skipping...")) + yield False + continue + if sec < 30: + print(col['orange'](f"{entry} is shorter than 30 seconds, skipping...")) + yield False + continue + + try: + yield { + 'title':entry['master_metadata_track_name'], + 'artiststr': entry['master_metadata_album_artist_name'], + 'album': entry['master_metadata_album_album_name'], + 'timestamp': int(datetime.datetime.strptime( + entry['ts'].replace('Z','+0000',), + "%Y-%m-%dT%H:%M:%S%z" + ).timestamp()), + 'duration':sec + } + except Exception as e: + print(col['red'](f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})")) + yield None + continue def parse_lastfm(inputf): @@ -110,7 +125,7 @@ def parse_lastfm(inputf): try: artist,album,title,time = row except ValueError: - print(col['red'](str(row) + " does not look like a valid entry. Scrobble not imported.")) + print(col['red'](f"{row} does not look like a valid entry. Scrobble not imported.")) yield None continue @@ -125,7 +140,7 @@ def parse_lastfm(inputf): ).timestamp()), 'duration':None } - except: - print(col['red'](str(row) + " could not be parsed. Scrobble not imported.")) + except Exception as e: + print(col['red'](f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})")) yield None continue From 3108b368ef8eb87a005697adde46b395677d21ec Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 29 Mar 2022 17:41:16 +0200 Subject: [PATCH 02/12] Fixed continued scrobble import after error --- maloja/proccontrol/tasks/importer.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index e3d571e..6c1751e 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -5,14 +5,16 @@ import json, csv from ...cleanup import * from doreah.io import col, ask from ...globalconf import data_dir -#from ...utilities import * - - c = CleanerAgent() +def warn(msg): + print(col['orange'](msg)) +def err(msg): + print(col['red'](msg)) + def import_scrobbles(inputf): @@ -45,7 +47,7 @@ def import_scrobbles(inputf): for scrobble in importfunc(inputf): if scrobble is None: failed += 1 - if scrobble is False: + elif scrobble is False: warning += 1 else: success += 1 @@ -88,15 +90,15 @@ def parse_spotify(inputf): sec = int(entry['ms_played'] / 1000) if entry['master_metadata_track_name'] is None: - print(col['orange'](f"{entry} has no title, skipping...")) + warn(f"{entry} has no title, skipping...") yield False continue if entry['master_metadata_album_artist_name'] is None: - print(col['orange'](f"{entry} has no artist, skipping...")) + warn(f"{entry} has no artist, skipping...") yield False continue if sec < 30: - print(col['orange'](f"{entry} is shorter than 30 seconds, skipping...")) + warn(f"{entry} is shorter than 30 seconds, skipping...") yield False continue @@ -112,7 +114,7 @@ def parse_spotify(inputf): 'duration':sec } except Exception as e: - print(col['red'](f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})")) + err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") yield None continue @@ -125,7 +127,7 @@ def parse_lastfm(inputf): try: artist,album,title,time = row except ValueError: - print(col['red'](f"{row} does not look like a valid entry. Scrobble not imported.")) + warn(f"{row} does not look like a valid entry. Scrobble not imported.") yield None continue @@ -141,6 +143,6 @@ def parse_lastfm(inputf): 'duration':None } except Exception as e: - print(col['red'](f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})")) + err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") yield None continue From 5d582d39aa18e7e260df6b5b9ccf1ed454e254b7 Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 29 Mar 2022 18:09:39 +0200 Subject: [PATCH 03/12] Added confirmation prompt to random generation --- maloja/proccontrol/tasks/__init__.py | 2 -- maloja/proccontrol/tasks/generate.py | 19 +++++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/maloja/proccontrol/tasks/__init__.py b/maloja/proccontrol/tasks/__init__.py index b8af5f3..7d72e6c 100644 --- a/maloja/proccontrol/tasks/__init__.py +++ b/maloja/proccontrol/tasks/__init__.py @@ -11,8 +11,6 @@ def loadexternal(filename): print("File could not be found.") return - print("Please wait...") - from .importer import import_scrobbles imported,failed,warning = import_scrobbles(filename) print("Successfully imported",imported,"scrobbles!") diff --git a/maloja/proccontrol/tasks/generate.py b/maloja/proccontrol/tasks/generate.py index 8bd2cb3..4461d24 100644 --- a/maloja/proccontrol/tasks/generate.py +++ b/maloja/proccontrol/tasks/generate.py @@ -1,5 +1,6 @@ import random import datetime +from doreah.io import ask artists = [ "Chou Tzuyu","Jennie Kim","Kim Seolhyun","Nancy McDonie","Park Junghwa","Hirai Momo","Rosé Park","Laura Brehm","HyunA", @@ -65,12 +66,14 @@ def generate_track(): def generate(targetfile): - with open(targetfile,"a") as fd: - for _ in range(200): - track = generate_track() - for _ in range(random.randint(1, 50)): - timestamp = random.randint(1, int(datetime.datetime.now().timestamp())) + if ask("Generate random scrobbles?",default=False): + with open(targetfile,"a") as fd: + for _ in range(200): + track = generate_track() + for _ in range(random.randint(1, 50)): + timestamp = random.randint(1, int(datetime.datetime.now().timestamp())) - entry = "\t".join([str(timestamp),"␟".join(track['artists']),track['title'],"-"]) - fd.write(entry) - fd.write("\n") + entry = "\t".join([str(timestamp),"␟".join(track['artists']),track['title'],"-"]) + fd.write(entry) + fd.write("\n") + print("Done!") From 27cacbf6589b1c2a71bacfbb8958d960bcf08579 Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 29 Mar 2022 19:02:59 +0200 Subject: [PATCH 04/12] Added ability to import multiple files, GH-104 --- maloja/proccontrol/tasks/importer.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index 6c1751e..429b20b 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -3,7 +3,7 @@ import os, datetime, re import json, csv from ...cleanup import * -from doreah.io import col, ask +from doreah.io import col, ask, prompt from ...globalconf import data_dir @@ -35,10 +35,20 @@ def import_scrobbles(inputf): print(f"Parsing {col['yellow'](inputf)} as {col['cyan'](type)} export") if os.path.exists(outputf): - overwrite = ask("Already imported data. Overwrite?",default=False) - if not overwrite: return + while True: + action = prompt("Already imported data. Overwrite (o), append (a) or cancel (c)?",default='c') + if action == 'c': + return 0,0,0 + elif action == 'a': + mode = 'a' + break + elif action == 'o': + mode = 'w' + break + else: + print("Could not understand response.") - with open(outputf,"w") as outputfd: + with open(outputf,mode) as outputfd: success = 0 failed = 0 warning = 0 From 2a1f188e3797e90eee54bd8d3fa60107dbc50fe4 Mon Sep 17 00:00:00 2001 From: krateng Date: Wed, 30 Mar 2022 17:38:56 +0200 Subject: [PATCH 05/12] Changed Spotify import to use all files and discard duplicates, GH-104 --- maloja/proccontrol/tasks/__init__.py | 4 +- maloja/proccontrol/tasks/importer.py | 111 +++++++++++++++++---------- 2 files changed, 72 insertions(+), 43 deletions(-) diff --git a/maloja/proccontrol/tasks/__init__.py b/maloja/proccontrol/tasks/__init__.py index 7d72e6c..83b1e51 100644 --- a/maloja/proccontrol/tasks/__init__.py +++ b/maloja/proccontrol/tasks/__init__.py @@ -15,9 +15,9 @@ def loadexternal(filename): imported,failed,warning = import_scrobbles(filename) print("Successfully imported",imported,"scrobbles!") if warning > 0: - print(col['orange'](str(warning) + " Warnings!")) + print(col['orange'](f"{warning} Warning{'s' if warning != 1 else ''}!")) if failed > 0: - print(col['red'](str(failed) + " Errors!")) + print(col['red'](f"{failed} Error{'s' if failed != 1 else ''}!")) def backuphere(): from .backup import backup diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index 429b20b..190b0c6 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -26,17 +26,18 @@ def import_scrobbles(inputf): importfunc = parse_lastfm - elif ext == 'json': + elif ext == 'json' or os.path.isdir(inputf): type = "Spotify" outputf = data_dir['scrobbles']("spotifyimport.tsv") importfunc = parse_spotify + if os.path.isfile(inputf): inputf = os.path.dirname(inputf) print(f"Parsing {col['yellow'](inputf)} as {col['cyan'](type)} export") if os.path.exists(outputf): while True: - action = prompt("Already imported data. Overwrite (o), append (a) or cancel (c)?",default='c') + action = prompt(f"Already imported {type} data. [O]verwrite, [A]ppend or [C]ancel?",default='c').lower()[0] if action == 'c': return 0,0,0 elif action == 'a': @@ -62,10 +63,6 @@ def import_scrobbles(inputf): else: success += 1 - ## We prevent double timestamps in the database creation, so we - ## technically don't need them in the files - ## however since the conversion to maloja is a one-time thing, - ## we should take any effort to make the file as good as possible while scrobble['timestamp'] in timestamps: scrobble['timestamp'] += 1 timestamps.add(scrobble['timestamp']) @@ -92,41 +89,73 @@ def import_scrobbles(inputf): def parse_spotify(inputf): - with open(inputf,'r') as inputfd: - data = json.load(inputfd) - for entry in data: + filenames = re.compile(r'endsong_[0-9]+\.json') - sec = int(entry['ms_played'] / 1000) + inputfiles = [os.path.join(inputf,f) for f in os.listdir(inputf) if filenames.match(f)] - if entry['master_metadata_track_name'] is None: - warn(f"{entry} has no title, skipping...") - yield False - continue - if entry['master_metadata_album_artist_name'] is None: - warn(f"{entry} has no artist, skipping...") - yield False - continue - if sec < 30: - warn(f"{entry} is shorter than 30 seconds, skipping...") - yield False - continue + if len(inputfiles) == 0: + print("No files found!") + elif ask("Importing the following files: " + ", ".join(col['yellow'](i) for i in inputfiles) + ". Confirm?", default=False): - try: - yield { - 'title':entry['master_metadata_track_name'], - 'artiststr': entry['master_metadata_album_artist_name'], - 'album': entry['master_metadata_album_album_name'], - 'timestamp': int(datetime.datetime.strptime( - entry['ts'].replace('Z','+0000',), - "%Y-%m-%dT%H:%M:%S%z" - ).timestamp()), - 'duration':sec - } - except Exception as e: - err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") - yield None - continue + # we keep timestamps here as well to remove duplicates because spotify's export + # is messy - this is specific to this import type and should not be mixed with + # the outer function timestamp check (which is there to fix duplicate timestamps + # that are assumed to correspond to actually distinct plays) + timestamps = {} + + for inputf in inputfiles: + + print("Importing",col['yellow'](inputf),"...") + with open(inputf,'r') as inputfd: + data = json.load(inputfd) + + for entry in data: + + try: + sec = int(entry['ms_played'] / 1000) + timestamp = entry['offline_timestamp'] + artist = entry['master_metadata_album_artist_name'] + title = entry['master_metadata_track_name'] + album = entry['master_metadata_album_album_name'] + + + if title is None: + warn(f"{entry} has no title, skipping...") + yield False + continue + if artist is None: + warn(f"{entry} has no artist, skipping...") + yield False + continue + if sec < 30: + warn(f"{entry} is shorter than 30 seconds, skipping...") + yield False + continue + if timestamp in timestamps and (artist,title) in timestamps[timestamp]: + warn(f"{entry} seems to be a duplicate, skipping...") + yield False + continue + + timestamps.setdefault(timestamp,[]).append((artist,title)) + + yield { + 'title':title, + 'artiststr': artist, + 'album': album, + # 'timestamp': int(datetime.datetime.strptime( + # entry['ts'].replace('Z','+0000',), + # "%Y-%m-%dT%H:%M:%S%z" + # ).timestamp()), + 'timestamp': timestamp, + 'duration':sec + } + except Exception as e: + err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") + yield None + continue + + print() def parse_lastfm(inputf): @@ -143,11 +172,11 @@ def parse_lastfm(inputf): try: yield { - 'title': row[2], - 'artiststr': row[0], - 'album': row[1], + 'title': title, + 'artiststr': artist, + 'album': album, 'timestamp': int(datetime.datetime.strptime( - row[3] + '+0000', + time + '+0000', "%d %b %Y %H:%M%z" ).timestamp()), 'duration':None From 8ed3923851a1c3e6c28d110eccdd8b6096a8a639 Mon Sep 17 00:00:00 2001 From: krateng Date: Wed, 30 Mar 2022 21:37:43 +0200 Subject: [PATCH 06/12] Fixed timestamp parsing for Spotify import, GH-104 --- maloja/proccontrol/tasks/importer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index 190b0c6..ee5f66d 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -35,6 +35,7 @@ def import_scrobbles(inputf): print(f"Parsing {col['yellow'](inputf)} as {col['cyan'](type)} export") + if os.path.exists(outputf): while True: action = prompt(f"Already imported {type} data. [O]verwrite, [A]ppend or [C]ancel?",default='c').lower()[0] @@ -48,7 +49,10 @@ def import_scrobbles(inputf): break else: print("Could not understand response.") + else: + mode = 'w' + with open(outputf,mode) as outputfd: success = 0 failed = 0 @@ -114,7 +118,7 @@ def parse_spotify(inputf): try: sec = int(entry['ms_played'] / 1000) - timestamp = entry['offline_timestamp'] + timestamp = int(entry['offline_timestamp'] / 1000) artist = entry['master_metadata_album_artist_name'] title = entry['master_metadata_track_name'] album = entry['master_metadata_album_album_name'] From 3389d6c5f5b06be9710cabf031450d11b3aac69a Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 1 Apr 2022 17:16:50 +0200 Subject: [PATCH 07/12] Reworked import --- maloja/proccontrol/tasks/__init__.py | 4 +- maloja/proccontrol/tasks/importer.py | 192 ++++++++++++++++----------- 2 files changed, 117 insertions(+), 79 deletions(-) diff --git a/maloja/proccontrol/tasks/__init__.py b/maloja/proccontrol/tasks/__init__.py index 83b1e51..744534a 100644 --- a/maloja/proccontrol/tasks/__init__.py +++ b/maloja/proccontrol/tasks/__init__.py @@ -12,10 +12,12 @@ def loadexternal(filename): return from .importer import import_scrobbles - imported,failed,warning = import_scrobbles(filename) + imported,warning,skipped,failed = import_scrobbles(filename) print("Successfully imported",imported,"scrobbles!") if warning > 0: print(col['orange'](f"{warning} Warning{'s' if warning != 1 else ''}!")) + if skipped > 0: + print(col['orange'](f"{skipped} Skipped!")) if failed > 0: print(col['red'](f"{failed} Error{'s' if failed != 1 else ''}!")) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index ee5f66d..7a332de 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -18,19 +18,24 @@ def err(msg): def import_scrobbles(inputf): - ext = inputf.split('.')[-1].lower() - - if ext == 'csv': + if re.match(".*\.csv",inputf): type = "Last.fm" outputf = data_dir['scrobbles']("lastfmimport.tsv") importfunc = parse_lastfm - - elif ext == 'json' or os.path.isdir(inputf): + elif re.match("endsong_[0-9]+\.json",inputf): type = "Spotify" outputf = data_dir['scrobbles']("spotifyimport.tsv") - importfunc = parse_spotify - if os.path.isfile(inputf): inputf = os.path.dirname(inputf) + importfunc = parse_spotify_full + + elif re.match("StreamingHistory[0-9]+\.json",inputf): + type = "Spotify" + outputf = data_dir['scrobbles']("spotifyimport.tsv") + importfunc = parse_spotify_lite + + else: + print("File",inputf,"could not be identified as a valid import source.") + return 0,0,0,0 print(f"Parsing {col['yellow'](inputf)} as {col['cyan'](type)} export") @@ -40,7 +45,7 @@ def import_scrobbles(inputf): while True: action = prompt(f"Already imported {type} data. [O]verwrite, [A]ppend or [C]ancel?",default='c').lower()[0] if action == 'c': - return 0,0,0 + return 0,0,0,0 elif action == 'a': mode = 'a' break @@ -52,20 +57,20 @@ def import_scrobbles(inputf): else: mode = 'w' - + with open(outputf,mode) as outputfd: - success = 0 - failed = 0 - warning = 0 + success, warning, skipped, failed = 0, 0, 0, 0 timestamps = set() - for scrobble in importfunc(inputf): - if scrobble is None: + for status,scrobble in importfunc(inputf): + if status == 'FAIL': failed += 1 - elif scrobble is False: - warning += 1 + elif status == 'SKIP': + skipped += 1 else: success += 1 + if status == 'WARN': + warning += 1 while scrobble['timestamp'] in timestamps: scrobble['timestamp'] += 1 @@ -89,77 +94,108 @@ def import_scrobbles(inputf): if success % 100 == 0: print(f"Imported {success} scrobbles...") - return success,failed,warning + return success, warning, skipped, failed +def parse_spotify_lite(inputf): + inputfolder = os.path.dirname(inputf) + filenames = re.compile(r'StreamingHistory[0-9]+\.json') + inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)] -def parse_spotify(inputf): + if inputfiles != [inputf]: + print("Spotify files should all be imported together to identify duplicates across the whole dataset.") + if not ask("Import " + ", ".join(col['yellow'](i) for i in inputfiles) + "?",default=True): + inputfiles = [inputf] + # TODO + +def parse_spotify_full(inputf): + + inputfolder = os.path.dirname(inputf) filenames = re.compile(r'endsong_[0-9]+\.json') + inputfiles = [os.path.join(inputfolder,f) for f in os.listdir(inputfolder) if filenames.match(f)] - inputfiles = [os.path.join(inputf,f) for f in os.listdir(inputf) if filenames.match(f)] + if inputfiles != [inputf]: + print("Spotify files should all be imported together to identify duplicates across the whole dataset.") + if not ask("Import " + ", ".join(col['yellow'](i) for i in inputfiles) + "?",default=True): + inputfiles = [inputf] - if len(inputfiles) == 0: - print("No files found!") - elif ask("Importing the following files: " + ", ".join(col['yellow'](i) for i in inputfiles) + ". Confirm?", default=False): + # we keep timestamps here as well to remove duplicates because spotify's export + # is messy - this is specific to this import type and should not be mixed with + # the outer function timestamp check (which is there to fix duplicate timestamps + # that are assumed to correspond to actually distinct plays) + timestamps = {} + inaccurate_timestamps = {} - # we keep timestamps here as well to remove duplicates because spotify's export - # is messy - this is specific to this import type and should not be mixed with - # the outer function timestamp check (which is there to fix duplicate timestamps - # that are assumed to correspond to actually distinct plays) - timestamps = {} + for inputf in inputfiles: - for inputf in inputfiles: + print("Importing",col['yellow'](inputf),"...") + with open(inputf,'r') as inputfd: + data = json.load(inputfd) - print("Importing",col['yellow'](inputf),"...") - with open(inputf,'r') as inputfd: - data = json.load(inputfd) + for entry in data: - for entry in data: - - try: - sec = int(entry['ms_played'] / 1000) - timestamp = int(entry['offline_timestamp'] / 1000) - artist = entry['master_metadata_album_artist_name'] - title = entry['master_metadata_track_name'] - album = entry['master_metadata_album_album_name'] + try: + played = int(entry['ms_played'] / 1000) + timestamp = int(entry['offline_timestamp'] / 1000) + artist = entry['master_metadata_album_artist_name'] + title = entry['master_metadata_track_name'] + album = entry['master_metadata_album_album_name'] - if title is None: - warn(f"{entry} has no title, skipping...") - yield False - continue - if artist is None: - warn(f"{entry} has no artist, skipping...") - yield False - continue - if sec < 30: - warn(f"{entry} is shorter than 30 seconds, skipping...") - yield False - continue - if timestamp in timestamps and (artist,title) in timestamps[timestamp]: - warn(f"{entry} seems to be a duplicate, skipping...") - yield False - continue - - timestamps.setdefault(timestamp,[]).append((artist,title)) - - yield { - 'title':title, - 'artiststr': artist, - 'album': album, - # 'timestamp': int(datetime.datetime.strptime( - # entry['ts'].replace('Z','+0000',), - # "%Y-%m-%dT%H:%M:%S%z" - # ).timestamp()), - 'timestamp': timestamp, - 'duration':sec - } - except Exception as e: - err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") - yield None + if title is None: + warn(f"{entry} has no title, skipping...") + yield ('SKIP',None) + continue + if artist is None: + warn(f"{entry} has no artist, skipping...") + yield ('SKIP',None) + continue + if played < 30: + warn(f"{entry} is shorter than 30 seconds, skipping...") + yield ('SKIP',None) continue - print() + # if offline_timestamp is a proper number, we treat it as + # accurate and check duplicates by that exact timestamp + if timestamp != 0: + status = 'SUCCESS' + if timestamp in timestamps and (artist,title) in timestamps[timestamp]: + warn(f"{entry} seems to be a duplicate, skipping...") + yield ('SKIP',None) + continue + timestamps.setdefault(timestamp,[]).append((artist,title)) + + # if it's 0, we use ts instead, but identify duplicates much more + # liberally (cause the ts is not accurate) + else: + status = 'WARN' + warn(f"{entry} might have an inaccurate timestamp.") + timestamp = int( + datetime.datetime.strptime(entry['ts'].replace('Z','+0000',),"%Y-%m-%dT%H:%M:%S%z").timestamp() + ) + # TODO HEURISTICS + + + + + + yield (status,{ + 'title':title, + 'artiststr': artist, + 'album': album, + # 'timestamp': int(datetime.datetime.strptime( + # entry['ts'].replace('Z','+0000',), + # "%Y-%m-%dT%H:%M:%S%z" + # ).timestamp()), + 'timestamp': timestamp, + 'duration':played + }) + except Exception as e: + err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") + yield ('FAIL',None) + continue + + print() def parse_lastfm(inputf): @@ -170,12 +206,12 @@ def parse_lastfm(inputf): try: artist,album,title,time = row except ValueError: - warn(f"{row} does not look like a valid entry. Scrobble not imported.") - yield None + err(f"{row} does not look like a valid entry. Scrobble not imported.") + yield ('FAIL',None) continue try: - yield { + yield ('SUCCESS',{ 'title': title, 'artiststr': artist, 'album': album, @@ -184,8 +220,8 @@ def parse_lastfm(inputf): "%d %b %Y %H:%M%z" ).timestamp()), 'duration':None - } + }) except Exception as e: err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") - yield None + yield ('FAIL',None) continue From d8821efeeb2ef1783765c67794f52ee2a88fedfb Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 1 Apr 2022 17:53:36 +0200 Subject: [PATCH 08/12] Implemented heuristics for Spotify import with inaccurate timestamps, GH-104 --- maloja/proccontrol/tasks/importer.py | 43 ++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index 7a332de..774932a 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -18,17 +18,19 @@ def err(msg): def import_scrobbles(inputf): - if re.match(".*\.csv",inputf): + filename = os.path.basename(inputf) + + if re.match(".*\.csv",filename): type = "Last.fm" outputf = data_dir['scrobbles']("lastfmimport.tsv") importfunc = parse_lastfm - elif re.match("endsong_[0-9]+\.json",inputf): + elif re.match("endsong_[0-9]+\.json",filename): type = "Spotify" outputf = data_dir['scrobbles']("spotifyimport.tsv") importfunc = parse_spotify_full - elif re.match("StreamingHistory[0-9]+\.json",inputf): + elif re.match("StreamingHistory[0-9]+\.json",filename): type = "Spotify" outputf = data_dir['scrobbles']("spotifyimport.tsv") importfunc = parse_spotify_lite @@ -165,28 +167,45 @@ def parse_spotify_full(inputf): continue timestamps.setdefault(timestamp,[]).append((artist,title)) - # if it's 0, we use ts instead, but identify duplicates much more - # liberally (cause the ts is not accurate) + # if it's 0, we use ts instead, but identify duplicates differently + # (cause the ts is not accurate) else: - status = 'WARN' - warn(f"{entry} might have an inaccurate timestamp.") + timestamp = int( datetime.datetime.strptime(entry['ts'].replace('Z','+0000',),"%Y-%m-%dT%H:%M:%S%z").timestamp() ) - # TODO HEURISTICS + ts_group = int(timestamp/10) + relevant_ts_groups = [ts_group-2,ts_group-1,ts_group,ts_group+1,ts_group+2] + similar_scrobbles = [scrob for tsg in relevant_ts_groups for scrob in inaccurate_timestamps.get(tsg,[])] + scrobble_describe = (timestamp,entry['spotify_track_uri'],entry['ms_played']) + found_similar = False + for scr in similar_scrobbles: + # scrobbles count as duplicate if: + # - less than 30 seconds apart + # - exact same track uri + # - exact same ms_played + if (abs(scr[0] - timestamp) < 30) and scr[1:] == scrobble_describe[1:]: + warn(f"{entry} has been identified as potential duplicate, skipping...") + yield ('SKIP',None) + found_similar = True + break + else: + # no duplicates, assume proper scrobble but warn + status = 'WARN' + warn(f"{entry} might have an inaccurate timestamp.") + inaccurate_timestamps.setdefault(ts_group,[]).append(scrobble_describe) + + if found_similar: + continue yield (status,{ 'title':title, 'artiststr': artist, 'album': album, - # 'timestamp': int(datetime.datetime.strptime( - # entry['ts'].replace('Z','+0000',), - # "%Y-%m-%dT%H:%M:%S%z" - # ).timestamp()), 'timestamp': timestamp, 'duration':played }) From a833039ced07fa80de6d6572b6dbb6c52e87bf60 Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 1 Apr 2022 18:19:21 +0200 Subject: [PATCH 09/12] Improved feedback of import --- maloja/proccontrol/tasks/__init__.py | 24 +++++++---- maloja/proccontrol/tasks/importer.py | 62 +++++++++++++++------------- 2 files changed, 50 insertions(+), 36 deletions(-) diff --git a/maloja/proccontrol/tasks/__init__.py b/maloja/proccontrol/tasks/__init__.py index 744534a..b86008f 100644 --- a/maloja/proccontrol/tasks/__init__.py +++ b/maloja/proccontrol/tasks/__init__.py @@ -12,14 +12,22 @@ def loadexternal(filename): return from .importer import import_scrobbles - imported,warning,skipped,failed = import_scrobbles(filename) - print("Successfully imported",imported,"scrobbles!") - if warning > 0: - print(col['orange'](f"{warning} Warning{'s' if warning != 1 else ''}!")) - if skipped > 0: - print(col['orange'](f"{skipped} Skipped!")) - if failed > 0: - print(col['red'](f"{failed} Error{'s' if failed != 1 else ''}!")) + result = import_scrobbles(filename) + + msg = f"Successfully imported {result['CONFIDENT_IMPORT'] + result['UNCERTAIN_IMPORT']} scrobbles" + if result['UNCERTAIN_IMPORT'] > 0: + warningmsg = col['orange'](f"{result['UNCERTAIN_IMPORT']} Warning{'s' if result['UNCERTAIN_IMPORT'] != 1 else ''}!") + msg += f" ({warningmsg})" + print(msg) + + msg = f"Skipped {result['CONFIDENT_SKIP'] + result['UNCERTAIN_SKIP']} scrobbles" + if result['UNCERTAIN_SKIP'] > 0: + warningmsg = col['orange'](f"{result['UNCERTAIN_SKIP']} Warning{'s' if result['UNCERTAIN_SKIP'] != 1 else ''}!") + msg += f" ({warningmsg})" + print(msg) + + if result['FAIL'] > 0: + print(col['red'](f"{result['FAIL']} Error{'s' if result['FAIL'] != 1 else ''}!")) def backuphere(): from .backup import backup diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index 774932a..69e3d3d 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -12,12 +12,22 @@ c = CleanerAgent() def warn(msg): print(col['orange'](msg)) +def skip(msg): + print(col['#ffcba4'](msg)) def err(msg): print(col['red'](msg)) def import_scrobbles(inputf): + result = { + "CONFIDENT_IMPORT": 0, + "UNCERTAIN_IMPORT": 0, + "CONFIDENT_SKIP": 0, + "UNCERTAIN_SKIP": 0, + "FAIL": 0 + } + filename = os.path.basename(inputf) if re.match(".*\.csv",filename): @@ -37,7 +47,7 @@ def import_scrobbles(inputf): else: print("File",inputf,"could not be identified as a valid import source.") - return 0,0,0,0 + return result print(f"Parsing {col['yellow'](inputf)} as {col['cyan'](type)} export") @@ -47,7 +57,7 @@ def import_scrobbles(inputf): while True: action = prompt(f"Already imported {type} data. [O]verwrite, [A]ppend or [C]ancel?",default='c').lower()[0] if action == 'c': - return 0,0,0,0 + return result elif action == 'a': mode = 'a' break @@ -61,18 +71,12 @@ def import_scrobbles(inputf): with open(outputf,mode) as outputfd: - success, warning, skipped, failed = 0, 0, 0, 0 + timestamps = set() for status,scrobble in importfunc(inputf): - if status == 'FAIL': - failed += 1 - elif status == 'SKIP': - skipped += 1 - else: - success += 1 - if status == 'WARN': - warning += 1 + result[status] += 1 + if status in ['CONFIDENT_IMPORT','UNCERTAIN_IMPORT']: while scrobble['timestamp'] in timestamps: scrobble['timestamp'] += 1 @@ -93,10 +97,10 @@ def import_scrobbles(inputf): ]) outputfd.write(outputline + '\n') - if success % 100 == 0: - print(f"Imported {success} scrobbles...") + if (result['CONFIDENT_IMPORT'] + result['UNCERTAIN_IMPORT']) % 100 == 0: + print(f"Imported {result['CONFIDENT_IMPORT'] + result['UNCERTAIN_IMPORT']} scrobbles...") - return success, warning, skipped, failed + return result def parse_spotify_lite(inputf): inputfolder = os.path.dirname(inputf) @@ -145,27 +149,29 @@ def parse_spotify_full(inputf): if title is None: - warn(f"{entry} has no title, skipping...") - yield ('SKIP',None) + skip(f"{entry} has no title, skipping...") + yield ('CONFIDENT_SKIP',None) continue if artist is None: - warn(f"{entry} has no artist, skipping...") - yield ('SKIP',None) + skip(f"{entry} has no artist, skipping...") + yield ('CONFIDENT_SKIP',None) continue if played < 30: - warn(f"{entry} is shorter than 30 seconds, skipping...") - yield ('SKIP',None) + skip(f"{entry} is shorter than 30 seconds, skipping...") + yield ('CONFIDENT_SKIP',None) continue # if offline_timestamp is a proper number, we treat it as # accurate and check duplicates by that exact timestamp if timestamp != 0: - status = 'SUCCESS' + if timestamp in timestamps and (artist,title) in timestamps[timestamp]: - warn(f"{entry} seems to be a duplicate, skipping...") - yield ('SKIP',None) + skip(f"{entry} seems to be a duplicate, skipping...") + yield ('CONFIDENT_SKIP',None) continue - timestamps.setdefault(timestamp,[]).append((artist,title)) + else: + status = 'CONFIDENT_IMPORT' + timestamps.setdefault(timestamp,[]).append((artist,title)) # if it's 0, we use ts instead, but identify duplicates differently # (cause the ts is not accurate) @@ -188,13 +194,13 @@ def parse_spotify_full(inputf): # - exact same track uri # - exact same ms_played if (abs(scr[0] - timestamp) < 30) and scr[1:] == scrobble_describe[1:]: - warn(f"{entry} has been identified as potential duplicate, skipping...") - yield ('SKIP',None) + warn(f"{entry} might be a duplicate, skipping...") + yield ('UNCERTAIN_SKIP',None) found_similar = True break else: # no duplicates, assume proper scrobble but warn - status = 'WARN' + status = 'UNCERTAIN_IMPORT' warn(f"{entry} might have an inaccurate timestamp.") inaccurate_timestamps.setdefault(ts_group,[]).append(scrobble_describe) @@ -230,7 +236,7 @@ def parse_lastfm(inputf): continue try: - yield ('SUCCESS',{ + yield ('CONFIDENT_IMPORT',{ 'title': title, 'artiststr': artist, 'album': album, From c150a57090614210d689228f2279163f1fed3021 Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 1 Apr 2022 19:28:13 +0200 Subject: [PATCH 10/12] Implemented importing from Spotify's one-year data export --- maloja/proccontrol/tasks/importer.py | 37 ++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index 69e3d3d..9247c3c 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -85,6 +85,7 @@ def import_scrobbles(inputf): # Format fields for tsv scrobble['timestamp'] = str(scrobble['timestamp']) scrobble['duration'] = str(scrobble['duration']) if scrobble['duration'] is not None else '-' + scrobble['album'] = scrobble['album'] if scrobble['album'] is not None else '-' (artists,scrobble['title']) = c.fullclean(scrobble['artiststr'],scrobble['title']) scrobble['artiststr'] = "␟".join(artists) @@ -112,7 +113,39 @@ def parse_spotify_lite(inputf): if not ask("Import " + ", ".join(col['yellow'](i) for i in inputfiles) + "?",default=True): inputfiles = [inputf] - # TODO + for inputf in inputfiles: + + print("Importing",col['yellow'](inputf),"...") + with open(inputf,'r') as inputfd: + data = json.load(inputfd) + + for entry in data: + + try: + played = int(entry['msPlayed'] / 1000) + timestamp = int( + datetime.datetime.strptime(entry['endTime'],"%Y-%m-%d %H:%M").timestamp() + ) + artist = entry['artistName'] + title = entry['trackName'] + + if played < 30: + skip(f"{entry} is shorter than 30 seconds, skipping...") + yield ('CONFIDENT_SKIP',None) + continue + + yield ("CONFIDENT_IMPORT",{ + 'title':title, + 'artiststr': artist, + 'timestamp': timestamp, + 'duration':played, + 'album': None + }) + except Exception as e: + err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") + yield ('FAIL',None) + continue + def parse_spotify_full(inputf): @@ -178,7 +211,7 @@ def parse_spotify_full(inputf): else: timestamp = int( - datetime.datetime.strptime(entry['ts'].replace('Z','+0000',),"%Y-%m-%dT%H:%M:%S%z").timestamp() + datetime.datetime.strptime(entry['ts'].replace('Z','+0000'),"%Y-%m-%dT%H:%M:%S%z").timestamp() ) From ca2596cfc9f786bc264548c73eb67c5511874131 Mon Sep 17 00:00:00 2001 From: krateng Date: Fri, 1 Apr 2022 19:43:33 +0200 Subject: [PATCH 11/12] Improved import feedback output logic --- maloja/proccontrol/tasks/importer.py | 56 ++++++++++++---------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/importer.py index 9247c3c..0290fdf 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/importer.py @@ -9,13 +9,13 @@ from ...globalconf import data_dir c = CleanerAgent() - -def warn(msg): - print(col['orange'](msg)) -def skip(msg): - print(col['#ffcba4'](msg)) -def err(msg): - print(col['red'](msg)) +outputs = { + "CONFIDENT_IMPORT": lambda msg: None, + "UNCERTAIN_IMPORT": lambda msg: print(col['orange'](msg)), + "CONFIDENT_SKIP": lambda msg: print(col['ffcba4'](msg)), + "UNCERTAIN_SKIP": lambda msg: print(col['orange'](msg)), + "FAIL": lambda msg: print(col['red'](msg)), +} def import_scrobbles(inputf): @@ -74,8 +74,9 @@ def import_scrobbles(inputf): timestamps = set() - for status,scrobble in importfunc(inputf): + for status,scrobble,msg in importfunc(inputf): result[status] += 1 + outputs[status](msg) if status in ['CONFIDENT_IMPORT','UNCERTAIN_IMPORT']: while scrobble['timestamp'] in timestamps: @@ -130,8 +131,7 @@ def parse_spotify_lite(inputf): title = entry['trackName'] if played < 30: - skip(f"{entry} is shorter than 30 seconds, skipping...") - yield ('CONFIDENT_SKIP',None) + yield ('CONFIDENT_SKIP',None,f"{entry} is shorter than 30 seconds, skipping...") continue yield ("CONFIDENT_IMPORT",{ @@ -140,10 +140,9 @@ def parse_spotify_lite(inputf): 'timestamp': timestamp, 'duration':played, 'album': None - }) + },'') except Exception as e: - err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") - yield ('FAIL',None) + yield ('FAIL',None,f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") continue @@ -182,16 +181,13 @@ def parse_spotify_full(inputf): if title is None: - skip(f"{entry} has no title, skipping...") - yield ('CONFIDENT_SKIP',None) + yield ('CONFIDENT_SKIP',None,f"{entry} has no title, skipping...") continue if artist is None: - skip(f"{entry} has no artist, skipping...") - yield ('CONFIDENT_SKIP',None) + yield ('CONFIDENT_SKIP',None,f"{entry} has no artist, skipping...") continue if played < 30: - skip(f"{entry} is shorter than 30 seconds, skipping...") - yield ('CONFIDENT_SKIP',None) + yield ('CONFIDENT_SKIP',None,f"{entry} is shorter than 30 seconds, skipping...") continue # if offline_timestamp is a proper number, we treat it as @@ -199,11 +195,11 @@ def parse_spotify_full(inputf): if timestamp != 0: if timestamp in timestamps and (artist,title) in timestamps[timestamp]: - skip(f"{entry} seems to be a duplicate, skipping...") - yield ('CONFIDENT_SKIP',None) + yield ('CONFIDENT_SKIP',None,f"{entry} seems to be a duplicate, skipping...") continue else: status = 'CONFIDENT_IMPORT' + msg = '' timestamps.setdefault(timestamp,[]).append((artist,title)) # if it's 0, we use ts instead, but identify duplicates differently @@ -227,14 +223,13 @@ def parse_spotify_full(inputf): # - exact same track uri # - exact same ms_played if (abs(scr[0] - timestamp) < 30) and scr[1:] == scrobble_describe[1:]: - warn(f"{entry} might be a duplicate, skipping...") - yield ('UNCERTAIN_SKIP',None) + yield ('UNCERTAIN_SKIP',None,f"{entry} might be a duplicate, skipping...") found_similar = True break else: # no duplicates, assume proper scrobble but warn status = 'UNCERTAIN_IMPORT' - warn(f"{entry} might have an inaccurate timestamp.") + msg = f"{entry} might have an inaccurate timestamp." inaccurate_timestamps.setdefault(ts_group,[]).append(scrobble_describe) if found_similar: @@ -247,10 +242,9 @@ def parse_spotify_full(inputf): 'album': album, 'timestamp': timestamp, 'duration':played - }) + },msg) except Exception as e: - err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") - yield ('FAIL',None) + yield ('FAIL',None,f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") continue print() @@ -264,8 +258,7 @@ def parse_lastfm(inputf): try: artist,album,title,time = row except ValueError: - err(f"{row} does not look like a valid entry. Scrobble not imported.") - yield ('FAIL',None) + yield ('FAIL',None,f"{row} does not look like a valid entry. Scrobble not imported.") continue try: @@ -278,8 +271,7 @@ def parse_lastfm(inputf): "%d %b %Y %H:%M%z" ).timestamp()), 'duration':None - }) + },'') except Exception as e: - err(f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") - yield ('FAIL',None) + yield ('FAIL',None,f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") continue From 72b74eb27effbf824d31f7ea145399a60063066b Mon Sep 17 00:00:00 2001 From: krateng Date: Mon, 4 Apr 2022 16:25:21 +0200 Subject: [PATCH 12/12] Renamed import module to match v3 --- maloja/proccontrol/tasks/__init__.py | 2 +- .../proccontrol/tasks/{importer.py => import_scrobbles.py} | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) rename maloja/proccontrol/tasks/{importer.py => import_scrobbles.py} (97%) diff --git a/maloja/proccontrol/tasks/__init__.py b/maloja/proccontrol/tasks/__init__.py index b86008f..cd5be4c 100644 --- a/maloja/proccontrol/tasks/__init__.py +++ b/maloja/proccontrol/tasks/__init__.py @@ -11,7 +11,7 @@ def loadexternal(filename): print("File could not be found.") return - from .importer import import_scrobbles + from .import_scrobbles import import_scrobbles result = import_scrobbles(filename) msg = f"Successfully imported {result['CONFIDENT_IMPORT'] + result['UNCERTAIN_IMPORT']} scrobbles" diff --git a/maloja/proccontrol/tasks/importer.py b/maloja/proccontrol/tasks/import_scrobbles.py similarity index 97% rename from maloja/proccontrol/tasks/importer.py rename to maloja/proccontrol/tasks/import_scrobbles.py index 0290fdf..0d22df0 100644 --- a/maloja/proccontrol/tasks/importer.py +++ b/maloja/proccontrol/tasks/import_scrobbles.py @@ -12,7 +12,8 @@ c = CleanerAgent() outputs = { "CONFIDENT_IMPORT": lambda msg: None, "UNCERTAIN_IMPORT": lambda msg: print(col['orange'](msg)), - "CONFIDENT_SKIP": lambda msg: print(col['ffcba4'](msg)), + #"CONFIDENT_SKIP": lambda msg: print(col['ffcba4'](msg)), + "CONFIDENT_SKIP": lambda msg: None, "UNCERTAIN_SKIP": lambda msg: print(col['orange'](msg)), "FAIL": lambda msg: print(col['red'](msg)), } @@ -145,6 +146,8 @@ def parse_spotify_lite(inputf): yield ('FAIL',None,f"{entry} could not be parsed. Scrobble not imported. ({repr(e)})") continue + print() + def parse_spotify_full(inputf): @@ -212,7 +215,7 @@ def parse_spotify_full(inputf): ts_group = int(timestamp/10) - relevant_ts_groups = [ts_group-2,ts_group-1,ts_group,ts_group+1,ts_group+2] + relevant_ts_groups = [ts_group-3,ts_group-2,ts_group-1,ts_group,ts_group+1,ts_group+2,ts_group+3] similar_scrobbles = [scrob for tsg in relevant_ts_groups for scrob in inaccurate_timestamps.get(tsg,[])] scrobble_describe = (timestamp,entry['spotify_track_uri'],entry['ms_played'])