From 400c920958b332b80f0b69fe7a7834bf2eb3019d Mon Sep 17 00:00:00 2001 From: Krateng Date: Mon, 8 Apr 2019 17:32:31 +0200 Subject: [PATCH] Associated artists now only show up if relevant --- cleanup.py | 47 +++++++++++++++++++++++++++++++++-------------- database.py | 25 +++++++++++++++++-------- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/cleanup.py b/cleanup.py index c8660b3..3012914 100644 --- a/cleanup.py +++ b/cleanup.py @@ -43,10 +43,12 @@ class CleanerAgent: confirmed = self.rules_belongtogether + [self.rules_replaceartist[r] for r in self.rules_replaceartist] return (a in confirmed) - delimiters_feat = ["ft.","ft","feat.","feat","featuring","Ft.","Ft","Feat.","Feat","Featuring"] #Delimiters used for extra artists, even when in the title field - delimiters = ["vs.","vs","&"] #Delimiters in informal artist strings, spaces expected around them - delimiters_formal = ["; ",";","/"] #Delimiters used specifically to tag multiple artists when only one tag field is available, no spaces used - + #Delimiters used for extra artists, even when in the title field + delimiters_feat = ["ft.","ft","feat.","feat","featuring","Ft.","Ft","Feat.","Feat","Featuring"] + #Delimiters in informal artist strings, spaces expected around them + delimiters = ["vs.","vs","&"] + #Delimiters used specifically to tag multiple artists when only one tag field is available, no spaces used + delimiters_formal = ["; ",";","/"] def parseArtists(self,a): @@ -68,7 +70,8 @@ class CleanerAgent: for d in self.delimiters_feat: if re.match(r"(.*) \(" + d + " (.*)\)",a) is not None: - return self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\1",a)) + self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\2",a)) + return self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\1",a)) + \ + self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\2",a)) for d in self.delimiters_formal: if (d in a): @@ -127,41 +130,57 @@ class CollectorAgent: def __init__(self): self.updateRules() + # rules_countas dict: real artist -> credited artist + # rules_countas_id dict: real artist ID -> credited artist ID + # rules_include dict: credited artist -> all real artists + def updateRules(self): raw = tsv.parse_all("rules","string","string","string") self.rules_countas = {b:c for [a,b,c] in raw if a=="countas"} - self.rules_include = {} #Twice the memory, double the performance! (Yes, we're saving redundant information here, but it's not unelegant if it's within a closed object!) + self.rules_countas_id = {} + self.rules_include = {} #Twice the memory, double the performance! + # (Yes, we're saving redundant information here, but it's not unelegant if it's within a closed object!) for a in self.rules_countas: self.rules_include[self.rules_countas[a]] = self.rules_include.setdefault(self.rules_countas[a],[]) + [a] - # this agent needs to be aware of the current id assignment in the main program. unelegant, but the best way i can think of + # this agent needs to be aware of the current id assignment in the main program + # unelegant, but the best way i can think of def updateIDs(self,artistlist): - self.rules_countas_id = {artistlist.index(a):artistlist.index(self.rules_countas[a]) for a in self.rules_countas} + self.rules_countas_id = {artistlist.index(a):artistlist.index(self.rules_countas[a]) for a in self.rules_countas if a in artistlist} #self.rules_include_id = {artistlist.index(a):artistlist.index(self.rules_include[a]) for a in self.rules_include} #this needs to take lists into account + + # get who is credited for this artist def getCredited(self,artist): - if artist in self.rules_countas_id: - return self.rules_countas_id[artist] if artist in self.rules_countas: return self.rules_countas[artist] + if artist in self.rules_countas_id: + return self.rules_countas_id[artist] + else: return artist - + # get all credited artists for the artists given def getCreditedList(self,artists): updatedArtists = [] for artist in artists: updatedArtists.append(self.getCredited(artist)) return list(set(updatedArtists)) + # get artists who the given artist is given credit for def getAllAssociated(self,artist): return self.rules_include.get(artist,[]) - # this function is there to check for artists that we should include in the database even though they never have any scrobble. important to avoid bugs when - # countas rules are declared preemptively + # this function is there to check for artists that we should include in the + # database even though they never have any scrobble. def getAllArtists(self): - return list(set([a for a in self.rules_countas] + [self.rules_countas[a] for a in self.rules_countas])) + return list(set([self.rules_countas[a] for a in self.rules_countas])) + # artists that count can be nonexisting (counting HyunA as 4Minute even + # though 4Minute has never been listened to) + # but artists that are counted as someone else are only relevant if they + # exist (so we can preemptively declare lots of rules just in case) + #return list(set([a for a in self.rules_countas] + [self.rules_countas[a] for a in self.rules_countas])) diff --git a/database.py b/database.py index 556ddfc..0672b9f 100644 --- a/database.py +++ b/database.py @@ -135,6 +135,13 @@ def getArtistID(name): ARTISTS.append(obj) ARTIST_SET.add(objlower) ARTISTS_LOWER.append(objlower) + + # with a new artist added, we might also get new artists that they are credited as + cr = coa.getCredited(name) + getArtistID(cr) + + coa.updateIDs(ARTISTS) + return i def getTrackID(artists,title): @@ -473,7 +480,7 @@ def artistInfo(artist): scrobbles = len(db_query(artists=[artist])) #we cant take the scrobble number from the charts because that includes all countas scrobbles try: c = [e for e in charts if e["artist"] == artist][0] - others = coa.getAllAssociated(artist) + others = [a for a in coa.getAllAssociated(artist) if a in ARTISTS] position = c["rank"] return {"scrobbles":scrobbles,"position":position,"associated":others,"medals":MEDALS.get(artist)} except: @@ -785,12 +792,14 @@ def build_db(): # inform malojatime module about earliest scrobble register_scrobbletime(STAMPS[0]) - # get extra artists with zero scrobbles from countas rules - for artist in coa.getAllArtists(): - if artist not in ARTISTS: - ARTISTS.append(artist) - - coa.updateIDs(ARTISTS) + # NOT NEEDED BECAUSE WE DO THAT ON ADDING EVERY ARTIST ANYWAY + # get extra artists with no real scrobbles from countas rules + #for artist in coa.getAllArtists(): + #for artist in coa.getCreditedList(ARTISTS): + # if artist not in ARTISTS: + # log(artist + " is added to database because of countas rules",module="debug") + # ARTISTS.append(artist) + # coa.updateIDs(ARTISTS) #start regular tasks update_medals() @@ -968,7 +977,7 @@ def db_aggregate_full(by=None,since=None,to=None,within=None,artist=None): # this either creates the new entry or increments the existing one charts[a] = charts.setdefault(a,0) + 1 - ls = [{"artist":get_artist_dict(ARTISTS[a]),"scrobbles":charts[a],"counting":coa.getAllAssociated(ARTISTS[a])} for a in charts] + ls = [{"artist":get_artist_dict(ARTISTS[a]),"scrobbles":charts[a],"counting":[arti for arti in coa.getAllAssociated(ARTISTS[a]) if arti in ARTISTS]} for a in charts] ls.sort(key=lambda k:k["scrobbles"],reverse=True) # add ranks for rnk in range(len(ls)):