From 17be00f7945d0078988a79a178546f6c4023a7ea Mon Sep 17 00:00:00 2001 From: krateng Date: Tue, 19 Apr 2022 15:22:42 +0200 Subject: [PATCH] Improved parsing of featuring artists in square brackets, fix GH-121 --- dev/releases/3.0.yml | 4 ++++ maloja/cleanup.py | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/dev/releases/3.0.yml b/dev/releases/3.0.yml index 1c59d2d..3cb6289 100644 --- a/dev/releases/3.0.yml +++ b/dev/releases/3.0.yml @@ -29,6 +29,10 @@ minor_release_name: "Yeonhee" - "[Bugfix] Fixed native API receiving superfluous keywords" - "[Bugfix] Fixed crash when importing scrobbles with artists with similar names" 3.0.5: + commit: "fe21894c5ecf3a53c9c5c00453abfc7f41c6a83e" notes: - "[Feature] Added notification system for web interface" - "[Bugfix] Fixed crash when encountering error in Lastfm import" +3.0.6: + notes: + - "[Bugfix] Better parsing of featuring artists" diff --git a/maloja/cleanup.py b/maloja/cleanup.py index e94fdae..73bf56a 100644 --- a/maloja/cleanup.py +++ b/maloja/cleanup.py @@ -109,9 +109,9 @@ class CleanerAgent: for d in self.delimiters_feat: - if re.match(r"(.*) \(" + d + " (.*)\)",a) is not None: - return self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\1",a)) + \ - self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\2",a)) + if re.match(r"(.*) [\(\[]" + d + " (.*)[\)\]]",a) is not None: + return self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\1",a)) + \ + self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\2",a)) @@ -139,11 +139,11 @@ class CleanerAgent: if t.strip().lower() in self.rules_replacetitle: return self.rules_replacetitle[t.strip().lower()] - t = t.replace("[","(").replace("]",")") + #t = t.replace("[","(").replace("]",")") - t = re.sub(r" \(as made famous by .*?\)","",t) - t = re.sub(r" \(originally by .*?\)","",t) - t = re.sub(r" \(.*?Remaster.*?\)","",t) + t = re.sub(r" [\(\[]as made famous by .*?[\)\]]","",t) + t = re.sub(r" [\(\[]originally by .*?[\)\]]","",t) + t = re.sub(r" [\(\[].*?Remaster.*?[\)\]]","",t) for s in malojaconfig["REMOVE_FROM_TITLE"]: if s in t: @@ -156,9 +156,9 @@ class CleanerAgent: def parseTitleForArtists(self,t): for d in self.delimiters_feat: - if re.match(r"(.*) \(" + d + " (.*?)\)",t) is not None: - (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) \(" + d + " (.*?)\)",r"\1",t)) - artists += self.parseArtists(re.sub(r"(.*) \(" + d + " (.*?)\).*",r"\2",t)) + if re.match(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",t) is not None: + (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",r"\1",t)) + artists += self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]].*",r"\2",t)) return (title,artists) if re.match(r"(.*) - " + d + " (.*)",t) is not None: (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) - " + d + " (.*)",r"\1",t))