From 3ede71fc79b5e2907b30a69d5eac595b8cd8ebd4 Mon Sep 17 00:00:00 2001 From: krateng Date: Thu, 28 Apr 2022 06:08:51 +0200 Subject: [PATCH] Made some parsing rules case insensitive --- maloja/cleanup.py | 34 ++++++++++++++++++---------------- maloja/pkg_global/conf.py | 6 +++--- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/maloja/cleanup.py b/maloja/cleanup.py index 35a5063..3e9c5b5 100644 --- a/maloja/cleanup.py +++ b/maloja/cleanup.py @@ -109,9 +109,9 @@ class CleanerAgent: for d in self.delimiters_feat: - if re.match(r"(.*) [\(\[]" + d + " (.*)[\)\]]",a) is not None: - return self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\1",a)) + \ - self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\2",a)) + if re.match(r"(.*) [\(\[]" + d + " (.*)[\)\]]",a,re.IGNORECASE) is not None: + return self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\1",a,re.IGNORECASE)) + \ + self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\2",a,re.IGNORECASE)) @@ -158,17 +158,17 @@ class CleanerAgent: def parseTitleForArtists(self,t): for d in self.delimiters_feat: - if re.match(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",t) is not None: - (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",r"\1",t)) - artists += self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]].*",r"\2",t)) + if re.match(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",t,re.IGNORECASE) is not None: + (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",r"\1",t,re.IGNORECASE)) + artists += self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]].*",r"\2",t,re.IGNORECASE)) return (title,artists) - if re.match(r"(.*) - " + d + " (.*)",t) is not None: - (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) - " + d + " (.*)",r"\1",t)) - artists += self.parseArtists(re.sub(r"(.*) - " + d + " (.*).*",r"\2",t)) + if re.match(r"(.*) - " + d + " (.*)",t,re.IGNORECASE) is not None: + (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) - " + d + " (.*)",r"\1",t,re.IGNORECASE)) + artists += self.parseArtists(re.sub(r"(.*) - " + d + " (.*).*",r"\2",t,re.IGNORECASE)) return (title,artists) - if re.match(r"(.*) " + d + " (.*)",t) is not None: - (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) " + d + " (.*)",r"\1",t)) - artists += self.parseArtists(re.sub(r"(.*) " + d + " (.*).*",r"\2",t)) + if re.match(r"(.*) " + d + " (.*)",t,re.IGNORECASE) is not None: + (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) " + d + " (.*)",r"\1",t,re.IGNORECASE)) + artists += self.parseArtists(re.sub(r"(.*) " + d + " (.*).*",r"\2",t,re.IGNORECASE)) return (title,artists) artists = [] @@ -176,12 +176,14 @@ class CleanerAgent: if malojaconfig["PARSE_REMIX_ARTISTS"]: for filter in malojaconfig["FILTERS_REMIX"]: # match remix in brackets - if re.match(r".*[\(\[](.*)" + filter + "[\)\]]", t): - artists += self.parseArtists(re.match(r".*[\(\[](.*)" + filter + "[\)\]]", t)[1]) + m = re.match(r".*[\(\[](.*)" + filter + "[\)\]]", t, re.IGNORECASE) + if m: + artists += self.parseArtists(m.groups()[0]) # match remix split with "-" - elif re.match(r".*-(.*)" + filter, t): - artists += self.parseArtists(re.match(r".*-(.*)" + filter, t)[1]) + m = re.match(r".*-(.*)" + filter, t, re.IGNORECASE) + if m: + artists += self.parseArtists(m.groups()[0]) for st in self.rules_artistintitle: if st in t.lower(): artists += self.rules_artistintitle[st].split("␟") diff --git a/maloja/pkg_global/conf.py b/maloja/pkg_global/conf.py index 7e7d172..fa2e981 100644 --- a/maloja/pkg_global/conf.py +++ b/maloja/pkg_global/conf.py @@ -179,10 +179,10 @@ malojaconfig = Configuration( "Database":{ "invalid_artists":(tp.Set(tp.String()), "Invalid Artists", ["[Unknown Artist]","Unknown Artist","Spotify"], "Artists that should be discarded immediately"), "remove_from_title":(tp.Set(tp.String()), "Remove from Title", ["(Original Mix)","(Radio Edit)","(Album Version)","(Explicit Version)","(Bonus Track)"], "Phrases that should be removed from song titles"), - "delimiters_feat":(tp.Set(tp.String()), "Featuring Delimiters", ["ft.","ft","feat.","feat","featuring","Ft.","Ft","Feat.","Feat","Featuring"], "Delimiters used for extra artists, even when in the title field"), + "delimiters_feat":(tp.Set(tp.String()), "Featuring Delimiters", ["ft.","ft","feat.","feat","featuring"], "Delimiters used for extra artists, even when in the title field"), "delimiters_informal":(tp.Set(tp.String()), "Informal Delimiters", ["vs.","vs","&"], "Delimiters in informal artist strings with spaces expected around them"), - "delimiters_formal":(tp.Set(tp.String()), "Formal Delimiters", [";","/","|","␝","␞","␟"], "Delimiters used to tag multiple artists when only one tag field is available"), - "filters_remix":(tp.Set(tp.String()), "Remix Filters", ["remix", "Remix", "Remix Edit", "remix edit", "Short Mix", "short mix", "Extended Mix", "extended mix", "Soundtrack Version", "soundtrack version"], "Filters used to recognize the remix artists in the title"), + "delimiters_formal":(tp.Set(tp.String()), "Formal Delimiters", [";","/","|","␝","␞","␟"], "Delimiters used to tag multiple artists when only one tag field is available"), + "filters_remix":(tp.Set(tp.String()), "Remix Filters", ["Remix", "Remix Edit", "Short Mix", "Extended Mix", "Soundtrack Version"], "Filters used to recognize the remix artists in the title"), "parse_remix_artists":(tp.Boolean(), "Parse Remix Artists", False) }, "Web Interface":{