1
0
mirror of https://github.com/krateng/maloja.git synced 2023-08-10 21:12:55 +03:00

Made some parsing rules case insensitive

This commit is contained in:
krateng 2022-04-28 06:08:51 +02:00
parent 77a0a0a41b
commit 3ede71fc79
2 changed files with 21 additions and 19 deletions

View File

@ -109,9 +109,9 @@ class CleanerAgent:
for d in self.delimiters_feat: for d in self.delimiters_feat:
if re.match(r"(.*) [\(\[]" + d + " (.*)[\)\]]",a) is not None: if re.match(r"(.*) [\(\[]" + d + " (.*)[\)\]]",a,re.IGNORECASE) is not None:
return self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\1",a)) + \ return self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\1",a,re.IGNORECASE)) + \
self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\2",a)) self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*)[\)\]]",r"\2",a,re.IGNORECASE))
@ -158,17 +158,17 @@ class CleanerAgent:
def parseTitleForArtists(self,t): def parseTitleForArtists(self,t):
for d in self.delimiters_feat: for d in self.delimiters_feat:
if re.match(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",t) is not None: if re.match(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",t,re.IGNORECASE) is not None:
(title,artists) = self.parseTitleForArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",r"\1",t)) (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]]",r"\1",t,re.IGNORECASE))
artists += self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]].*",r"\2",t)) artists += self.parseArtists(re.sub(r"(.*) [\(\[]" + d + " (.*?)[\)\]].*",r"\2",t,re.IGNORECASE))
return (title,artists) return (title,artists)
if re.match(r"(.*) - " + d + " (.*)",t) is not None: if re.match(r"(.*) - " + d + " (.*)",t,re.IGNORECASE) is not None:
(title,artists) = self.parseTitleForArtists(re.sub(r"(.*) - " + d + " (.*)",r"\1",t)) (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) - " + d + " (.*)",r"\1",t,re.IGNORECASE))
artists += self.parseArtists(re.sub(r"(.*) - " + d + " (.*).*",r"\2",t)) artists += self.parseArtists(re.sub(r"(.*) - " + d + " (.*).*",r"\2",t,re.IGNORECASE))
return (title,artists) return (title,artists)
if re.match(r"(.*) " + d + " (.*)",t) is not None: if re.match(r"(.*) " + d + " (.*)",t,re.IGNORECASE) is not None:
(title,artists) = self.parseTitleForArtists(re.sub(r"(.*) " + d + " (.*)",r"\1",t)) (title,artists) = self.parseTitleForArtists(re.sub(r"(.*) " + d + " (.*)",r"\1",t,re.IGNORECASE))
artists += self.parseArtists(re.sub(r"(.*) " + d + " (.*).*",r"\2",t)) artists += self.parseArtists(re.sub(r"(.*) " + d + " (.*).*",r"\2",t,re.IGNORECASE))
return (title,artists) return (title,artists)
artists = [] artists = []
@ -176,12 +176,14 @@ class CleanerAgent:
if malojaconfig["PARSE_REMIX_ARTISTS"]: if malojaconfig["PARSE_REMIX_ARTISTS"]:
for filter in malojaconfig["FILTERS_REMIX"]: for filter in malojaconfig["FILTERS_REMIX"]:
# match remix in brackets # match remix in brackets
if re.match(r".*[\(\[](.*)" + filter + "[\)\]]", t): m = re.match(r".*[\(\[](.*)" + filter + "[\)\]]", t, re.IGNORECASE)
artists += self.parseArtists(re.match(r".*[\(\[](.*)" + filter + "[\)\]]", t)[1]) if m:
artists += self.parseArtists(m.groups()[0])
# match remix split with "-" # match remix split with "-"
elif re.match(r".*-(.*)" + filter, t): m = re.match(r".*-(.*)" + filter, t, re.IGNORECASE)
artists += self.parseArtists(re.match(r".*-(.*)" + filter, t)[1]) if m:
artists += self.parseArtists(m.groups()[0])
for st in self.rules_artistintitle: for st in self.rules_artistintitle:
if st in t.lower(): artists += self.rules_artistintitle[st].split("") if st in t.lower(): artists += self.rules_artistintitle[st].split("")

View File

@ -179,10 +179,10 @@ malojaconfig = Configuration(
"Database":{ "Database":{
"invalid_artists":(tp.Set(tp.String()), "Invalid Artists", ["[Unknown Artist]","Unknown Artist","Spotify"], "Artists that should be discarded immediately"), "invalid_artists":(tp.Set(tp.String()), "Invalid Artists", ["[Unknown Artist]","Unknown Artist","Spotify"], "Artists that should be discarded immediately"),
"remove_from_title":(tp.Set(tp.String()), "Remove from Title", ["(Original Mix)","(Radio Edit)","(Album Version)","(Explicit Version)","(Bonus Track)"], "Phrases that should be removed from song titles"), "remove_from_title":(tp.Set(tp.String()), "Remove from Title", ["(Original Mix)","(Radio Edit)","(Album Version)","(Explicit Version)","(Bonus Track)"], "Phrases that should be removed from song titles"),
"delimiters_feat":(tp.Set(tp.String()), "Featuring Delimiters", ["ft.","ft","feat.","feat","featuring","Ft.","Ft","Feat.","Feat","Featuring"], "Delimiters used for extra artists, even when in the title field"), "delimiters_feat":(tp.Set(tp.String()), "Featuring Delimiters", ["ft.","ft","feat.","feat","featuring"], "Delimiters used for extra artists, even when in the title field"),
"delimiters_informal":(tp.Set(tp.String()), "Informal Delimiters", ["vs.","vs","&"], "Delimiters in informal artist strings with spaces expected around them"), "delimiters_informal":(tp.Set(tp.String()), "Informal Delimiters", ["vs.","vs","&"], "Delimiters in informal artist strings with spaces expected around them"),
"delimiters_formal":(tp.Set(tp.String()), "Formal Delimiters", [";","/","|","","",""], "Delimiters used to tag multiple artists when only one tag field is available"), "delimiters_formal":(tp.Set(tp.String()), "Formal Delimiters", [";","/","|","","",""], "Delimiters used to tag multiple artists when only one tag field is available"),
"filters_remix":(tp.Set(tp.String()), "Remix Filters", ["remix", "Remix", "Remix Edit", "remix edit", "Short Mix", "short mix", "Extended Mix", "extended mix", "Soundtrack Version", "soundtrack version"], "Filters used to recognize the remix artists in the title"), "filters_remix":(tp.Set(tp.String()), "Remix Filters", ["Remix", "Remix Edit", "Short Mix", "Extended Mix", "Soundtrack Version"], "Filters used to recognize the remix artists in the title"),
"parse_remix_artists":(tp.Boolean(), "Parse Remix Artists", False) "parse_remix_artists":(tp.Boolean(), "Parse Remix Artists", False)
}, },
"Web Interface":{ "Web Interface":{