mirror of
https://github.com/krateng/maloja.git
synced 2023-08-10 21:12:55 +03:00
Implemented custom rules
This commit is contained in:
parent
144198f933
commit
54bffc5642
148
cleanup.py
148
cleanup.py
@ -1,74 +1,112 @@
|
|||||||
import re
|
import re
|
||||||
|
import utilities
|
||||||
|
|
||||||
def fullclean(artist,title):
|
# need to do this as a class so it can retain loaded settings from file
|
||||||
artists = parseArtists(removespecial(artist))
|
class CleanerAgent:
|
||||||
title = parseTitle(removespecial(title))
|
|
||||||
(title,moreartists) = parseTitleForArtists(title)
|
|
||||||
artists += moreartists
|
|
||||||
|
|
||||||
return (list(set(artists)),title)
|
def __init__(self):
|
||||||
|
self.updateRules()
|
||||||
def removespecial(s):
|
|
||||||
return s.replace("\t","").replace("␟","").replace("\n","")
|
|
||||||
|
|
||||||
|
|
||||||
delimiters_feat = ["ft.","ft","feat.","feat","featuring"] #Delimiters used for extra artists, even when in the title field
|
|
||||||
delimiters = ["vs.","vs","&"] #Delimiters in informal titles, spaces expected around them
|
|
||||||
delimiters_formal = ["; ",";"] #Delimiters used specifically to tag multiple artists when only one tag field is available, no spaces used
|
|
||||||
|
|
||||||
|
|
||||||
def parseArtists(a):
|
|
||||||
|
|
||||||
if a.strip() == "":
|
|
||||||
return []
|
|
||||||
|
|
||||||
for d in delimiters_feat:
|
def updateRules(self):
|
||||||
if re.match(r"(.*) \(" + d + " (.*)\)",a) is not None:
|
raw = utilities.parseAllTSV("rules","string","string","string")
|
||||||
return parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\1",a)) + parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\2",a))
|
self.rules_belongtogether = [b for [a,b,c] in raw if a=="belongtogether"]
|
||||||
|
self.rules_notanartist = [b for [a,b,c] in raw if a=="notanartist"]
|
||||||
for d in (delimiters + delimiters_feat):
|
self.rules_replacetitle = {b:c for [a,b,c] in raw if a=="replacetitle"}
|
||||||
if ((" " + d + " ") in a):
|
self.rules_replaceartist = {b:c for [a,b,c] in raw if a=="replaceartist"}
|
||||||
ls = []
|
|
||||||
for i in a.split(" " + d + " "):
|
|
||||||
ls += parseArtists(i)
|
|
||||||
return ls
|
|
||||||
|
|
||||||
for d in delimiters_formal:
|
|
||||||
if (d in a):
|
|
||||||
ls = []
|
def fullclean(self,artist,title):
|
||||||
for i in a.split(d):
|
artists = self.parseArtists(self.removespecial(artist))
|
||||||
ls += parseArtists(i)
|
title = self.parseTitle(self.removespecial(title))
|
||||||
return ls
|
(title,moreartists) = self.parseTitleForArtists(title)
|
||||||
|
artists += moreartists
|
||||||
|
|
||||||
|
return (list(set(artists)),title)
|
||||||
|
|
||||||
|
def removespecial(self,s):
|
||||||
|
return s.replace("\t","").replace("␟","").replace("\n","")
|
||||||
|
|
||||||
|
|
||||||
|
delimiters_feat = ["ft.","ft","feat.","feat","featuring"] #Delimiters used for extra artists, even when in the title field
|
||||||
|
delimiters = ["vs.","vs","&"] #Delimiters in informal titles, spaces expected around them
|
||||||
|
delimiters_formal = ["; ",";"] #Delimiters used specifically to tag multiple artists when only one tag field is available, no spaces used
|
||||||
|
|
||||||
|
|
||||||
|
def parseArtists(self,a):
|
||||||
|
|
||||||
|
if a.strip() == "":
|
||||||
|
return []
|
||||||
|
|
||||||
|
if a.strip() in self.rules_belongtogether:
|
||||||
|
return [a.strip()]
|
||||||
|
if a.strip() in self.rules_replaceartist:
|
||||||
|
return [self.rules_replaceartist[a.strip()]]
|
||||||
|
|
||||||
|
|
||||||
return [a.strip()]
|
|
||||||
|
for d in self.delimiters_feat:
|
||||||
|
if re.match(r"(.*) \(" + d + " (.*)\)",a) is not None:
|
||||||
|
return self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\1",a)) + self.parseArtists(re.sub(r"(.*) \(" + d + " (.*)\)",r"\2",a))
|
||||||
|
|
||||||
|
for d in (self.delimiters_feat + self.delimiters):
|
||||||
|
if ((" " + d + " ") in a):
|
||||||
|
ls = []
|
||||||
|
for i in a.split(" " + d + " "):
|
||||||
|
ls += self.parseArtists(i)
|
||||||
|
return ls
|
||||||
|
|
||||||
|
for d in self.delimiters_formal:
|
||||||
|
if (d in a):
|
||||||
|
ls = []
|
||||||
|
for i in a.split(d):
|
||||||
|
ls += self.parseArtists(i)
|
||||||
|
return ls
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return [a.strip()]
|
||||||
|
|
||||||
def parseTitle(t):
|
def parseTitle(self,t):
|
||||||
t = t.replace("[","(").replace("]",")")
|
|
||||||
|
if t.strip() in self.rules_replacetitle:
|
||||||
|
return self.rules_replacetitle[t.strip()]
|
||||||
|
|
||||||
t = re.sub(r" \(as made famous by .*?\)","",t)
|
t = t.replace("[","(").replace("]",")")
|
||||||
t = re.sub(r" \(originally by .*?\)","",t)
|
|
||||||
|
t = re.sub(r" \(as made famous by .*?\)","",t)
|
||||||
return t
|
t = re.sub(r" \(originally by .*?\)","",t)
|
||||||
|
|
||||||
|
return t.strip()
|
||||||
|
|
||||||
def parseTitleForArtists(t):
|
def parseTitleForArtists(self,t):
|
||||||
for d in delimiters_feat:
|
for d in self.delimiters_feat:
|
||||||
if re.match(r"(.*) \(" + d + " (.*?)\)",t) is not None:
|
if re.match(r"(.*) \(" + d + " (.*?)\)",t) is not None:
|
||||||
(title,artists) = parseTitleForArtists(re.sub(r"(.*) \(" + d + " (.*?)\)",r"\1",t))
|
(title,artists) = self.parseTitleForArtists(re.sub(r"(.*) \(" + d + " (.*?)\)",r"\1",t))
|
||||||
artists += parseArtists(re.sub(r"(.*) \(" + d + " (.*?)\).*",r"\2",t))
|
artists += self.parseArtists(re.sub(r"(.*) \(" + d + " (.*?)\).*",r"\2",t))
|
||||||
return (title,artists)
|
return (title,artists)
|
||||||
|
|
||||||
return (t,[])
|
return (t,[])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def flatten(lis):
|
def flatten(lis):
|
||||||
|
|
||||||
newlist = []
|
newlist = []
|
||||||
|
|
||||||
for l in lis:
|
for l in lis:
|
||||||
if isinstance(l, str):
|
if isinstance(l, str):
|
||||||
newlist.append(l)
|
newlist.append(l)
|
||||||
else:
|
else:
|
||||||
newlist = newlist + l
|
newlist = newlist + l
|
||||||
|
|
||||||
return list(set(newlist))
|
return list(set(newlist))
|
||||||
|
13
database.py
13
database.py
@ -4,7 +4,7 @@ import urllib
|
|||||||
import waitress
|
import waitress
|
||||||
import os
|
import os
|
||||||
import datetime
|
import datetime
|
||||||
import cleanup
|
from cleanup import *
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
@ -12,6 +12,8 @@ SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved)
|
|||||||
ARTISTS = [] # Format: artist
|
ARTISTS = [] # Format: artist
|
||||||
TRACKS = [] # Format: tuple(frozenset(artist_ref,...),title)
|
TRACKS = [] # Format: tuple(frozenset(artist_ref,...),title)
|
||||||
|
|
||||||
|
c = CleanerAgent()
|
||||||
|
|
||||||
lastsync = 0
|
lastsync = 0
|
||||||
|
|
||||||
|
|
||||||
@ -118,11 +120,12 @@ def post_scrobble():
|
|||||||
#title = urllib.parse.unquote(keys.get("title"))
|
#title = urllib.parse.unquote(keys.get("title"))
|
||||||
artists = keys.get("artist")
|
artists = keys.get("artist")
|
||||||
title = keys.get("title")
|
title = keys.get("title")
|
||||||
time = int(keys.get("time"))
|
try:
|
||||||
(artists,title) = cleanup.fullclean(artists,title)
|
time = int(keys.get("time"))
|
||||||
if time is None:
|
except:
|
||||||
time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
time = int(datetime.datetime.now(tz=datetime.timezone.utc).timestamp())
|
||||||
|
(artists,title) = c.fullclean(artists,title)
|
||||||
|
|
||||||
## this is necessary for localhost testing
|
## this is necessary for localhost testing
|
||||||
response.set_header("Access-Control-Allow-Origin","*")
|
response.set_header("Access-Control-Allow-Origin","*")
|
||||||
|
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
import sys, os, datetime, re, cleanup
|
import sys, os, datetime, re, cleanup
|
||||||
|
from cleanup import *
|
||||||
|
|
||||||
log = open(sys.argv[1])
|
log = open(sys.argv[1])
|
||||||
|
|
||||||
outputlog = open(sys.argv[2],"a")
|
outputlog = open(sys.argv[2],"a")
|
||||||
|
|
||||||
|
c = CleanerAgent()
|
||||||
|
|
||||||
for l in log:
|
for l in log:
|
||||||
l = l.replace("\n","")
|
l = l.replace("\n","")
|
||||||
data = l.split(",")
|
data = l.split(",")
|
||||||
@ -13,8 +16,8 @@ for l in log:
|
|||||||
title = data[2]
|
title = data[2]
|
||||||
time = data[3]
|
time = data[3]
|
||||||
|
|
||||||
|
|
||||||
(artists,title) = cleanup.fullclean(artist,title)
|
(artists,title) = c.fullclean(artist,title)
|
||||||
|
|
||||||
artistsstr = "␟".join(artists)
|
artistsstr = "␟".join(artists)
|
||||||
|
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
### countas: defines an artist that should be counted together with another artist for chart statistics etc. This will not change the separation in the database and all effects of this rule will disappear as soon as it is no longer active. Second column is the artist, third column the replacement artist
|
### countas: defines an artist that should be counted together with another artist for chart statistics etc. This will not change the separation in the database and all effects of this rule will disappear as soon as it is no longer active. Second column is the artist, third column the replacement artist
|
||||||
###
|
###
|
||||||
### THE RULES IN THIS EXAMPLE FILE ARE IGNORED
|
### THE RULES IN THIS EXAMPLE FILE ARE IGNORED
|
||||||
notanartist In Dreams
|
#notanartist In Dreams
|
||||||
belongtogether Darth & Vader
|
#belongtogether Darth & Vader
|
||||||
replacetitle 첫 사랑니 (Rum Pum Pum Pum) Rum Pum Pum Pum
|
#replacetitle 첫 사랑니 (Rum Pum Pum Pum) Rum Pum Pum Pum
|
||||||
replaceartist Dal Shabet Dal★Shabet
|
#replaceartist Dal Shabet Dal★Shabet
|
||||||
countas Trouble Maker HyunA
|
#countas Trouble Maker HyunA
|
||||||
|
Can't render this file because it contains an unexpected character in line 3 and column 58.
|
52
utilities.py
Normal file
52
utilities.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def parseTSV(filename,*args):
|
||||||
|
f = open(filename)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for l in [l for l in f if (not l.startswith("#")) and (not l.strip()=="")]:
|
||||||
|
|
||||||
|
l = l.replace("\n","").split("#")[0]
|
||||||
|
data = list(filter(None,l.split("\t"))) # Multiple tabs are okay, we don't accept empty fields unless trailing
|
||||||
|
entry = [] * len(args)
|
||||||
|
for i in range(len(args)):
|
||||||
|
if args[i]=="list":
|
||||||
|
try:
|
||||||
|
entry.append(data[i].split("␟"))
|
||||||
|
except:
|
||||||
|
entry.append([])
|
||||||
|
elif args[i]=="string":
|
||||||
|
try:
|
||||||
|
entry.append(data[i])
|
||||||
|
except:
|
||||||
|
entry.append("")
|
||||||
|
elif args[i]=="int":
|
||||||
|
try:
|
||||||
|
entry.append(int(data[i]))
|
||||||
|
except:
|
||||||
|
entry.append(0)
|
||||||
|
elif args[i]=="bool":
|
||||||
|
try:
|
||||||
|
entry.append((data[i].lower() in ["true","yes","1","y"]))
|
||||||
|
except:
|
||||||
|
entry.append(False)
|
||||||
|
|
||||||
|
result.append(entry)
|
||||||
|
|
||||||
|
f.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def parseAllTSV(path,*args):
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for f in os.listdir(path + "/"):
|
||||||
|
|
||||||
|
if (".tsv" in f):
|
||||||
|
|
||||||
|
result += parseTSV(path + "/" + f,*args)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
Loading…
Reference in New Issue
Block a user