commit 7b151d60ef2c4b509b89b49b89f14db38b58b4e3 Author: Krateng Date: Sat Nov 24 16:29:24 2018 +0100 Initial commit diff --git a/cleanup.py b/cleanup.py new file mode 100644 index 0000000..a758eea --- /dev/null +++ b/cleanup.py @@ -0,0 +1,152 @@ +import re + +def cleanup(artiststr): + + if artiststr == "": + return [] + + artists = [artiststr] + + artistsnew = [] + for a in artists: + artistsnew.append(re.sub(r"(.*) \(ft. (.*)\)",r"\1",a)) + artistsnew.append(re.sub(r"(.*) \(ft. (.*)\)",r"\2",a)) + + artists = artistsnew + artistsnew = [] + + for a in artists: + artistsnew.append(a.split(" vs. ")) + + artists = flatten(artistsnew) + artistsnew = [] + + for a in artists: + artistsnew.append(a.split(" vs ")) + + artists = flatten(artistsnew) + artistsnew = [] + + + for a in artists: + artistsnew.append(a.split(" ft. ")) + + artists = flatten(artistsnew) + artistsnew = [] + + for a in artists: + artistsnew.append(a.split(" Ft. ")) + + artists = flatten(artistsnew) + artistsnew = [] + + + for a in artists: + artistsnew.append(a.split(" Feat. ")) + + artists = flatten(artistsnew) + artistsnew = [] + + for a in artists: + artistsnew.append(a.split(" feat. ")) + + artists = flatten(artistsnew) + artistsnew = [] + + + for a in artists: + artistsnew.append(a.split(" featuring ")) + + artists = flatten(artistsnew) + artistsnew = [] + + + for a in artists: + artistsnew.append(a.split(" Featuring ")) + + artists = flatten(artistsnew) + artistsnew = [] + + for a in artists: + artistsnew.append(a.split(" ; ")) + + artists = flatten(artistsnew) + artistsnew = [] + + for a in artists: + artistsnew.append(a.split("; ")) + + artists = flatten(artistsnew) + artistsnew = [] + + for a in artists: + artistsnew.append(a.split(";")) + + artists = flatten(artistsnew) + artistsnew = [] + + #if not artists[0] == artiststr: + # print(artiststr + " became " + str(artists)) + + return artists + + +def cleantitle(title): + title = title.replace("[","(").replace("]",")") + + title = re.sub(r" \(as made famous by .*?\)","",title) + title = re.sub(r" \(originally by .*?\)","",title) + + return title + +def findartistsintitle(title): + + truetitle = title + artists = "" + + newtitle = re.sub(r"(.*) \(ft. (.*?)\)",r"\1",title) + if (title != newtitle): + artists = re.sub(r"(.*) \(ft. (.*?)\).*",r"\2",title) + truetitle = newtitle + + newtitle = re.sub(r"(.*) \(feat. (.*?)\)",r"\1",title) + if (title != newtitle): + artists = re.sub(r"(.*) \(feat. (.*?)\).*",r"\2",title) + truetitle = newtitle + + newtitle = re.sub(r"(.*) \(Feat. (.*?)\)",r"\1",title) + if (title != newtitle): + artists = re.sub(r"(.*) \(Feat. (.*?)\).*",r"\2",title) + truetitle = newtitle + + newtitle = re.sub(r"(.*) \(Ft. (.*?)\)",r"\1",title) + if (title != newtitle): + artists = re.sub(r"(.*) \(Ft. (.*?)\).*",r"\2",title) + truetitle = newtitle + + newtitle = re.sub(r"(.*) \(Featuring (.*?)\)",r"\1",title) + if (title != newtitle): + artists = re.sub(r"(.*) \(Featuring. (.*?)\).*",r"\2",title) + truetitle = newtitle + + newtitle = re.sub(r"(.*) \(featuring (.*?)\)",r"\1",title) + if (title != newtitle): + artists = re.sub(r"(.*) \(featuring (.*?)\).*",r"\2",title) + truetitle = newtitle + + + artistlist = cleanup(artists) + + return (truetitle,artistlist) + +def flatten(lis): + + newlist = [] + + for l in lis: + if isinstance(l, str): + newlist.append(l) + else: + newlist = newlist + l + + return list(set(newlist)) diff --git a/database.py b/database.py new file mode 100644 index 0000000..0825309 --- /dev/null +++ b/database.py @@ -0,0 +1,186 @@ +from bottle import route, run, template, static_file, request, response +from importlib.machinery import SourceFileLoader +import waitress +import os +import datetime + +DATABASE = [] + +ARTISTS = [] +TRACKS = [] + + +@route("/scrobbles") +def get_scrobbles(): + keys = request.query + r = db_query(artist=keys.get("artist")) + #print(r) + response.content_type = "application/json" + return {"object":r} ##json can't be a list apparently??? + + #r = db_query(artist=keys.get("artist")) + #text = "" + #for e in r: + # entry = "" + # for a in e["artists"]: + # entry += a + "/" + # entry += " " + e["title"] + "\n" + # text += entry + #return text + +@route("/tracks") +def get_tracks(): + artist = request.query.get("artist") + + ls = [t for t in TRACKS if (artist in t["artists"])] + return {"object":ls} + +# Starts the server +def runserver(DATABASE_PORT): + + reload() + buildh() + + run(host='0.0.0.0', port=DATABASE_PORT, server='waitress') + + +# builds database of artists and tracks +# UNUSED as it is very resource-heavy, use buildh() instead +def build(): + global ARTISTS + global TRACKS + + artistlist = [] + tracklist = [] + for t in DATABASE: + for a in t["artists"]: + if a in artistlist: + continue + artistlist.append(a) + + # first check if the title exists at all to quickly rule out most titles + if (t["title"] in [tr["title"] for tr in tracklist]): + #only it same title actually exists do we need to check if the song is the same + + + if not (set(t["artists"]) in [set(tr["artists"]) for tr in tracklist if tr["title"] == t["title"]]): #wut + tracklist.append({"artists":t["artists"],"title":t["title"]}) + + ### ALRIGHT + #foundexisting = False + #for track in [tr for tr in tracklist if tr["title"] == t["title"]]: #wtf did I just write + # #print("Check duplicate: " + str(track) + " AND " + str(t)) + # if (set(track["artists"]) == set(t["artists"])): + # foundexisting = True + # #print("MATCH!") + # break + # #else: + # #print("NO MATCH!") + + #if not foundexisting: + # tracklist.append({"artists":t["artists"],"title":t["title"]}) + else: + tracklist.append({"artists":t["artists"],"title":t["title"]}) + + + ARTISTS = artistlist + TRACKS = tracklist + + +# builds database of artists and tracks +# uses better data types to quickly find all unique tracks +def buildh(): + global ARTISTS + global TRACKS + + artistset = set() + trackset = set() + for t in DATABASE: + for a in t["artists"]: + if a not in artistset: + artistset.add(a) + + # we list the tracks as tupels of frozenset(artists) and track + # this way they're hashable and easily comparable, but we need to change them back after we have the list + if ((frozenset(t["artists"]),t["title"])) not in trackset: + trackset.add((frozenset(t["artists"]),t["title"])) + + print("Done, now converting back!") + + ARTISTS = list(artistset) + TRACKS = [{"artists":list(a[0]),"title":a[1]} for a in trackset] + +# Rebuilds the database from disk, keeps cached entries +def reload(): + newdb = [t for t in DATABASE if not t["saved"]] + + for f in os.listdir("logs/"): + #print(f) + + if not (".csv" in f): + continue + + logfile = open("logs/" + f) + for l in logfile: + + l = l.replace("\n","") + data = l.split(",") + #print(l) + + artists = data[1].split("/") + #album = data[3] + title = data[2] + time = int(data[0]) + + DATABASE.append({"artists":artists,"title":title,"time":time,"saved":True}) + +# Saves all cached entries to disk +def flush(): + for t in DATABASE: + if not t["saved"]: + + artistss = "/".join(t["artists"]) + timestamp = datetime.date.fromtimestamp(t["time"]) + + entry = ",".join([str(t["time"]),artistss,t["title"]]) + + monthfile = open("logs/" + str(timestamp.year) + "_" + str(timestamp.month) + ".csv","a") + monthfile.write(entry) + monthfile.write("\n") + monthfile.close() + + t["saved"] = True + + +# Queries the database +def db_query(artist=None,title=None,since=0,to=9999999999): + if isinstance(since, str): + sdate = [int(x) for x in since.split("/")] + date = [1970,1,1,0,0] + date[:len(sdate)] = sdate + since = int(datetime.datetime(date[0],date[1],date[2],date[3],date[4],tzinfo=datetime.timezone.utc).timestamp()) + if isinstance(to, str): + sdate = [int(x) for x in to.split("/")] + date = [1970,1,1,0,0] + date[:len(sdate)] = sdate + to = int(datetime.datetime(date[0],date[1],date[2],date[3],date[4],tzinfo=datetime.timezone.utc).timestamp()) + + thingsweneed = ["artists","title","time"] + return [{key:t[key] for key in thingsweneed} for t in DATABASE if (artist in t["artists"] or artist==None) and (t["title"]==title or title==None) and (since < t["time"] < to)] + +# Search for strings +def db_search(query,type=None): + if type=="ARTIST": + results = [] + for a in ARTISTS: + if query.lower() in a.lower(): + results.append(a) + + if type=="TRACK": + results = [] + for t in TRACKS: + if query.lower() in t[1].lower(): + results.append(t) + + return results + diff --git a/lastfmconverter.py b/lastfmconverter.py new file mode 100644 index 0000000..8ad3cb3 --- /dev/null +++ b/lastfmconverter.py @@ -0,0 +1,39 @@ +import sys, os, datetime, re, cleanup + +log = open(sys.argv[1]) + +outputlog = open(sys.argv[2],"a") + +for l in log: + l = l.replace("\n","") + data = l.split(",") + + artist = data[0] + album = data[1] + title = data[2] + time = data[3] + + title = cleanup.cleantitle(title) + artists = cleanup.cleanup(artist) + (title,extraartists) = cleanup.findartistsintitle(title) + artists = list(set(artists + extraartists)) + + artistsstr = "/".join(artists) + + + timeparts = time.split(" ") + (h,m) = timeparts[3].split(":") + + months = {"Jan":1,"Feb":2,"Mar":3,"Apr":4,"May":5,"Jun":6,"Jul":7,"Aug":8,"Sep":9,"Oct":10,"Nov":11,"Dec":12} + + timestamp = int(datetime.datetime(int(timeparts[2]),months[timeparts[1]],int(timeparts[0]),int(h),int(m)).timestamp()) + + entry = ",".join([str(timestamp),artistsstr,title,album]) + + + outputlog.write(entry) + outputlog.write("\n") + + + + diff --git a/logs/.gitignore b/logs/.gitignore new file mode 100644 index 0000000..afed073 --- /dev/null +++ b/logs/.gitignore @@ -0,0 +1 @@ +*.csv diff --git a/logs/dummy b/logs/dummy new file mode 100644 index 0000000..e69de29 diff --git a/rules/dummy b/rules/dummy new file mode 100644 index 0000000..e69de29 diff --git a/server.py b/server.py new file mode 100755 index 0000000..af80880 --- /dev/null +++ b/server.py @@ -0,0 +1,45 @@ +from bottle import route, run, template, static_file, request +#import os +from importlib.machinery import SourceFileLoader +#from serverutil import log, db_remove, createVideoFile +import _thread +import waitress + + +MAIN_PORT = 12345 +DATABASE_PORT = 12349 + +#@route("//") +#@route("//") +#@route("//") +#@route("//") +#@route("//") +#@route("//") +#@route("//") +@route("/") +def static(pth): + + return static_file(pth,root="") + + +@route("") +@route("/") +def mainpage(): + keys = request.query + + return SourceFileLoader("mainpage","mainpage.py").load_module().GET(keys) + +@route("/xhttp") +def xhttp(): + keys = request.query + + return SourceFileLoader("download","download.py").load_module().GET(keys) + + + +## other programs to always run with the server +#_thread.start_new_thread(SourceFileLoader("downloader","downloader.py").load_module().loop,()) +_thread.start_new_thread(SourceFileLoader("database","database.py").load_module().runserver,(DATABASE_PORT,)) + +print("wat") +run(host='0.0.0.0', port=MAIN_PORT, server='waitress')