From 78b1937044715c10522c80d25d968fe3d44e38ac Mon Sep 17 00:00:00 2001 From: Krateng Date: Mon, 17 Dec 2018 15:10:10 +0100 Subject: [PATCH] Better metadata grabbing and caching --- cleanup.py | 6 +++ info/artists/.gitignore | 5 ++ info/artists/default.jpg | Bin 0 -> 6986 bytes info/artists_cache/.gitignore | 4 ++ server.py | 15 ++++-- utilities.py | 88 +++++++++++++++++++++++++++++++++- website/artist.py | 28 ++--------- 7 files changed, 117 insertions(+), 29 deletions(-) create mode 100644 info/artists/.gitignore create mode 100644 info/artists/default.jpg create mode 100644 info/artists_cache/.gitignore diff --git a/cleanup.py b/cleanup.py index cf20b7f..b2d4c76 100644 --- a/cleanup.py +++ b/cleanup.py @@ -38,6 +38,12 @@ class CleanerAgent: if a.strip() == "": return [] + if a.strip() in self.rules_notanartist: + return [] + + if " performing " in a.lower(): + return self.parseArtists(re.split(" [Pp]erforming",a)[0]) + if a.strip() in self.rules_belongtogether: return [a.strip()] if a.strip() in self.rules_replaceartist: diff --git a/info/artists/.gitignore b/info/artists/.gitignore new file mode 100644 index 0000000..9bf0e9e --- /dev/null +++ b/info/artists/.gitignore @@ -0,0 +1,5 @@ +*.png +*.jpg +*.jpeg +*.txt +!default.jpg diff --git a/info/artists/default.jpg b/info/artists/default.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0da19b2ad4e6f3876cd42d04e92215f967df9def GIT binary patch literal 6986 zcmex=SH9mE6~0@A|B$jHpZ#LULR%mNZ74gWvDAjrY6j$s`$qY?v? zAS1INn#iR?;+B1Vl97jh^&Z9FI%bn%0VaZ*teCzqJGgrtbvx}>nyN9P&a7buactm7Wa!P7idPZheaY<=ec|~Pab4zPmdq-#2q{&mJPMbbs z=B!1Fmn>bje8tLDn>KIRx^4T8ox2VlK63Qf@e?OcUAlbb>b2`PZr*zM=<$=M&z`?{ z`Re1R&tJZN`~KtSFOa_&8JHp7Vg$tn$ZKf+5@cXvWMW}qW?={Ui;=0Efsu(>kcCyz zkWI)jkUgSBQ7RaJe9m%sQ``)}>)_?s`Q!ltL|o?E{1mANyO37 zKi(hwAMAgt{IUL#-1-%Fj@lo#7kzQfPB7Mp!zDF+@0FJCg-OZE9M^8>Z z=e+3OX8#!uzMj7+{D^)>RDJ7zhRph#&X3l=x%*&kSjEErN98SlYWe%$mi&}+Wy^Is zWzB6pWt)rMnQJRc%}+5M`uTwQcl4q?=@0nd3VyKuw&F+l!}-1UH?Kdc&-N$jN9d#9 z`NobP*EfD`-MzI;;dEj zt@b}dlU3badEtE`6@C914%u05`H{Qy#jX5Kla*)8%gS!O+qwLu`KIVo`%QvtCkO51 z)Drl(Gu&y-f(HJ_v;Xmx{%2_V{p0ZC>qqiC_kZwxe^dGBVwb;j{>f+iFRr^#FT6*0 z<$U|&x2{Nt&+-voX7*BSb9!R+c&;4_}?%4hhzWuis|Ks9ZlO^T-a7F$L8}?82fy*Tb*_JcIM`6s~)rLQXLOxE0;X;DM8$< z2NdR9{~hw5;h>#;qI^?5$lc)&kH1;?LFv-VKe`V()OTM06TZe(;%bfO`dG8yf5ZM> zV7Tn_gP};N%_7mqU5J74pW*$j@oyA=C;vNqPo@3Pe}=8|-`xF}{2=!2yXB?ni?a5a zeb~~u=+))h$>*ig*2?|1{_60;`_@^lhf|*OaQ-yffA907{6pv4;-&WRudgxsarDFI zgYu&LWGYtOP5h|euur)n>~{3UtsGOYUd+>Z7x-v`_9BBj%Xt>`2=La4{gD61Tk)Tv z$+$xJaox&~%T9kwyHewvTbpiEReo^2^dHwNru{6Lm;D9%qB~4}KFLfeTB{hcV^+qZ z^t&R3n_Q9^{xjIv|4@qmBj)~C{*O@lxBiP?BTQqXkNjuYH2=tbp&QZvF3Gb;_Ixe}=UE9pWF~|GWBO&gUI{w<;UBu7A_xU3=irv=q-Km2!dSzoPy# zG<(@U*mBK|Z{NlG2UGqtY?`~=|lSa4Q80;_X!K9fK4O+@di2OSr{f}7+{PQZRxO#tVEXTY=s%(Qt=Hci{;2;=@9(mG zX8%shi?7*t{3H8=`Xk(L`6K69M<#CH-==mqeZ{R#(U@EJqW12+qH{pJt>QMb>A%VU z89wBn|0ARL+pg|%{igX1bu4wa_itH#xW3JPoByGR*$z9d{4=cBT4MU~U1yG>uW?v+ z(4L#QW{Z`4bf0Y(-h1V8)al}y||E68^?Q@wb6fsw&&xKcJ;Xh-h*@V=*p_anR8H5>Iu zym}9g7ALN}8Ru@h_K2Qkr1CVAQsoIDQvVr(=l`&*|HsYw!TFoU-@boF|4ztBePnBW z9DnQiw-rAESN;e){&4M@Uh5y}hpOH#xi+hB_sS`X3I7>fCzWjNyy2FeJze&~M`{?`3>p&j=h?jODnZ@(9cP#4CV8DZ!f4cu?Xl~#C;O@`;50-ttS$B2*rltQGTK_2iPWj<} z=wjwP)eq-8*R{=GT;p`{E%WS%v|Svo`&M&ppUiva{;o-eBEmL-M@2T)&%XXP@Hcba zE%}XNyGr5ADr= zL_f|Bx}5vvbn&HoHy_T7vo8H7A=={QskK<$YV&`FgK_l_R*SQlP5#H_`@3{y&)j|P zZ=)YYH2!McC;2D)uUF~%A7=j<{&7rSA2r+h{SEntZv>@p>Y2aTXZKNUw*HL2?z8h( zDZhE#@p)r4WBd{MKeCSh8Cd%M&bNOs`@YzJhSc{R_cyU0=CBj_cm6+v^dH96nNrtV zbpC{{ew}ptn3d^;Y1J$BOr>W&>4}ZZnx-*PXLfO-HA_&$2KLWl?8oeHnLpab@MHSN z+mG9it{46GJHPW!_@msScmBV#i>phGxBo7(mV37U^6!7VTmCaN@np$=@bzcS+P~HO ztz+iGSMl5QkIiShc{_8S==SdO@oo2H=Z0l|Id9V-A3XPrcDYKzlKS+B#Byho$^RJs zGaQ^Yf6Mm6_Duh7#Bb4mbMn!B)*q?+6#rEHh|XVKqx@sOS}Swbw@VrRDznzLO?MJD zXnJ?+q28&PVWPPzPa~fM*mF z`kS^-*=3$$9?a{f)i(o7p>NdF0hG{gK%< zF+OyIzS@bcZ*vdqx>78&P^HuEY^vUofZ`TZnt5qicXIRz#M`-syj{4ss{}~Qy z>`~7z{Lp`B`6K(c%&Rix(>`8*Yxz;$uPtBbKD*?+`~Cr3nPoipKfTTUY_%roQS|iQ zlm+}Zc4;hX;6lTe_0o4M79BrWr@!xm-Q4sw6_?H*`qi>e;8LmNp54`J0~LxhdDc$c zDiS^*8*#UB3J^+WuDex4<8& zAKU&jL~QQ6_xPBc(gz!XAG0JTGs8 z;nBM5e}w-tG#~xX@LtO+pY}aZY_Q+BzhnQ0tmS_M<&VqpU9-`D_@Cis@zHqB(po=NF?)eJ*}4mI z8q*^#&oNMadpY=uzOKXsu1D3G;VciBo9v}(QvWlsS=H$7`)Ieh{KNNuT%5sb@0}0>QDL}{o_qKXZeJgVOO-1#3#Y30QL$0PR##M9p3ey;h|ow z{70hkv|H5Xa2F>>2ov7 zDsFG`1AU>}Q+s`yK6_uyciq(V=2hFhUk~S~J&Wk%j&H1&-2Y(a{tp5F8E)P`_)q6Y z_s7%Ul>V-?6RolQU@}p^d!KT}?FW5plOMjdKfA-bA}ZGR!EXak^;r@Z|D0X-Ng>Fa zr=cqSj}WL}N&nBla^&we8^Pa6aW&Tae{B8F!1bTu;69B_n|~B0Ka%)TdNr@qSFhS4 zDEnJma;`{z!DiznMjJmhu^yfl@BW{mY07_wgL(G(`9HMeyMmrS?mx2ZgZ-hodOknm zzg7LPE-qhOVRzzj{(Re=bD!+rrCj*=y~@-Hd%8K=%nYwTUEf&4|LA__KJyRdhwB+K zr24!5lz(JDYE{?yU_JLs!OH1X)7I8nIi_bbJ%2VSa%rzfd79|Iw)zhq`aewEAI@(o z;b*O>`_Is{^l;_l{awpG$-4Xgon*o<;8df%SS!!)@FVTpX=zv0?>^ZbecM-d(UgoQ z**ES^VW{W6zj6H=@!zra2Oa*Y#?+|)olwXAWBp_E?k~3mOCO0{O5MKk(w6QZ>6}~B zR+j68P0RQfy{hul!l~C&67O@>Z@vFR?LWgG$==18lQZIl{xdvSUN5fnQRbpI@2R>o z`#1mFs3WGs*m=b2dZ(mNI*a7PY4`p5l$K~8tvTHF?@68TAK@RyAA}G6XZXEHm%ySKXr4L#rLT$tlOTnGTU%{dByN?euMnx{RiIP zjDHy4R>$%0+_`r&+zKN0^Kn!@T2I@`!UuGKZI6|_&`mwRn??Yy%7)KwX`e11)v zS->^%vtw;yrU83X5gf45M&GFKog5ANl{dD*rQRT>a0`)K-79_fOP+hAYtl>vw8DeE&o9==~e^ z4`$nc2(|LA{2^2_@6`CHZ>{QWlgKpgA-O4V)q zel#vT$@6h`VABSlU*_K)pSZ6vGv{HL&c#CunE#r>2TYI$Pte%fKVm>7ul<7+|LlLb zFR0P|&(KuxhyCNfQ9&gR#nO2qG{&be6x$fSVzHZSg zBs8Z&$;lJGi**xZ{O2Z=cPXsXB3rXR%RK%ulh2 z`8(#HoN`=RCumb~{3iFeqL2JSfn@0EM3Z* z-RMYv;j%1Fp1uw7w|37+Z`k%?sax=+eckIn9{kVHq+EY6z0UYQ!$H;h3|+g3QtkJm zbtmgZYoeFwzg7Ow6kWRGep>HSv&~D|4mJmGYYR^hoTxX=q^+j4&7=N?{lotZ{d>ee zh98*6^JDIZ$47Sg_xzKuarv;dZTo|M@eFgPYF&TNeU0f`b~QPEUpz@u@XB`{4d(TD zI!K_-6Z4;Q`yYIZTz~r?7w>N!N0sZkOOOASj@mnS_bDIkf_p00SkA1zU>MS1C^KQ@ zyrv!bKaBgoS^x0;`2E|*9~*x=*LY2Kx@LO+#_xyk=C1mq_91NHs@9$L03`&ouZchZpMh22@AN(PDhIdhHU6f*anI)F(+q2m zUhmwxWajo{b?r=sgAZ~auUudBpP^Y{AJ?+=AK!m_`*+cjYfBsd$<-)+Sl+a@#%e>V zUVBHWA{_UUsjqg80 zQ)k^Z`Ay$uKm6-|xQ_Ej^~bnh@h#t^wk^qDn$MOcvG+pEUAZ*-o8QA0&Y5)k>eYXx z3HCeef0*k3;|~51e|$gVKe7J|2X*UA?nTyE{doOh+WEk(KUmklx;5=KzwC?8aWQeV zP3pxRTde~Nm2~$N+6XGUPxln4yY!#oU~>I~*7=*h{}DZWu>QluiiLW1;{UGRjSpQA_$Nw36 z>xBNeT&{8cu>SG-1Nt48BeD%{-aDCHyEMD1^jOl)&AWPIqzO6gG*xi-}iV1M>Q7kf8zYl&^%{UNV99&lwRd0p^&Q#Rx%ixcXnoQC2Pf)#>pAyWKaRgW z{{h?TM>BKkO#TF4vXOlh&r#v`+FzUXTk`^0C+?RCJI`I4Wm$SU_rip=8{YdqO#Sq4 z^?!z@gY^fO$qW2v$XS2u{iFD|nZKp}RQ>S&@GZaZpF(Z^qy9tld$;YHx^(XJ4prCR zx!>llxN-ZgO?Rb1>f4xW*ZDr*(g|$=vGq6bhXK2pdD*r>(|5&_G-If0g@^+RL z@sIv99QSoUU|uKvp}cjU$Ch z!TlFiOOIVU{huNA?%bYG$<}=fbtYLI&M^DWkTdm6#qERpB!2WhxG!E~`*B-u`Xha| ziuZ@bf0=jhv0rg~-@X3~K6@J%k#UCRB_a8k") +@route("/info/") +@route("/info/") +def static_image(pth): + return static_file("info/" + pth,root="") + @route("/") @route("/") @route("/") @route("/") @route("/") -def static(name): - +def static(name): return static_file("website/" + name,root="") + + @route("/") def static_html(name): keys = FormsDict.decode(request.query) + + # If a python file exists, it provides the replacement dict for the html file if os.path.exists("website/" + name + ".py"): txt_keys = SourceFileLoader(name,"website/" + name + ".py").load_module().replacedict(keys,DATABASE_PORT) with open("website/" + name + ".html") as htmlfile: @@ -91,7 +100,7 @@ def static_html(name): return html - + # Otherwise, we just serve the html file return static_file("website/" + name + ".html",root="") #set graceful shutdown diff --git a/utilities.py b/utilities.py index 68d6d8d..13f10f0 100644 --- a/utilities.py +++ b/utilities.py @@ -1,5 +1,4 @@ - - +### TSV files def parseTSV(filename,*args): f = open(filename) @@ -56,7 +55,92 @@ def createTSV(filename): if not os.path.exists(filename): open(filename,"w").close() +### Logging def log(msg): print(msg) + # best function ever + +### Media info + +def getArtistInfo(artist): + import re + import os + import urllib + import json + import _thread + + + filename = re.sub("[^a-zA-Z0-9]","",artist) + filepath = "info/artists/" + filename + filepath_cache = "info/artists_cache/" + filename + + # check if custom image exists + if os.path.exists(filepath + ".png"): + imgurl = "/" + filepath + ".png" + elif os.path.exists(filepath + ".jpg"): + imgurl = "/" + filepath + ".jpg" + elif os.path.exists(filepath + ".jpeg"): + imgurl = "/" + filepath + ".jpeg" + + #check if cached image exists + elif os.path.exists(filepath_cache + ".png"): + imgurl = "/" + filepath_cache + ".png" + elif os.path.exists(filepath_cache + ".jpg"): + imgurl = "/" + filepath_cache + ".jpg" + elif os.path.exists(filepath_cache + ".jpeg"): + imgurl = "/" + filepath_cache + ".jpeg" + + + # check if custom desc exists + if os.path.exists(filepath + ".txt"): + with open(filepath + ".txt","r") as descfile: + desc = descfile.read().replace("\n","") + + #check if cached desc exists + elif os.path.exists(filepath_cache + ".txt"): + with open(filepath_cache + ".txt","r") as descfile: + desc = descfile.read().replace("\n","") + + try: + return {"image":imgurl,"info":desc} + except NameError: + pass + #is this pythonic? + + + # if we neither have a custom image nor a cached version, we return the address from lastfm, but cache that image for later use + with open("apikey","r") as keyfile: + apikey = keyfile.read().replace("\n","") + + + try: + url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(artist) + "&api_key=" + apikey + "&format=json" + response = urllib.request.urlopen(url) + lastfm_data = json.loads(response.read()) + try: + imgurl + except NameError: + imgurl = lastfm_data["artist"]["image"][2]["#text"] + _thread.start_new_thread(cacheImage,(imgurl,"info/artists_cache",filename)) + try: + desc + except NameError: + desc = lastfm_data["artist"]["bio"]["summary"] + with open(filepath_cache + ".txt","w") as descfile: + descfile.write(desc) + # this feels so dirty + + + return {"image":imgurl,"info":desc} + except: + return {"image":"/info/artists/default.jpg","info":"No information available"} + + + +def cacheImage(url,path,filename): + import urllib.request + response = urllib.request.urlopen(url) + target = path + "/" + filename + "." + response.info().get_content_subtype() + urllib.request.urlretrieve(url,target) diff --git a/website/artist.py b/website/artist.py index 1083181..fc39c67 100644 --- a/website/artist.py +++ b/website/artist.py @@ -1,34 +1,14 @@ import urllib import json - - -#def page(keys): -# -# txt_keys = replace(keys) -# -# -# with open("website/artist.html","r") as htmlfile: -# html = htmlfile.read() -# -# -# -# for k in txt_keys: -# html = html.replace(k,txt_keys[k]) -# -# return html def replacedict(keys,dbport): + from utilities import getArtistInfo - with open("website/apikey","r") as keyfile: - apikey = keyfile.read().replace("\n","") - - url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(keys["artist"]) + "&api_key=" + apikey + "&format=json" - response = urllib.request.urlopen(url) - lastfm_data = json.loads(response.read()) - imgurl = lastfm_data["artist"]["image"][2]["#text"] - desc = lastfm_data["artist"]["bio"]["summary"] + info = getArtistInfo(keys["artist"]) + imgurl = info.get("image") + desc = info.get("info") response = urllib.request.urlopen("http://localhost:" + str(dbport) + "/artistinfo?artist=" + urllib.parse.quote(keys["artist"])) db_data = json.loads(response.read())