From 78b1937044715c10522c80d25d968fe3d44e38ac Mon Sep 17 00:00:00 2001 From: Krateng Date: Mon, 17 Dec 2018 15:10:10 +0100 Subject: [PATCH] Better metadata grabbing and caching --- cleanup.py | 6 +++ info/artists/.gitignore | 5 ++ info/artists/default.jpg | Bin 0 -> 6986 bytes info/artists_cache/.gitignore | 4 ++ server.py | 15 ++++-- utilities.py | 88 +++++++++++++++++++++++++++++++++- website/artist.py | 28 ++--------- 7 files changed, 117 insertions(+), 29 deletions(-) create mode 100644 info/artists/.gitignore create mode 100644 info/artists/default.jpg create mode 100644 info/artists_cache/.gitignore diff --git a/cleanup.py b/cleanup.py index cf20b7f..b2d4c76 100644 --- a/cleanup.py +++ b/cleanup.py @@ -38,6 +38,12 @@ class CleanerAgent: if a.strip() == "": return [] + if a.strip() in self.rules_notanartist: + return [] + + if " performing " in a.lower(): + return self.parseArtists(re.split(" [Pp]erforming",a)[0]) + if a.strip() in self.rules_belongtogether: return [a.strip()] if a.strip() in self.rules_replaceartist: diff --git a/info/artists/.gitignore b/info/artists/.gitignore new file mode 100644 index 0000000..9bf0e9e --- /dev/null +++ b/info/artists/.gitignore @@ -0,0 +1,5 @@ +*.png +*.jpg +*.jpeg +*.txt +!default.jpg diff --git a/info/artists/default.jpg b/info/artists/default.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0da19b2ad4e6f3876cd42d04e92215f967df9def GIT binary patch literal 6986 zcmbVw2~ZPT_h&^3WsPBVM|KH97Tg8tpf9v0=}hyVufCc2rs~zq|90J~+ezP3=XcIMzjMwNy%Dhh z`9lYs4*+6f03e1ufM^!jhrInCLtK3A6(x=g6iPw@g$xM^DM<+lNn|aFmXwrQ8xoRe zG+GM1wpZ4IvG_p^$i>Kw-^|1-DY8FW&Qgun|+S^4>%okKIC=esJD-=-?6|m zXM=)62%%9Iqc6q8#$8UilA4yDk(pI^?Rrsh$zL~0Z&y{<+^M}=cdx0rrM0d7QAcNA z|G?nT@RO&{#>OWmr>5V`FxZ@hcke%Z{Pg(?cjfz!zj>?B&tGe}!~oR4Ot$8WwecUg z6cAkEi1j7VYq-S3W7a-UkdWNGLrQV42Ra~9NoQx0w6a4%Wg~r^uDK_ua{9ukjH;dm zTc5iI?Z1%yzX41BZz20Pu>XaN0mz}m5a*#302~0rhPShTf6sfDbE|4U{Yrov>hUw5 zFMF2+o4l&6`(o`P+0)v6=$P}_f{Y{kj|LeJ0AdDN@I?_oU_v&vu=L-2tJu`;tu@2| z?(~w`gPAWrs+|nd3z9fIJHA+|14Q1NA0Y%rYZT6&e8yoP9&-ljPGLTAvf$mlJ468e z6Bay7mJ$3uDsTbaS+pfxE}5f6z58|$*N;>AG-a=FVyCp@+2EduE31*G19hdRX3lG@ zddA&0oBLVe&$mJ(e5Ew`447pcBc~!dk!SXCsEc!~2JhI)JWk9NK>{Xk>EchV{Zio$ z&unFFueUjxzL=_uwtgnw_mhR<``_*kW?_~TKwI8j$cIhIVK!3-&7=gUp`#prg!x`B zHRG(F(NW8>Rwo1y2SX_QTy4t$^RM6!YkpLXUDp2^4ty|hx@k@ zF$-nzZ_&thKGeT3JrI#e1KpNqbO);(g*Rbrn0poJHCd_Kq>h3cW=yE7mAWe3vHGmq zoSwF)I{RrQ>DXI?a=|cZ2-%f4TLsIkyC1sIeUZ$!B<_PoBC1Z7g~tzGJ|j=4&=^18 zO|{q?_Wc!w5Do1yHTZV>@8U-2!NA&OkA zxhvPcK6HGR&>UuS)haC7_~1Sp$Me?LJ!K^yYu}vaA3f;wZL z1tzQvgN|u|&;9-ClSF6g2`j$PTlg9ncZGlpI%&TLxH>5UV(<=TD}He^KM?Mp*Gumc z0j-q9Ic^~9#L~Cg=-}I-y;fjL=It-zvwwtw&WJ8dDsd+?2!5wyReCjJ@682p%@TWcW4&knM_Z+fCP&X3fAiqv8{OXiwpQ-e zRr&QKRR{~Ok45Zm9;>uKs^i{y&I_6#?h8){w!0idr)miHPgv$j5XRjVa{V(be!e4| zxzMor$fMGb+re|nkxh<)fC#X|K{|MWs?!2ipcu@{8~xkY@`V3@2xy`VFcshU!(&*q z-&OEsZHe!@sf$ls#I)+e6oMRNvQyo4yjrS;;eIlvo6MqR!6*$B4lopJSGK zQ5f|YHhulTaq3vbwFGf@LNqF`N3+0LqwYrle!)cqNZvs-KLL%y>s!{ffgMp~dMBGvylsJCbI$WI5D6FE)HDclQ>CdmfQ| z6l(83i9!}-WcqGF@WSpWS8(KtUgYa#%Q7!hc0=G&9T$xW0*k_2g%)>7L~PQ$gi_xYCXc{Bm< zh;6-S$^DjjZnK-YceZEuOt@8*j;qOscBP()@ng@U-d`z^783@Iy5%|+Df*V{qZ3h@9`$2B@QN;J6b>u{pF<594~}F$NTXB{ z9Y&W>Wxhe3())YsW!kT;f5Gf5+@Y*w=P{_5|#%gfvb=4s&;A|_cRfp zOjKdJXD~6uvo%ixIZRaS$9^!h^FVEo2q4MUJC9bCnkTJv7Cc6du_c5!S<(k_bP-Vp z@?(>eDYCfpR7MGVx{oO_6bPoL2voRbp5x&^o=1(%&#-Ttw2g+^>$K$0@lE=b+PgszCOi~Pg^&yiOP5fOmDw)8!Lk<>G-gb# zKlR#kx0IRvC3Gr_xTm|gknz1SUJO5g6>QpzoVQ#d1;;95s)~TK%NfijbicH{GCU#z zGz4PxgpJczj0H#Qzn-2Qvax(!9A9T@sd3}o)f0Z9#fAmOWl;gP5}v*f(ce|kLpX*t zrwxEgxU&oCgH#nBKO$4$!wJ9e&7B_>6(4Qh&c9%*^?^3dhu0(dCRr{N3%a>WhSFNC zm-Z4Wzu?=oH+aj-(t4-l*5#S&h~B=T!#E3>WR7#WlJh5ls6XQu2=dXt;9iTd-691+w|ED zZ;rXcP{!a2E!0B?oxREP%uMzjFRri14 z(zd+$tpC)sBB)mB^xJy<4s)zM^Jh4+#TjNWK@VFDw6%S)KMr(XpbXP8m`e6LNUs@^ zB?CB_uN3*{-RBYKo}F7EpEK={8|!?#mv^A6)IT%H>Pya_T2>hcX}b}7ThLe~*j6Wj ziowEzJTRS?0A8xWq?%Y7;@Dw=$f=8jyi}q_t-Xso`Qk0z<2#Kj+-|p*|M|=zv0kr0 z1l&s(D0acpWWF*Y%COrYA`;B&tL5O9Q1=MfAf(XT#l;>@i%?{uH&ig)|BxYEl4Gtc zg@3oXf12Z8l5^%3=1r&J?L?XXn7hatbFsIF=_IpW(7rtQe(cyt->)mZ@?+5(y1U{V zJhY<-at#HoS{@s(6(qXr(E8(fb;26lIdTqGlcXJ6#%qQexU~ahjdOy4yZaVQa2)px zvw)t_R>O~HD~4=0Z1ARQ=Uaq@D;~(dB7L`V5BA$Nyed3_M@rl`?!58wzx{EHY@P?C z&qbKPw>fuyVMt2{w3CNQ)$^0H!yn9MEH5>-$kA;!bsS&PlgDsbAWon{fn`!aru%CP zvLu=AAOg<4RB`}Yd%Cr>z!iA(*W$16nijx#yA#1=+;Q{KqaLa+llEvt4w9w?N(lW8 ziU8Jq5g;qjv$N!)4hjFlh4gsZ7II_1b87pt%dLIi91&A`kVy^*d16(FndyI@`!OI9 z*GS8tfuXem1?;faZ&P+=Y!NUMHHcpqop4xR6(^6R=GR(=CBEY~x9cyBdrg>`sOQUY z;t4(yEL1XXBN53gQZ_`BUaoy}47du*7%YeYxmc_C&l>#<%vD;jtXsTSYn#ElTn@bc z^s8O(P(Y`@gZhfXIVM$xXz%6<*pn6^3S_hau7oE$P_(k@U6gJqHsqUwXlEVG#|PHlz(^uC%juBFA* zqoM6L3-l&h=3?B&8-56nO?`0cUctMg)gDrQ#CNzDEq(1a>lXoS27(PtBpSCu`W)|w z86Q6=l1~uq?r@z6wK>@n$TCx$Ey{6C{_ZjvvA-#K@k`qLFZRtXAAB=q@k!(jG}a9o z_y=M!CLK10vboQBI>I`872+xUu-#oefIWM+Gk* ztY~|>$R%D57AUR}OE7|BK-gxdz(0MGXiI}yk-8fRh+e~{W+*a{qy8g$@_e+LY5eb= zVZ5}!aOH?Uo6W0tHm)Q~_7~%k+?R}0ZuIlGhzMvKHp}zlLf*g~5Z703>I&g`mm%Az zn5tO&Ckqjj-5urcXq~!|-tc+(i$h@ldvPn3Mbl>jm@?#6 zCbUHa2sY$Y5w3;c6_G6Y+x5N59jJs z8x0csv*33mC4mxT4k{s?2ACp{gPg!&7A^VZ*S8dDq5-Iqmu^98X}Z@!JKLP~`)ZM+ zWZSz`l%33vF94U4h;61lUY^0T@o6M!_!Ui-r~$^XeNvXXd(iZg=+UXMN%e&n6`uBS z-}H1ZmMEFOG%8%NZ8T0O{pTVEpuWOD#s>Wn(gI(^n%-pkK0E9}f{A5<0`3e(>#Nn& zh^|@Dy%2`$ui_hj$`pLxn-Dn<~i9EXk}&3NMhXO8Z-N!Vo2J z>{hyqC)k^Vx%GEWy!8z|7M!_gYQg{NcgRJK`zl8>qd))IV|gbBBjJxQfpoXI2++l* zi-1LIf(WScrZF%5U*!6qWcy$5Py{R!Cjty75iQclGdTjJwV3e#gW6%lUWT>~GT$Jj z1P=Tod-n@CgoNf_K5cGcv&|J8dC3NC^ zOnD%1;d_Q^DNp9~EDfigmn*&;k^a7PtF=kbXSW0Xch$ERt2Z1ePK)yp0Z0kRqTxZW zWf?Nvl6-0;v-Cj{*U1&MUh?PG_g&6SvkTW zKk-sm$MEO7OWJaUhNw5>pu9JEyh*&EuG1GSE~xQDom`(8KS8F&>*cfPbk4F8jI&g zorPuA^NsiFO`FsV^EHn3ly^OIHTrX2Z%X1t{p%e^5_f3k*Ht7xih#`Xc(#xmca_J5 z8u|N(hpstHSu)@7+26|V3ATXVmG@FR$nsy!-9tlB9lN6whKd zefoa#_{oH1gQ$#FTaQQ`lio->1zo2SXL-^XvMG?vN~JWd3RL>(WUwH%($roJo}8Bf z&vXX&VHudcF=leoV=p7_gjvPSB-|0+j>Dc;jdd+l*LPc~ro9W}MzMW{@@6nd?Uh4P z6gZ9(_kq*}>SMEvao@1*GxJZw5^BOOM-QFuXzsk4lJis(JU%dcV&ksB)A#T1!Ofdy z3FHyuC*W8QRmq??Ze|N_c{tzyGq%-w@eAW{3g3ZH{H$K;!K&(#kis!K<9H<3=sO|4 zQxf&GVm=yh3*ps}8ibI9wuZ5Qyf6qZyy=f*jbg}`gfZW9MD z9eenzyOAU><<=$#+0UmsUQLzmihdscI=|uJvK#yC3S5skl}@JL!6Nw1Q5nk|{1Sn` z5orcpKwd;Htep_S$?K!!v~?EMl$T`a@8^f}%J06JowLi1+a5Q+?v%T!pPrGf?)}Jw zB7~bZARxk9v5>y|5MGfqAp)@WyJGPi5isoL^w^q&Q~^l0{b_o8Y}B5X?qY+UPV17r z2HlSeOcpZDCDtig%qTY3JBkVYNB1iOK3r?0hePr#eLI;QA&>@r1j;e2<>Z$i2dFbY zOw;)X$bGn)<<1F#murTmo+EAeT=~_b8&Z2KjSFr1Ly(64T+xfzS$eNJl*3}G z5qERja)LQ#QnAbZYQLFYZ0C=l z>J#?%stdv%CMSfkWAe_X(Tp#HSnG>~=^!$~mrgX=u-=2>9~{;DH$AzcIx`GyemD!l`%w literal 0 HcmV?d00001 diff --git a/info/artists_cache/.gitignore b/info/artists_cache/.gitignore new file mode 100644 index 0000000..9169a8f --- /dev/null +++ b/info/artists_cache/.gitignore @@ -0,0 +1,4 @@ +*.png +*.jpg +*.jpeg +*.txt diff --git a/server.py b/server.py index 4118399..868a1cd 100755 --- a/server.py +++ b/server.py @@ -70,18 +70,27 @@ def graceful_exit(sig=None,frame=None): sys.exit() +@route("/info/") +@route("/info/") +@route("/info/") +def static_image(pth): + return static_file("info/" + pth,root="") + @route("/") @route("/") @route("/") @route("/") @route("/") -def static(name): - +def static(name): return static_file("website/" + name,root="") + + @route("/") def static_html(name): keys = FormsDict.decode(request.query) + + # If a python file exists, it provides the replacement dict for the html file if os.path.exists("website/" + name + ".py"): txt_keys = SourceFileLoader(name,"website/" + name + ".py").load_module().replacedict(keys,DATABASE_PORT) with open("website/" + name + ".html") as htmlfile: @@ -91,7 +100,7 @@ def static_html(name): return html - + # Otherwise, we just serve the html file return static_file("website/" + name + ".html",root="") #set graceful shutdown diff --git a/utilities.py b/utilities.py index 68d6d8d..13f10f0 100644 --- a/utilities.py +++ b/utilities.py @@ -1,5 +1,4 @@ - - +### TSV files def parseTSV(filename,*args): f = open(filename) @@ -56,7 +55,92 @@ def createTSV(filename): if not os.path.exists(filename): open(filename,"w").close() +### Logging def log(msg): print(msg) + # best function ever + +### Media info + +def getArtistInfo(artist): + import re + import os + import urllib + import json + import _thread + + + filename = re.sub("[^a-zA-Z0-9]","",artist) + filepath = "info/artists/" + filename + filepath_cache = "info/artists_cache/" + filename + + # check if custom image exists + if os.path.exists(filepath + ".png"): + imgurl = "/" + filepath + ".png" + elif os.path.exists(filepath + ".jpg"): + imgurl = "/" + filepath + ".jpg" + elif os.path.exists(filepath + ".jpeg"): + imgurl = "/" + filepath + ".jpeg" + + #check if cached image exists + elif os.path.exists(filepath_cache + ".png"): + imgurl = "/" + filepath_cache + ".png" + elif os.path.exists(filepath_cache + ".jpg"): + imgurl = "/" + filepath_cache + ".jpg" + elif os.path.exists(filepath_cache + ".jpeg"): + imgurl = "/" + filepath_cache + ".jpeg" + + + # check if custom desc exists + if os.path.exists(filepath + ".txt"): + with open(filepath + ".txt","r") as descfile: + desc = descfile.read().replace("\n","") + + #check if cached desc exists + elif os.path.exists(filepath_cache + ".txt"): + with open(filepath_cache + ".txt","r") as descfile: + desc = descfile.read().replace("\n","") + + try: + return {"image":imgurl,"info":desc} + except NameError: + pass + #is this pythonic? + + + # if we neither have a custom image nor a cached version, we return the address from lastfm, but cache that image for later use + with open("apikey","r") as keyfile: + apikey = keyfile.read().replace("\n","") + + + try: + url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(artist) + "&api_key=" + apikey + "&format=json" + response = urllib.request.urlopen(url) + lastfm_data = json.loads(response.read()) + try: + imgurl + except NameError: + imgurl = lastfm_data["artist"]["image"][2]["#text"] + _thread.start_new_thread(cacheImage,(imgurl,"info/artists_cache",filename)) + try: + desc + except NameError: + desc = lastfm_data["artist"]["bio"]["summary"] + with open(filepath_cache + ".txt","w") as descfile: + descfile.write(desc) + # this feels so dirty + + + return {"image":imgurl,"info":desc} + except: + return {"image":"/info/artists/default.jpg","info":"No information available"} + + + +def cacheImage(url,path,filename): + import urllib.request + response = urllib.request.urlopen(url) + target = path + "/" + filename + "." + response.info().get_content_subtype() + urllib.request.urlretrieve(url,target) diff --git a/website/artist.py b/website/artist.py index 1083181..fc39c67 100644 --- a/website/artist.py +++ b/website/artist.py @@ -1,34 +1,14 @@ import urllib import json - - -#def page(keys): -# -# txt_keys = replace(keys) -# -# -# with open("website/artist.html","r") as htmlfile: -# html = htmlfile.read() -# -# -# -# for k in txt_keys: -# html = html.replace(k,txt_keys[k]) -# -# return html def replacedict(keys,dbport): + from utilities import getArtistInfo - with open("website/apikey","r") as keyfile: - apikey = keyfile.read().replace("\n","") - - url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=" + urllib.parse.quote(keys["artist"]) + "&api_key=" + apikey + "&format=json" - response = urllib.request.urlopen(url) - lastfm_data = json.loads(response.read()) - imgurl = lastfm_data["artist"]["image"][2]["#text"] - desc = lastfm_data["artist"]["bio"]["summary"] + info = getArtistInfo(keys["artist"]) + imgurl = info.get("image") + desc = info.get("info") response = urllib.request.urlopen("http://localhost:" + str(dbport) + "/artistinfo?artist=" + urllib.parse.quote(keys["artist"])) db_data = json.loads(response.read())