maloja/maloja/images.py

310 lines
8.2 KiB
Python
Raw Permalink Normal View History

from .pkg_global.conf import data_dir, malojaconfig
from . import thirdparty
from . import database
2020-09-04 03:42:01 +03:00
from doreah.logging import log
import itertools
import os
import urllib
import random
import base64
import requests
import datauri
import io
from threading import Thread, Timer, BoundedSemaphore
2020-09-04 03:42:01 +03:00
import re
2020-09-04 14:59:04 +03:00
import datetime
2022-02-17 09:35:05 +03:00
import sqlalchemy as sql
2018-12-17 17:10:10 +03:00
2022-02-17 09:35:05 +03:00
DB = {}
engine = sql.create_engine(f"sqlite:///{data_dir['cache']('images.sqlite')}", echo = False)
meta = sql.MetaData()
DB['artists'] = sql.Table(
'artists', meta,
sql.Column('id',sql.Integer,primary_key=True),
sql.Column('url',sql.String),
2022-03-26 07:49:30 +03:00
sql.Column('expire',sql.Integer),
sql.Column('raw',sql.String)
2022-02-17 09:35:05 +03:00
)
DB['tracks'] = sql.Table(
'tracks', meta,
sql.Column('id',sql.Integer,primary_key=True),
sql.Column('url',sql.String),
2022-03-26 07:49:30 +03:00
sql.Column('expire',sql.Integer),
sql.Column('raw',sql.String)
2022-02-17 09:35:05 +03:00
)
meta.create_all(engine)
def get_image_from_cache(id,table):
now = int(datetime.datetime.now().timestamp())
with engine.begin() as conn:
2022-02-17 09:53:25 +03:00
op = DB[table].select().where(
2022-02-17 09:35:05 +03:00
DB[table].c.id==id,
DB[table].c.expire>now
)
result = conn.execute(op).all()
for row in result:
2022-03-26 07:49:30 +03:00
if row.raw is not None:
return {'type':'raw','value':row.raw}
else:
return {'type':'url','value':row.url} # returns None as value if nonexistence cached
return None # no cache entry
2022-02-17 09:35:05 +03:00
def set_image_in_cache(id,table,url):
2022-04-07 18:28:11 +03:00
remove_image_from_cache(id,table)
2022-02-17 09:35:05 +03:00
now = int(datetime.datetime.now().timestamp())
if url is None:
expire = now + (malojaconfig["CACHE_EXPIRE_NEGATIVE"] * 24 * 3600)
else:
expire = now + (malojaconfig["CACHE_EXPIRE_POSITIVE"] * 24 * 3600)
2022-03-26 07:49:30 +03:00
raw = dl_image(url)
2022-02-17 09:35:05 +03:00
with engine.begin() as conn:
op = DB[table].insert().values(
id=id,
url=url,
2022-03-26 07:49:30 +03:00
expire=expire,
raw=raw
2022-04-07 18:28:11 +03:00
)
2022-02-17 09:35:05 +03:00
result = conn.execute(op)
2022-02-17 10:42:33 +03:00
def remove_image_from_cache(id,table):
with engine.begin() as conn:
op = DB[table].delete().where(
DB[table].c.id==id,
)
result = conn.execute(op)
2022-02-17 09:35:05 +03:00
def dl_image(url):
2022-03-26 07:49:30 +03:00
if not malojaconfig["PROXY_IMAGES"]: return None
2022-02-27 00:47:41 +03:00
if url is None: return None
2022-03-27 20:52:51 +03:00
if url.startswith("/"): return None #local image
try:
r = requests.get(url)
2022-02-27 00:47:41 +03:00
mime = r.headers.get('content-type') or 'image/jpg'
data = io.BytesIO(r.content).read()
uri = datauri.DataURI.make(mime,charset='ascii',base64=True,data=data)
2022-02-27 00:47:41 +03:00
log(f"Downloaded {url} for local caching")
return uri
2022-04-24 20:41:55 +03:00
except Exception:
2022-02-27 00:47:41 +03:00
log(f"Image {url} could not be downloaded for local caching")
2022-03-26 07:49:30 +03:00
return None
2022-02-17 09:35:05 +03:00
2022-03-26 07:49:30 +03:00
### getting images for any website embedding now ALWAYS returns just the generic link
### even if we have already cached it, we will handle that on request
def get_track_image(track=None,track_id=None):
2022-02-17 09:35:05 +03:00
if track_id is None:
track_id = database.sqldb.get_track_id(track)
2022-03-26 07:49:30 +03:00
return f"/image?type=track&id={track_id}"
def get_artist_image(artist=None,artist_id=None):
if artist_id is None:
artist_id = database.sqldb.get_artist_id(artist)
return f"/image?type=artist&id={artist_id}"
resolve_semaphore = BoundedSemaphore(8)
2022-03-26 07:49:30 +03:00
def resolve_track_image(track_id):
2022-02-17 09:35:05 +03:00
with resolve_semaphore:
# check cache
result = get_image_from_cache(track_id,'tracks')
if result is not None:
return result
2022-02-17 09:35:05 +03:00
track = database.sqldb.get_track(track_id)
2022-03-26 07:49:30 +03:00
# local image
if malojaconfig["USE_LOCAL_IMAGES"]:
images = local_files(artists=track['artists'],title=track['title'])
if len(images) != 0:
result = random.choice(images)
result = urllib.parse.quote(result)
result = {'type':'url','value':result}
set_image_in_cache(track_id,'tracks',result['value'])
return result
2022-02-17 09:35:05 +03:00
# third party
result = thirdparty.get_image_track_all((track['artists'],track['title']))
result = {'type':'url','value':result}
set_image_in_cache(track_id,'tracks',result['value'])
return result
2022-02-17 09:35:05 +03:00
2022-03-26 07:49:30 +03:00
def resolve_artist_image(artist_id):
2022-02-17 09:35:05 +03:00
with resolve_semaphore:
# check cache
result = get_image_from_cache(artist_id,'artists')
if result is not None:
return result
2022-02-17 09:35:05 +03:00
artist = database.sqldb.get_artist(artist_id)
2022-03-26 07:49:30 +03:00
# local image
if malojaconfig["USE_LOCAL_IMAGES"]:
images = local_files(artist=artist)
if len(images) != 0:
result = random.choice(images)
result = urllib.parse.quote(result)
result = {'type':'url','value':result}
set_image_in_cache(artist_id,'artists',result['value'])
return result
2022-02-17 09:35:05 +03:00
# third party
result = thirdparty.get_image_artist_all(artist)
result = {'type':'url','value':result}
set_image_in_cache(artist_id,'artists',result['value'])
return result
2022-02-17 09:35:05 +03:00
2019-04-03 17:03:48 +03:00
# removes emojis and weird shit from names
def clean(name):
return "".join(c for c in name if c.isalnum() or c in []).strip()
def get_all_possible_filenames(artist=None,artists=None,title=None):
2019-04-03 17:03:48 +03:00
# check if we're dealing with a track or artist, then clean up names
# (only remove non-alphanumeric, allow korean and stuff)
2019-04-03 17:03:48 +03:00
if title is not None and artists is not None:
track = True
title, artists = clean(title), [clean(a) for a in artists]
elif artist is not None:
track = False
artist = clean(artist)
else: return []
superfolder = "tracks/" if track else "artists/"
2019-04-03 17:03:48 +03:00
filenames = []
if track:
#unsafeartists = [artist.translate(None,"-_./\\") for artist in artists]
safeartists = [re.sub("[^a-zA-Z0-9]","",artist) for artist in artists]
#unsafetitle = title.translate(None,"-_./\\")
safetitle = re.sub("[^a-zA-Z0-9]","",title)
if len(artists) < 4:
unsafeperms = itertools.permutations(artists)
safeperms = itertools.permutations(safeartists)
else:
unsafeperms = [sorted(artists)]
safeperms = [sorted(safeartists)]
for unsafeartistlist in unsafeperms:
filename = "-".join(unsafeartistlist) + "_" + title
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
for safeartistlist in safeperms:
filename = "-".join(safeartistlist) + "_" + safetitle
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filenames = list(set(filenames))
if len(filenames) == 0: filenames.append(str(hash((frozenset(artists),title))))
else:
#unsafeartist = artist.translate(None,"-_./\\")
safeartist = re.sub("[^a-zA-Z0-9]","",artist)
filename = artist
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filename = safeartist
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filenames = list(set(filenames))
if len(filenames) == 0: filenames.append(str(hash(artist)))
return [superfolder + name for name in filenames]
def local_files(artist=None,artists=None,title=None):
filenames = get_all_possible_filenames(artist,artists,title)
2019-04-03 17:03:48 +03:00
images = []
for purename in filenames:
# direct files
for ext in ["png","jpg","jpeg","gif"]:
#for num in [""] + [str(n) for n in range(0,10)]:
if os.path.exists(data_dir['images'](purename + "." + ext)):
images.append("/images/" + purename + "." + ext)
2019-04-03 17:03:48 +03:00
# folder
try:
for f in os.listdir(data_dir['images'](purename)):
2019-04-03 17:03:48 +03:00
if f.split(".")[-1] in ["png","jpg","jpeg","gif"]:
images.append("/images/" + purename + "/" + f)
2022-04-24 20:41:55 +03:00
except Exception:
2019-04-03 17:03:48 +03:00
pass
return images
2022-10-19 20:53:13 +03:00
class MalformedB64(Exception):
pass
def set_image(b64,**keys):
track = "title" in keys
2022-02-17 10:42:33 +03:00
if track:
entity = {'artists':keys['artists'],'title':keys['title']}
id = database.sqldb.get_track_id(entity)
else:
entity = keys['artist']
id = database.sqldb.get_artist_id(entity)
2020-08-21 19:06:16 +03:00
log("Trying to set image, b64 string: " + str(b64[:30] + "..."),module="debug")
regex = r"data:image/(\w+);base64,(.+)"
2022-10-19 20:53:13 +03:00
match = re.fullmatch(regex,b64)
if not match: raise MalformedB64()
type,b64 = match.groups()
b64 = base64.b64decode(b64)
filename = "webupload" + str(int(datetime.datetime.now().timestamp())) + "." + type
for folder in get_all_possible_filenames(**keys):
if os.path.exists(data_dir['images'](folder)):
with open(data_dir['images'](folder,filename),"wb") as f:
f.write(b64)
2021-01-16 22:11:06 +03:00
break
else:
folder = get_all_possible_filenames(**keys)[0]
os.makedirs(data_dir['images'](folder))
with open(data_dir['images'](folder,filename),"wb") as f:
f.write(b64)
2022-10-19 20:53:13 +03:00
2021-01-16 22:11:06 +03:00
log("Saved image as " + data_dir['images'](folder,filename),module="debug")
# set as current picture in rotation
2022-02-17 10:42:33 +03:00
if track: set_image_in_cache(id,'tracks',os.path.join("/images",folder,filename))
else: set_image_in_cache(id,'artists',os.path.join("/images",folder,filename))
return os.path.join("/images",folder,filename)