mirror of
				https://github.com/krateng/maloja.git
				synced 2023-08-10 21:12:55 +03:00 
			
		
		
		
	Completely reworked album parsing
This commit is contained in:
		| @@ -148,7 +148,7 @@ def print_info(): | |||||||
| 		print("Could not determine dependency versions.") | 		print("Could not determine dependency versions.") | ||||||
| 	print() | 	print() | ||||||
|  |  | ||||||
| @mainfunction({"l":"level","v":"version","V":"version"},flags=['version','include_images'],shield=True) | @mainfunction({"l":"level","v":"version","V":"version"},flags=['version','include_images','prefer_existing'],shield=True) | ||||||
| def main(*args,**kwargs): | def main(*args,**kwargs): | ||||||
|  |  | ||||||
| 	actions = { | 	actions = { | ||||||
| @@ -166,7 +166,7 @@ def main(*args,**kwargs): | |||||||
| 		"generate":generate.generate_scrobbles,	# maloja generate 400 | 		"generate":generate.generate_scrobbles,	# maloja generate 400 | ||||||
| 		"export":tasks.export,					# maloja export | 		"export":tasks.export,					# maloja export | ||||||
| 		"apidebug":apidebug.run,				# maloja apidebug | 		"apidebug":apidebug.run,				# maloja apidebug | ||||||
| 		"parsealbums":tasks.parse_albums,		# maloja parsealbums | 		"parsealbums":tasks.parse_albums,		# maloja parsealbums --strategy majority | ||||||
| 		# aux | 		# aux | ||||||
| 		"info":print_info | 		"info":print_info | ||||||
| 	} | 	} | ||||||
|   | |||||||
| @@ -483,14 +483,11 @@ def get_artist_id(artistname,create_new=True,dbconn=None): | |||||||
|  |  | ||||||
| @cached_wrapper | @cached_wrapper | ||||||
| @connection_provider | @connection_provider | ||||||
| def get_album_id(albumdict,create_new=True,dbconn=None): | def get_album_id(albumdict,create_new=True,ignore_albumartists=False,dbconn=None): | ||||||
| 	ntitle = normalize_name(albumdict['albumtitle']) | 	ntitle = normalize_name(albumdict['albumtitle']) | ||||||
| 	artist_ids = [get_artist_id(a,dbconn=dbconn) for a in albumdict.get('artists') or []] | 	artist_ids = [get_artist_id(a,dbconn=dbconn) for a in albumdict.get('artists') or []] | ||||||
| 	artist_ids = list(set(artist_ids)) | 	artist_ids = list(set(artist_ids)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	op = DB['albums'].select( | 	op = DB['albums'].select( | ||||||
| #		DB['albums'].c.id | #		DB['albums'].c.id | ||||||
| 	).where( | 	).where( | ||||||
| @@ -498,11 +495,14 @@ def get_album_id(albumdict,create_new=True,dbconn=None): | |||||||
| 	) | 	) | ||||||
| 	result = dbconn.execute(op).all() | 	result = dbconn.execute(op).all() | ||||||
| 	for row in result: | 	for row in result: | ||||||
|  | 		if ignore_albumartists: | ||||||
|  | 			return row.id | ||||||
|  | 		else: | ||||||
| 			# check if the artists are the same | 			# check if the artists are the same | ||||||
| 			foundtrackartists = [] | 			foundtrackartists = [] | ||||||
|  |  | ||||||
| 			op = DB['albumartists'].select( | 			op = DB['albumartists'].select( | ||||||
| #			DB['albumartists'].c.artist_id | 	#			DB['albumartists'].c.artist_id | ||||||
| 			).where( | 			).where( | ||||||
| 				DB['albumartists'].c.album_id==row.id | 				DB['albumartists'].c.album_id==row.id | ||||||
| 			) | 			) | ||||||
| @@ -1601,7 +1601,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None): | |||||||
| 				}} | 				}} | ||||||
| 				if len(artists) == 0: | 				if len(artists) == 0: | ||||||
| 					# for albums without artist, assume track artist | 					# for albums without artist, assume track artist | ||||||
| 					res[track_id]["guess_artists"] = True | 					res[track_id]["guess_artists"] = [] | ||||||
| 			else: | 			else: | ||||||
| 				res[track_id] = {"assigned":False,"reason":"Not enough data"} | 				res[track_id] = {"assigned":False,"reason":"Not enough data"} | ||||||
|  |  | ||||||
| @@ -1610,7 +1610,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None): | |||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	missing_artists = [track_id for track_id in res if res[track_id].get("guess_artists")] | 	missing_artists = [track_id for track_id in res if "guess_artists" in res[track_id]] | ||||||
|  |  | ||||||
| 	#we're pointlessly getting the albumartist names here even though the IDs would be enough | 	#we're pointlessly getting the albumartist names here even though the IDs would be enough | ||||||
| 	#but it's better for function separation I guess | 	#but it's better for function separation I guess | ||||||
| @@ -1627,10 +1627,7 @@ def guess_albums(track_ids=None,replace=False,dbconn=None): | |||||||
| 	result = dbconn.execute(op).all() | 	result = dbconn.execute(op).all() | ||||||
|  |  | ||||||
| 	for row in result: | 	for row in result: | ||||||
| 		res[row.track_id]["assigned"]["artists"].append(row.name) | 		res[row.track_id]["guess_artists"].append(row.name) | ||||||
| 	for track_id in res: |  | ||||||
| 		if res[track_id].get("guess_artists"): |  | ||||||
| 			del res[track_id]["guess_artists"] |  | ||||||
|  |  | ||||||
| 	return res | 	return res | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,19 +1,106 @@ | |||||||
| from doreah.io import col | from doreah.io import col | ||||||
|  |  | ||||||
| def parse_albums(replace=False): | def parse_albums(strategy=None,prefer_existing=False): | ||||||
|  |  | ||||||
|  | 	if strategy not in ("track","none","all","majority","most"): | ||||||
|  | 		print(""" | ||||||
|  | Please specify your album parsing strategy: | ||||||
|  |  | ||||||
|  |     --strategy           Specify what strategy to use when the scrobble contains | ||||||
|  |                          no information about album artists. | ||||||
|  |                          track      Take the track artists. This can lead to | ||||||
|  |                                     separate albums being created for compilation | ||||||
|  |                                     albums or albums that have collaboration tracks. | ||||||
|  |                          none       Merge all albums with the same name and assign | ||||||
|  |                                     'Various Artists' as the album artist. | ||||||
|  |                          all        Merge all albums with the same name and assign | ||||||
|  |                                     every artist that appears on the album as an album | ||||||
|  |                                     artist. | ||||||
|  |                          majority   Merge all albums with the same name and assign | ||||||
|  |                                     artists that appear in at least half the tracks | ||||||
|  |                                     of the album as album artists. [RECOMMENDED] | ||||||
|  |                          most       Merge all albums with the same name and assign | ||||||
|  |                                     the artist that appears most on the album as album | ||||||
|  |                                     artist. | ||||||
|  |     --prefer_existing    If an album with the same name already exists, use it | ||||||
|  |                          without further examination of track artists. | ||||||
|  | 		""") | ||||||
|  | 		return | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	from ...database.sqldb import guess_albums, get_album_id, add_track_to_album | 	from ...database.sqldb import guess_albums, get_album_id, add_track_to_album | ||||||
|  |  | ||||||
| 	print("Parsing album information...") | 	print("Parsing album information...") | ||||||
| 	result = guess_albums(replace=replace) | 	result = guess_albums() | ||||||
|  |  | ||||||
| 	result = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]} | 	result = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]} | ||||||
| 	print("Adding",len(result),"tracks to albums...") | 	print("Found",col['yellow'](len(result)),"Tracks to assign albums to") | ||||||
|  |  | ||||||
|  | 	result_authorative = {track_id:result[track_id] for track_id in result if result[track_id]["assigned"]["artists"]} | ||||||
|  | 	result_guesswork = {track_id:result[track_id] for track_id in result if not result[track_id]["assigned"]["artists"]} | ||||||
|  |  | ||||||
| 	i = 0 | 	i = 0 | ||||||
| 	for track_id in result: |  | ||||||
| 		album_id = get_album_id(result[track_id]["assigned"]) | 	def countup(i): | ||||||
| 		add_track_to_album(track_id,album_id) | 		i+=1 | ||||||
| 		i += 1 |  | ||||||
| 		if (i % 100) == 0: | 		if (i % 100) == 0: | ||||||
| 			print(i,"of",len(result)) | 			print(f"Added album information for {i} of {len(result)} tracks...") | ||||||
| 	print("Done!") | 		return i | ||||||
|  |  | ||||||
|  | 	for track_id in result_authorative: | ||||||
|  | 		albuminfo = result[track_id]['assigned'] | ||||||
|  | 		album_id = get_album_id(albuminfo) | ||||||
|  | 		add_track_to_album(track_id,album_id) | ||||||
|  | 		i=countup(i) | ||||||
|  |  | ||||||
|  | 	albums = {} | ||||||
|  | 	for track_id in result_guesswork: | ||||||
|  | 		albuminfo = result[track_id]['assigned'] | ||||||
|  |  | ||||||
|  | 		# check if already exists | ||||||
|  | 		if prefer_existing: | ||||||
|  | 			album_id = get_album_id(albuminfo,ignore_albumartists=True,create_new=False) | ||||||
|  | 			if album_id: | ||||||
|  | 				add_track_to_album(track_id,album_id) | ||||||
|  | 				i=countup(i) | ||||||
|  | 				continue | ||||||
|  |  | ||||||
|  | 		if strategy == 'track': | ||||||
|  | 			albuminfo['artists'] = result[track_id]['guess_artists'] | ||||||
|  | 			album_id = get_album_id(albuminfo) | ||||||
|  | 			add_track_to_album(track_id,album_id) | ||||||
|  | 			i=countup(i) | ||||||
|  | 			continue | ||||||
|  |  | ||||||
|  | 		if strategy == 'none': | ||||||
|  | 			albuminfo['artists'] = [] | ||||||
|  | 			album_id = get_album_id(albuminfo) | ||||||
|  | 			add_track_to_album(track_id,album_id) | ||||||
|  | 			i=countup(i) | ||||||
|  | 			continue | ||||||
|  |  | ||||||
|  | 		if strategy in ['all','majority','most']: | ||||||
|  | 			albums.setdefault(albuminfo['albumtitle'],{'track_ids':[],'artists':{}}) | ||||||
|  | 			albums[albuminfo['albumtitle']]['track_ids'].append(track_id) | ||||||
|  | 			for a in result[track_id]['guess_artists']: | ||||||
|  | 				albums[albuminfo['albumtitle']]['artists'].setdefault(a,0) | ||||||
|  | 				albums[albuminfo['albumtitle']]['artists'][a] += 1 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 	for title in albums: | ||||||
|  | 		artistoptions = albums[title]['artists'] | ||||||
|  | 		track_ids = albums[title]['track_ids'] | ||||||
|  | 		if strategy == 'all': | ||||||
|  | 			artists = [a for a in artistoptions] | ||||||
|  | 		elif strategy == 'majority': | ||||||
|  | 			artists = [a for a in artistoptions if artistoptions[a] >= (len(track_ids) / 2)] | ||||||
|  | 		elif strategy == 'most': | ||||||
|  | 			artists = [max(artistoptions,key=artistoptions.get)] | ||||||
|  |  | ||||||
|  | 		for track_id in track_ids: | ||||||
|  | 			album_id = get_album_id({'albumtitle':title,'artists':artists}) | ||||||
|  | 			add_track_to_album(track_id,album_id) | ||||||
|  | 			i=countup(i) | ||||||
|  |  | ||||||
|  | 	print(col['lawngreen']("Done!")) | ||||||
|   | |||||||
| @@ -90,7 +90,11 @@ | |||||||
| </table> | </table> | ||||||
|  |  | ||||||
|  |  | ||||||
| {% if info["isalbumartist"] %} | {% set albums_info = dbc.get_albums_artist_appears_on(filterkeys,limitkeys) %} | ||||||
|  | {% set ownalbums = albums_info.own_albums %} | ||||||
|  | {% set otheralbums = albums_info.appears_on %} | ||||||
|  |  | ||||||
|  | {% if ownalbums or otheralbums %} | ||||||
|  |  | ||||||
| {% if settings['ALBUM_SHOWCASE'] %} | {% if settings['ALBUM_SHOWCASE'] %} | ||||||
| 	<h2><a href='{{ mlj_uri.create_uri("/charts_albums",filterkeys) }}'>Albums</a></h2> | 	<h2><a href='{{ mlj_uri.create_uri("/charts_albums",filterkeys) }}'>Albums</a></h2> | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 krateng
					krateng