outsourced lyrics fetching

This commit is contained in:
Martin Wagner 2020-09-23 12:01:10 +02:00
parent 0bc5a7d46a
commit 48d54779a5
3 changed files with 134 additions and 70 deletions

View File

@ -23,8 +23,6 @@ gi.require_version('Gtk', '3.0')
gi.require_version('Notify', '0.7')
from gi.repository import Gtk, Gio, Gdk, GdkPixbuf, Pango, GObject, GLib, Notify
from mpd import MPDClient, base as MPDBase
import requests
from bs4 import BeautifulSoup, Comment
import threading
import locale
import gettext
@ -34,6 +32,7 @@ import datetime
import os
import sys
import re
from mpdevil.lyrics import LyricsHelper
# MPRIS modules
import dbus
@ -2420,6 +2419,7 @@ class LyricsWindow(FocusFrame):
self._settings=settings
self._client=client
self._displayed_song_file=None
self._lyrics_helper=LyricsHelper(debug=False)
# text view
self._text_view=Gtk.TextView(
@ -2466,7 +2466,9 @@ class LyricsWindow(FocusFrame):
def _display_lyrics(self, current_song):
GLib.idle_add(self._text_buffer.set_text, _("searching..."), -1)
text=self._get_lyrics(current_song["artist"], current_song["title"])
text=self._lyrics_helper.get_lyrics(current_song["artist"], current_song["title"])
if text is None:
text=_("lyrics not found")
GLib.idle_add(self._text_buffer.set_text, text, -1)
def _refresh(self, *args):
@ -2482,73 +2484,6 @@ class LyricsWindow(FocusFrame):
)
update_thread.start()
def _get_lyrics_lyriki(self, singer, song):
# print("lyriki")
replaces=((' ', '_'),('.', '_'),('@', '_'),(',', '_'),(';', '_'),('&', '_'),('\\', '_'),('/', '_'),('"', '_'))
for char1, char2 in replaces:
singer.replace(char1, char2)
song.replace(char1, char2)
r=requests.get('http://www.lyriki.com/{0}:{1}'.format(singer,song))
s=BeautifulSoup(r.text)
lyrics=s.p
if lyrics is None:
raise ValueError("Not found")
elif str(lyrics).startswith("<p>There is currently no text in this page."):
raise ValueError("Not found")
output=str(lyrics)[3:-4].replace('\n','').replace('<br/>','\n')
return output
def _get_lyrics_songlyrics(self, singer, song):
# print("songlyrics")
replaces=((' ', '-'),('.', '-'),('_', '-'),('@', '-'),(',', '-'),(';', '-'),('&', '-'),('\\', '-'),('/', '-'),('"', '-'))
for char1, char2 in replaces:
singer.replace(char1, char2)
song.replace(char1, char2)
r=requests.get('https://www.songlyrics.com/{0}/{1}-lyrics/'.format(singer,song))
s=BeautifulSoup(r.text)
lyrics=s.find(id="songLyricsDiv")
if lyrics is None:
raise ValueError("Not found")
elif str(lyrics)[58:-4].startswith("Sorry, we have no"):
raise ValueError("Not found")
output=str(lyrics)[58:-4].replace('\n','').replace('\r','').replace(' /', '').replace('<br/>','\n')
return output
def _get_lyrics_letras(self, singer, song):
# print("letras")
replaces=((' ', '+'),('.', '_'),('@', '_'),(',', '_'),(';', '_'),('&', '_'),('\\', '_'),('/', '_'),('"', '_'))
for char1, char2 in replaces:
singer.replace(char1, char2)
song.replace(char1, char2)
r=requests.get('https://www.letras.mus.br/winamp.php?musica={1}&artista={0}'.format(singer,song))
s=BeautifulSoup(r.text)
s=s.find(id="letra-cnt")
if s is None:
raise ValueError("Not found")
pragraphs=[i for i in s.children][2:-1] # remove unneded pragraphs
lyrics=""
for p in pragraphs:
for line in p.stripped_strings:
lyrics+=line+'\n'
lyrics+='\n'
output=lyrics[:-2] # omit last two newlines
if output != "": # assume song is instrumental when lyrics are empty
return output
else:
return "Instrumental"
def _get_lyrics(self, singer, song):
# print("fetching lyrics for '"+singer+"' - '"+song+"'")
providers=[self._get_lyrics_letras, self._get_lyrics_lyriki, self._get_lyrics_songlyrics]
text=_("lyrics not found")
for provider in providers:
try:
text=provider(singer, song)
break
except ValueError:
pass
return text
def _on_disconnected(self, *args):
self._displayed_song_file=None
self._text_buffer.set_text("", -1)

20
mpdevil/__init__.py Normal file
View File

@ -0,0 +1,20 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
# mpdevil - MPD Client.
# Copyright 2020 Martin Wagner <martin.wagner.dev@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 3 of the License.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA

109
mpdevil/lyrics.py Normal file
View File

@ -0,0 +1,109 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
# mpdevil - MPD Client.
# Copyright 2020 Martin Wagner <martin.wagner.dev@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 3 of the License.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
import requests
from bs4 import BeautifulSoup, Comment
class LyricsHelper(object):
def __init__(self, debug=False):
self._debug=debug
def _debug_print(self, text):
if self._debug:
print(text)
def _get_lyrics_lyriki(self, singer, song):
self._debug_print("lyriki")
replaces=((' ', '_'),('.', '_'),('@', '_'),(',', '_'),(';', '_'),('&', '_'),('\\', '_'),('/', '_'),('"', '_'))
for char1, char2 in replaces:
singer=singer.replace(char1, char2)
song=song.replace(char1, char2)
self._debug_print('http://www.lyriki.com/{0}:{1}'.format(singer,song))
r=requests.get('http://www.lyriki.com/{0}:{1}'.format(singer,song))
s=BeautifulSoup(r.text)
lyrics=s.p
if lyrics is None:
raise ValueError("Not found")
elif str(lyrics).startswith("<p>There is currently no text in this page."):
raise ValueError("Not found")
try:
lyrics.tt.unwrap()
except:
pass
output=str(lyrics)[3:-4].replace('\n','').replace('<br/>','\n')
return output
def _get_lyrics_songlyrics(self, singer, song):
self._debug_print("songlyrics")
replaces=((' ', '-'),('.', '-'),('_', '-'),('@', '-'),(',', '-'),(';', '-'),('&', '-'),('\\', '-'),('/', '-'),('"', '-'))
for char1, char2 in replaces:
singer=singer.replace(char1, char2)
song=song.replace(char1, char2)
self._debug_print('https://www.songlyrics.com/{0}/{1}-lyrics/'.format(singer,song))
r=requests.get('https://www.songlyrics.com/{0}/{1}-lyrics/'.format(singer,song))
s=BeautifulSoup(r.text)
lyrics=s.find(id="songLyricsDiv")
if lyrics is None:
raise ValueError("Not found")
elif str(lyrics)[58:-4].startswith("Sorry, we have no"):
raise ValueError("Not found")
try:
lyrics.i.unwrap()
except:
pass
output=str(lyrics)[58:-4].replace('\n','').replace('\r','').replace(' /', '').replace('<br/>','\n')
return output
def _get_lyrics_letras(self, singer, song):
self._debug_print("letras")
replaces=((' ', '+'),('.', '_'),('@', '_'),(',', '_'),(';', '_'),('&', '_'),('\\', '_'),('/', '_'),('"', '_'),('(', '_'),(')', '_'))
for char1, char2 in replaces:
singer=singer.replace(char1, char2)
song=song.replace(char1, char2)
self._debug_print('https://www.letras.mus.br/winamp.php?musica={1}&artista={0}'.format(singer,song))
r=requests.get('https://www.letras.mus.br/winamp.php?musica={1}&artista={0}'.format(singer,song))
s=BeautifulSoup(r.text)
s=s.find(id="letra-cnt")
if s is None:
raise ValueError("Not found")
pragraphs=[i for i in s.children][2:-1] # remove unneded pragraphs
lyrics=""
for p in pragraphs:
for line in p.stripped_strings:
lyrics+=line+'\n'
lyrics+='\n'
output=lyrics[:-2] # omit last two newlines
if output != "": # assume song is instrumental when lyrics are empty
return output
else:
return "Instrumental"
def get_lyrics(self, singer, song):
self._debug_print("fetching lyrics for '"+singer+"' - '"+song+"'")
providers=[self._get_lyrics_letras, self._get_lyrics_lyriki, self._get_lyrics_songlyrics]
text=None
for provider in providers:
try:
text=provider(singer, song)
break
except ValueError:
pass
return text