replaced bs4 with HTMLParser

This commit is contained in:
Martin Wagner 2022-03-15 19:50:23 +01:00
parent 0c6fc5ac19
commit 721db17d63
3 changed files with 30 additions and 20 deletions

View File

@ -59,7 +59,6 @@ Dependencies:
Python modules: Python modules:
- mpd (python-mpd2 >=1.1) - mpd (python-mpd2 >=1.1)
- gi (Gtk, Gio, Gdk, GdkPixbuf, Pango, GObject, GLib) - gi (Gtk, Gio, Gdk, GdkPixbuf, Pango, GObject, GLib)
- bs4 (beautifulsoup)
Run: Run:
```bash ```bash

View File

@ -1,3 +1,2 @@
beautifulsoup4
python-mpd2 >=1.1 python-mpd2 >=1.1
PyGObject PyGObject

View File

@ -21,8 +21,9 @@ import gi
gi.require_version("Gtk", "3.0") gi.require_version("Gtk", "3.0")
from gi.repository import Gtk, Gio, Gdk, GdkPixbuf, Pango, GObject, GLib from gi.repository import Gtk, Gio, Gdk, GdkPixbuf, Pango, GObject, GLib
from mpd import MPDClient, base as MPDBase from mpd import MPDClient, base as MPDBase
from bs4 import BeautifulSoup from urllib import request
import urllib from urllib.error import URLError
from html.parser import HTMLParser
import threading import threading
import functools import functools
import itertools import itertools
@ -2736,6 +2737,27 @@ class PlaylistWindow(Gtk.Overlay):
# cover and lyrics # # cover and lyrics #
#################### ####################
class LetrasParser(HTMLParser):
def __init__(self):
super().__init__()
self._found_text=False
self.text=""
def handle_starttag(self, tag, attrs):
if tag == "div" and ("id", "letra-cnt") in attrs:
self._found_text=True
def handle_endtag(self, tag):
if self._found_text:
if tag == "p":
self.text+="\n"
elif tag == "div":
self._found_text=False
def handle_data(self, data):
if self._found_text and data:
self.text+=data+"\n"
class LyricsWindow(Gtk.ScrolledWindow): class LyricsWindow(Gtk.ScrolledWindow):
def __init__(self, client, settings): def __init__(self, client, settings):
super().__init__() super().__init__()
@ -2780,28 +2802,18 @@ class LyricsWindow(Gtk.ScrolledWindow):
for char1, char2 in replaces: for char1, char2 in replaces:
title=title.replace(char1, char2) title=title.replace(char1, char2)
artist=artist.replace(char1, char2) artist=artist.replace(char1, char2)
with urllib.request.urlopen(f"https://www.letras.mus.br/winamp.php?musica={title}&artista={artist}") as response: parser=LetrasParser()
soup=BeautifulSoup(response.read(), "html.parser") with request.urlopen(request.quote(f"https://www.letras.mus.br/winamp.php?musica={title}&artista={artist}", safe=':/')) as response:
soup=soup.find(id="letra-cnt") parser.feed(response.read().decode("utf-8"))
if soup is None: if not parser.text:
raise ValueError("Not found") raise ValueError("Not found")
paragraphs=[i for i in soup.children][1] # remove unneded paragraphs (NavigableString) return parser.text.strip("\n ")
lyrics=""
for paragraph in paragraphs:
for line in paragraph.stripped_strings:
lyrics+=line+"\n"
lyrics+="\n"
output=lyrics[:-2] # omit last two newlines
if output:
return output
else: # assume song is instrumental when lyrics are empty
return "Instrumental"
def _display_lyrics(self, current_song): def _display_lyrics(self, current_song):
idle_add(self._text_buffer.set_text, _("searching…"), -1) idle_add(self._text_buffer.set_text, _("searching…"), -1)
try: try:
text=self._get_lyrics(current_song["title"][0], current_song["artist"][0]) text=self._get_lyrics(current_song["title"][0], current_song["artist"][0])
except urllib.error.URLError: except URLError:
self._displayed_song_file=None self._displayed_song_file=None
text=_("connection error") text=_("connection error")
except ValueError: except ValueError: