now using built-in html.parser

This commit is contained in:
Martin Wagner 2020-09-23 21:36:38 +02:00
parent c8ef1c22f0
commit be1a2446da

View File

@ -19,7 +19,7 @@
# USA # USA
import requests import requests
from bs4 import BeautifulSoup, Comment from bs4 import BeautifulSoup
class LyricsHelper(object): class LyricsHelper(object):
def __init__(self, debug=False): def __init__(self, debug=False):
@ -37,7 +37,7 @@ class LyricsHelper(object):
song=song.replace(char1, char2) song=song.replace(char1, char2)
self._debug_print('http://www.lyriki.com/{0}:{1}'.format(singer,song)) self._debug_print('http://www.lyriki.com/{0}:{1}'.format(singer,song))
r=requests.get('http://www.lyriki.com/{0}:{1}'.format(singer,song)) r=requests.get('http://www.lyriki.com/{0}:{1}'.format(singer,song))
s=BeautifulSoup(r.text) s=BeautifulSoup(r.text, 'html.parser')
lyrics=s.p lyrics=s.p
if lyrics is None: if lyrics is None:
raise ValueError("Not found") raise ValueError("Not found")
@ -58,7 +58,7 @@ class LyricsHelper(object):
song=song.replace(char1, char2) song=song.replace(char1, char2)
self._debug_print('https://www.songlyrics.com/{0}/{1}-lyrics/'.format(singer,song)) self._debug_print('https://www.songlyrics.com/{0}/{1}-lyrics/'.format(singer,song))
r=requests.get('https://www.songlyrics.com/{0}/{1}-lyrics/'.format(singer,song)) r=requests.get('https://www.songlyrics.com/{0}/{1}-lyrics/'.format(singer,song))
s=BeautifulSoup(r.text) s=BeautifulSoup(r.text, 'html.parser')
lyrics=s.find(id="songLyricsDiv") lyrics=s.find(id="songLyricsDiv")
if lyrics is None: if lyrics is None:
raise ValueError("Not found") raise ValueError("Not found")
@ -79,13 +79,13 @@ class LyricsHelper(object):
song=song.replace(char1, char2) song=song.replace(char1, char2)
self._debug_print('https://www.letras.mus.br/winamp.php?musica={1}&artista={0}'.format(singer,song)) self._debug_print('https://www.letras.mus.br/winamp.php?musica={1}&artista={0}'.format(singer,song))
r=requests.get('https://www.letras.mus.br/winamp.php?musica={1}&artista={0}'.format(singer,song)) r=requests.get('https://www.letras.mus.br/winamp.php?musica={1}&artista={0}'.format(singer,song))
s=BeautifulSoup(r.text) s=BeautifulSoup(r.text, 'html.parser')
s=s.find(id="letra-cnt") s=s.find(id="letra-cnt")
if s is None: if s is None:
raise ValueError("Not found") raise ValueError("Not found")
pragraphs=[i for i in s.children][2:-1] # remove unneded pragraphs paragraphs=[i for i in s.children][1] # remove unneded paragraphs (NavigableString)
lyrics="" lyrics=""
for p in pragraphs: for p in paragraphs:
for line in p.stripped_strings: for line in p.stripped_strings:
lyrics+=line+'\n' lyrics+=line+'\n'
lyrics+='\n' lyrics+='\n'