200 lines
8.0 KiB
C++
200 lines
8.0 KiB
C++
/*
|
|
* Cantata
|
|
*
|
|
* Copyright (c) 2011-2014 Craig Drummond <craig.p.drummond@gmail.com>
|
|
*
|
|
* ----
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "rssparser.h"
|
|
#include <QXmlStreamReader>
|
|
#include <QStringList>
|
|
#include <QSet>
|
|
|
|
static const char * constITunesNameSpace = "http://www.itunes.com/dtds/podcast-1.0.dtd";
|
|
static const char * constMediaNameSpace = "http://search.yahoo.com/mrss/";
|
|
|
|
using namespace RssParser;
|
|
|
|
static bool parseUntil(QXmlStreamReader &reader, const QString &elem)
|
|
{
|
|
while (!reader.atEnd()) {
|
|
reader.readNext();
|
|
if (reader.isStartElement() && reader.name() == elem) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void consumeCurrentElement(QXmlStreamReader &reader)
|
|
{
|
|
int level = 1;
|
|
while (0!=level && !reader.atEnd()) {
|
|
switch (reader.readNext()) {
|
|
case QXmlStreamReader::StartElement: ++level; break;
|
|
case QXmlStreamReader::EndElement: --level; break;
|
|
default: break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static QDateTime parseRfc822DateTime(const QString& text)
|
|
{
|
|
// This sucks but we need it because some podcasts don't quite follow the
|
|
// spec properly - they might have 1-digit hour numbers for example.
|
|
|
|
QRegExp re("([a-zA-Z]{3}),? (\\d{1,2}) ([a-zA-Z]{3}) (\\d{4}) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})");
|
|
if (-1==re.indexIn(text)) {
|
|
return QDateTime();
|
|
}
|
|
|
|
return QDateTime(QDate::fromString(QString("%1 %2 %3 %4").arg(re.cap(1), re.cap(3), re.cap(2), re.cap(4)), Qt::TextDate),
|
|
QTime(re.cap(5).toInt(), re.cap(6).toInt(), re.cap(7).toInt()));
|
|
}
|
|
|
|
static QUrl parseImage(QXmlStreamReader &reader)
|
|
{
|
|
QUrl url;
|
|
while (!reader.atEnd()) {
|
|
reader.readNext();
|
|
if (reader.isStartElement()) {
|
|
if (QLatin1String("url")==reader.name()) {
|
|
url=QUrl::fromEncoded(reader.readElementText().toLatin1());
|
|
} else {
|
|
consumeCurrentElement(reader);
|
|
}
|
|
} else if (reader.isEndElement()) {
|
|
break;
|
|
}
|
|
}
|
|
return url;
|
|
}
|
|
|
|
static Episode parseEpisode(QXmlStreamReader &reader)
|
|
{
|
|
Episode ep;
|
|
bool isAudio=false;
|
|
QUrl guidUrl;
|
|
|
|
while (!reader.atEnd()) {
|
|
reader.readNext();
|
|
const QStringRef name = reader.name();
|
|
if (reader.isStartElement()) {
|
|
if (QLatin1String("title")==name) {
|
|
ep.name=reader.readElementText();
|
|
} else if (QLatin1String("duration")==name && constITunesNameSpace==reader.namespaceUri()) {
|
|
QStringList parts = reader.readElementText().split(':');
|
|
if (2==parts.count()) {
|
|
ep.duration=(parts[0].toInt() * 60) + parts[1].toInt();
|
|
} else if (parts.count()>=3) {
|
|
ep.duration=(parts[0].toInt() * 60*60) + (parts[1].toInt() * 60) + parts[2].toInt();
|
|
}
|
|
} else if (0==ep.duration && QLatin1String("content")==name && constMediaNameSpace==reader.namespaceUri()) {
|
|
ep.duration=reader.attributes().value(QLatin1String("duration")).toString().toUInt();
|
|
consumeCurrentElement(reader);
|
|
} else if (QLatin1String("enclosure")==name) {
|
|
static QSet<QString> audioFormats;
|
|
if (audioFormats.isEmpty()) {
|
|
audioFormats.insert(QLatin1String("mp3")); audioFormats.insert(QLatin1String("MP3"));
|
|
audioFormats.insert(QLatin1String("ogg")); audioFormats.insert(QLatin1String("OGG"));
|
|
audioFormats.insert(QLatin1String("wma")); audioFormats.insert(QLatin1String("WMA"));
|
|
}
|
|
QString type=reader.attributes().value(QLatin1String("type")).toString();
|
|
if (type.startsWith(QLatin1String("audio/")) || audioFormats.contains(type)) {
|
|
isAudio=true;
|
|
ep.url=QUrl::fromEncoded(reader.attributes().value(QLatin1String("url")).toString().toLatin1());
|
|
} else if (type.startsWith(QLatin1String("video/")) ) {
|
|
// At least one broken feed (BUG: 588) has the audio podcast listed as video/mp4,
|
|
// ...but the path ends in .mp3 !!!
|
|
QUrl url=QUrl::fromEncoded(reader.attributes().value(QLatin1String("url")).toString().toLatin1());
|
|
QString path=url.path();
|
|
if (path.endsWith(QLatin1String(".mp3"), Qt::CaseInsensitive) ||
|
|
path.endsWith(QLatin1String(".ogg"), Qt::CaseInsensitive) ||
|
|
path.endsWith(QLatin1String(".wma"), Qt::CaseInsensitive)) {
|
|
ep.url=url;
|
|
} else {
|
|
ep.video=true;
|
|
}
|
|
}
|
|
consumeCurrentElement(reader);
|
|
} else if (QLatin1String("guid")==name) {
|
|
guidUrl=QUrl(reader.readElementText());
|
|
} else if (QLatin1String("pubDate")==name) {
|
|
ep.publicationDate=parseRfc822DateTime(reader.readElementText());
|
|
} else {
|
|
consumeCurrentElement(reader);
|
|
}
|
|
} else if (reader.isEndElement()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Sometimes the url entry in 'enclusure' is empty, but there is a url in 'guid' - so use
|
|
// that if present (BUG: 602)
|
|
if (isAudio && ep.url.isEmpty() && !guidUrl.isEmpty()) {
|
|
ep.url=guidUrl;
|
|
}
|
|
return ep;
|
|
}
|
|
|
|
Channel RssParser::parse(QIODevice *dev, bool getEpisodes, bool getDescription)
|
|
{
|
|
Channel ch;
|
|
QXmlStreamReader reader(dev);
|
|
if (parseUntil(reader, QLatin1String("rss")) && parseUntil(reader, QLatin1String("channel"))) {
|
|
while (!reader.atEnd()) {
|
|
reader.readNext();
|
|
|
|
if (reader.isStartElement()) {
|
|
const QStringRef name = reader.name();
|
|
if (ch.name.isEmpty() && QLatin1String("title")==name) {
|
|
ch.name=reader.readElementText();
|
|
} else if (QLatin1String("image")==name && ch.image.isEmpty()) {
|
|
if (constITunesNameSpace==reader.namespaceUri()) {
|
|
ch.image=reader.attributes().value(QLatin1String("href")).toString();
|
|
consumeCurrentElement(reader);
|
|
} else {
|
|
ch.image=parseImage(reader);
|
|
}
|
|
} else if (getEpisodes && QLatin1String("item")==name) {
|
|
Episode ep=parseEpisode(reader);
|
|
if (!ep.name.isEmpty() && !ep.url.isEmpty()) {
|
|
ch.episodes.append(ep);
|
|
} else if (ep.video) {
|
|
ch.video=true;
|
|
}
|
|
} else if (getDescription && QLatin1String("description")==name && ch.description.isEmpty()) {
|
|
ch.description=reader.readElementText();
|
|
} else if (getDescription && QLatin1String("summary")==name && ch.description.isEmpty() && constITunesNameSpace==reader.namespaceUri()) {
|
|
ch.description=reader.readElementText();
|
|
} else {
|
|
consumeCurrentElement(reader);
|
|
}
|
|
} else if (reader.isEndElement()) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (ch.video && !ch.episodes.isEmpty()) {
|
|
ch.video=false;
|
|
}
|
|
return ch;
|
|
}
|