diff options
author | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
---|---|---|
committer | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
commit | e38d2351b83fa65c66ccde443777647ef5cb6cff (patch) | |
tree | 1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/fetch/animenfofetcher.cpp | |
download | tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.tar.gz tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.zip |
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/fetch/animenfofetcher.cpp')
-rw-r--r-- | src/fetch/animenfofetcher.cpp | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/src/fetch/animenfofetcher.cpp b/src/fetch/animenfofetcher.cpp new file mode 100644 index 0000000..728c583 --- /dev/null +++ b/src/fetch/animenfofetcher.cpp @@ -0,0 +1,378 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "animenfofetcher.h" +#include "messagehandler.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collections/videocollection.h" +#include "../entry.h" +#include "../filehandler.h" +#include "../latin1literal.h" +#include "../imagefactory.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kio/job.h> + +#include <qregexp.h> +#include <qlayout.h> +#include <qlabel.h> +#include <qfile.h> + +//#define ANIMENFO_TEST + +namespace { + static const char* ANIMENFO_BASE_URL = "http://www.animenfo.com/search.php"; +} + +using Tellico::Fetch::AnimeNfoFetcher; + +AnimeNfoFetcher::AnimeNfoFetcher(QObject* parent_, const char* name_ /*=0*/) + : Fetcher(parent_, name_), m_started(false) { +} + +QString AnimeNfoFetcher::defaultName() { + return QString::fromLatin1("AnimeNfo.com"); +} + +QString AnimeNfoFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool AnimeNfoFetcher::canFetch(int type) const { + return type == Data::Collection::Video; +} + +void AnimeNfoFetcher::readConfigHook(const KConfigGroup& config_) { + Q_UNUSED(config_); +} + +void AnimeNfoFetcher::search(FetchKey key_, const QString& value_) { + m_started = true; + m_matches.clear(); + +#ifdef ANIMENFO_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/animenfo.html")); +#else + KURL u(QString::fromLatin1(ANIMENFO_BASE_URL)); + u.addQueryItem(QString::fromLatin1("action"), QString::fromLatin1("Go")); + u.addQueryItem(QString::fromLatin1("option"), QString::fromLatin1("keywords")); + u.addQueryItem(QString::fromLatin1("queryin"), QString::fromLatin1("anime_titles")); + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + switch(key_) { + case Keyword: + u.addQueryItem(QString::fromLatin1("query"), value_); + break; + + default: + kdWarning() << "AnimeNfoFetcher::search() - key not recognized: " << key_ << endl; + stop(); + return; + } +#endif +// myDebug() << "AnimeNfoFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void AnimeNfoFetcher::stop() { + if(!m_started) { + return; + } + + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void AnimeNfoFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void AnimeNfoFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "AnimeNfoFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "AnimeNfoFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + + QString s = Tellico::decodeHTML(QString(m_data)); + + QRegExp infoRx(QString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[\"'][^>]*>(.*)</td>"), false); + infoRx.setMinimal(true); + QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false); + anchorRx.setMinimal(true); + QRegExp yearRx(QString::fromLatin1("\\d{4}"), false); + + // search page comes in groups of threes + int n = 0; + QString u, t, y; + + for(int pos = infoRx.search(s); m_started && pos > -1; pos = infoRx.search(s, pos+1)) { + if(n == 0 && !u.isEmpty()) { + SearchResult* r = new SearchResult(this, t, y, QString()); + emit signalResultFound(r); + +#ifdef ANIMENFO_TEST + KURL url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/animetitle.html")); +#else + KURL url(QString::fromLatin1(ANIMENFO_BASE_URL), u); + url.setQuery(QString::null); +#endif + m_matches.insert(r->uid, url); + + u.truncate(0); + t.truncate(0); + y.truncate(0); + } + switch(n) { + case 0: // title and url + { + int pos2 = anchorRx.search(infoRx.cap(1)); + if(pos2 > -1) { + u = anchorRx.cap(1); + t = anchorRx.cap(2); + } + } + break; + case 1: // don't case + break; + case 2: + if(yearRx.exactMatch(infoRx.cap(1))) { + y = infoRx.cap(1); + } + break; + } + + n = (n+1)%3; + } + + // grab last response +#ifndef ANIMENFO_TEST + if(!u.isEmpty()) { + SearchResult* r = new SearchResult(this, t, y, QString()); + emit signalResultFound(r); + KURL url(QString::fromLatin1(ANIMENFO_BASE_URL), u); + url.setQuery(QString::null); + m_matches.insert(r->uid, url); + } +#endif + stop(); +} + +Tellico::Data::EntryPtr AnimeNfoFetcher::fetchEntry(uint uid_) { + // if we already grabbed this one, then just pull it out of the dict + Data::EntryPtr entry = m_entries[uid_]; + if(entry) { + return entry; + } + + KURL url = m_matches[uid_]; + if(url.isEmpty()) { + kdWarning() << "AnimeNfoFetcher::fetchEntry() - no url in map" << endl; + return 0; + } + + QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true)); + if(results.isEmpty()) { + myDebug() << "AnimeNfoFetcher::fetchEntry() - no text results" << endl; + return 0; + } + +#if 0 + kdWarning() << "Remove debug from animenfofetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.html")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << results; + } + f.close(); +#endif + + entry = parseEntry(results); + if(!entry) { + myDebug() << "AnimeNfoFetcher::fetchEntry() - error in processing entry" << endl; + return 0; + } + m_entries.insert(uid_, entry); // keep for later + return entry; +} + +Tellico::Data::EntryPtr AnimeNfoFetcher::parseEntry(const QString& str_) { + // myDebug() << "AnimeNfoFetcher::parseEntry()" << endl; + // class might be anime_info_top + QRegExp infoRx(QString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[^>]*>(.*)</td>"), false); + infoRx.setMinimal(true); + QRegExp tagRx(QString::fromLatin1("<.*>")); + tagRx.setMinimal(true); + QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false); + anchorRx.setMinimal(true); + QRegExp jsRx(QString::fromLatin1("<script.*</script>"), false); + jsRx.setMinimal(true); + + QString s = str_; + s.remove(jsRx); + + Data::CollPtr coll = new Data::VideoCollection(true); + + // add new fields + Data::FieldPtr f = new Data::Field(QString::fromLatin1("origtitle"), i18n("Original Title")); + coll->addField(f); + + f = new Data::Field(QString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table); + f->setFormatFlag(Data::Field::FormatTitle); + coll->addField(f); + + f = new Data::Field(QString::fromLatin1("distributor"), i18n("Distributor")); + f->setCategory(i18n("Other People")); + f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowMultiple | Data::Field::AllowGrouped); + f->setFormatFlag(Data::Field::FormatPlain); + coll->addField(f); + + f = new Data::Field(QString::fromLatin1("episodes"), i18n("Episodes"), Data::Field::Number); + f->setCategory(i18n("Features")); + coll->addField(f); + + // map captions in HTML to field names + QMap<QString, QString> fieldMap; + fieldMap.insert(QString::fromLatin1("Title"), QString::fromLatin1("title")); + fieldMap.insert(QString::fromLatin1("Japanese Title"), QString::fromLatin1("origtitle")); + fieldMap.insert(QString::fromLatin1("Total Episodes"), QString::fromLatin1("episodes")); + fieldMap.insert(QString::fromLatin1("Genres"), QString::fromLatin1("genre")); + fieldMap.insert(QString::fromLatin1("Year Published"), QString::fromLatin1("year")); + fieldMap.insert(QString::fromLatin1("Studio"), QString::fromLatin1("studio")); + fieldMap.insert(QString::fromLatin1("US Distribution"), QString::fromLatin1("distributor")); + + Data::EntryPtr entry = new Data::Entry(coll); + + int n = 0; + QString key, value; + int oldpos = -1; + for(int pos = infoRx.search(s); pos > -1; pos = infoRx.search(s, pos+1)) { + if(n == 0 && !key.isEmpty()) { + if(fieldMap.contains(key)) { + value = value.simplifyWhiteSpace(); + if(value.length() > 2) { // might be "-" + if(key == Latin1Literal("Genres")) { + entry->setField(fieldMap[key], QStringList::split(QRegExp(QString::fromLatin1("\\s*,\\s*")), + value).join(QString::fromLatin1("; "))); + } else { + entry->setField(fieldMap[key], value); + } + } + } + key.truncate(0); + value.truncate(0); + } + switch(n) { + case 0: + key = infoRx.cap(1).remove(tagRx); + break; + case 1: + value = infoRx.cap(1).remove(tagRx); + break; + } + n = (n+1)%2; + oldpos = pos; + } + + // image + QRegExp imgRx(QString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*[\"']([^>]*)[\"']\\s+[^>]*alt\\s*=\\s*[\"']%1[\"']") + .arg(entry->field(QString::fromLatin1("title"))), false); + imgRx.setMinimal(true); + int pos = imgRx.search(s); + if(pos > -1) { + KURL imgURL(QString::fromLatin1(ANIMENFO_BASE_URL), imgRx.cap(1)); + QString id = ImageFactory::addImage(imgURL, true); + if(!id.isEmpty()) { + entry->setField(QString::fromLatin1("cover"), id); + } + } + + // now look for alternative titles and plot + const QString a = QString::fromLatin1("Alternative titles"); + pos = s.find(a, oldpos+1, false); + if(pos > -1) { + pos += a.length(); + } + int pos2 = -1; + if(pos > -1) { + pos2 = s.find(QString::fromLatin1("Description"), pos+1, true); + if(pos2 > -1) { + value = s.mid(pos, pos2-pos).remove(tagRx).simplifyWhiteSpace(); + entry->setField(QString::fromLatin1("alttitle"), value); + } + } + QRegExp descRx(QString::fromLatin1("class\\s*=\\s*[\"']description[\"'][^>]*>(.*)<"), false); + descRx.setMinimal(true); + pos = descRx.search(s, QMAX(pos, pos2)); + if(pos > -1) { + entry->setField(QString::fromLatin1("plot"), descRx.cap(1).simplifyWhiteSpace()); + } + + return entry; +} + +void AnimeNfoFetcher::updateEntry(Data::EntryPtr entry_) { + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Keyword, t); + return; + } + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* AnimeNfoFetcher::configWidget(QWidget* parent_) const { + return new AnimeNfoFetcher::ConfigWidget(parent_); +} + +AnimeNfoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +QString AnimeNfoFetcher::ConfigWidget::preferredName() const { + return AnimeNfoFetcher::defaultName(); +} + +#include "animenfofetcher.moc" |