/*************************************************************************** copyright : (C) 2006 by Robby Stephenson email : robby@periapsis.org ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of version 2 of the GNU General Public License as * * published by the Free Software Foundation; * * * ***************************************************************************/ #include "animenfofetcher.h" #include "messagehandler.h" #include "../tellico_kernel.h" #include "../tellico_utils.h" #include "../collections/videocollection.h" #include "../entry.h" #include "../filehandler.h" #include "../latin1literal.h" #include "../imagefactory.h" #include "../tellico_debug.h" #include #include #include #include #include #include #include //#define ANIMENFO_TEST namespace { static const char* ANIMENFO_BASE_URL = "http://www.animenfo.com/search.php"; } using Tellico::Fetch::AnimeNfoFetcher; AnimeNfoFetcher::AnimeNfoFetcher(TQObject* tqparent_, const char* name_ /*=0*/) : Fetcher(tqparent_, name_), m_started(false) { } TQString AnimeNfoFetcher::defaultName() { return TQString::tqfromLatin1("AnimeNfo.com"); } TQString AnimeNfoFetcher::source() const { return m_name.isEmpty() ? defaultName() : m_name; } bool AnimeNfoFetcher::canFetch(int type) const { return type == Data::Collection::Video; } void AnimeNfoFetcher::readConfigHook(const KConfigGroup& config_) { Q_UNUSED(config_); } void AnimeNfoFetcher::search(FetchKey key_, const TQString& value_) { m_started = true; m_matches.clear(); #ifdef ANIMENFO_TEST KURL u = KURL::fromPathOrURL(TQString::tqfromLatin1("/home/robby/animenfo.html")); #else KURL u(TQString::tqfromLatin1(ANIMENFO_BASE_URL)); u.addQueryItem(TQString::tqfromLatin1("action"), TQString::tqfromLatin1("Go")); u.addQueryItem(TQString::tqfromLatin1("option"), TQString::tqfromLatin1("keywords")); u.addQueryItem(TQString::tqfromLatin1("queryin"), TQString::tqfromLatin1("anime_titles")); if(!canFetch(Kernel::self()->collectionType())) { message(i18n("%1 does not allow searching for this collection type.").tqarg(source()), MessageHandler::Warning); stop(); return; } switch(key_) { case Keyword: u.addQueryItem(TQString::tqfromLatin1("query"), value_); break; default: kdWarning() << "AnimeNfoFetcher::search() - key not recognized: " << key_ << endl; stop(); return; } #endif // myDebug() << "AnimeNfoFetcher::search() - url: " << u.url() << endl; m_job = KIO::get(u, false, false); connect(m_job, TQT_SIGNAL(data(KIO::Job*, const TQByteArray&)), TQT_SLOT(slotData(KIO::Job*, const TQByteArray&))); connect(m_job, TQT_SIGNAL(result(KIO::Job*)), TQT_SLOT(slotComplete(KIO::Job*))); } void AnimeNfoFetcher::stop() { if(!m_started) { return; } if(m_job) { m_job->kill(); m_job = 0; } m_data.truncate(0); m_started = false; emit signalDone(this); } void AnimeNfoFetcher::slotData(KIO::Job*, const TQByteArray& data_) { TQDataStream stream(m_data, IO_WriteOnly | IO_Append); stream.writeRawBytes(data_.data(), data_.size()); } void AnimeNfoFetcher::slotComplete(KIO::Job* job_) { // myDebug() << "AnimeNfoFetcher::slotComplete()" << endl; // since the fetch is done, don't worry about holding the job pointer m_job = 0; if(job_->error()) { job_->showErrorDialog(Kernel::self()->widget()); stop(); return; } if(m_data.isEmpty()) { myDebug() << "AnimeNfoFetcher::slotComplete() - no data" << endl; stop(); return; } TQString s = Tellico::decodeHTML(TQString(m_data)); TQRegExp infoRx(TQString::tqfromLatin1("]*class\\s*=\\s*[\"']anime_info[\"'][^>]*>(.*)"), false); infoRx.setMinimal(true); TQRegExp anchorRx(TQString::tqfromLatin1("]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)"), false); anchorRx.setMinimal(true); TQRegExp yearRx(TQString::tqfromLatin1("\\d{4}"), false); // search page comes in groups of threes int n = 0; TQString u, t, y; for(int pos = infoRx.search(s); m_started && pos > -1; pos = infoRx.search(s, pos+1)) { if(n == 0 && !u.isEmpty()) { SearchResult* r = new SearchResult(this, t, y, TQString()); emit signalResultFound(r); #ifdef ANIMENFO_TEST KURL url = KURL::fromPathOrURL(TQString::tqfromLatin1("/home/robby/animetitle.html")); #else KURL url(TQString::tqfromLatin1(ANIMENFO_BASE_URL), u); url.setQuery(TQString()); #endif m_matches.insert(r->uid, url); u.truncate(0); t.truncate(0); y.truncate(0); } switch(n) { case 0: // title and url { int pos2 = anchorRx.search(infoRx.cap(1)); if(pos2 > -1) { u = anchorRx.cap(1); t = anchorRx.cap(2); } } break; case 1: // don't case break; case 2: if(yearRx.exactMatch(infoRx.cap(1))) { y = infoRx.cap(1); } break; } n = (n+1)%3; } // grab last response #ifndef ANIMENFO_TEST if(!u.isEmpty()) { SearchResult* r = new SearchResult(this, t, y, TQString()); emit signalResultFound(r); KURL url(TQString::tqfromLatin1(ANIMENFO_BASE_URL), u); url.setQuery(TQString()); m_matches.insert(r->uid, url); } #endif stop(); } Tellico::Data::EntryPtr AnimeNfoFetcher::fetchEntry(uint uid_) { // if we already grabbed this one, then just pull it out of the dict Data::EntryPtr entry = m_entries[uid_]; if(entry) { return entry; } KURL url = m_matches[uid_]; if(url.isEmpty()) { kdWarning() << "AnimeNfoFetcher::fetchEntry() - no url in map" << endl; return 0; } TQString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true)); if(results.isEmpty()) { myDebug() << "AnimeNfoFetcher::fetchEntry() - no text results" << endl; return 0; } #if 0 kdWarning() << "Remove debug from animenfofetcher.cpp" << endl; TQFile f(TQString::tqfromLatin1("/tmp/test.html")); if(f.open(IO_WriteOnly)) { TQTextStream t(&f); t.setEncoding(TQTextStream::UnicodeUTF8); t << results; } f.close(); #endif entry = parseEntry(results); if(!entry) { myDebug() << "AnimeNfoFetcher::fetchEntry() - error in processing entry" << endl; return 0; } m_entries.insert(uid_, entry); // keep for later return entry; } Tellico::Data::EntryPtr AnimeNfoFetcher::parseEntry(const TQString& str_) { // myDebug() << "AnimeNfoFetcher::parseEntry()" << endl; // class might be anime_info_top TQRegExp infoRx(TQString::tqfromLatin1("]*class\\s*=\\s*[\"']anime_info[^>]*>(.*)"), false); infoRx.setMinimal(true); TQRegExp tagRx(TQString::tqfromLatin1("<.*>")); tagRx.setMinimal(true); TQRegExp anchorRx(TQString::tqfromLatin1("]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)"), false); anchorRx.setMinimal(true); TQRegExp jsRx(TQString::tqfromLatin1(""), false); jsRx.setMinimal(true); TQString s = str_; s.remove(jsRx); Data::CollPtr coll = new Data::VideoCollection(true); // add new fields Data::FieldPtr f = new Data::Field(TQString::tqfromLatin1("origtitle"), i18n("Original Title")); coll->addField(f); f = new Data::Field(TQString::tqfromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table); f->setFormatFlag(Data::Field::FormatTitle); coll->addField(f); f = new Data::Field(TQString::tqfromLatin1("distributor"), i18n("Distributor")); f->setCategory(i18n("Other People")); f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowMultiple | Data::Field::AllowGrouped); f->setFormatFlag(Data::Field::FormatPlain); coll->addField(f); f = new Data::Field(TQString::tqfromLatin1("episodes"), i18n("Episodes"), Data::Field::Number); f->setCategory(i18n("Features")); coll->addField(f); // map captions in HTML to field names TQMap fieldMap; fieldMap.insert(TQString::tqfromLatin1("Title"), TQString::tqfromLatin1("title")); fieldMap.insert(TQString::tqfromLatin1("Japanese Title"), TQString::tqfromLatin1("origtitle")); fieldMap.insert(TQString::tqfromLatin1("Total Episodes"), TQString::tqfromLatin1("episodes")); fieldMap.insert(TQString::tqfromLatin1("Genres"), TQString::tqfromLatin1("genre")); fieldMap.insert(TQString::tqfromLatin1("Year Published"), TQString::tqfromLatin1("year")); fieldMap.insert(TQString::tqfromLatin1("Studio"), TQString::tqfromLatin1("studio")); fieldMap.insert(TQString::tqfromLatin1("US Distribution"), TQString::tqfromLatin1("distributor")); Data::EntryPtr entry = new Data::Entry(coll); int n = 0; TQString key, value; int oldpos = -1; for(int pos = infoRx.search(s); pos > -1; pos = infoRx.search(s, pos+1)) { if(n == 0 && !key.isEmpty()) { if(fieldMap.tqcontains(key)) { value = value.simplifyWhiteSpace(); if(value.length() > 2) { // might be "-" if(key == Latin1Literal("Genres")) { entry->setField(fieldMap[key], TQStringList::split(TQRegExp(TQString::tqfromLatin1("\\s*,\\s*")), value).join(TQString::tqfromLatin1("; "))); } else { entry->setField(fieldMap[key], value); } } } key.truncate(0); value.truncate(0); } switch(n) { case 0: key = infoRx.cap(1).remove(tagRx); break; case 1: value = infoRx.cap(1).remove(tagRx); break; } n = (n+1)%2; oldpos = pos; } // image TQRegExp imgRx(TQString::tqfromLatin1("]*src\\s*=\\s*[\"']([^>]*)[\"']\\s+[^>]*alt\\s*=\\s*[\"']%1[\"']") .tqarg(entry->field(TQString::tqfromLatin1("title"))), false); imgRx.setMinimal(true); int pos = imgRx.search(s); if(pos > -1) { KURL imgURL(TQString::tqfromLatin1(ANIMENFO_BASE_URL), imgRx.cap(1)); TQString id = ImageFactory::addImage(imgURL, true); if(!id.isEmpty()) { entry->setField(TQString::tqfromLatin1("cover"), id); } } // now look for alternative titles and plot const TQString a = TQString::tqfromLatin1("Alternative titles"); pos = s.tqfind(a, oldpos+1, false); if(pos > -1) { pos += a.length(); } int pos2 = -1; if(pos > -1) { pos2 = s.tqfind(TQString::tqfromLatin1("Description"), pos+1, true); if(pos2 > -1) { value = s.mid(pos, pos2-pos).remove(tagRx).simplifyWhiteSpace(); entry->setField(TQString::tqfromLatin1("alttitle"), value); } } TQRegExp descRx(TQString::tqfromLatin1("class\\s*=\\s*[\"']description[\"'][^>]*>(.*)<"), false); descRx.setMinimal(true); pos = descRx.search(s, TQMAX(pos, pos2)); if(pos > -1) { entry->setField(TQString::tqfromLatin1("plot"), descRx.cap(1).simplifyWhiteSpace()); } return entry; } void AnimeNfoFetcher::updateEntry(Data::EntryPtr entry_) { TQString t = entry_->field(TQString::tqfromLatin1("title")); if(!t.isEmpty()) { search(Fetch::Keyword, t); return; } emit signalDone(this); // always need to emit this if not continuing with the search } Tellico::Fetch::ConfigWidget* AnimeNfoFetcher::configWidget(TQWidget* tqparent_) const { return new AnimeNfoFetcher::ConfigWidget(tqparent_); } AnimeNfoFetcher::ConfigWidget::ConfigWidget(TQWidget* tqparent_) : Fetch::ConfigWidget(tqparent_) { TQVBoxLayout* l = new TQVBoxLayout(optionsWidget()); l->addWidget(new TQLabel(i18n("This source has no options."), optionsWidget())); l->addStretch(); } TQString AnimeNfoFetcher::ConfigWidget::preferredName() const { return AnimeNfoFetcher::defaultName(); } #include "animenfofetcher.moc"