diff options
author | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
---|---|---|
committer | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
commit | e38d2351b83fa65c66ccde443777647ef5cb6cff (patch) | |
tree | 1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/fetch | |
download | tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.tar.gz tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.zip |
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/fetch')
56 files changed, 14021 insertions, 0 deletions
diff --git a/src/fetch/Makefile.am b/src/fetch/Makefile.am new file mode 100644 index 0000000..fbf2ea1 --- /dev/null +++ b/src/fetch/Makefile.am @@ -0,0 +1,46 @@ +####### kdevelop will overwrite this part!!! (begin)########## +noinst_LIBRARIES = libfetch.a + +AM_CPPFLAGS = $(all_includes) $(LIBXML_CFLAGS) $(LIBXSLT_CFLAGS) $(YAZ_CFLAGS) + +libfetch_a_METASOURCES = AUTO + +libfetch_a_SOURCES = amazonfetcher.cpp animenfofetcher.cpp arxivfetcher.cpp \ + bibsonomyfetcher.cpp citebasefetcher.cpp configwidget.cpp crossreffetcher.cpp \ + discogsfetcher.cpp entrezfetcher.cpp execexternalfetcher.cpp fetcher.cpp fetchmanager.cpp \ + gcstarpluginfetcher.cpp googlescholarfetcher.cpp ibsfetcher.cpp imdbfetcher.cpp \ + isbndbfetcher.cpp messagehandler.cpp srufetcher.cpp yahoofetcher.cpp z3950connection.cpp \ + z3950fetcher.cpp + +####### kdevelop will overwrite this part!!! (end)############ + +SUBDIRS = scripts + +CLEANFILES = *~ + +KDE_OPTIONS = noautodist + +EXTRA_DIST = \ +fetcher.h fetcher.cpp fetchmanager.h fetchmanager.cpp \ +amazonfetcher.h amazonfetcher.cpp z3950fetcher.h z3950fetcher.cpp \ +imdbfetcher.h imdbfetcher.cpp fetch.h configwidget.h configwidget.cpp \ +entrezfetcher.h entrezfetcher.cpp \ +execexternalfetcher.h execexternalfetcher.cpp \ +messagehandler.h messagehandler.cpp \ +z3950connection.h z3950connection.cpp \ +yahoofetcher.h yahoofetcher.cpp \ +animenfofetcher.h animenfofetcher.cpp \ +ibsfetcher.h ibsfetcher.cpp \ +srufetcher.h srufetcher.cpp \ +isbndbfetcher.h isbndbfetcher.cpp \ +gcstarpluginfetcher.h gcstarpluginfetcher.cpp \ +crossreffetcher.h crossreffetcher.cpp \ +arxivfetcher.h arxivfetcher.cpp \ +citebasefetcher.h citebasefetcher.cpp \ +bibsonomyfetcher.h bibsonomyfetcher.cpp \ +googlescholarfetcher.h googlescholarfetcher.cpp \ +discogsfetcher.h discogsfetcher.cpp \ +z3950-servers.cfg + +appdir = $(kde_datadir)/tellico +app_DATA = z3950-servers.cfg diff --git a/src/fetch/amazonfetcher.cpp b/src/fetch/amazonfetcher.cpp new file mode 100644 index 0000000..36c009f --- /dev/null +++ b/src/fetch/amazonfetcher.cpp @@ -0,0 +1,937 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "amazonfetcher.h" +#include "messagehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../imagefactory.h" +#include "../tellico_kernel.h" +#include "../latin1literal.h" +#include "../collection.h" +#include "../document.h" +#include "../entry.h" +#include "../field.h" +#include "../tellico_utils.h" +#include "../tellico_debug.h" +#include "../isbnvalidator.h" +#include "../gui/combobox.h" + +#include <klocale.h> +#include <kio/job.h> +#include <kstandarddirs.h> +#include <kconfig.h> +#include <klineedit.h> +#include <kseparator.h> +#include <kcombobox.h> +#include <kaccelmanager.h> + +#include <qdom.h> +#include <qlayout.h> +#include <qlabel.h> +#include <qwhatsthis.h> +#include <qcheckbox.h> +#include <qfile.h> +#include <qtextcodec.h> + +namespace { + static const int AMAZON_RETURNS_PER_REQUEST = 10; + static const int AMAZON_MAX_RETURNS_TOTAL = 20; + static const char* AMAZON_ACCESS_KEY = "0834VQ4S71KYPVSYQD02"; + static const char* AMAZON_ASSOC_TOKEN = "tellico-20"; + // need to have these in the translation file + static const char* linkText = I18N_NOOP("Amazon Link"); +} + +using Tellico::Fetch::AmazonFetcher; + +// static +const AmazonFetcher::SiteData& AmazonFetcher::siteData(int site_) { + static SiteData dataVector[6] = { + { + i18n("Amazon (US)"), + "http://webservices.amazon.com/onca/xml" + }, { + i18n("Amazon (UK)"), + "http://webservices.amazon.co.uk/onca/xml" + }, { + i18n("Amazon (Germany)"), + "http://webservices.amazon.de/onca/xml" + }, { + i18n("Amazon (Japan)"), + "http://webservices.amazon.co.jp/onca/xml" + }, { + i18n("Amazon (France)"), + "http://webservices.amazon.fr/onca/xml" + }, { + i18n("Amazon (Canada)"), + "http://webservices.amazon.ca/onca/xml" + } + }; + + return dataVector[site_]; +} + +AmazonFetcher::AmazonFetcher(Site site_, QObject* parent_, const char* name_) + : Fetcher(parent_, name_), m_xsltHandler(0), m_site(site_), m_imageSize(MediumImage), + m_access(QString::fromLatin1(AMAZON_ACCESS_KEY)), + m_assoc(QString::fromLatin1(AMAZON_ASSOC_TOKEN)), m_addLinkField(true), m_limit(AMAZON_MAX_RETURNS_TOTAL), + m_countOffset(0), m_page(1), m_total(-1), m_numResults(0), m_job(0), m_started(false) { + m_name = siteData(site_).title; +} + +AmazonFetcher::~AmazonFetcher() { + delete m_xsltHandler; + m_xsltHandler = 0; +} + +QString AmazonFetcher::defaultName() { + return i18n("Amazon.com Web Services"); +} + +QString AmazonFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool AmazonFetcher::canFetch(int type) const { + return type == Data::Collection::Book + || type == Data::Collection::ComicBook + || type == Data::Collection::Bibtex + || type == Data::Collection::Album + || type == Data::Collection::Video + || type == Data::Collection::Game; +} + +void AmazonFetcher::readConfigHook(const KConfigGroup& config_) { + QString s = config_.readEntry("AccessKey"); + if(!s.isEmpty()) { + m_access = s; + } + s = config_.readEntry("AssocToken"); + if(!s.isEmpty()) { + m_assoc = s; + } + int imageSize = config_.readNumEntry("Image Size", -1); + if(imageSize > -1) { + m_imageSize = static_cast<ImageSize>(imageSize); + } + m_fields = config_.readListEntry("Custom Fields", QString::fromLatin1("keyword")); +} + +void AmazonFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_.stripWhiteSpace(); + m_started = true; + m_page = 1; + m_total = -1; + m_countOffset = 0; + m_numResults = 0; + doSearch(); +} + +void AmazonFetcher::continueSearch() { + m_started = true; + m_limit += AMAZON_MAX_RETURNS_TOTAL; + doSearch(); +} + +void AmazonFetcher::doSearch() { + m_data.truncate(0); + +// myDebug() << "AmazonFetcher::doSearch() - value = " << m_value << endl; +// myDebug() << "AmazonFetcher::doSearch() - getting page " << m_page << endl; + + const SiteData& data = siteData(m_site); + KURL u = data.url; + u.addQueryItem(QString::fromLatin1("Service"), QString::fromLatin1("AWSECommerceService")); + u.addQueryItem(QString::fromLatin1("AssociateTag"), m_assoc); + u.addQueryItem(QString::fromLatin1("AWSAccessKeyId"), m_access); + u.addQueryItem(QString::fromLatin1("Operation"), QString::fromLatin1("ItemSearch")); + u.addQueryItem(QString::fromLatin1("ResponseGroup"), QString::fromLatin1("Large")); + u.addQueryItem(QString::fromLatin1("ItemPage"), QString::number(m_page)); + u.addQueryItem(QString::fromLatin1("Version"), QString::fromLatin1("2007-10-29")); + + const int type = Kernel::self()->collectionType(); + switch(type) { + case Data::Collection::Book: + case Data::Collection::ComicBook: + case Data::Collection::Bibtex: + u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("Books")); + u.addQueryItem(QString::fromLatin1("SortIndex"), QString::fromLatin1("relevancerank")); + break; + + case Data::Collection::Album: + u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("Music")); + break; + + case Data::Collection::Video: + u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("Video")); + u.addQueryItem(QString::fromLatin1("SortIndex"), QString::fromLatin1("relevancerank")); + break; + + case Data::Collection::Game: + u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("VideoGames")); + break; + + case Data::Collection::Coin: + case Data::Collection::Stamp: + case Data::Collection::Wine: + case Data::Collection::Base: + case Data::Collection::Card: + default: + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + // I have not been able to find any documentation about what character set to use + // when URL encoding the search term in the Amazon REST interface. But I do know + // that utf8 DOES NOT WORK. So I'm arbitrarily using iso-8859-1, except for JP. + // Why different for JP? Well, I've not received any bug reports from that direction yet + +// QString value = KURL::decode_string(value_, 106); +// QString value = QString::fromLocal8Bit(value_.utf8()); + QString value = m_value; + // a mibenum of 106 is utf-8, 4 is iso-8859-1, 0 means use user's locale, + int mib = m_site == AmazonFetcher::JP ? 106 : 4; + + switch(m_key) { + case Title: + u.addQueryItem(QString::fromLatin1("Title"), value, mib); + break; + + case Person: + if(type == Data::Collection::Video) { + u.addQueryItem(QString::fromLatin1("Actor"), value, mib); + u.addQueryItem(QString::fromLatin1("Director"), value, mib); + } else if(type == Data::Collection::Album) { + u.addQueryItem(QString::fromLatin1("Artist"), value, mib); + } else if(type == Data::Collection::Game) { + u.addQueryItem(QString::fromLatin1("Manufacturer"), value, mib); + } else { // books and bibtex + QString s = QString::fromLatin1("author:%1 or publisher:%2").arg(value, value); +// u.addQueryItem(QString::fromLatin1("Author"), value, mib); +// u.addQueryItem(QString::fromLatin1("Publisher"), value, mib); + u.addQueryItem(QString::fromLatin1("Power"), s, mib); + } + break; + + case ISBN: + { + u.removeQueryItem(QString::fromLatin1("Operation")); + u.addQueryItem(QString::fromLatin1("Operation"), QString::fromLatin1("ItemLookup")); + + QString s = m_value; // not encValue!!! + s.remove('-'); + // ISBN only get digits or 'X', and multiple values are connected with "; " + QStringList isbns = QStringList::split(QString::fromLatin1("; "), s); + // Amazon isbn13 search is still very flaky, so if possible, we're going to convert + // all of them to isbn10. If we run into a 979 isbn13, then we're forced to do an + // isbn13 search + bool isbn13 = false; + for(QStringList::Iterator it = isbns.begin(); it != isbns.end(); ) { + if(m_value.startsWith(QString::fromLatin1("979"))) { + if(m_site == JP) { // never works for JP + kdWarning() << "AmazonFetcher:doSearch() - ISBN-13 searching not implemented for Japan" << endl; + isbns.remove(it); // automatically skips to next + continue; + } + isbn13 = true; + break; + } + ++it; + } + // if we want isbn10, then convert all + if(!isbn13) { + for(QStringList::Iterator it = isbns.begin(); it != isbns.end(); ++it) { + if((*it).length() > 12) { + (*it) = ISBNValidator::isbn10(*it); + (*it).remove('-'); + } + } + // the default search is by ASIN, which prohibits SearchIndex + u.removeQueryItem(QString::fromLatin1("SearchIndex")); + } + // limit to first 10 + while(isbns.size() > 10) { + isbns.pop_back(); + } + u.addQueryItem(QString::fromLatin1("ItemId"), isbns.join(QString::fromLatin1(","))); + if(isbn13) { + u.addQueryItem(QString::fromLatin1("IdType"), QString::fromLatin1("EAN")); + } + } + break; + + case UPC: + { + u.removeQueryItem(QString::fromLatin1("Operation")); + u.addQueryItem(QString::fromLatin1("Operation"), QString::fromLatin1("ItemLookup")); + // US allows UPC, all others are EAN + if(m_site == US) { + u.addQueryItem(QString::fromLatin1("IdType"), QString::fromLatin1("UPC")); + } else { + u.addQueryItem(QString::fromLatin1("IdType"), QString::fromLatin1("EAN")); + } + QString s = m_value; // not encValue!!! + s.remove('-'); + // limit to first 10 + s.replace(QString::fromLatin1("; "), QString::fromLatin1(",")); + s = s.section(',', 0, 9); + u.addQueryItem(QString::fromLatin1("ItemId"), s); + } + break; + + case Keyword: + u.addQueryItem(QString::fromLatin1("Keywords"), m_value, mib); + break; + + case Raw: + { + QString key = value.section('=', 0, 0).stripWhiteSpace(); + QString str = value.section('=', 1).stripWhiteSpace(); + u.addQueryItem(key, str, mib); + } + break; + + default: + kdWarning() << "AmazonFetcher::search() - key not recognized: " << m_key << endl; + stop(); + return; + } +// myDebug() << "AmazonFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void AmazonFetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "AmazonFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void AmazonFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void AmazonFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "AmazonFetcher::slotComplete()" << endl; + + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "AmazonFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from amazonfetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test%1.xml").arg(m_page)); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + QStringList errors; + if(m_total == -1) { + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "AmazonFetcher::slotComplete() - server did not return valid XML." << endl; + stop(); + return; + } + // find TotalResults element + // it's in the first level under the root element + //ItemSearchResponse/Items/TotalResults + QDomNode n = dom.documentElement().namedItem(QString::fromLatin1("Items")) + .namedItem(QString::fromLatin1("TotalResults")); + QDomElement e = n.toElement(); + if(!e.isNull()) { + m_total = e.text().toInt(); + } + n = dom.documentElement().namedItem(QString::fromLatin1("Items")) + .namedItem(QString::fromLatin1("Request")) + .namedItem(QString::fromLatin1("Errors")); + e = n.toElement(); + if(!e.isNull()) { + QDomNodeList nodes = e.elementsByTagName(QString::fromLatin1("Error")); + for(uint i = 0; i < nodes.count(); ++i) { + e = nodes.item(i).toElement().namedItem(QString::fromLatin1("Code")).toElement(); + if(!e.isNull() && e.text() == Latin1Literal("AWS.ECommerceService.NoExactMatches")) { + // no exact match, not a real error, so skip + continue; + } + // for some reason, Amazon will return an error simply when a valid ISBN is not found + // I really want to ignore that, so check the IsValid element in the Request element + QDomNode isValidNode = n.parentNode().namedItem(QString::fromLatin1("IsValid")); + if(m_key == ISBN && isValidNode.toElement().text().lower() == Latin1Literal("true")) { + continue; + } + e = nodes.item(i).toElement().namedItem(QString::fromLatin1("Message")).toElement(); + if(!e.isNull()) { + errors << e.text(); + } + } + } + } + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return; + } + } + +// QRegExp stripHTML(QString::fromLatin1("<.*>"), true); +// stripHTML.setMinimal(true); + + // assume amazon is always utf-8 + QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size())); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(!coll) { + myDebug() << "AmazonFetcher::slotComplete() - no collection pointer" << endl; + stop(); + return; + } + + if(!m_addLinkField) { + // remove amazon field if it's not to be added + coll->removeField(QString::fromLatin1("amazon")); + } + + Data::EntryVec entries = coll->entries(); + if(entries.isEmpty() && !errors.isEmpty()) { + for(QStringList::ConstIterator it = errors.constBegin(); it != errors.constEnd(); ++it) { + myDebug() << "AmazonFetcher::" << *it << endl; + } + message(errors[0], MessageHandler::Error); + stop(); + return; + } + + int count = 0; + for(Data::EntryVec::Iterator entry = entries.begin(); + m_numResults < m_limit && entry != entries.end(); + ++entry, ++count) { + if(count < m_countOffset) { + continue; + } + if(!m_started) { + // might get aborted + break; + } + + // special case book author + // amazon is really bad about not putting spaces after periods + if(coll->type() == Data::Collection::Book) { + QRegExp rx(QString::fromLatin1("\\.([^\\s])")); + QStringList values = entry->fields(QString::fromLatin1("author"), false); + for(QStringList::Iterator it = values.begin(); it != values.end(); ++it) { + (*it).replace(rx, QString::fromLatin1(". \\1")); + } + entry->setField(QString::fromLatin1("author"), values.join(QString::fromLatin1("; "))); + } + + // UK puts the year in the title for some reason + if(m_site == UK && coll->type() == Data::Collection::Video) { + QRegExp rx(QString::fromLatin1("\\[(\\d{4})\\]")); + QString t = entry->title(); + if(t.find(rx) > -1) { + QString y = rx.cap(1); + t.remove(rx).simplifyWhiteSpace(); + entry->setField(QString::fromLatin1("title"), t); + if(entry->field(QString::fromLatin1("year")).isEmpty()) { + entry->setField(QString::fromLatin1("year"), y); + } + } + } + + QString desc; + switch(coll->type()) { + case Data::Collection::Book: + case Data::Collection::ComicBook: + case Data::Collection::Bibtex: + desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("cr_year")); + } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){ + desc += QChar('/') + entry->field(QString::fromLatin1("pub_year")); + } + break; + + case Data::Collection::Video: + desc = entry->field(QString::fromLatin1("studio")) + + QChar('/') + + entry->field(QString::fromLatin1("director")) + + QChar('/') + + entry->field(QString::fromLatin1("year")) + + QChar('/') + + entry->field(QString::fromLatin1("medium")); + break; + + case Data::Collection::Album: + desc = entry->field(QString::fromLatin1("artist")) + + QChar('/') + + entry->field(QString::fromLatin1("label")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + case Data::Collection::Game: + desc = entry->field(QString::fromLatin1("platform")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + default: + break; + } + + // strip HTML from comments, or plot in movies + // tentatively don't do this, looks like ECS 4 cleaned everything up +/* + if(coll->type() == Data::Collection::Video) { + QString plot = entry->field(QString::fromLatin1("plot")); + plot.remove(stripHTML); + entry->setField(QString::fromLatin1("plot"), plot); + } else if(coll->type() == Data::Collection::Game) { + QString desc = entry->field(QString::fromLatin1("description")); + desc.remove(stripHTML); + entry->setField(QString::fromLatin1("description"), desc); + } else { + QString comments = entry->field(QString::fromLatin1("comments")); + comments.remove(stripHTML); + entry->setField(QString::fromLatin1("comments"), comments); + } +*/ +// myDebug() << "AmazonFetcher::slotComplete() - " << entry->title() << endl; + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + ++m_numResults; + } + + // we might have gotten aborted + if(!m_started) { + return; + } + + // are there any additional results to get? + m_hasMoreResults = m_page * AMAZON_RETURNS_PER_REQUEST < m_total; + + const int currentTotal = QMIN(m_total, m_limit); + if(m_page * AMAZON_RETURNS_PER_REQUEST < currentTotal) { + int foundCount = (m_page-1) * AMAZON_RETURNS_PER_REQUEST + coll->entryCount(); + message(i18n("Results from %1: %2/%3").arg(source()).arg(foundCount).arg(m_total), MessageHandler::Status); + ++m_page; + m_countOffset = 0; + doSearch(); + } else if(m_value.contains(';') > 9) { + search(m_key, m_value.section(';', 10)); + } else { + m_countOffset = m_entries.count() % AMAZON_RETURNS_PER_REQUEST; + if(m_countOffset == 0) { + ++m_page; // need to go to next page + } + stop(); + } +} + +Tellico::Data::EntryPtr AmazonFetcher::fetchEntry(uint uid_) { + Data::EntryPtr entry = m_entries[uid_]; + if(!entry) { + kdWarning() << "AmazonFetcher::fetchEntry() - no entry in dict" << endl; + return 0; + } + + QStringList defaultFields = customFields().keys(); + for(QStringList::Iterator it = defaultFields.begin(); it != defaultFields.end(); ++it) { + if(!m_fields.contains(*it)) { + entry->setField(*it, QString::null); + } + } + + // do what we can to remove useless keywords + const int type = Kernel::self()->collectionType(); + switch(type) { + case Data::Collection::Book: + case Data::Collection::ComicBook: + case Data::Collection::Bibtex: + { + const QString keywords = QString::fromLatin1("keyword"); + QStringList oldWords = entry->fields(keywords, false); + StringSet words; + for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) { + // the amazon2tellico stylesheet separates keywords with '/' + QStringList nodes = QStringList::split('/', *it); + for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) { + if(*it2 == Latin1Literal("General") || + *it2 == Latin1Literal("Subjects") || + *it2 == Latin1Literal("Par prix") || // french stuff + *it2 == Latin1Literal("Divers") || // french stuff + (*it2).startsWith(QChar('(')) || + (*it2).startsWith(QString::fromLatin1("Authors"))) { + continue; + } + words.add(*it2); + } + } + entry->setField(keywords, words.toList().join(QString::fromLatin1("; "))); + } + entry->setField(QString::fromLatin1("comments"), Tellico::decodeHTML(entry->field(QString::fromLatin1("comments")))); + break; + + case Data::Collection::Video: + { + const QString genres = QString::fromLatin1("genre"); + QStringList oldWords = entry->fields(genres, false); + StringSet words; + // only care about genres that have "Genres" in the amazon response + // and take the first word after that + for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) { + if((*it).find(QString::fromLatin1("Genres")) == -1) { + continue; + } + + // the amazon2tellico stylesheet separates words with '/' + QStringList nodes = QStringList::split('/', *it); + for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) { + if(*it2 != Latin1Literal("Genres")) { + continue; + } + ++it2; + if(it2 != nodes.end() && *it2 != Latin1Literal("General")) { + words.add(*it2); + } + break; // we're done + } + } + entry->setField(genres, words.toList().join(QString::fromLatin1("; "))); + // language tracks get duplicated, too + QStringList langs = entry->fields(QString::fromLatin1("language"), false); + words.clear(); + for(QStringList::ConstIterator it = langs.begin(); it != langs.end(); ++it) { + words.add(*it); + } + entry->setField(QString::fromLatin1("language"), words.toList().join(QString::fromLatin1("; "))); + } + entry->setField(QString::fromLatin1("plot"), Tellico::decodeHTML(entry->field(QString::fromLatin1("plot")))); + break; + + case Data::Collection::Album: + { + const QString genres = QString::fromLatin1("genre"); + QStringList oldWords = entry->fields(genres, false); + StringSet words; + // only care about genres that have "Styles" in the amazon response + // and take the first word after that + for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) { + if((*it).find(QString::fromLatin1("Styles")) == -1) { + continue; + } + + // the amazon2tellico stylesheet separates words with '/' + QStringList nodes = QStringList::split('/', *it); + bool isStyle = false; + for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) { + if(!isStyle) { + if(*it2 == Latin1Literal("Styles")) { + isStyle = true; + } + continue; + } + if(*it2 != Latin1Literal("General")) { + words.add(*it2); + } + } + } + entry->setField(genres, words.toList().join(QString::fromLatin1("; "))); + } + entry->setField(QString::fromLatin1("comments"), Tellico::decodeHTML(entry->field(QString::fromLatin1("comments")))); + break; + + case Data::Collection::Game: + entry->setField(QString::fromLatin1("description"), Tellico::decodeHTML(entry->field(QString::fromLatin1("description")))); + break; + } + + // clean up the title + parseTitle(entry, type); + + // also sometimes table fields have rows but no values + Data::FieldVec fields = entry->collection()->fields(); + QRegExp blank(QString::fromLatin1("[\\s:;]+")); // only white space, column separators and row separators + for(Data::FieldVec::Iterator fIt = fields.begin(); fIt != fields.end(); ++fIt) { + if(fIt->type() != Data::Field::Table) { + continue; + } + if(blank.exactMatch(entry->field(fIt))) { + entry->setField(fIt, QString::null); + } + } + + KURL imageURL; + switch(m_imageSize) { + case SmallImage: + imageURL = entry->field(QString::fromLatin1("small-image")); + break; + case MediumImage: + imageURL = entry->field(QString::fromLatin1("medium-image")); + break; + case LargeImage: + imageURL = entry->field(QString::fromLatin1("large-image")); + break; + case NoImage: + default: + break; + } +// myDebug() << "AmazonFetcher::fetchEntry() - grabbing " << imageURL.prettyURL() << endl; + if(!imageURL.isEmpty()) { + QString id = ImageFactory::addImage(imageURL, true); + // FIXME: need to add cover image field to bibtex collection + if(id.isEmpty()) { + message(i18n("The cover image could not be loaded."), MessageHandler::Warning); + } else { // amazon serves up 1x1 gifs occasionally, but that's caught in the image constructor + // all relevant collection types have cover fields + entry->setField(QString::fromLatin1("cover"), id); + } + } + + // don't want to show image urls in the fetch dialog + entry->setField(QString::fromLatin1("small-image"), QString::null); + entry->setField(QString::fromLatin1("medium-image"), QString::null); + entry->setField(QString::fromLatin1("large-image"), QString::null); + return entry; +} + +void AmazonFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("amazon2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "AmazonFetcher::initXSLTHandler() - can not locate amazon2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + delete m_xsltHandler; + m_xsltHandler = new XSLTHandler(u); + if(!m_xsltHandler->isValid()) { + kdWarning() << "AmazonFetcher::initXSLTHandler() - error in amazon2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +void AmazonFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "AmazonFetcher::updateEntry()" << endl; + + int type = entry_->collection()->type(); + if(type == Data::Collection::Book || type == Data::Collection::ComicBook || type == Data::Collection::Bibtex) { + QString isbn = entry_->field(QString::fromLatin1("isbn")); + if(!isbn.isEmpty()) { + m_limit = 5; // no need for more + search(Fetch::ISBN, isbn); + return; + } + } else if(type == Data::Collection::Album) { + QString a = entry_->field(QString::fromLatin1("artist")); + if(!a.isEmpty()) { + search(Fetch::Person, a); + return; + } + } + + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + + myDebug() << "AmazonFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +void AmazonFetcher::parseTitle(Data::EntryPtr entry, int collType) { + Q_UNUSED(collType); + // assume that everything in brackets or parentheses is extra + QRegExp rx(QString::fromLatin1("[\\(\\[](.*)[\\)\\]]")); + rx.setMinimal(true); + QString title = entry->field(QString::fromLatin1("title")); + int pos = rx.search(title); + while(pos > -1) { + if(parseTitleToken(entry, rx.cap(1))) { + title.remove(pos, rx.matchedLength()); + --pos; // search again there + } + pos = rx.search(title, pos+1); + } + entry->setField(QString::fromLatin1("title"), title.stripWhiteSpace()); +} + +bool AmazonFetcher::parseTitleToken(Data::EntryPtr entry, const QString& token) { + // if res = true, then the token gets removed from the title + bool res = false; + if(token.find(QString::fromLatin1("widescreen"), 0, false /* case-insensitive*/) > -1 || + token.find(i18n("Widescreen"), 0, false) > -1) { + entry->setField(QString::fromLatin1("widescreen"), QString::fromLatin1("true")); + // res = true; leave it in the title + } else if(token.find(QString::fromLatin1("full screen"), 0, false) > -1) { + // skip, but go ahead and remove from title + res = true; + } + if(token.find(QString::fromLatin1("blu-ray"), 0, false) > -1) { + entry->setField(QString::fromLatin1("medium"), i18n("Blu-ray")); + res = true; + } else if(token.find(QString::fromLatin1("hd dvd"), 0, false) > -1) { + entry->setField(QString::fromLatin1("medium"), i18n("HD DVD")); + res = true; + } + if(token.find(QString::fromLatin1("director's cut"), 0, false) > -1 || + token.find(i18n("Director's Cut"), 0, false) > -1) { + entry->setField(QString::fromLatin1("directors-cut"), QString::fromLatin1("true")); + // res = true; leave it in the title + } + return res; +} + +Tellico::Fetch::ConfigWidget* AmazonFetcher::configWidget(QWidget* parent_) const { + return new AmazonFetcher::ConfigWidget(parent_, this); +} + +AmazonFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const AmazonFetcher* fetcher_/*=0*/) + : Fetch::ConfigWidget(parent_) { + QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = -1; + QLabel* label = new QLabel(i18n("Co&untry: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_siteCombo = new GUI::ComboBox(optionsWidget()); + m_siteCombo->insertItem(i18n("United States"), US); + m_siteCombo->insertItem(i18n("United Kingdom"), UK); + m_siteCombo->insertItem(i18n("Germany"), DE); + m_siteCombo->insertItem(i18n("Japan"), JP); + m_siteCombo->insertItem(i18n("France"), FR); + m_siteCombo->insertItem(i18n("Canada"), CA); + connect(m_siteCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); + connect(m_siteCombo, SIGNAL(activated(int)), SLOT(slotSiteChanged())); + l->addWidget(m_siteCombo, row, 1); + QString w = i18n("Amazon.com provides data from several different localized sites. Choose the one " + "you wish to use for this data source."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_siteCombo, w); + label->setBuddy(m_siteCombo); + + label = new QLabel(i18n("&Image size: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_imageCombo = new GUI::ComboBox(optionsWidget()); + m_imageCombo->insertItem(i18n("Small Image"), SmallImage); + m_imageCombo->insertItem(i18n("Medium Image"), MediumImage); + m_imageCombo->insertItem(i18n("Large Image"), LargeImage); + m_imageCombo->insertItem(i18n("No Image"), NoImage); + connect(m_imageCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); + l->addWidget(m_imageCombo, row, 1); + w = i18n("The cover image may be downloaded as well. However, too many large images in the " + "collection may degrade performance."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_imageCombo, w); + label->setBuddy(m_imageCombo); + + label = new QLabel(i18n("&Associate's ID: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_assocEdit = new KLineEdit(optionsWidget()); + connect(m_assocEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_assocEdit, row, 1); + w = i18n("The associate's id identifies the person accessing the Amazon.com Web Services, and is included " + "in any links to the Amazon.com site."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_assocEdit, w); + label->setBuddy(m_assocEdit); + + l->setRowStretch(++row, 10); + + if(fetcher_) { + m_siteCombo->setCurrentData(fetcher_->m_site); + m_assocEdit->setText(fetcher_->m_assoc); + m_imageCombo->setCurrentData(fetcher_->m_imageSize); + } else { // defaults + m_assocEdit->setText(QString::fromLatin1(AMAZON_ASSOC_TOKEN)); + m_imageCombo->setCurrentData(MediumImage); + } + + addFieldsWidget(AmazonFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList()); + + KAcceleratorManager::manage(optionsWidget()); +} + +void AmazonFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + int n = m_siteCombo->currentData().toInt(); + config_.writeEntry("Site", n); + QString s = m_assocEdit->text().stripWhiteSpace(); + if(!s.isEmpty()) { + config_.writeEntry("AssocToken", s); + } + n = m_imageCombo->currentData().toInt(); + config_.writeEntry("Image Size", n); + + saveFieldsConfig(config_); + slotSetModified(false); +} + +QString AmazonFetcher::ConfigWidget::preferredName() const { + return AmazonFetcher::siteData(m_siteCombo->currentData().toInt()).title; +} + +void AmazonFetcher::ConfigWidget::slotSiteChanged() { + emit signalName(preferredName()); +} + +//static +Tellico::StringMap AmazonFetcher::customFields() { + StringMap map; + map[QString::fromLatin1("keyword")] = i18n("Keywords"); + return map; +} + +#include "amazonfetcher.moc" diff --git a/src/fetch/amazonfetcher.h b/src/fetch/amazonfetcher.h new file mode 100644 index 0000000..05df8d7 --- /dev/null +++ b/src/fetch/amazonfetcher.h @@ -0,0 +1,158 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef AMAZONFETCHER_H +#define AMAZONFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <kurl.h> + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +class KLineEdit; + +class QCheckBox; +class QLabel; + +namespace KIO { + class Job; +} + +namespace Tellico { + + class XSLTHandler; + namespace GUI { + class ComboBox; + } + + namespace Fetch { + +/** + * A fetcher for Amazon.com. + * + * @author Robby Stephenson + */ +class AmazonFetcher : public Fetcher { +Q_OBJECT + +public: + enum Site { + Unknown = -1, + US = 0, + UK = 1, + DE = 2, + JP = 3, + FR = 4, + CA = 5 + }; + + enum ImageSize { + SmallImage=0, + MediumImage=1, + LargeImage=2, + NoImage=3 + }; + + AmazonFetcher(Site site, QObject* parent, const char* name = 0); + virtual ~AmazonFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + // amazon can search title, person, isbn, or keyword. No Raw for now. + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == UPC || k == Keyword; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Amazon; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + struct SiteData { + QString title; + KURL url; + }; + static const SiteData& siteData(int site); + + /** + * Returns a widget for modifying the fetcher's config. + */ + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const ; + + static StringMap customFields(); + + class ConfigWidget; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void initXSLTHandler(); + void doSearch(); + void parseTitle(Data::EntryPtr entry, int collType); + bool parseTitleToken(Data::EntryPtr entry, const QString& token); + + XSLTHandler* m_xsltHandler; + Site m_site; + ImageSize m_imageSize; + + QString m_access; + QString m_assoc; + bool m_addLinkField; + int m_limit; + int m_countOffset; + + QByteArray m_data; + int m_page; + int m_total; + int m_numResults; + QMap<int, Data::EntryPtr> m_entries; // they get modified after collection is created, so can't be const + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; + QStringList m_fields; +}; + +class AmazonFetcher::ConfigWidget : public Fetch::ConfigWidget { +Q_OBJECT + +public: + ConfigWidget(QWidget* parent_, const AmazonFetcher* fetcher = 0); + + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + +private slots: + void slotSiteChanged(); + +private: + KLineEdit* m_assocEdit; + GUI::ComboBox* m_siteCombo; + GUI::ComboBox* m_imageCombo; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/animenfofetcher.cpp b/src/fetch/animenfofetcher.cpp new file mode 100644 index 0000000..728c583 --- /dev/null +++ b/src/fetch/animenfofetcher.cpp @@ -0,0 +1,378 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "animenfofetcher.h" +#include "messagehandler.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collections/videocollection.h" +#include "../entry.h" +#include "../filehandler.h" +#include "../latin1literal.h" +#include "../imagefactory.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kio/job.h> + +#include <qregexp.h> +#include <qlayout.h> +#include <qlabel.h> +#include <qfile.h> + +//#define ANIMENFO_TEST + +namespace { + static const char* ANIMENFO_BASE_URL = "http://www.animenfo.com/search.php"; +} + +using Tellico::Fetch::AnimeNfoFetcher; + +AnimeNfoFetcher::AnimeNfoFetcher(QObject* parent_, const char* name_ /*=0*/) + : Fetcher(parent_, name_), m_started(false) { +} + +QString AnimeNfoFetcher::defaultName() { + return QString::fromLatin1("AnimeNfo.com"); +} + +QString AnimeNfoFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool AnimeNfoFetcher::canFetch(int type) const { + return type == Data::Collection::Video; +} + +void AnimeNfoFetcher::readConfigHook(const KConfigGroup& config_) { + Q_UNUSED(config_); +} + +void AnimeNfoFetcher::search(FetchKey key_, const QString& value_) { + m_started = true; + m_matches.clear(); + +#ifdef ANIMENFO_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/animenfo.html")); +#else + KURL u(QString::fromLatin1(ANIMENFO_BASE_URL)); + u.addQueryItem(QString::fromLatin1("action"), QString::fromLatin1("Go")); + u.addQueryItem(QString::fromLatin1("option"), QString::fromLatin1("keywords")); + u.addQueryItem(QString::fromLatin1("queryin"), QString::fromLatin1("anime_titles")); + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + switch(key_) { + case Keyword: + u.addQueryItem(QString::fromLatin1("query"), value_); + break; + + default: + kdWarning() << "AnimeNfoFetcher::search() - key not recognized: " << key_ << endl; + stop(); + return; + } +#endif +// myDebug() << "AnimeNfoFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void AnimeNfoFetcher::stop() { + if(!m_started) { + return; + } + + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void AnimeNfoFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void AnimeNfoFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "AnimeNfoFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "AnimeNfoFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + + QString s = Tellico::decodeHTML(QString(m_data)); + + QRegExp infoRx(QString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[\"'][^>]*>(.*)</td>"), false); + infoRx.setMinimal(true); + QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false); + anchorRx.setMinimal(true); + QRegExp yearRx(QString::fromLatin1("\\d{4}"), false); + + // search page comes in groups of threes + int n = 0; + QString u, t, y; + + for(int pos = infoRx.search(s); m_started && pos > -1; pos = infoRx.search(s, pos+1)) { + if(n == 0 && !u.isEmpty()) { + SearchResult* r = new SearchResult(this, t, y, QString()); + emit signalResultFound(r); + +#ifdef ANIMENFO_TEST + KURL url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/animetitle.html")); +#else + KURL url(QString::fromLatin1(ANIMENFO_BASE_URL), u); + url.setQuery(QString::null); +#endif + m_matches.insert(r->uid, url); + + u.truncate(0); + t.truncate(0); + y.truncate(0); + } + switch(n) { + case 0: // title and url + { + int pos2 = anchorRx.search(infoRx.cap(1)); + if(pos2 > -1) { + u = anchorRx.cap(1); + t = anchorRx.cap(2); + } + } + break; + case 1: // don't case + break; + case 2: + if(yearRx.exactMatch(infoRx.cap(1))) { + y = infoRx.cap(1); + } + break; + } + + n = (n+1)%3; + } + + // grab last response +#ifndef ANIMENFO_TEST + if(!u.isEmpty()) { + SearchResult* r = new SearchResult(this, t, y, QString()); + emit signalResultFound(r); + KURL url(QString::fromLatin1(ANIMENFO_BASE_URL), u); + url.setQuery(QString::null); + m_matches.insert(r->uid, url); + } +#endif + stop(); +} + +Tellico::Data::EntryPtr AnimeNfoFetcher::fetchEntry(uint uid_) { + // if we already grabbed this one, then just pull it out of the dict + Data::EntryPtr entry = m_entries[uid_]; + if(entry) { + return entry; + } + + KURL url = m_matches[uid_]; + if(url.isEmpty()) { + kdWarning() << "AnimeNfoFetcher::fetchEntry() - no url in map" << endl; + return 0; + } + + QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true)); + if(results.isEmpty()) { + myDebug() << "AnimeNfoFetcher::fetchEntry() - no text results" << endl; + return 0; + } + +#if 0 + kdWarning() << "Remove debug from animenfofetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.html")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << results; + } + f.close(); +#endif + + entry = parseEntry(results); + if(!entry) { + myDebug() << "AnimeNfoFetcher::fetchEntry() - error in processing entry" << endl; + return 0; + } + m_entries.insert(uid_, entry); // keep for later + return entry; +} + +Tellico::Data::EntryPtr AnimeNfoFetcher::parseEntry(const QString& str_) { + // myDebug() << "AnimeNfoFetcher::parseEntry()" << endl; + // class might be anime_info_top + QRegExp infoRx(QString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[^>]*>(.*)</td>"), false); + infoRx.setMinimal(true); + QRegExp tagRx(QString::fromLatin1("<.*>")); + tagRx.setMinimal(true); + QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false); + anchorRx.setMinimal(true); + QRegExp jsRx(QString::fromLatin1("<script.*</script>"), false); + jsRx.setMinimal(true); + + QString s = str_; + s.remove(jsRx); + + Data::CollPtr coll = new Data::VideoCollection(true); + + // add new fields + Data::FieldPtr f = new Data::Field(QString::fromLatin1("origtitle"), i18n("Original Title")); + coll->addField(f); + + f = new Data::Field(QString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table); + f->setFormatFlag(Data::Field::FormatTitle); + coll->addField(f); + + f = new Data::Field(QString::fromLatin1("distributor"), i18n("Distributor")); + f->setCategory(i18n("Other People")); + f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowMultiple | Data::Field::AllowGrouped); + f->setFormatFlag(Data::Field::FormatPlain); + coll->addField(f); + + f = new Data::Field(QString::fromLatin1("episodes"), i18n("Episodes"), Data::Field::Number); + f->setCategory(i18n("Features")); + coll->addField(f); + + // map captions in HTML to field names + QMap<QString, QString> fieldMap; + fieldMap.insert(QString::fromLatin1("Title"), QString::fromLatin1("title")); + fieldMap.insert(QString::fromLatin1("Japanese Title"), QString::fromLatin1("origtitle")); + fieldMap.insert(QString::fromLatin1("Total Episodes"), QString::fromLatin1("episodes")); + fieldMap.insert(QString::fromLatin1("Genres"), QString::fromLatin1("genre")); + fieldMap.insert(QString::fromLatin1("Year Published"), QString::fromLatin1("year")); + fieldMap.insert(QString::fromLatin1("Studio"), QString::fromLatin1("studio")); + fieldMap.insert(QString::fromLatin1("US Distribution"), QString::fromLatin1("distributor")); + + Data::EntryPtr entry = new Data::Entry(coll); + + int n = 0; + QString key, value; + int oldpos = -1; + for(int pos = infoRx.search(s); pos > -1; pos = infoRx.search(s, pos+1)) { + if(n == 0 && !key.isEmpty()) { + if(fieldMap.contains(key)) { + value = value.simplifyWhiteSpace(); + if(value.length() > 2) { // might be "-" + if(key == Latin1Literal("Genres")) { + entry->setField(fieldMap[key], QStringList::split(QRegExp(QString::fromLatin1("\\s*,\\s*")), + value).join(QString::fromLatin1("; "))); + } else { + entry->setField(fieldMap[key], value); + } + } + } + key.truncate(0); + value.truncate(0); + } + switch(n) { + case 0: + key = infoRx.cap(1).remove(tagRx); + break; + case 1: + value = infoRx.cap(1).remove(tagRx); + break; + } + n = (n+1)%2; + oldpos = pos; + } + + // image + QRegExp imgRx(QString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*[\"']([^>]*)[\"']\\s+[^>]*alt\\s*=\\s*[\"']%1[\"']") + .arg(entry->field(QString::fromLatin1("title"))), false); + imgRx.setMinimal(true); + int pos = imgRx.search(s); + if(pos > -1) { + KURL imgURL(QString::fromLatin1(ANIMENFO_BASE_URL), imgRx.cap(1)); + QString id = ImageFactory::addImage(imgURL, true); + if(!id.isEmpty()) { + entry->setField(QString::fromLatin1("cover"), id); + } + } + + // now look for alternative titles and plot + const QString a = QString::fromLatin1("Alternative titles"); + pos = s.find(a, oldpos+1, false); + if(pos > -1) { + pos += a.length(); + } + int pos2 = -1; + if(pos > -1) { + pos2 = s.find(QString::fromLatin1("Description"), pos+1, true); + if(pos2 > -1) { + value = s.mid(pos, pos2-pos).remove(tagRx).simplifyWhiteSpace(); + entry->setField(QString::fromLatin1("alttitle"), value); + } + } + QRegExp descRx(QString::fromLatin1("class\\s*=\\s*[\"']description[\"'][^>]*>(.*)<"), false); + descRx.setMinimal(true); + pos = descRx.search(s, QMAX(pos, pos2)); + if(pos > -1) { + entry->setField(QString::fromLatin1("plot"), descRx.cap(1).simplifyWhiteSpace()); + } + + return entry; +} + +void AnimeNfoFetcher::updateEntry(Data::EntryPtr entry_) { + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Keyword, t); + return; + } + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* AnimeNfoFetcher::configWidget(QWidget* parent_) const { + return new AnimeNfoFetcher::ConfigWidget(parent_); +} + +AnimeNfoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +QString AnimeNfoFetcher::ConfigWidget::preferredName() const { + return AnimeNfoFetcher::defaultName(); +} + +#include "animenfofetcher.moc" diff --git a/src/fetch/animenfofetcher.h b/src/fetch/animenfofetcher.h new file mode 100644 index 0000000..7e4028e --- /dev/null +++ b/src/fetch/animenfofetcher.h @@ -0,0 +1,86 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_ANIMENFOFETCHER_H +#define TELLICO_FETCH_ANIMENFOFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace KIO { + class Job; +} + +namespace Tellico { + namespace Fetch { + +/** + * A fetcher for animenfo.com + * + * @author Robby Stephenson + */ +class AnimeNfoFetcher : public Fetcher { +Q_OBJECT + +public: + AnimeNfoFetcher(QObject* parent, const char* name = 0); + virtual ~AnimeNfoFetcher() {} + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + // only keyword search + virtual bool canSearch(FetchKey k) const { return k == Keyword; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return AnimeNfo; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_); + virtual void saveConfig(KConfigGroup&) {} + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + Data::EntryPtr parseEntry(const QString& str); + + QByteArray m_data; + int m_total; + QMap<int, Data::EntryPtr> m_entries; + QMap<int, KURL> m_matches; + QGuardedPtr<KIO::Job> m_job; + + bool m_started; +// QStringList m_fields; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/arxivfetcher.cpp b/src/fetch/arxivfetcher.cpp new file mode 100644 index 0000000..442ef30 --- /dev/null +++ b/src/fetch/arxivfetcher.cpp @@ -0,0 +1,366 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "arxivfetcher.h" +#include "messagehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../core/netaccess.h" +#include "../imagefactory.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kstandarddirs.h> +#include <kconfig.h> + +#include <qdom.h> +#include <qlabel.h> +#include <qlayout.h> + +//#define ARXIV_TEST + +namespace { + static const int ARXIV_RETURNS_PER_REQUEST = 20; + static const char* ARXIV_BASE_URL = "http://export.arxiv.org/api/query"; +} + +using Tellico::Fetch::ArxivFetcher; + +ArxivFetcher::ArxivFetcher(QObject* parent_) + : Fetcher(parent_), m_xsltHandler(0), m_start(0), m_job(0), m_started(false) { +} + +ArxivFetcher::~ArxivFetcher() { + delete m_xsltHandler; + m_xsltHandler = 0; +} + +QString ArxivFetcher::defaultName() { + return i18n("arXiv.org"); +} + +QString ArxivFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool ArxivFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void ArxivFetcher::readConfigHook(const KConfigGroup&) { +} + +void ArxivFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_.stripWhiteSpace(); + m_started = true; + m_start = 0; + m_total = -1; + doSearch(); +} + +void ArxivFetcher::continueSearch() { + m_started = true; + doSearch(); +} + +void ArxivFetcher::doSearch() { + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + m_data.truncate(0); + +// myDebug() << "ArxivFetcher::search() - value = " << value_ << endl; + + KURL u = searchURL(m_key, m_value); + if(u.isEmpty()) { + stop(); + return; + } + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void ArxivFetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "ArxivFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void ArxivFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void ArxivFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "ArxivFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "ArxivFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from arxivfetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return; + } + } + + if(m_total == -1) { + QDomDocument dom; + if(!dom.setContent(m_data, true /*namespace*/)) { + kdWarning() << "ArxivFetcher::slotComplete() - server did not return valid XML." << endl; + return; + } + // total is top level element, with attribute totalResultsAvailable + QDomNodeList list = dom.elementsByTagNameNS(QString::fromLatin1("http://a9.com/-/spec/opensearch/1.1/"), + QString::fromLatin1("totalResults")); + if(list.count() > 0) { + m_total = list.item(0).toElement().text().toInt(); + } + } + + // assume result is always utf-8 + QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size())); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + + if(!coll) { + myDebug() << "ArxivFetcher::slotComplete() - no valid result" << endl; + stop(); + return; + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("year")); + } + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + + m_start = m_entries.count(); + m_hasMoreResults = m_start < m_total; + stop(); // required +} + +Tellico::Data::EntryPtr ArxivFetcher::fetchEntry(uint uid_) { + Data::EntryPtr entry = m_entries[uid_]; + // if URL but no cover image, fetch it + if(!entry->field(QString::fromLatin1("url")).isEmpty()) { + Data::CollPtr coll = entry->collection(); + Data::FieldPtr field = coll->fieldByName(QString::fromLatin1("cover")); + if(!field && !coll->imageFields().isEmpty()) { + field = coll->imageFields().front(); + } else if(!field) { + field = new Data::Field(QString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image); + coll->addField(field); + } + if(entry->field(field).isEmpty()) { + QPixmap pix = NetAccess::filePreview(entry->field(QString::fromLatin1("url"))); + if(!pix.isNull()) { + QString id = ImageFactory::addImage(pix, QString::fromLatin1("PNG")); + if(!id.isEmpty()) { + entry->setField(field, id); + } + } + } + } + return entry; +} + +void ArxivFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("arxiv2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "ArxivFetcher::initXSLTHandler() - can not locate arxiv2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + delete m_xsltHandler; + m_xsltHandler = new XSLTHandler(u); + if(!m_xsltHandler->isValid()) { + kdWarning() << "ArxivFetcher::initXSLTHandler() - error in arxiv2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +KURL ArxivFetcher::searchURL(FetchKey key_, const QString& value_) const { + KURL u(QString::fromLatin1(ARXIV_BASE_URL)); + u.addQueryItem(QString::fromLatin1("start"), QString::number(m_start)); + u.addQueryItem(QString::fromLatin1("max_results"), QString::number(ARXIV_RETURNS_PER_REQUEST)); + + // quotes should be used if spaces are present, just use all the time + QString quotedValue = '"' + value_ + '"'; + switch(key_) { + case Title: + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("ti:%1").arg(quotedValue)); + break; + + case Person: + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("au:%1").arg(quotedValue)); + break; + + case Keyword: + // keyword gets to use all the words without being quoted + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("all:%1").arg(value_)); + break; + + case ArxivID: + { + // remove prefix and/or version number + QString value = value_; + value.remove(QRegExp(QString::fromLatin1("^arxiv:"), false)); + value.remove(QRegExp(QString::fromLatin1("v\\d+$"))); + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("id:%1").arg(value)); + } + break; + + default: + kdWarning() << "ArxivFetcher::search() - key not recognized: " << m_key << endl; + return KURL(); + } + +#ifdef ARXIV_TEST + u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/arxiv.xml")); +#endif + myDebug() << "ArxivFetcher::search() - url: " << u.url() << endl; + return u; +} + +void ArxivFetcher::updateEntry(Data::EntryPtr entry_) { + QString id = entry_->field(QString::fromLatin1("arxiv")); + if(!id.isEmpty()) { + search(Fetch::ArxivID, id); + return; + } + + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + + myDebug() << "ArxivFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +void ArxivFetcher::updateEntrySynchronous(Data::EntryPtr entry) { + if(!entry) { + return; + } + QString arxiv = entry->field(QString::fromLatin1("arxiv")); + if(arxiv.isEmpty()) { + return; + } + + KURL u = searchURL(ArxivID, arxiv); + QString xml = FileHandler::readTextFile(u, true, true); + if(xml.isEmpty()) { + return; + } + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + return; + } + } + + // assume result is always utf-8 + QString str = m_xsltHandler->applyStylesheet(xml); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(coll && coll->entryCount() > 0) { + myLog() << "ArxivFetcher::updateEntrySynchronous() - found Arxiv result, merging" << endl; + Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/); + // the arxiv id might have a version# + entry->setField(QString::fromLatin1("arxiv"), + coll->entries().front()->field(QString::fromLatin1("arxiv"))); + } +} + +Tellico::Fetch::ConfigWidget* ArxivFetcher::configWidget(QWidget* parent_) const { + return new ArxivFetcher::ConfigWidget(parent_, this); +} + +ArxivFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ArxivFetcher*) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +void ArxivFetcher::ConfigWidget::saveConfig(KConfigGroup&) { +} + +QString ArxivFetcher::ConfigWidget::preferredName() const { + return ArxivFetcher::defaultName(); +} + +#include "arxivfetcher.moc" diff --git a/src/fetch/arxivfetcher.h b/src/fetch/arxivfetcher.h new file mode 100644 index 0000000..bce5f9d --- /dev/null +++ b/src/fetch/arxivfetcher.h @@ -0,0 +1,93 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_ARXIVFETCHER_H +#define TELLICO_FETCH_ARXIVFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <kio/job.h> + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace Tellico { + + class XSLTHandler; + + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class ArxivFetcher : public Fetcher { +Q_OBJECT + +public: + ArxivFetcher(QObject* parent); + ~ArxivFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword || k == ArxivID; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Arxiv; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + virtual void updateEntrySynchronous(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const ArxivFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void initXSLTHandler(); + KURL searchURL(FetchKey key, const QString& value) const; + void doSearch(); + + XSLTHandler* m_xsltHandler; + int m_start; + int m_total; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; +}; + + } +} +#endif diff --git a/src/fetch/bibsonomyfetcher.cpp b/src/fetch/bibsonomyfetcher.cpp new file mode 100644 index 0000000..faa48a4 --- /dev/null +++ b/src/fetch/bibsonomyfetcher.cpp @@ -0,0 +1,209 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "bibsonomyfetcher.h" +#include "messagehandler.h" +#include "../translators/bibteximporter.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../core/netaccess.h" +#include "../filehandler.h" +#include "../tellico_debug.h" + +#include <klocale.h> + +#include <qlabel.h> +#include <qlayout.h> + +namespace { + // always bibtex + static const char* BIBSONOMY_BASE_URL = "http://bibsonomy.org"; + static const int BIBSONOMY_MAX_RESULTS = 20; +} + +using Tellico::Fetch::BibsonomyFetcher; + +BibsonomyFetcher::BibsonomyFetcher(QObject* parent_) + : Fetcher(parent_), m_job(0), m_started(false) { +} + +BibsonomyFetcher::~BibsonomyFetcher() { +} + +QString BibsonomyFetcher::defaultName() { + return QString::fromLatin1("Bibsonomy"); +} + +QString BibsonomyFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool BibsonomyFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void BibsonomyFetcher::readConfigHook(const KConfigGroup&) { +} + +void BibsonomyFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_.stripWhiteSpace(); + m_started = true; + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + m_data.truncate(0); + +// myDebug() << "BibsonomyFetcher::search() - value = " << value_ << endl; + + KURL u = searchURL(m_key, m_value); + if(u.isEmpty()) { + stop(); + return; + } + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void BibsonomyFetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "BibsonomyFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void BibsonomyFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void BibsonomyFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "BibsonomyFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "BibsonomyFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + + Import::BibtexImporter imp(QString::fromUtf8(m_data, m_data.size())); + Data::CollPtr coll = imp.collection(); + + if(!coll) { + myDebug() << "BibsonomyFetcher::slotComplete() - no valid result" << endl; + stop(); + return; + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("year")); + } + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + + stop(); // required +} + +Tellico::Data::EntryPtr BibsonomyFetcher::fetchEntry(uint uid_) { + return m_entries[uid_]; +} + +KURL BibsonomyFetcher::searchURL(FetchKey key_, const QString& value_) const { + KURL u(QString::fromLatin1(BIBSONOMY_BASE_URL)); + u.setPath(QString::fromLatin1("/bib/")); + + switch(key_) { + case Person: + u.addPath(QString::fromLatin1("author/%1").arg(value_)); + break; + + case Keyword: + u.addPath(QString::fromLatin1("search/%1").arg(value_)); + break; + + default: + kdWarning() << "BibsonomyFetcher::search() - key not recognized: " << m_key << endl; + return KURL(); + } + + u.addQueryItem(QString::fromLatin1("items"), QString::number(BIBSONOMY_MAX_RESULTS)); + myDebug() << "BibsonomyFetcher::search() - url: " << u.url() << endl; + return u; +} + +void BibsonomyFetcher::updateEntry(Data::EntryPtr entry_) { + QString title = entry_->field(QString::fromLatin1("title")); + if(!title.isEmpty()) { + search(Fetch::Keyword, title); + return; + } + + myDebug() << "BibsonomyFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* BibsonomyFetcher::configWidget(QWidget* parent_) const { + return new BibsonomyFetcher::ConfigWidget(parent_, this); +} + +BibsonomyFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const BibsonomyFetcher*) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +void BibsonomyFetcher::ConfigWidget::saveConfig(KConfigGroup&) { +} + +QString BibsonomyFetcher::ConfigWidget::preferredName() const { + return BibsonomyFetcher::defaultName(); +} + +#include "bibsonomyfetcher.moc" diff --git a/src/fetch/bibsonomyfetcher.h b/src/fetch/bibsonomyfetcher.h new file mode 100644 index 0000000..fc59928 --- /dev/null +++ b/src/fetch/bibsonomyfetcher.h @@ -0,0 +1,82 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_BIBSONOMYFETCHER_H +#define TELLICO_FETCH_BIBSONOMYFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <kio/job.h> + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class BibsonomyFetcher : public Fetcher { +Q_OBJECT + +public: + BibsonomyFetcher(QObject* parent); + ~BibsonomyFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + + virtual bool canSearch(FetchKey k) const { return k == Person || k == Keyword; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Bibsonomy; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const BibsonomyFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + KURL searchURL(FetchKey key, const QString& value) const; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; +}; + + } +} +#endif diff --git a/src/fetch/citebasefetcher.cpp b/src/fetch/citebasefetcher.cpp new file mode 100644 index 0000000..798d690 --- /dev/null +++ b/src/fetch/citebasefetcher.cpp @@ -0,0 +1,248 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "citebasefetcher.h" +#include "messagehandler.h" +#include "../translators/bibteximporter.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../core/netaccess.h" +#include "../filehandler.h" +#include "../tellico_debug.h" + +#include <klocale.h> + +#include <qlabel.h> +#include <qlayout.h> + +// #define CITEBASE_TEST + +namespace { + // always bibtex + static const char* CITEBASE_BASE_URL = "http://www.citebase.org/openurl/?url_ver=Z39.88-2004&svc_id=bibtex"; +} + +using Tellico::Fetch::CitebaseFetcher; + +CitebaseFetcher::CitebaseFetcher(QObject* parent_) + : Fetcher(parent_), m_job(0), m_started(false) { +} + +CitebaseFetcher::~CitebaseFetcher() { +} + +QString CitebaseFetcher::defaultName() { + return QString::fromLatin1("Citebase"); +} + +QString CitebaseFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool CitebaseFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void CitebaseFetcher::readConfigHook(const KConfigGroup&) { +} + +void CitebaseFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_.stripWhiteSpace(); + m_started = true; + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + m_data.truncate(0); + +// myDebug() << "CitebaseFetcher::search() - value = " << value_ << endl; + + KURL u = searchURL(m_key, m_value); + if(u.isEmpty()) { + stop(); + return; + } + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void CitebaseFetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "CitebaseFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void CitebaseFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void CitebaseFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "CitebaseFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "CitebaseFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from citebasefetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.bib")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + Import::BibtexImporter imp(QString::fromUtf8(m_data, m_data.size())); + Data::CollPtr coll = imp.collection(); + + if(!coll) { + myDebug() << "CitebaseFetcher::slotComplete() - no valid result" << endl; + stop(); + return; + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("year")); + } + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + + stop(); // required +} + +Tellico::Data::EntryPtr CitebaseFetcher::fetchEntry(uint uid_) { + return m_entries[uid_]; +} + +KURL CitebaseFetcher::searchURL(FetchKey key_, const QString& value_) const { + KURL u(QString::fromLatin1(CITEBASE_BASE_URL)); + + switch(key_) { + case ArxivID: + { + // remove prefix and/or version number + QString value = value_; + value.remove(QRegExp(QString::fromLatin1("^arxiv:"), false)); + value.remove(QRegExp(QString::fromLatin1("v\\d+$"))); + u.addQueryItem(QString::fromLatin1("rft_id"), QString::fromLatin1("oai:arXiv.org:%1").arg(value)); + } + break; + + default: + kdWarning() << "CitebaseFetcher::search() - key not recognized: " << m_key << endl; + return KURL(); + } + +#ifdef CITEBASE_TEST + u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/citebase.bib")); +#endif + myDebug() << "CitebaseFetcher::search() - url: " << u.url() << endl; + return u; +} + +void CitebaseFetcher::updateEntry(Data::EntryPtr entry_) { + QString arxiv = entry_->field(QString::fromLatin1("arxiv")); + if(!arxiv.isEmpty()) { + search(Fetch::ArxivID, arxiv); + return; + } + + myDebug() << "CitebaseFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +void CitebaseFetcher::updateEntrySynchronous(Data::EntryPtr entry) { + if(!entry) { + return; + } + QString arxiv = entry->field(QString::fromLatin1("arxiv")); + if(arxiv.isEmpty()) { + return; + } + + KURL u = searchURL(ArxivID, arxiv); + QString bibtex = FileHandler::readTextFile(u, true); + if(bibtex.isEmpty()) { + return; + } + + // assume result is always utf-8 + Import::BibtexImporter imp(bibtex); + Data::CollPtr coll = imp.collection(); + if(coll && coll->entryCount() > 0) { + myLog() << "CitebaseFetcher::updateEntrySynchronous() - found arxiv result, merging" << endl; + Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/); + } +} + +Tellico::Fetch::ConfigWidget* CitebaseFetcher::configWidget(QWidget* parent_) const { + return new CitebaseFetcher::ConfigWidget(parent_, this); +} + +CitebaseFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const CitebaseFetcher*) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +void CitebaseFetcher::ConfigWidget::saveConfig(KConfigGroup&) { +} + +QString CitebaseFetcher::ConfigWidget::preferredName() const { + return CitebaseFetcher::defaultName(); +} + +#include "citebasefetcher.moc" diff --git a/src/fetch/citebasefetcher.h b/src/fetch/citebasefetcher.h new file mode 100644 index 0000000..a292107 --- /dev/null +++ b/src/fetch/citebasefetcher.h @@ -0,0 +1,83 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_CITEBASEFETCHER_H +#define TELLICO_FETCH_CITEBASEFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <kio/job.h> + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class CitebaseFetcher : public Fetcher { +Q_OBJECT + +public: + CitebaseFetcher(QObject* parent); + ~CitebaseFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + + virtual bool canSearch(FetchKey k) const { return k == ArxivID; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Citebase; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + virtual void updateEntrySynchronous(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const CitebaseFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + KURL searchURL(FetchKey key, const QString& value) const; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; +}; + + } +} +#endif diff --git a/src/fetch/configwidget.cpp b/src/fetch/configwidget.cpp new file mode 100644 index 0000000..c7b3b59 --- /dev/null +++ b/src/fetch/configwidget.cpp @@ -0,0 +1,66 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "configwidget.h" + +#include <kconfig.h> +#include <klocale.h> +#include <kaccelmanager.h> + +#include <qvgroupbox.h> +#include <qlayout.h> + +using Tellico::Fetch::ConfigWidget; + +ConfigWidget::ConfigWidget(QWidget* parent_) : QWidget(parent_), m_modified(false), m_accepted(false) { + QHBoxLayout* boxLayout = new QHBoxLayout(this); + boxLayout->setSpacing(10); + + QGroupBox* vbox = new QVGroupBox(i18n("Source Options"), this); + boxLayout->addWidget(vbox, 10 /*stretch*/); + + m_optionsWidget = new QWidget(vbox); +} + +void ConfigWidget::addFieldsWidget(const StringMap& customFields_, const QStringList& fieldsToAdd_) { + if(customFields_.isEmpty()) { + return; + } + + QVGroupBox* box = new QVGroupBox(i18n("Available Fields"), this); + static_cast<QBoxLayout*>(layout())->addWidget(box); + for(StringMap::ConstIterator it = customFields_.begin(); it != customFields_.end(); ++it) { + QCheckBox* cb = new QCheckBox(it.data(), box); + m_fields.insert(it.key(), cb); + if(fieldsToAdd_.contains(it.key())) { + cb->setChecked(true); + } + connect(cb, SIGNAL(clicked()), SLOT(slotSetModified())); + } + + KAcceleratorManager::manage(this); + + return; +} + +void ConfigWidget::saveFieldsConfig(KConfigGroup& config_) const { + QStringList fields; + for(QDictIterator<QCheckBox> it(m_fields); it.current(); ++it) { + if(it.current()->isChecked()) { + fields << it.currentKey(); + } + } + config_.writeEntry(QString::fromLatin1("Custom Fields"), fields); +} + +#include "configwidget.moc" diff --git a/src/fetch/configwidget.h b/src/fetch/configwidget.h new file mode 100644 index 0000000..9f18f83 --- /dev/null +++ b/src/fetch/configwidget.h @@ -0,0 +1,78 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef FETCHCONFIGWIDGET_H +#define FETCHCONFIGWIDGET_H + +#include "../datavectors.h" + +#include <qwidget.h> +#include <qdict.h> +#include <qcheckbox.h> + +class KConfigGroup; +class QStringList; + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class ConfigWidget : public QWidget { +Q_OBJECT + +public: + ConfigWidget(QWidget* parent); + virtual ~ConfigWidget() {} + + void setAccepted(bool accepted_) { m_accepted = accepted_; } + bool shouldSave() const { return m_modified && m_accepted; } + /** + * Saves any configuration options. The config group must be + * set before calling this function. + * + * @param config_ The KConfig pointer + */ + virtual void saveConfig(KConfigGroup& config) = 0; + /** + * Called when a fetcher data source is removed. Useful for any cleanup work necessary. + * The ExecExternalFetcher might need to remove the script, for example. + * Because of the way the ConfigDialog is setup, easier to have that in the ConfigWidget + * class than in the Fetcher class itself + */ + virtual void removed() {} + virtual QString preferredName() const = 0; + +signals: + void signalName(const QString& name); + +public slots: + void slotSetModified(bool modified_ = true) { m_modified = modified_; } + +protected: + QWidget* optionsWidget() { return m_optionsWidget; } + void addFieldsWidget(const StringMap& customFields, const QStringList& fieldsToAdd); + void saveFieldsConfig(KConfigGroup& config) const; + +private: + bool m_modified; + bool m_accepted; + QWidget* m_optionsWidget; + QDict<QCheckBox> m_fields; +}; + + } +} + +#endif diff --git a/src/fetch/crossreffetcher.cpp b/src/fetch/crossreffetcher.cpp new file mode 100644 index 0000000..8c5d303 --- /dev/null +++ b/src/fetch/crossreffetcher.cpp @@ -0,0 +1,392 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "crossreffetcher.h" +#include "messagehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../core/netaccess.h" +#include "../imagefactory.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kstandarddirs.h> +#include <kconfig.h> +#include <klineedit.h> +#include <kactivelabel.h> + +#include <qlabel.h> +#include <qwhatsthis.h> +#include <qlayout.h> +#include <qfile.h> + +// #define CROSSREF_TEST + +#define CROSSREF_USE_UNIXREF + +namespace { + static const char* CROSSREF_BASE_URL = "http://www.crossref.org/openurl/?url_ver=Z39.88-2004&noredirect=true"; +} + +using Tellico::Fetch::CrossRefFetcher; + +CrossRefFetcher::CrossRefFetcher(QObject* parent_) + : Fetcher(parent_), m_xsltHandler(0), m_job(0), m_started(false) { +} + +CrossRefFetcher::~CrossRefFetcher() { + delete m_xsltHandler; + m_xsltHandler = 0; +} + +QString CrossRefFetcher::defaultName() { + return QString::fromLatin1("CrossRef"); +} + +QString CrossRefFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool CrossRefFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void CrossRefFetcher::readConfigHook(const KConfigGroup& config_) { + QString s = config_.readEntry("User"); + if(!s.isEmpty()) { + m_user = s; + } + s = config_.readEntry("Password"); + if(!s.isEmpty()) { + m_password = s; + } +} + +void CrossRefFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_.stripWhiteSpace(); + m_started = true; + + if(m_user.isEmpty() || m_password.isEmpty()) { + message(i18n("%1 requires a username and password.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + m_data.truncate(0); + +// myDebug() << "CrossRefFetcher::search() - value = " << value_ << endl; + + KURL u = searchURL(m_key, m_value); + if(u.isEmpty()) { + stop(); + return; + } + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void CrossRefFetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "CrossRefFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void CrossRefFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void CrossRefFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "CrossRefFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "CrossRefFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from crossreffetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return; + } + } + + // assume result is always utf-8 + QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size())); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + + if(!coll) { + myDebug() << "CrossRefFetcher::slotComplete() - no valid result" << endl; + stop(); + return; + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("year")); + } + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + + stop(); // required +} + +Tellico::Data::EntryPtr CrossRefFetcher::fetchEntry(uint uid_) { + Data::EntryPtr entry = m_entries[uid_]; + // if URL but no cover image, fetch it + if(!entry->field(QString::fromLatin1("url")).isEmpty()) { + Data::CollPtr coll = entry->collection(); + Data::FieldPtr field = coll->fieldByName(QString::fromLatin1("cover")); + if(!field && !coll->imageFields().isEmpty()) { + field = coll->imageFields().front(); + } else if(!field) { + field = new Data::Field(QString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image); + coll->addField(field); + } + if(entry->field(field).isEmpty()) { + QPixmap pix = NetAccess::filePreview(entry->field(QString::fromLatin1("url"))); + if(!pix.isNull()) { + QString id = ImageFactory::addImage(pix, QString::fromLatin1("PNG")); + if(!id.isEmpty()) { + entry->setField(field, id); + } + } + } + } + return entry; +} + +void CrossRefFetcher::initXSLTHandler() { +#ifdef CROSSREF_USE_UNIXREF + QString xsltfile = locate("appdata", QString::fromLatin1("unixref2tellico.xsl")); +#else + QString xsltfile = locate("appdata", QString::fromLatin1("crossref2tellico.xsl")); +#endif + if(xsltfile.isEmpty()) { + kdWarning() << "CrossRefFetcher::initXSLTHandler() - can not locate xslt file." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + delete m_xsltHandler; + m_xsltHandler = new XSLTHandler(u); + if(!m_xsltHandler->isValid()) { + kdWarning() << "CrossRefFetcher::initXSLTHandler() - error in crossref2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +KURL CrossRefFetcher::searchURL(FetchKey key_, const QString& value_) const { + KURL u(QString::fromLatin1(CROSSREF_BASE_URL)); +#ifdef CROSSREF_USE_UNIXREF + u.addQueryItem(QString::fromLatin1("format"), QString::fromLatin1("unixref")); +#endif + u.addQueryItem(QString::fromLatin1("req_dat"), QString::fromLatin1("ourl_%1:%2").arg(m_user, m_password)); + + switch(key_) { + case DOI: + u.addQueryItem(QString::fromLatin1("rft_id"), QString::fromLatin1("info:doi/%1").arg(value_)); + break; + + default: + kdWarning() << "CrossRefFetcher::search() - key not recognized: " << m_key << endl; + return KURL(); + } + +#ifdef CROSSREF_TEST + u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/crossref.xml")); +#endif + myDebug() << "CrossRefFetcher::search() - url: " << u.url() << endl; + return u; +} + +void CrossRefFetcher::updateEntry(Data::EntryPtr entry_) { + QString doi = entry_->field(QString::fromLatin1("doi")); + if(!doi.isEmpty()) { + search(Fetch::DOI, doi); + return; + } + +#if 0 + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + m_limit = 10; // raise limit so more possibility of match + search(Fetch::Title, t); + return; + } +#endif + + myDebug() << "CrossRefFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +void CrossRefFetcher::updateEntrySynchronous(Data::EntryPtr entry) { + if(!entry) { + return; + } + if(m_user.isEmpty() || m_password.isEmpty()) { + myDebug() << "CrossRefFetcher::updateEntrySynchronous() - username and password is required" << endl; + return; + } + QString doi = entry->field(QString::fromLatin1("doi")); + if(doi.isEmpty()) { + return; + } + + KURL u = searchURL(DOI, doi); + QString xml = FileHandler::readTextFile(u, true, true); + if(xml.isEmpty()) { + return; + } + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + return; + } + } + + // assume result is always utf-8 + QString str = m_xsltHandler->applyStylesheet(xml); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(coll && coll->entryCount() > 0) { + myLog() << "CrossRefFetcher::updateEntrySynchronous() - found DOI result, merging" << endl; + Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/); + } +} + +Tellico::Fetch::ConfigWidget* CrossRefFetcher::configWidget(QWidget* parent_) const { + return new CrossRefFetcher::ConfigWidget(parent_, this); +} + +CrossRefFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const CrossRefFetcher* fetcher_) + : Fetch::ConfigWidget(parent_) { + QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = 0; + + KActiveLabel* al = new KActiveLabel(i18n("CrossRef requires an account for access. " + "Please read the terms and conditions and " + "<a href='http://www.crossref.org/requestaccount/'>" + "request an account</a>. Enter your OpenURL " + "account information below."), + optionsWidget()); + ++row; + l->addMultiCellWidget(al, row, row, 0, 1); + // richtext gets weird with size + al->setMinimumWidth(al->sizeHint().width()); + + QLabel* label = new QLabel(i18n("&Username: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_userEdit = new KLineEdit(optionsWidget()); + connect(m_userEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_userEdit, row, 1); + QString w = i18n("A username and password is required to access the CrossRef service. The password is " + "stored as plain text in the Tellico configuration file."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_userEdit, w); + label->setBuddy(m_userEdit); + + label = new QLabel(i18n("&Password: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_passEdit = new KLineEdit(optionsWidget()); + connect(m_passEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_passEdit, row, 1); + QWhatsThis::add(label, w); + QWhatsThis::add(m_passEdit, w); + label->setBuddy(m_passEdit); + + if(fetcher_) { + m_userEdit->setText(fetcher_->m_user); + m_passEdit->setText(fetcher_->m_password); + } +} + +void CrossRefFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + QString s = m_userEdit->text(); + config_.writeEntry("User", s); + + s = m_passEdit->text(); + config_.writeEntry("Password", s); + + slotSetModified(false); +} + +QString CrossRefFetcher::ConfigWidget::preferredName() const { + return CrossRefFetcher::defaultName(); +} + +#include "crossreffetcher.moc" diff --git a/src/fetch/crossreffetcher.h b/src/fetch/crossreffetcher.h new file mode 100644 index 0000000..392d46a --- /dev/null +++ b/src/fetch/crossreffetcher.h @@ -0,0 +1,97 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_CROSSREFFETCHER_H +#define TELLICO_FETCH_CROSSREFFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <kio/job.h> + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +class KLineEdit; + +namespace Tellico { + + class XSLTHandler; + + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class CrossRefFetcher : public Fetcher { +Q_OBJECT + +public: + CrossRefFetcher(QObject* parent); + ~CrossRefFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + + virtual bool canSearch(FetchKey k) const { return k == DOI; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return CrossRef; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + virtual void updateEntrySynchronous(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const CrossRefFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + private: + KLineEdit* m_userEdit; + KLineEdit* m_passEdit; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void initXSLTHandler(); + KURL searchURL(FetchKey key, const QString& value) const; + + XSLTHandler* m_xsltHandler; + + QString m_user; + QString m_password; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; +}; + + } +} +#endif diff --git a/src/fetch/discogsfetcher.cpp b/src/fetch/discogsfetcher.cpp new file mode 100644 index 0000000..31a8bab --- /dev/null +++ b/src/fetch/discogsfetcher.cpp @@ -0,0 +1,413 @@ +/*************************************************************************** + copyright : (C) 2008 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "discogsfetcher.h" +#include "messagehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../imagefactory.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kstandarddirs.h> +#include <kconfig.h> +#include <kio/job.h> + +#include <qlabel.h> +#include <qlayout.h> +#include <qfile.h> +#include <qwhatsthis.h> + +//#define DISCOGS_TEST + +namespace { + static const int DISCOGS_MAX_RETURNS_TOTAL = 20; + static const char* DISCOGS_API_URL = "http://www.discogs.com"; + static const char* DISCOGS_API_KEY = "de6cb96534"; +} + +using Tellico::Fetch::DiscogsFetcher; + +DiscogsFetcher::DiscogsFetcher(QObject* parent_, const char* name_) + : Fetcher(parent_, name_), m_xsltHandler(0), + m_limit(DISCOGS_MAX_RETURNS_TOTAL), m_job(0), m_started(false), + m_apiKey(QString::fromLatin1(DISCOGS_API_KEY)) { +} + +DiscogsFetcher::~DiscogsFetcher() { + delete m_xsltHandler; + m_xsltHandler = 0; +} + +QString DiscogsFetcher::defaultName() { + return i18n("Discogs Audio Search"); +} + +QString DiscogsFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool DiscogsFetcher::canFetch(int type) const { + return type == Data::Collection::Album; +} + +void DiscogsFetcher::readConfigHook(const KConfigGroup& config_) { + QString k = config_.readEntry("API Key"); + if(!k.isEmpty()) { + m_apiKey = k; + } + m_fetchImages = config_.readBoolEntry("Fetch Images", true); + m_fields = config_.readListEntry("Custom Fields"); +} + +void DiscogsFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_; + m_started = true; + m_start = 1; + m_total = -1; + doSearch(); +} + +void DiscogsFetcher::continueSearch() { + m_started = true; + doSearch(); +} + +void DiscogsFetcher::doSearch() { + KURL u(QString::fromLatin1(DISCOGS_API_URL)); + u.addQueryItem(QString::fromLatin1("f"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("api_key"), m_apiKey); + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + switch(m_key) { + case Title: + u.setPath(QString::fromLatin1("/search")); + u.addQueryItem(QString::fromLatin1("q"), m_value); + u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("release")); + break; + + case Person: + u.setPath(QString::fromLatin1("/artist/%1").arg(m_value)); + break; + + case Keyword: + u.setPath(QString::fromLatin1("/search")); + u.addQueryItem(QString::fromLatin1("q"), m_value); + u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("all")); + break; + + default: + kdWarning() << "DiscogsFetcher::search() - key not recognized: " << m_key << endl; + stop(); + return; + } + +#ifdef DISCOGS_TEST + u = KURL(QString::fromLatin1("/home/robby/discogs-results.xml")); +#endif +// myDebug() << "DiscogsFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void DiscogsFetcher::stop() { + if(!m_started) { + return; + } + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void DiscogsFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void DiscogsFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "DiscogsFetcher::slotComplete()" << endl; + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "DiscogsFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from discogsfetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return; + } + } + + if(m_total == -1) { + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "DiscogsFetcher::slotComplete() - server did not return valid XML." << endl; + return; + } + // total is /resp/searchresults/@numResults + QDomNode n = dom.documentElement().namedItem(QString::fromLatin1("resp")) + .namedItem(QString::fromLatin1("searchresults")); + QDomElement e = n.toElement(); + if(!e.isNull()) { + m_total = e.attribute(QString::fromLatin1("numResults")).toInt(); + myDebug() << "total = " << m_total; + } + } + + // assume discogs is always utf-8 + QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size())); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(!coll) { + myDebug() << "DiscogsFetcher::slotComplete() - no collection pointer" << endl; + stop(); + return; + } + + int count = 0; + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); count < m_limit && entry != entries.end(); ++entry, ++count) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("artist")) + + QChar('/') + + entry->field(QString::fromLatin1("label")); + + SearchResult* r = new SearchResult(this, entry->title(), desc, QString()); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + m_start = m_entries.count() + 1; + // not sure how tospecify start in the REST url + // m_hasMoreResults = m_start <= m_total; + + stop(); // required +} + +Tellico::Data::EntryPtr DiscogsFetcher::fetchEntry(uint uid_) { + Data::EntryPtr entry = m_entries[uid_]; + if(!entry) { + kdWarning() << "DiscogsFetcher::fetchEntry() - no entry in dict" << endl; + return 0; + } + // one way we tell if this entry has been fully initialized is to + // check for a cover image + if(!entry->field(QString::fromLatin1("cover")).isEmpty()) { + myLog() << "DiscogsFetcher::fetchEntry() - already downloaded " << entry->title() << endl; + return entry; + } + + QString release = entry->field(QString::fromLatin1("discogs-id")); + if(release.isEmpty()) { + myDebug() << "DiscogsFetcher::fetchEntry() - no discogs release found" << endl; + return entry; + } + +#ifdef DISCOGS_TEST + KURL u(QString::fromLatin1("/home/robby/discogs-release.xml")); +#else + KURL u(QString::fromLatin1(DISCOGS_API_URL)); + u.setPath(QString::fromLatin1("/release/%1").arg(release)); + u.addQueryItem(QString::fromLatin1("f"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("api_key"), m_apiKey); +#endif +// myDebug() << "DiscogsFetcher::fetchEntry() - url: " << u << endl; + + // quiet, utf8, allowCompressed + QString output = FileHandler::readTextFile(u, true, true, true); +#if 0 + kdWarning() << "Remove output debug from discogsfetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << output; + } + f.close(); +#endif + + Import::TellicoImporter imp(m_xsltHandler->applyStylesheet(output)); + Data::CollPtr coll = imp.collection(); +// getTracks(entry); + if(!coll) { + kdWarning() << "DiscogsFetcher::fetchEntry() - no collection pointer" << endl; + return entry; + } + + if(coll->entryCount() > 1) { + myDebug() << "DiscogsFetcher::fetchEntry() - weird, more than one entry found" << endl; + } + + const StringMap customFields = this->customFields(); + for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) { + if(!m_fields.contains(it.key())) { + coll->removeField(it.key()); + } + } + + // don't want to include id + coll->removeField(QString::fromLatin1("discogs-id")); + + entry = coll->entries().front(); + m_entries.replace(uid_, entry); + return entry; +} + +void DiscogsFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("discogs2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "DiscogsFetcher::initXSLTHandler() - can not locate discogs2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + delete m_xsltHandler; + m_xsltHandler = new XSLTHandler(u); + if(!m_xsltHandler->isValid()) { + kdWarning() << "DiscogsFetcher::initXSLTHandler() - error in discogs2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +void DiscogsFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "DiscogsFetcher::updateEntry()" << endl; + + QString value; + QString title = entry_->field(QString::fromLatin1("title")); + if(!title.isEmpty()) { + search(Title, value); + return; + } + + QString artist = entry_->field(QString::fromLatin1("artist")); + if(!artist.isEmpty()) { + search(Person, artist); + return; + } + + myDebug() << "DiscogsFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* DiscogsFetcher::configWidget(QWidget* parent_) const { + return new DiscogsFetcher::ConfigWidget(parent_, this); +} + +DiscogsFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const DiscogsFetcher* fetcher_) + : Fetch::ConfigWidget(parent_) { + QGridLayout* l = new QGridLayout(optionsWidget(), 2, 2); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = -1; + QLabel* label = new QLabel(i18n("API &key: "), optionsWidget()); + l->addWidget(label, ++row, 0); + + m_apiKeyEdit = new KLineEdit(optionsWidget()); + connect(m_apiKeyEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_apiKeyEdit, row, 1); + QString w = i18n("With your discogs.com account you receive an API key for the usage of their XML-based interface " + "(See http://www.discogs.com/help/api)."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_apiKeyEdit, w); + label->setBuddy(m_apiKeyEdit); + + m_fetchImageCheck = new QCheckBox(i18n("Download cover &image"), optionsWidget()); + connect(m_fetchImageCheck, SIGNAL(clicked()), SLOT(slotSetModified())); + ++row; + l->addMultiCellWidget(m_fetchImageCheck, row, row, 0, 1); + w = i18n("The cover image may be downloaded as well. However, too many large images in the " + "collection may degrade performance."); + QWhatsThis::add(m_fetchImageCheck, w); + + l->setRowStretch(++row, 10); + + // now add additional fields widget + addFieldsWidget(DiscogsFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList()); + + if(fetcher_) { + m_apiKeyEdit->setText(fetcher_->m_apiKey); + m_fetchImageCheck->setChecked(fetcher_->m_fetchImages); + } else { + m_apiKeyEdit->setText(QString::fromLatin1(DISCOGS_API_KEY)); + m_fetchImageCheck->setChecked(true); + } +} + +void DiscogsFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + QString apiKey = m_apiKeyEdit->text().stripWhiteSpace(); + if(!apiKey.isEmpty()) { + config_.writeEntry("API Key", apiKey); + } + config_.writeEntry("Fetch Images", m_fetchImageCheck->isChecked()); + + saveFieldsConfig(config_); + slotSetModified(false); +} + +QString DiscogsFetcher::ConfigWidget::preferredName() const { + return DiscogsFetcher::defaultName(); +} + +Tellico::StringMap DiscogsFetcher::customFields() { + StringMap map; + map[QString::fromLatin1("producer")] = i18n("Producer"); + map[QString::fromLatin1("nationality")] = i18n("Nationality"); + map[QString::fromLatin1("discogs")] = i18n("Discogs Link"); + return map; +} + +#include "discogsfetcher.moc" diff --git a/src/fetch/discogsfetcher.h b/src/fetch/discogsfetcher.h new file mode 100644 index 0000000..ac8c1b8 --- /dev/null +++ b/src/fetch/discogsfetcher.h @@ -0,0 +1,117 @@ +/*************************************************************************** + copyright : (C) 2008 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef DISCOGSFETCHER_H +#define DISCOGSFETCHER_H + +namespace Tellico { + class XSLTHandler; +} + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" +#include <klineedit.h> + +#include <qdom.h> +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace KIO { + class Job; +} + +namespace Tellico { + namespace Fetch { + +/** + * A fetcher for discogs.com + * + * @author Robby Stephenson + */ +class DiscogsFetcher : public Fetcher { +Q_OBJECT + +public: + /** + */ + DiscogsFetcher(QObject* parent, const char* name = 0); + /** + */ + virtual ~DiscogsFetcher(); + + /** + */ + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + // amazon can search title or person + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Discogs; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + /** + * Returns a widget for modifying the fetcher's config. + */ + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + static StringMap customFields(); + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const DiscogsFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup&); + virtual QString preferredName() const; + private: + KLineEdit *m_apiKeyEdit; + QCheckBox* m_fetchImageCheck; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void initXSLTHandler(); + void doSearch(); + + XSLTHandler* m_xsltHandler; + int m_limit; + int m_start; + int m_total; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; + + bool m_fetchImages; + QString m_apiKey; + QStringList m_fields; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/entrezfetcher.cpp b/src/fetch/entrezfetcher.cpp new file mode 100644 index 0000000..14b9e20 --- /dev/null +++ b/src/fetch/entrezfetcher.cpp @@ -0,0 +1,498 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "entrezfetcher.h" +#include "../tellico_kernel.h" +#include "../latin1literal.h" +#include "../collection.h" +#include "../entry.h" +#include "../filehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kstandarddirs.h> +#include <kio/job.h> + +#include <qdom.h> +#include <qlabel.h> +#include <qlayout.h> +#include <qfile.h> + +//#define ENTREZ_TEST + +namespace { + static const int ENTREZ_MAX_RETURNS_TOTAL = 25; + static const char* ENTREZ_BASE_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; + static const char* ENTREZ_SEARCH_CGI = "esearch.fcgi"; + static const char* ENTREZ_SUMMARY_CGI = "esummary.fcgi"; + static const char* ENTREZ_FETCH_CGI = "efetch.fcgi"; + static const char* ENTREZ_LINK_CGI = "elink.fcgi"; + static const char* ENTREZ_DEFAULT_DATABASE = "pubmed"; +} + +using Tellico::Fetch::EntrezFetcher; + +EntrezFetcher::EntrezFetcher(QObject* parent_, const char* name_) : Fetcher(parent_, name_), m_xsltHandler(0), + m_step(Begin), m_started(false) { +} + +EntrezFetcher::~EntrezFetcher() { +} + +QString EntrezFetcher::defaultName() { + return i18n("Entrez Database"); +} + +QString EntrezFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool EntrezFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void EntrezFetcher::readConfigHook(const KConfigGroup& config_) { + QString s = config_.readEntry("Database", QString::fromLatin1(ENTREZ_DEFAULT_DATABASE)); // default to pubmed + if(!s.isEmpty()) { + m_dbname = s; + } + m_fields = config_.readListEntry("Custom Fields"); +} + +void EntrezFetcher::search(FetchKey key_, const QString& value_) { + m_started = true; + m_start = 1; + m_total = -1; + +// only search if current collection is a bibliography + if(!canFetch(Kernel::self()->collectionType())) { + myDebug() << "EntrezFetcher::search() - collection type mismatch, stopping" << endl; + stop(); + return; + } + if(m_dbname.isEmpty()) { + m_dbname = QString::fromLatin1(ENTREZ_DEFAULT_DATABASE); + } + +#ifdef ENTREZ_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/esearch.xml")); +#else + KURL u(QString::fromLatin1(ENTREZ_BASE_URL)); + u.addPath(QString::fromLatin1(ENTREZ_SEARCH_CGI)); + u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("usehistory"), QString::fromLatin1("y")); + u.addQueryItem(QString::fromLatin1("retmax"), QString::fromLatin1("1")); // we're just getting the count + u.addQueryItem(QString::fromLatin1("db"), m_dbname); + u.addQueryItem(QString::fromLatin1("term"), value_); + switch(key_) { + case Title: + u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("titl")); + break; + + case Person: + u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("auth")); + break; + + case Keyword: + // for Tellico Keyword searches basically mean search for any field matching +// u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("word")); + break; + + case PubmedID: + u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("pmid")); + break; + + case DOI: + case Raw: + u.setQuery(u.query() + '&' + value_); + break; + + default: + kdWarning() << "EntrezFetcher::search() - FetchKey not supported" << endl; + stop(); + return; + } +#endif + + m_step = Search; +// myLog() << "EntrezFetcher::doSearch() - url: " << u.url() << endl; + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void EntrezFetcher::continueSearch() { + m_started = true; + doSummary(); +} + +void EntrezFetcher::stop() { + if(!m_started) { + return; + } + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + m_step = Begin; + emit signalDone(this); +} + +void EntrezFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void EntrezFetcher::slotComplete(KIO::Job* job_) { + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "EntrezFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from entrezfetcher.cpp: " << __LINE__ << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + switch(m_step) { + case Search: + searchResults(); + break; + case Summary: + summaryResults(); + break; + case Begin: + case Fetch: + default: + myLog() << "EntrezFetcher::slotComplete() - wrong step = " << m_step << endl; + stop(); + break; + } +} + +void EntrezFetcher::searchResults() { + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "EntrezFetcher::searchResults() - server did not return valid XML." << endl; + stop(); + return; + } + // find Count, QueryKey, and WebEnv elements + int count = 0; + for(QDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) { + QDomElement e = n.toElement(); + if(e.isNull()) { + continue; + } + if(e.tagName() == Latin1Literal("Count")) { + m_total = e.text().toInt(); + ++count; + } else if(e.tagName() == Latin1Literal("QueryKey")) { + m_queryKey = e.text(); + ++count; + } else if(e.tagName() == Latin1Literal("WebEnv")) { + m_webEnv = e.text(); + ++count; + } + if(count >= 3) { + break; // found them all + } + } + + m_data.truncate(0); + doSummary(); +} + +void EntrezFetcher::doSummary() { +#ifdef ENTREZ_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/esummary.xml")); +#else + KURL u(QString::fromLatin1(ENTREZ_BASE_URL)); + u.addPath(QString::fromLatin1(ENTREZ_SUMMARY_CGI)); + u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("retstart"), QString::number(m_start)); + u.addQueryItem(QString::fromLatin1("retmax"), QString::number(QMIN(m_total-m_start-1, ENTREZ_MAX_RETURNS_TOTAL))); + u.addQueryItem(QString::fromLatin1("usehistory"), QString::fromLatin1("y")); + u.addQueryItem(QString::fromLatin1("db"), m_dbname); + u.addQueryItem(QString::fromLatin1("query_key"), m_queryKey); + u.addQueryItem(QString::fromLatin1("WebEnv"), m_webEnv); +#endif + + m_step = Summary; +// myLog() << "EntrezFetcher::searchResults() - url: " << u.url() << endl; + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void EntrezFetcher::summaryResults() { + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "EntrezFetcher::summaryResults() - server did not return valid XML." << endl; + stop(); + return; + } + // top child is eSummaryResult + // all children are DocSum + for(QDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) { + QDomElement e = n.toElement(); + if(e.isNull() || e.tagName() != Latin1Literal("DocSum")) { + continue; + } + QDomNodeList nodes = e.elementsByTagName(QString::fromLatin1("Id")); + if(nodes.count() == 0) { + myDebug() << "EntrezFetcher::summaryResults() - no Id elements" << endl; + continue; + } + int id = nodes.item(0).toElement().text().toInt(); + QString title, pubdate, authors; + nodes = e.elementsByTagName(QString::fromLatin1("Item")); + for(uint j = 0; j < nodes.count(); ++j) { + if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("Title")) { + title = nodes.item(j).toElement().text(); + } else if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("PubDate")) { + pubdate = nodes.item(j).toElement().text(); + } else if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("AuthorList")) { + QStringList list; + for(QDomNode aNode = nodes.item(j).firstChild(); !aNode.isNull(); aNode = aNode.nextSibling()) { + // lazy, assume all children Items are authors + if(aNode.nodeName() == Latin1Literal("Item")) { + list << aNode.toElement().text(); + } + } + authors = list.join(QString::fromLatin1("; ")); + } + if(!title.isEmpty() && !pubdate.isEmpty() && !authors.isEmpty()) { + break; // done now + } + } + SearchResult* r = new SearchResult(this, title, pubdate + '/' + authors, QString()); + m_matches.insert(r->uid, id); + emit signalResultFound(r); + } + m_start = m_matches.count() + 1; + m_hasMoreResults = m_start <= m_total; + stop(); // done searching +} + +Tellico::Data::EntryPtr EntrezFetcher::fetchEntry(uint uid_) { + // if we already grabbed this one, then just pull it out of the dict + Data::EntryPtr entry = m_entries[uid_]; + if(entry) { + return entry; + } + + if(!m_matches.contains(uid_)) { + return 0; + } + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return 0; + } + } + + int id = m_matches[uid_]; +#ifdef ENTREZ_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/pubmed.xml")); +#else + KURL u(QString::fromLatin1(ENTREZ_BASE_URL)); + u.addPath(QString::fromLatin1(ENTREZ_FETCH_CGI)); + u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("rettype"), QString::fromLatin1("abstract")); + u.addQueryItem(QString::fromLatin1("db"), m_dbname); + u.addQueryItem(QString::fromLatin1("id"), QString::number(id)); +#endif + // now it's sychronous, and we know that it's utf8 + QString xmlOutput = FileHandler::readTextFile(u, false /*quiet*/, true /*utf8*/); + if(xmlOutput.isEmpty()) { + kdWarning() << "EntrezFetcher::fetchEntry() - unable to download " << u << endl; + return 0; + } +#if 0 + kdWarning() << "EntrezFetcher::fetchEntry() - turn me off!" << endl; + QFile f1(QString::fromLatin1("/tmp/test-entry.xml")); + if(f1.open(IO_WriteOnly)) { + QTextStream t(&f1); + t.setEncoding(QTextStream::UnicodeUTF8); + t << xmlOutput; + } + f1.close(); +#endif + QString str = m_xsltHandler->applyStylesheet(xmlOutput); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(!coll) { + kdWarning() << "EntrezFetcher::fetchEntry() - invalid collection" << endl; + return 0; + } + if(coll->entryCount() == 0) { + myDebug() << "EntrezFetcher::fetchEntry() - no entries in collection" << endl; + return 0; + } else if(coll->entryCount() > 1) { + myDebug() << "EntrezFetcher::fetchEntry() - collection has multiple entries, taking first one" << endl; + } + + Data::EntryPtr e = coll->entries().front(); + + // try to get a link, but only if necessary + if(m_fields.contains(QString::fromLatin1("url"))) { + KURL link(QString::fromLatin1(ENTREZ_BASE_URL)); + link.addPath(QString::fromLatin1(ENTREZ_LINK_CGI)); + link.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + link.addQueryItem(QString::fromLatin1("cmd"), QString::fromLatin1("llinks")); + link.addQueryItem(QString::fromLatin1("db"), m_dbname); + link.addQueryItem(QString::fromLatin1("dbfrom"), m_dbname); + link.addQueryItem(QString::fromLatin1("id"), QString::number(id)); + + QDomDocument linkDom = FileHandler::readXMLFile(link, false /* namespace */, true /* quiet */); + // need eLinkResult/LinkSet/IdUrlList/IdUrlSet/ObjUrl/Url + QDomNode linkNode = linkDom.namedItem(QString::fromLatin1("eLinkResult")) + .namedItem(QString::fromLatin1("LinkSet")) + .namedItem(QString::fromLatin1("IdUrlList")) + .namedItem(QString::fromLatin1("IdUrlSet")) + .namedItem(QString::fromLatin1("ObjUrl")) + .namedItem(QString::fromLatin1("Url")); + if(!linkNode.isNull()) { + QString u = linkNode.toElement().text(); +// myDebug() << u << endl; + if(!u.isEmpty()) { + if(!coll->hasField(QString::fromLatin1("url"))) { + Data::FieldPtr field = new Data::Field(QString::fromLatin1("url"), i18n("URL"), Data::Field::URL); + field->setCategory(i18n("Miscellaneous")); + coll->addField(field); + } + e->setField(QString::fromLatin1("url"), u); + } + } + } + + const StringMap customFields = EntrezFetcher::customFields(); + for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) { + if(!m_fields.contains(it.key())) { + coll->removeField(it.key()); + } + } + + m_entries.insert(uid_, e); + return e; +} + +void EntrezFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("pubmed2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "EntrezFetcher::initXSLTHandler() - can not locate pubmed2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + if(!m_xsltHandler) { + m_xsltHandler = new XSLTHandler(u); + } + if(!m_xsltHandler->isValid()) { + kdWarning() << "EntrezFetcher::initXSLTHandler() - error in pubmed2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +void EntrezFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "EntrezFetcher::updateEntry()" << endl; + QString s = entry_->field(QString::fromLatin1("pmid")); + if(!s.isEmpty()) { + search(PubmedID, s); + return; + } + + s = entry_->field(QString::fromLatin1("doi")); + if(!s.isEmpty()) { + search(DOI, s); + return; + } + + s = entry_->field(QString::fromLatin1("title")); + if(!s.isEmpty()) { + search(Title, s); + return; + } + + myDebug() << "EntrezFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* EntrezFetcher::configWidget(QWidget* parent_) const { + return new EntrezFetcher::ConfigWidget(parent_, this); +} + +EntrezFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const EntrezFetcher* fetcher_/*=0*/) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); + + // now add additional fields widget + addFieldsWidget(EntrezFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList()); +} + +void EntrezFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + saveFieldsConfig(config_); + slotSetModified(false); +} + +QString EntrezFetcher::ConfigWidget::preferredName() const { + return EntrezFetcher::defaultName(); +} + +//static +Tellico::StringMap EntrezFetcher::customFields() { + StringMap map; + map[QString::fromLatin1("institution")] = i18n("Institution"); + map[QString::fromLatin1("abstract")] = i18n("Abstract"); + map[QString::fromLatin1("url")] = i18n("URL"); + return map; +} + +#include "entrezfetcher.moc" diff --git a/src/fetch/entrezfetcher.h b/src/fetch/entrezfetcher.h new file mode 100644 index 0000000..c8aac49 --- /dev/null +++ b/src/fetch/entrezfetcher.h @@ -0,0 +1,113 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_ENTREZFETCHER_H +#define TELLICO_ENTREZFETCHER_H + +namespace Tellico { + class XSLTHandler; +} + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace KIO { + class Job; +} + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class EntrezFetcher : public Fetcher { +Q_OBJECT + +public: + EntrezFetcher(QObject* parent, const char* name=0); + /** + */ + virtual ~EntrezFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + // pubmed can search title, person, and keyword + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword || k == Raw || k == PubmedID || k == DOI; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Entrez; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + virtual void updateEntry(Data::EntryPtr entry); + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + static StringMap customFields(); + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const EntrezFetcher* fetcher=0); + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void initXSLTHandler(); + void doSummary(); + + void searchResults(); + void summaryResults(); + + enum Step { + Begin, + Search, + Summary, + Fetch + }; + + XSLTHandler* m_xsltHandler; + QString m_dbname; + + int m_start; + int m_total; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; // map from search result id to entry + QMap<int, int> m_matches; // search result id to pubmed id + QGuardedPtr<KIO::Job> m_job; + + QString m_queryKey; + QString m_webEnv; + Step m_step; + + bool m_started; + QStringList m_fields; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/fetch/execexternalfetcher.cpp b/src/fetch/execexternalfetcher.cpp new file mode 100644 index 0000000..07b99d8 --- /dev/null +++ b/src/fetch/execexternalfetcher.cpp @@ -0,0 +1,561 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "execexternalfetcher.h" +#include "messagehandler.h" +#include "fetchmanager.h" +#include "../collection.h" +#include "../entry.h" +#include "../importdialog.h" +#include "../translators/tellicoimporter.h" +#include "../tellico_debug.h" +#include "../gui/combobox.h" +#include "../gui/lineedit.h" +#include "../gui/collectiontypecombo.h" +#include "../tellico_utils.h" +#include "../newstuff/manager.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kprocess.h> +#include <kurlrequester.h> +#include <kaccelmanager.h> + +#include <qlayout.h> +#include <qlabel.h> +#include <qwhatsthis.h> +#include <qregexp.h> +#include <qvgroupbox.h> +#include <qfile.h> // needed for QFile::remove + +using Tellico::Fetch::ExecExternalFetcher; + +QStringList ExecExternalFetcher::parseArguments(const QString& str_) { + // matching escaped quotes is too hard... :( +// QRegExp quotes(QString::fromLatin1("[^\\\\](['\"])(.*[^\\\\])\\1")); + QRegExp quotes(QString::fromLatin1("(['\"])(.*)\\1")); + quotes.setMinimal(true); + QRegExp spaces(QString::fromLatin1("\\s+")); + spaces.setMinimal(true); + + QStringList args; + int pos = 0; + for(int nextPos = quotes.search(str_); nextPos > -1; pos = nextPos+1, nextPos = quotes.search(str_, pos)) { + // a non-quotes arguments runs from pos to nextPos + args += QStringList::split(spaces, str_.mid(pos, nextPos-pos)); + // move nextpos marker to end of match + pos = quotes.pos(2); // skip quotation mark + nextPos += quotes.matchedLength(); + args += str_.mid(pos, nextPos-pos-1); + } + // catch the end stuff + args += QStringList::split(spaces, str_.mid(pos)); + +#if 0 + for(QStringList::ConstIterator it = args.begin(); it != args.end(); ++it) { + myDebug() << *it << endl; + } +#endif + + return args; +} + +ExecExternalFetcher::ExecExternalFetcher(QObject* parent_, const char* name_/*=0*/) : Fetcher(parent_, name_), + m_started(false), m_collType(-1), m_formatType(-1), m_canUpdate(false), m_process(0), m_deleteOnRemove(false) { +} + +ExecExternalFetcher::~ExecExternalFetcher() { + stop(); +} + +QString ExecExternalFetcher::defaultName() { + return i18n("External Application"); +} + +QString ExecExternalFetcher::source() const { + return m_name; +} + +bool ExecExternalFetcher::canFetch(int type_) const { + return m_collType == -1 ? false : m_collType == type_; +} + +void ExecExternalFetcher::readConfigHook(const KConfigGroup& config_) { + QString s = config_.readPathEntry("ExecPath"); + if(!s.isEmpty()) { + m_path = s; + } + QValueList<int> il; + if(config_.hasKey("ArgumentKeys")) { + il = config_.readIntListEntry("ArgumentKeys"); + } else { + il.append(Keyword); + } + QStringList sl = config_.readListEntry("Arguments"); + if(il.count() != sl.count()) { + kdWarning() << "ExecExternalFetcher::readConfig() - unequal number of arguments and keys" << endl; + } + int n = QMIN(il.count(), sl.count()); + for(int i = 0; i < n; ++i) { + m_args[static_cast<FetchKey>(il[i])] = sl[i]; + } + if(config_.hasKey("UpdateArgs")) { + m_canUpdate = true; + m_updateArgs = config_.readEntry("UpdateArgs"); + } else { + m_canUpdate = false; + } + m_collType = config_.readNumEntry("CollectionType", -1); + m_formatType = config_.readNumEntry("FormatType", -1); + m_deleteOnRemove = config_.readBoolEntry("DeleteOnRemove", false); + m_newStuffName = config_.readEntry("NewStuffName"); +} + +void ExecExternalFetcher::search(FetchKey key_, const QString& value_) { + m_started = true; + + if(!m_args.contains(key_)) { + stop(); + return; + } + + // should KProcess::quote() be used? + // %1 gets replaced by the search value, but since the arguments are going to be split + // the search value needs to be enclosed in quotation marks + // but first check to make sure the user didn't do that already + // AND the "%1" wasn't used in the settings + QString value = value_; + if(key_ == ISBN) { + value.remove('-'); // remove hyphens from isbn values + // shouldn't hurt and might keep from confusing stupid search sources + } + QRegExp rx1(QString::fromLatin1("['\"].*\\1")); + if(!rx1.exactMatch(value)) { + value.prepend('"').append('"'); + } + QString args = m_args[key_]; + QRegExp rx2(QString::fromLatin1("['\"]%1\\1")); + args.replace(rx2, QString::fromLatin1("%1")); + startSearch(parseArguments(args.arg(value))); // replace %1 with search value +} + +void ExecExternalFetcher::startSearch(const QStringList& args_) { + if(m_path.isEmpty()) { + stop(); + return; + } + +#if 0 + myDebug() << m_path << endl; + for(QStringList::ConstIterator it = args_.begin(); it != args_.end(); ++it) { + myDebug() << " " << *it << endl; + } +#endif + + m_process = new KProcess(); + connect(m_process, SIGNAL(receivedStdout(KProcess*, char*, int)), SLOT(slotData(KProcess*, char*, int))); + connect(m_process, SIGNAL(receivedStderr(KProcess*, char*, int)), SLOT(slotError(KProcess*, char*, int))); + connect(m_process, SIGNAL(processExited(KProcess*)), SLOT(slotProcessExited(KProcess*))); + *m_process << m_path << args_; + if(!m_process->start(KProcess::NotifyOnExit, KProcess::AllOutput)) { + myDebug() << "ExecExternalFetcher::startSearch() - process failed to start" << endl; + stop(); + } +} + +void ExecExternalFetcher::stop() { + if(!m_started) { + return; + } + if(m_process) { + m_process->kill(); + delete m_process; + m_process = 0; + } + m_data.truncate(0); + m_started = false; + m_errors.clear(); + emit signalDone(this); +} + +void ExecExternalFetcher::slotData(KProcess*, char* buffer_, int len_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(buffer_, len_); +} + +void ExecExternalFetcher::slotError(KProcess*, char* buffer_, int len_) { + GUI::CursorSaver cs(Qt::arrowCursor); + QString msg = QString::fromLocal8Bit(buffer_, len_); + msg.prepend(source() + QString::fromLatin1(": ")); + if(msg.endsWith(QChar('\n'))) { + msg.truncate(msg.length()-1); + } + myDebug() << "ExecExternalFetcher::slotError() - " << msg << endl; + m_errors << msg; +} + +void ExecExternalFetcher::slotProcessExited(KProcess*) { +// myDebug() << "ExecExternalFetcher::slotProcessExited()" << endl; + if(!m_process->normalExit() || m_process->exitStatus()) { + myDebug() << "ExecExternalFetcher::slotProcessExited() - "<< source() << ": process did not exit successfully" << endl; + if(!m_errors.isEmpty()) { + message(m_errors.join(QChar('\n')), MessageHandler::Error); + } + stop(); + return; + } + if(!m_errors.isEmpty()) { + message(m_errors.join(QChar('\n')), MessageHandler::Warning); + } + + if(m_data.isEmpty()) { + myDebug() << "ExecExternalFetcher::slotProcessExited() - "<< source() << ": no data" << endl; + stop(); + return; + } + + Import::Format format = static_cast<Import::Format>(m_formatType > -1 ? m_formatType : Import::TellicoXML); + Import::Importer* imp = ImportDialog::importer(format, KURL::List()); + if(!imp) { + stop(); + return; + } + + imp->setText(QString::fromUtf8(m_data, m_data.size())); + Data::CollPtr coll = imp->collection(); + if(!coll) { + if(!imp->statusMessage().isEmpty()) { + message(imp->statusMessage(), MessageHandler::Status); + } + myDebug() << "ExecExternalFetcher::slotProcessExited() - "<< source() << ": no collection pointer" << endl; + delete imp; + stop(); + return; + } + + delete imp; + if(coll->entryCount() == 0) { +// myDebug() << "ExecExternalFetcher::slotProcessExited() - no results" << endl; + stop(); + return; + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + QString desc; + switch(coll->type()) { + case Data::Collection::Book: + case Data::Collection::Bibtex: + desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("cr_year")); + } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){ + desc += QChar('/') + entry->field(QString::fromLatin1("pub_year")); + } + break; + + case Data::Collection::Video: + desc = entry->field(QString::fromLatin1("studio")) + + QChar('/') + + entry->field(QString::fromLatin1("director")) + + QChar('/') + + entry->field(QString::fromLatin1("year")) + + QChar('/') + + entry->field(QString::fromLatin1("medium")); + break; + + case Data::Collection::Album: + desc = entry->field(QString::fromLatin1("artist")) + + QChar('/') + + entry->field(QString::fromLatin1("label")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + case Data::Collection::Game: + desc = entry->field(QString::fromLatin1("platform")); + break; + + case Data::Collection::ComicBook: + desc = entry->field(QString::fromLatin1("publisher")) + + QChar('/') + + entry->field(QString::fromLatin1("pub_year")); + break; + + case Data::Collection::BoardGame: + desc = entry->field(QString::fromLatin1("designer")) + + QChar('/') + + entry->field(QString::fromLatin1("publisher")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + default: + break; + } + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, entry); + emit signalResultFound(r); + } + stop(); // be sure to call this +} + +Tellico::Data::EntryPtr ExecExternalFetcher::fetchEntry(uint uid_) { + return m_entries[uid_]; +} + +void ExecExternalFetcher::updateEntry(Data::EntryPtr entry_) { + if(!m_canUpdate) { + emit signalDone(this); // must do this + } + + m_started = true; + + Data::ConstEntryPtr e(entry_.data()); + QStringList args = parseArguments(m_updateArgs); + for(QStringList::Iterator it = args.begin(); it != args.end(); ++it) { + *it = Data::Entry::dependentValue(e, *it, false); + } + startSearch(args); +} + +Tellico::Fetch::ConfigWidget* ExecExternalFetcher::configWidget(QWidget* parent_) const { + return new ExecExternalFetcher::ConfigWidget(parent_, this); +} + +ExecExternalFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ExecExternalFetcher* fetcher_/*=0*/) + : Fetch::ConfigWidget(parent_), m_deleteOnRemove(false) { + QGridLayout* l = new QGridLayout(optionsWidget(), 5, 2); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = -1; + + QLabel* label = new QLabel(i18n("Collection &type:"), optionsWidget()); + l->addWidget(label, ++row, 0); + m_collCombo = new GUI::CollectionTypeCombo(optionsWidget()); + connect(m_collCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); + l->addWidget(m_collCombo, row, 1); + QString w = i18n("Set the collection type of the data returned from the external application."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_collCombo, w); + label->setBuddy(m_collCombo); + + label = new QLabel(i18n("&Result type: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_formatCombo = new GUI::ComboBox(optionsWidget()); + Import::FormatMap formatMap = ImportDialog::formatMap(); + for(Import::FormatMap::Iterator it = formatMap.begin(); it != formatMap.end(); ++it) { + if(ImportDialog::formatImportsText(it.key())) { + m_formatCombo->insertItem(it.data(), it.key()); + } + } + connect(m_formatCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); + l->addWidget(m_formatCombo, row, 1); + w = i18n("Set the result type of the data returned from the external application."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_formatCombo, w); + label->setBuddy(m_formatCombo); + + label = new QLabel(i18n("Application &path: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_pathEdit = new KURLRequester(optionsWidget()); + connect(m_pathEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_pathEdit, row, 1); + w = i18n("Set the path of the application to run that should output a valid Tellico data file."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_pathEdit, w); + label->setBuddy(m_pathEdit); + + w = i18n("Select the search keys supported by the data source."); + QString w2 = i18n("Add any arguments that may be needed. <b>%1</b> will be replaced by the search term."); + QVGroupBox* box = new QVGroupBox(i18n("Arguments"), optionsWidget()); + ++row; + l->addMultiCellWidget(box, row, row, 0, 1); + QWidget* grid = new QWidget(box); + QGridLayout* gridLayout = new QGridLayout(grid); + gridLayout->setSpacing(2); + row = -1; + const Fetch::KeyMap keyMap = Fetch::Manager::self()->keyMap(); + for(Fetch::KeyMap::ConstIterator it = keyMap.begin(); it != keyMap.end(); ++it) { + FetchKey key = it.key(); + if(key == Raw) { + continue; + } + QCheckBox* cb = new QCheckBox(it.data(), grid); + gridLayout->addWidget(cb, ++row, 0); + m_cbDict.insert(key, cb); + GUI::LineEdit* le = new GUI::LineEdit(grid); + le->setHint(QString::fromLatin1("%1")); // for example + le->completionObject()->addItem(QString::fromLatin1("%1")); + gridLayout->addWidget(le, row, 1); + m_leDict.insert(key, le); + if(fetcher_ && fetcher_->m_args.contains(key)) { + cb->setChecked(true); + le->setEnabled(true); + le->setText(fetcher_->m_args[key]); + } else { + cb->setChecked(false); + le->setEnabled(false); + } + connect(cb, SIGNAL(toggled(bool)), le, SLOT(setEnabled(bool))); + QWhatsThis::add(cb, w); + QWhatsThis::add(le, w2); + } + m_cbUpdate = new QCheckBox(i18n("Update"), grid); + gridLayout->addWidget(m_cbUpdate, ++row, 0); + m_leUpdate = new GUI::LineEdit(grid); + m_leUpdate->setHint(QString::fromLatin1("%{title}")); // for example + m_leUpdate->completionObject()->addItem(QString::fromLatin1("%{title}")); + m_leUpdate->completionObject()->addItem(QString::fromLatin1("%{isbn}")); + gridLayout->addWidget(m_leUpdate, row, 1); + /* TRANSLATORS: Do not translate %{author}. */ + w2 = i18n("<p>Enter the arguments which should be used to search for available updates to an entry.</p><p>" + "The format is the same as for <i>Dependent</i> fields, where field values " + "are contained inside braces, such as <i>%{author}</i>. See the documentation for details.</p>"); + QWhatsThis::add(m_cbUpdate, w); + QWhatsThis::add(m_leUpdate, w2); + if(fetcher_ && fetcher_->m_canUpdate) { + m_cbUpdate->setChecked(true); + m_leUpdate->setEnabled(true); + m_leUpdate->setText(fetcher_->m_updateArgs); + } else { + m_cbUpdate->setChecked(false); + m_leUpdate->setEnabled(false); + } + connect(m_cbUpdate, SIGNAL(toggled(bool)), m_leUpdate, SLOT(setEnabled(bool))); + + l->setRowStretch(++row, 1); + + if(fetcher_) { + m_pathEdit->setURL(fetcher_->m_path); + m_newStuffName = fetcher_->m_newStuffName; + } + if(fetcher_ && fetcher_->m_collType > -1) { + m_collCombo->setCurrentType(fetcher_->m_collType); + } else { + m_collCombo->setCurrentType(Data::Collection::Book); + } + if(fetcher_ && fetcher_->m_formatType > -1) { + m_formatCombo->setCurrentItem(formatMap[static_cast<Import::Format>(fetcher_->m_formatType)]); + } else { + m_formatCombo->setCurrentItem(formatMap[Import::TellicoXML]); + } + m_deleteOnRemove = fetcher_ && fetcher_->m_deleteOnRemove; + KAcceleratorManager::manage(optionsWidget()); +} + +ExecExternalFetcher::ConfigWidget::~ConfigWidget() { +} + +void ExecExternalFetcher::ConfigWidget::readConfig(KConfig* config_) { + m_pathEdit->setURL(config_->readPathEntry("ExecPath")); + QValueList<int> argKeys = config_->readIntListEntry("ArgumentKeys"); + QStringList argValues = config_->readListEntry("Arguments"); + if(argKeys.count() != argValues.count()) { + kdWarning() << "ExecExternalFetcher::ConfigWidget::readConfig() - unequal number of arguments and keys" << endl; + } + int n = QMIN(argKeys.count(), argValues.count()); + QMap<FetchKey, QString> args; + for(int i = 0; i < n; ++i) { + args[static_cast<FetchKey>(argKeys[i])] = argValues[i]; + } + for(QValueList<int>::Iterator it = argKeys.begin(); it != argKeys.end(); ++it) { + if(*it == Raw) { + continue; + } + FetchKey key = static_cast<FetchKey>(*it); + QCheckBox* cb = m_cbDict[key]; + KLineEdit* le = m_leDict[key]; + if(cb && le) { + if(args.contains(key)) { + cb->setChecked(true); + le->setEnabled(true); + le->setText(args[key]); + } else { + cb->setChecked(false); + le->setEnabled(false); + le->clear(); + } + } + } + + if(config_->hasKey("UpdateArgs")) { + m_cbUpdate->setChecked(true); + m_leUpdate->setEnabled(true); + m_leUpdate->setText(config_->readEntry("UpdateArgs")); + } else { + m_cbUpdate->setChecked(false); + m_leUpdate->setEnabled(false); + m_leUpdate->clear(); + } + + int collType = config_->readNumEntry("CollectionType"); + m_collCombo->setCurrentType(collType); + + Import::FormatMap formatMap = ImportDialog::formatMap(); + int formatType = config_->readNumEntry("FormatType"); + m_formatCombo->setCurrentItem(formatMap[static_cast<Import::Format>(formatType)]); + m_deleteOnRemove = config_->readBoolEntry("DeleteOnRemove", false); + m_name = config_->readEntry("Name"); + m_newStuffName = config_->readEntry("NewStuffName"); +} + +void ExecExternalFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + QString s = m_pathEdit->url(); + if(!s.isEmpty()) { + config_.writePathEntry("ExecPath", s); + } + QValueList<int> keys; + QStringList args; + for(QIntDictIterator<QCheckBox> it(m_cbDict); it.current(); ++it) { + if(it.current()->isChecked()) { + keys << it.currentKey(); + args << m_leDict[it.currentKey()]->text(); + } + } + config_.writeEntry("ArgumentKeys", keys); + config_.writeEntry("Arguments", args); + + if(m_cbUpdate->isChecked()) { + config_.writeEntry("UpdateArgs", m_leUpdate->text()); + } else { + config_.deleteEntry("UpdateArgs"); + } + + config_.writeEntry("CollectionType", m_collCombo->currentType()); + config_.writeEntry("FormatType", m_formatCombo->currentData().toInt()); + config_.writeEntry("DeleteOnRemove", m_deleteOnRemove); + if(!m_newStuffName.isEmpty()) { + config_.writeEntry("NewStuffName", m_newStuffName); + } + slotSetModified(false); +} + +void ExecExternalFetcher::ConfigWidget::removed() { + if(!m_deleteOnRemove) { + return; + } + if(!m_newStuffName.isEmpty()) { + NewStuff::Manager man(this); + man.removeScript(m_newStuffName); + } +} + +QString ExecExternalFetcher::ConfigWidget::preferredName() const { + return m_name.isEmpty() ? ExecExternalFetcher::defaultName() : m_name; +} + +#include "execexternalfetcher.moc" diff --git a/src/fetch/execexternalfetcher.h b/src/fetch/execexternalfetcher.h new file mode 100644 index 0000000..bdc2a40 --- /dev/null +++ b/src/fetch/execexternalfetcher.h @@ -0,0 +1,118 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_EXECEXTERNALFETCHER_H +#define TELLICO_EXECEXTERNALFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <qintdict.h> + +class KProcess; +class KURLRequester; +class KLineEdit; +class KComboBox; + +class QCheckBox; + +namespace Tellico { + namespace GUI { + class ComboBox; + class LineEdit; + class CollectionTypeCombo; + } + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class ExecExternalFetcher : public Fetcher { +Q_OBJECT + +public: + ExecExternalFetcher(QObject* parent, const char* name=0); + /** + */ + virtual ~ExecExternalFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual bool canSearch(FetchKey k) const { return m_args.contains(k); } + virtual bool canUpdate() const { return m_canUpdate; } + virtual void search(FetchKey key, const QString& value); + virtual void updateEntry(Data::EntryPtr entry); + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return ExecExternal; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + const QString& execPath() const { return m_path; } + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent = 0, const ExecExternalFetcher* fetcher = 0); + ~ConfigWidget(); + + void readConfig(KConfig* config); + virtual void saveConfig(KConfigGroup& config); + virtual void removed(); + virtual QString preferredName() const; + + private: + bool m_deleteOnRemove : 1; + QString m_name, m_newStuffName; + KURLRequester* m_pathEdit; + GUI::CollectionTypeCombo* m_collCombo; + GUI::ComboBox* m_formatCombo; + QIntDict<QCheckBox> m_cbDict; + QIntDict<GUI::LineEdit> m_leDict; + QCheckBox* m_cbUpdate; + GUI::LineEdit* m_leUpdate; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KProcess* proc, char* buffer, int len); + void slotError(KProcess* proc, char* buffer, int len); + void slotProcessExited(KProcess* proc); + +private: + static QStringList parseArguments(const QString& str); + + void startSearch(const QStringList& args); + + bool m_started; + int m_collType; + int m_formatType; + QString m_path; + QMap<FetchKey, QString> m_args; + bool m_canUpdate : 1; + QString m_updateArgs; + KProcess* m_process; + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; // map from search result id to entry + QStringList m_errors; + bool m_deleteOnRemove : 1; + QString m_newStuffName; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/fetch/fetch.h b/src/fetch/fetch.h new file mode 100644 index 0000000..0cdb726 --- /dev/null +++ b/src/fetch/fetch.h @@ -0,0 +1,64 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_H +#define TELLICO_FETCH_H + +namespace Tellico { + namespace Fetch { + +/** + * FetchFirst must be first, and the rest must follow consecutively in value. + * FetchLast must be last! + */ +enum FetchKey { + FetchFirst = 0, + Title, + Person, + ISBN, + UPC, + Keyword, + DOI, + ArxivID, + PubmedID, + LCCN, + Raw, + FetchLast +}; + +// real ones must start at 0! +enum Type { + Unknown = -1, + Amazon = 0, + IMDB, + Z3950, + SRU, + Entrez, + ExecExternal, + Yahoo, + AnimeNfo, + IBS, + ISBNdb, + GCstarPlugin, + CrossRef, + Citebase, + Arxiv, + Bibsonomy, + GoogleScholar, + Discogs +}; + + } +} + +#endif diff --git a/src/fetch/fetcher.cpp b/src/fetch/fetcher.cpp new file mode 100644 index 0000000..3bc7749 --- /dev/null +++ b/src/fetch/fetcher.cpp @@ -0,0 +1,61 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "fetcher.h" +#include "messagehandler.h" +#include "../entry.h" + +#include <kglobal.h> +#include <kconfig.h> + +using Tellico::Fetch::Fetcher; +using Tellico::Fetch::SearchResult; + +Fetcher::~Fetcher() { + KConfigGroup config(KGlobal::config(), m_configGroup); + saveConfigHook(config); +} + +void Fetcher::readConfig(const KConfigGroup& config_, const QString& groupName_) { + m_configGroup = groupName_; + + QString s = config_.readEntry("Name"); + if(!s.isEmpty()) { + m_name = s; + } + m_updateOverwrite = config_.readBoolEntry("UpdateOverwrite", false); + // be sure to read config for subclass + readConfigHook(config_); +} + +void Fetcher::message(const QString& message_, int type_) const { + if(m_messager) { + m_messager->send(message_, static_cast<MessageHandler::Type>(type_)); + } +} + +void Fetcher::infoList(const QString& message_, const QStringList& list_) const { + if(m_messager) { + m_messager->infoList(message_, list_); + } +} + +void Fetcher::updateEntry(Data::EntryPtr) { + emit signalDone(this); +} + +Tellico::Data::EntryPtr SearchResult::fetchEntry() { + return fetcher->fetchEntry(uid); +} + +#include "fetcher.moc" diff --git a/src/fetch/fetcher.h b/src/fetch/fetcher.h new file mode 100644 index 0000000..0d2496e --- /dev/null +++ b/src/fetch/fetcher.h @@ -0,0 +1,151 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef FETCHER_H +#define FETCHER_H + +#include "fetch.h" +#include "../datavectors.h" + +#include <kapplication.h> // for KApplication::random() + +#include <qobject.h> +#include <qstring.h> + +class KConfigGroup; + +namespace Tellico { + namespace Fetch { + class ConfigWidget; + class MessageHandler; + class SearchResult; + +/** + * The top-level abstract class for fetching data. + * + * @author Robby Stephenson + */ +class Fetcher : public QObject, public KShared { +Q_OBJECT + +public: + typedef KSharedPtr<Fetcher> Ptr; + typedef KSharedPtr<const Fetcher> CPtr; + + /** + */ + Fetcher(QObject* parent, const char* name = 0) : QObject(parent, name), KShared(), + m_updateOverwrite(false), m_hasMoreResults(false), + m_messager(0) {} + /** + */ + virtual ~Fetcher(); + + /** + * Returns true if the fetcher might return entries from a certain collection type. + */ + virtual bool canFetch(int type) const = 0; + /** + * Returns true if the fetcher can search using a certain key. + */ + virtual bool canSearch(FetchKey key) const = 0; + virtual bool canUpdate() const { return true; } + + /** + * Returns the type of the data source. + */ + virtual Type type() const = 0; + /** + * Returns the name of the data source, as defined by the user. + */ + virtual QString source() const = 0; + /** + * Returns whether the fetcher will overwite existing info when updating + */ + bool updateOverwrite() const { return m_updateOverwrite; } + /** + * Starts a search, using a key and value. + */ + virtual void search(FetchKey key, const QString& value) = 0; + virtual void continueSearch() {} + virtual void updateEntry(Data::EntryPtr); + // mopst fetchers won't support this. it's particular useful for text fetchers + virtual void updateEntrySynchronous(Data::EntryPtr) {} + /** + * Returns true if the fetcher is currently searching. + */ + virtual bool isSearching() const = 0; + /** + * Returns true if the fetcher can continue and fetch more results + * The fetcher is responsible for remembering state. + */ + virtual bool hasMoreResults() const { return m_hasMoreResults; } + /** + * Stops the fetcher. + */ + virtual void stop() = 0; + /** + * Fetches an entry, given the uid of the search result. + */ + virtual Data::EntryPtr fetchEntry(uint uid) = 0; + + void setMessageHandler(MessageHandler* handler) { m_messager = handler; } + MessageHandler* messageHandler() const { return m_messager; } + /** + */ + void message(const QString& message, int type) const; + void infoList(const QString& message, const QStringList& list) const; + + /** + * Reads the config for the widget, given a config group. + */ + void readConfig(const KConfigGroup& config, const QString& groupName); + /** + * Returns a widget for modifying the fetcher's config. + */ + virtual ConfigWidget* configWidget(QWidget* parent) const = 0; + +signals: +// void signalStatus(const QString& status); + void signalResultFound(Tellico::Fetch::SearchResult* result); + void signalDone(Tellico::Fetch::Fetcher::Ptr); + +protected: + QString m_name; + bool m_updateOverwrite : 1; + bool m_hasMoreResults : 1; + +private: + virtual void readConfigHook(const KConfigGroup&) = 0; + virtual void saveConfigHook(KConfigGroup&) {} + + MessageHandler* m_messager; + QString m_configGroup; +}; + +class SearchResult { +public: + SearchResult(Fetcher::Ptr f, const QString& t, const QString& d, const QString& i) + : uid(KApplication::random()), fetcher(f), title(t), desc(d), isbn(i) {} + Data::EntryPtr fetchEntry(); + uint uid; + Fetcher::Ptr fetcher; + QString title; + QString desc; + QString isbn; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/fetch/fetchmanager.cpp b/src/fetch/fetchmanager.cpp new file mode 100644 index 0000000..84f4f39 --- /dev/null +++ b/src/fetch/fetchmanager.cpp @@ -0,0 +1,707 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include <config.h> + +#include "fetchmanager.h" +#include "configwidget.h" +#include "messagehandler.h" +#include "../tellico_kernel.h" +#include "../entry.h" +#include "../collection.h" +#include "../tellico_utils.h" +#include "../tellico_debug.h" + +#ifdef AMAZON_SUPPORT +#include "amazonfetcher.h" +#endif +#ifdef IMDB_SUPPORT +#include "imdbfetcher.h" +#endif +#ifdef HAVE_YAZ +#include "z3950fetcher.h" +#endif +#include "srufetcher.h" +#include "entrezfetcher.h" +#include "execexternalfetcher.h" +#include "yahoofetcher.h" +#include "animenfofetcher.h" +#include "ibsfetcher.h" +#include "isbndbfetcher.h" +#include "gcstarpluginfetcher.h" +#include "crossreffetcher.h" +#include "arxivfetcher.h" +#include "citebasefetcher.h" +#include "bibsonomyfetcher.h" +#include "googlescholarfetcher.h" +#include "discogsfetcher.h" + +#include <kglobal.h> +#include <kconfig.h> +#include <klocale.h> +#include <kiconloader.h> +#include <kmimetype.h> +#include <kstandarddirs.h> +#include <dcopref.h> +#include <ktempfile.h> +#include <kio/netaccess.h> + +#include <qfileinfo.h> +#include <qdir.h> + +#define LOAD_ICON(name, group, size) \ + KGlobal::iconLoader()->loadIcon(name, static_cast<KIcon::Group>(group), size_) + +using Tellico::Fetch::Manager; +Manager* Manager::s_self = 0; + +Manager::Manager() : QObject(), m_currentFetcherIndex(-1), m_messager(new ManagerMessage()), + m_count(0), m_loadDefaults(false) { + loadFetchers(); + +// m_keyMap.insert(FetchFirst, QString::null); + m_keyMap.insert(Title, i18n("Title")); + m_keyMap.insert(Person, i18n("Person")); + m_keyMap.insert(ISBN, i18n("ISBN")); + m_keyMap.insert(UPC, i18n("UPC/EAN")); + m_keyMap.insert(Keyword, i18n("Keyword")); + m_keyMap.insert(DOI, i18n("DOI")); + m_keyMap.insert(ArxivID, i18n("arXiv ID")); + m_keyMap.insert(PubmedID, i18n("Pubmed ID")); + // to keep from having a new i18n string, just remove octothorpe + m_keyMap.insert(LCCN, i18n("LCCN#").remove('#')); + m_keyMap.insert(Raw, i18n("Raw Query")); +// m_keyMap.insert(FetchLast, QString::null); +} + +Manager::~Manager() { + delete m_messager; +} + +void Manager::loadFetchers() { +// myDebug() << "Manager::loadFetchers()" << endl; + m_fetchers.clear(); + m_configMap.clear(); + + KConfig* config = KGlobal::config(); + if(config->hasGroup(QString::fromLatin1("Data Sources"))) { + KConfigGroup configGroup(config, QString::fromLatin1("Data Sources")); + int nSources = configGroup.readNumEntry("Sources Count", 0); + for(int i = 0; i < nSources; ++i) { + QString group = QString::fromLatin1("Data Source %1").arg(i); + Fetcher::Ptr f = createFetcher(config, group); + if(f) { + m_configMap.insert(f, group); + m_fetchers.append(f); + f->setMessageHandler(m_messager); + } + } + m_loadDefaults = false; + } else { // add default sources + m_fetchers = defaultFetchers(); + m_loadDefaults = true; + } +} + +Tellico::Fetch::FetcherVec Manager::fetchers(int type_) { + FetcherVec vec; + for(FetcherVec::Iterator it = m_fetchers.begin(); it != m_fetchers.end(); ++it) { + if(it->canFetch(type_)) { + vec.append(it.data()); + } + } + return vec; +} + +Tellico::Fetch::KeyMap Manager::keyMap(const QString& source_) const { + // an empty string means return all + if(source_.isEmpty()) { + return m_keyMap; + } + + // assume there's only one fetcher match + KSharedPtr<const Fetcher> f = 0; + for(FetcherVec::ConstIterator it = m_fetchers.constBegin(); it != m_fetchers.constEnd(); ++it) { + if(source_ == it->source()) { + f = it.data(); + break; + } + } + if(!f) { + kdWarning() << "Manager::keyMap() - no fetcher found!" << endl; + return KeyMap(); + } + + KeyMap map; + for(KeyMap::ConstIterator it = m_keyMap.begin(); it != m_keyMap.end(); ++it) { + if(f->canSearch(it.key())) { + map.insert(it.key(), it.data()); + } + } + return map; +} + +void Manager::startSearch(const QString& source_, FetchKey key_, const QString& value_) { + if(value_.isEmpty()) { + emit signalDone(); + return; + } + + // assume there's only one fetcher match + int i = 0; + m_currentFetcherIndex = -1; + for(FetcherVec::Iterator it = m_fetchers.begin(); it != m_fetchers.end(); ++it, ++i) { + if(source_ == it->source()) { + ++m_count; // Fetcher::search() might emit done(), so increment before calling search() + connect(it.data(), SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*)), + SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*))); + connect(it.data(), SIGNAL(signalDone(Tellico::Fetch::Fetcher::Ptr)), + SLOT(slotFetcherDone(Tellico::Fetch::Fetcher::Ptr))); + it->search(key_, value_); + m_currentFetcherIndex = i; + break; + } + } +} + +void Manager::continueSearch() { + if(m_currentFetcherIndex < 0 || m_currentFetcherIndex >= static_cast<int>(m_fetchers.count())) { + myDebug() << "Manager::continueSearch() - can't continue!" << endl; + emit signalDone(); + return; + } + Fetcher::Ptr f = m_fetchers[m_currentFetcherIndex]; + if(f && f->hasMoreResults()) { + ++m_count; + connect(f, SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*)), + SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*))); + connect(f, SIGNAL(signalDone(Tellico::Fetch::Fetcher::Ptr)), + SLOT(slotFetcherDone(Tellico::Fetch::Fetcher::Ptr))); + f->continueSearch(); + } else { + emit signalDone(); + } +} + +bool Manager::hasMoreResults() const { + if(m_currentFetcherIndex < 0 || m_currentFetcherIndex >= static_cast<int>(m_fetchers.count())) { + return false; + } + Fetcher::Ptr f = m_fetchers[m_currentFetcherIndex]; + return f && f->hasMoreResults(); +} + +void Manager::stop() { +// myDebug() << "Manager::stop()" << endl; + for(FetcherVec::Iterator it = m_fetchers.begin(); it != m_fetchers.end(); ++it) { + if(it->isSearching()) { + it->stop(); + } + } +#ifndef NDEBUG + if(m_count != 0) { + myDebug() << "Manager::stop() - count should be 0!" << endl; + } +#endif + m_count = 0; +} + +void Manager::slotFetcherDone(Fetcher::Ptr fetcher_) { +// myDebug() << "Manager::slotFetcherDone() - " << (fetcher_ ? fetcher_->source() : QString::null) +// << " :" << m_count << endl; + fetcher_->disconnect(); // disconnect all signals + --m_count; + if(m_count <= 0) { + emit signalDone(); + } +} + +bool Manager::canFetch() const { + for(FetcherVec::ConstIterator it = m_fetchers.constBegin(); it != m_fetchers.constEnd(); ++it) { + if(it->canFetch(Kernel::self()->collectionType())) { + return true; + } + } + return false; +} + +Tellico::Fetch::Fetcher::Ptr Manager::createFetcher(KConfig* config_, const QString& group_) { + if(!config_->hasGroup(group_)) { + myDebug() << "Manager::createFetcher() - no config group for " << group_ << endl; + return 0; + } + + KConfigGroup config(config_, group_); + + int fetchType = config.readNumEntry("Type", Fetch::Unknown); + if(fetchType == Fetch::Unknown) { + myDebug() << "Manager::createFetcher() - unknown type " << fetchType << ", skipping" << endl; + return 0; + } + + Fetcher::Ptr f = 0; + switch(fetchType) { + case Amazon: +#ifdef AMAZON_SUPPORT + { + int site = config.readNumEntry("Site", AmazonFetcher::Unknown); + if(site == AmazonFetcher::Unknown) { + myDebug() << "Manager::createFetcher() - unknown amazon site " << site << ", skipping" << endl; + } else { + f = new AmazonFetcher(static_cast<AmazonFetcher::Site>(site), this); + } + } +#endif + break; + + case IMDB: +#ifdef IMDB_SUPPORT + f = new IMDBFetcher(this); +#endif + break; + + case Z3950: +#ifdef HAVE_YAZ + f = new Z3950Fetcher(this); +#endif + break; + + case SRU: + f = new SRUFetcher(this); + break; + + case Entrez: + f = new EntrezFetcher(this); + break; + + case ExecExternal: + f = new ExecExternalFetcher(this); + break; + + case Yahoo: + f = new YahooFetcher(this); + break; + + case AnimeNfo: + f = new AnimeNfoFetcher(this); + break; + + case IBS: + f = new IBSFetcher(this); + break; + + case ISBNdb: + f = new ISBNdbFetcher(this); + break; + + case GCstarPlugin: + f = new GCstarPluginFetcher(this); + break; + + case CrossRef: + f = new CrossRefFetcher(this); + break; + + case Arxiv: + f = new ArxivFetcher(this); + break; + + case Citebase: + f = new CitebaseFetcher(this); + break; + + case Bibsonomy: + f = new BibsonomyFetcher(this); + break; + + case GoogleScholar: + f = new GoogleScholarFetcher(this); + break; + + case Discogs: + f = new DiscogsFetcher(this); + break; + + case Unknown: + default: + break; + } + if(f) { + f->readConfig(config, group_); + } + return f; +} + +// static +Tellico::Fetch::FetcherVec Manager::defaultFetchers() { + FetcherVec vec; +#ifdef AMAZON_SUPPORT + vec.append(new AmazonFetcher(AmazonFetcher::US, this)); +#endif +#ifdef IMDB_SUPPORT + vec.append(new IMDBFetcher(this)); +#endif + vec.append(SRUFetcher::libraryOfCongress(this)); + vec.append(new ISBNdbFetcher(this)); + vec.append(new YahooFetcher(this)); + vec.append(new AnimeNfoFetcher(this)); + vec.append(new ArxivFetcher(this)); + vec.append(new GoogleScholarFetcher(this)); + vec.append(new DiscogsFetcher(this)); +// only add IBS if user includes italian + if(KGlobal::locale()->languagesTwoAlpha().contains(QString::fromLatin1("it"))) { + vec.append(new IBSFetcher(this)); + } + return vec; +} + +Tellico::Fetch::FetcherVec Manager::createUpdateFetchers(int collType_) { + if(m_loadDefaults) { + return defaultFetchers(); + } + + FetcherVec vec; + KConfigGroup config(KGlobal::config(), "Data Sources"); + int nSources = config.readNumEntry("Sources Count", 0); + for(int i = 0; i < nSources; ++i) { + QString group = QString::fromLatin1("Data Source %1").arg(i); + // needs the KConfig* + Fetcher::Ptr f = createFetcher(KGlobal::config(), group); + if(f && f->canFetch(collType_) && f->canUpdate()) { + vec.append(f); + } + } + return vec; +} + +Tellico::Fetch::FetcherVec Manager::createUpdateFetchers(int collType_, FetchKey key_) { + FetcherVec fetchers; + // creates new fetchers + FetcherVec allFetchers = createUpdateFetchers(collType_); + for(Fetch::FetcherVec::Iterator it = allFetchers.begin(); it != allFetchers.end(); ++it) { + if(it->canSearch(key_)) { + fetchers.append(it); + } + } + return fetchers; +} + +Tellico::Fetch::Fetcher::Ptr Manager::createUpdateFetcher(int collType_, const QString& source_) { + Fetcher::Ptr fetcher = 0; + // creates new fetchers + FetcherVec fetchers = createUpdateFetchers(collType_); + for(Fetch::FetcherVec::Iterator it = fetchers.begin(); it != fetchers.end(); ++it) { + if(it->source() == source_) { + fetcher = it; + break; + } + } + return fetcher; +} + +void Manager::updateStatus(const QString& message_) { + emit signalStatus(message_); +} + +Tellico::Fetch::TypePairList Manager::typeList() { + Fetch::TypePairList list; +#ifdef AMAZON_SUPPORT + list.append(TypePair(AmazonFetcher::defaultName(), Amazon)); +#endif +#ifdef IMDB_SUPPORT + list.append(TypePair(IMDBFetcher::defaultName(), IMDB)); +#endif +#ifdef HAVE_YAZ + list.append(TypePair(Z3950Fetcher::defaultName(), Z3950)); +#endif + list.append(TypePair(SRUFetcher::defaultName(), SRU)); + list.append(TypePair(EntrezFetcher::defaultName(), Entrez)); + list.append(TypePair(ExecExternalFetcher::defaultName(), ExecExternal)); + list.append(TypePair(YahooFetcher::defaultName(), Yahoo)); + list.append(TypePair(AnimeNfoFetcher::defaultName(), AnimeNfo)); + list.append(TypePair(IBSFetcher::defaultName(), IBS)); + list.append(TypePair(ISBNdbFetcher::defaultName(), ISBNdb)); + list.append(TypePair(GCstarPluginFetcher::defaultName(), GCstarPlugin)); + list.append(TypePair(CrossRefFetcher::defaultName(), CrossRef)); + list.append(TypePair(ArxivFetcher::defaultName(), Arxiv)); + list.append(TypePair(CitebaseFetcher::defaultName(), Citebase)); + list.append(TypePair(BibsonomyFetcher::defaultName(), Bibsonomy)); + list.append(TypePair(GoogleScholarFetcher::defaultName(),GoogleScholar)); + list.append(TypePair(DiscogsFetcher::defaultName(), Discogs)); + + // now find all the scripts distributed with tellico + QStringList files = KGlobal::dirs()->findAllResources("appdata", QString::fromLatin1("data-sources/*.spec"), + false, true); + for(QStringList::Iterator it = files.begin(); it != files.end(); ++it) { + KConfig spec(*it, false, false); + QString name = spec.readEntry("Name"); + if(name.isEmpty()) { + myDebug() << "Fetch::Manager::typeList() - no Name for " << *it << endl; + continue; + } + + if(!bundledScriptHasExecPath(*it, &spec)) { // no available exec + continue; + } + + list.append(TypePair(name, ExecExternal)); + m_scriptMap.insert(name, *it); + } + list.sort(); + return list; +} + + +// called when creating a new fetcher +Tellico::Fetch::ConfigWidget* Manager::configWidget(QWidget* parent_, Type type_, const QString& name_) { + ConfigWidget* w = 0; + switch(type_) { +#ifdef AMAZON_SUPPORT + case Amazon: + w = new AmazonFetcher::ConfigWidget(parent_); + break; +#endif +#ifdef IMDB_SUPPORT + case IMDB: + w = new IMDBFetcher::ConfigWidget(parent_); + break; +#endif +#ifdef HAVE_YAZ + case Z3950: + w = new Z3950Fetcher::ConfigWidget(parent_); + break; +#endif + case SRU: + w = new SRUConfigWidget(parent_); + break; + case Entrez: + w = new EntrezFetcher::ConfigWidget(parent_); + break; + case ExecExternal: + w = new ExecExternalFetcher::ConfigWidget(parent_); + if(!name_.isEmpty() && m_scriptMap.contains(name_)) { + // bundledScriptHasExecPath() actually needs to write the exec path + // back to the config so the configWidget can read it. But if the spec file + // is not readablle, that doesn't work. So work around it with a copy to a temp file + KTempFile tmpFile; + tmpFile.setAutoDelete(true); + KURL from, to; + from.setPath(m_scriptMap[name_]); + to.setPath(tmpFile.name()); + // have to overwrite since KTempFile already created it + if(!KIO::NetAccess::file_copy(from, to, -1, true /*overwrite*/)) { + myDebug() << KIO::NetAccess::lastErrorString() << endl; + } + KConfig spec(to.path(), false, false); + // pass actual location of spec file + if(name_ == spec.readEntry("Name") && bundledScriptHasExecPath(m_scriptMap[name_], &spec)) { + static_cast<ExecExternalFetcher::ConfigWidget*>(w)->readConfig(&spec); + } else { + kdWarning() << "Fetch::Manager::configWidget() - Can't read config file for " << to.path() << endl; + } + } + break; + case Yahoo: + w = new YahooFetcher::ConfigWidget(parent_); + break; + case AnimeNfo: + w = new AnimeNfoFetcher::ConfigWidget(parent_); + break; + case IBS: + w = new IBSFetcher::ConfigWidget(parent_); + break; + case ISBNdb: + w = new ISBNdbFetcher::ConfigWidget(parent_); + break; + case GCstarPlugin: + w = new GCstarPluginFetcher::ConfigWidget(parent_); + break; + case CrossRef: + w = new CrossRefFetcher::ConfigWidget(parent_); + break; + case Arxiv: + w = new ArxivFetcher::ConfigWidget(parent_); + break; + case Citebase: + w = new CitebaseFetcher::ConfigWidget(parent_); + break; + case Bibsonomy: + w = new BibsonomyFetcher::ConfigWidget(parent_); + break; + case GoogleScholar: + w = new GoogleScholarFetcher::ConfigWidget(parent_); + break; + case Discogs: + w = new DiscogsFetcher::ConfigWidget(parent_); + break; + case Unknown: + kdWarning() << "Fetch::Manager::configWidget() - no widget defined for type = " << type_ << endl; + } + return w; +} + +// static +QString Manager::typeName(Fetch::Type type_) { + switch(type_) { +#ifdef AMAZON_SUPPORT + case Amazon: return AmazonFetcher::defaultName(); +#endif +#ifdef IMDB_SUPPORT + case IMDB: return IMDBFetcher::defaultName(); +#endif +#ifdef HAVE_YAZ + case Z3950: return Z3950Fetcher::defaultName(); +#endif + case SRU: return SRUFetcher::defaultName(); + case Entrez: return EntrezFetcher::defaultName(); + case ExecExternal: return ExecExternalFetcher::defaultName(); + case Yahoo: return YahooFetcher::defaultName(); + case AnimeNfo: return AnimeNfoFetcher::defaultName(); + case IBS: return IBSFetcher::defaultName(); + case ISBNdb: return ISBNdbFetcher::defaultName(); + case GCstarPlugin: return GCstarPluginFetcher::defaultName(); + case CrossRef: return CrossRefFetcher::defaultName(); + case Arxiv: return ArxivFetcher::defaultName(); + case Citebase: return CitebaseFetcher::defaultName(); + case Bibsonomy: return BibsonomyFetcher::defaultName(); + case GoogleScholar: return GoogleScholarFetcher::defaultName(); + case Discogs: return DiscogsFetcher::defaultName(); + case Unknown: break; + } + myWarning() << "Manager::typeName() - none found for " << type_ << endl; + return QString::null; +} + +QPixmap Manager::fetcherIcon(Fetch::Fetcher::CPtr fetcher_, int group_, int size_) { +#ifdef HAVE_YAZ + if(fetcher_->type() == Fetch::Z3950) { + const Fetch::Z3950Fetcher* f = static_cast<const Fetch::Z3950Fetcher*>(fetcher_.data()); + KURL u; + u.setProtocol(QString::fromLatin1("http")); + u.setHost(f->host()); + QString icon = favIcon(u); + if(u.isValid() && !icon.isEmpty()) { + return LOAD_ICON(icon, group_, size_); + } + } else +#endif + if(fetcher_->type() == Fetch::ExecExternal) { + const Fetch::ExecExternalFetcher* f = static_cast<const Fetch::ExecExternalFetcher*>(fetcher_.data()); + const QString p = f->execPath(); + KURL u; + if(p.find(QString::fromLatin1("allocine")) > -1) { + u = QString::fromLatin1("http://www.allocine.fr"); + } else if(p.find(QString::fromLatin1("ministerio_de_cultura")) > -1) { + u = QString::fromLatin1("http://www.mcu.es"); + } else if(p.find(QString::fromLatin1("dark_horse_comics")) > -1) { + u = QString::fromLatin1("http://www.darkhorse.com"); + } else if(p.find(QString::fromLatin1("boardgamegeek")) > -1) { + u = QString::fromLatin1("http://www.boardgamegeek.com"); + } else if(f->source().find(QString::fromLatin1("amarok"), 0, false /*case-sensitive*/) > -1) { + return LOAD_ICON(QString::fromLatin1("amarok"), group_, size_); + } + if(!u.isEmpty() && u.isValid()) { + QString icon = favIcon(u); + if(!icon.isEmpty()) { + return LOAD_ICON(icon, group_, size_); + } + } + } + return fetcherIcon(fetcher_->type(), group_); +} + +QPixmap Manager::fetcherIcon(Fetch::Type type_, int group_, int size_) { + QString name; + switch(type_) { + case Amazon: + name = favIcon("http://amazon.com"); break; + case IMDB: + name = favIcon("http://imdb.com"); break; + case Z3950: + name = QString::fromLatin1("network"); break; // rather arbitrary + case SRU: + name = QString::fromLatin1("network_local"); break; // just to be different than z3950 + case Entrez: + name = favIcon("http://www.ncbi.nlm.nih.gov"); break; + case ExecExternal: + name = QString::fromLatin1("exec"); break; + case Yahoo: + name = favIcon("http://yahoo.com"); break; + case AnimeNfo: + name = favIcon("http://animenfo.com"); break; + case IBS: + name = favIcon("http://internetbookshop.it"); break; + case ISBNdb: + name = favIcon("http://isbndb.com"); break; + case GCstarPlugin: + name = QString::fromLatin1("gcstar"); break; + case CrossRef: + name = favIcon("http://crossref.org"); break; + case Arxiv: + name = favIcon("http://arxiv.org"); break; + case Citebase: + name = favIcon("http://citebase.org"); break; + case Bibsonomy: + name = favIcon("http://bibsonomy.org"); break; + case GoogleScholar: + name = favIcon("http://scholar.google.com"); break; + case Discogs: + name = favIcon("http://www.discogs.com"); break; + case Unknown: + kdWarning() << "Fetch::Manager::fetcherIcon() - no pixmap defined for type = " << type_ << endl; + } + + return name.isEmpty() ? QPixmap() : LOAD_ICON(name, group_, size_); +} + +QString Manager::favIcon(const KURL& url_) { + DCOPRef kded("kded", "favicons"); + DCOPReply reply = kded.call("iconForURL(KURL)", url_); + QString iconName = reply.isValid() ? reply : QString(); + if(!iconName.isEmpty()) { + return iconName; + } else { + // go ahead and try to download it for later + kded.call("downloadHostIcon(KURL)", url_); + } + return KMimeType::iconForURL(url_); +} + +bool Manager::bundledScriptHasExecPath(const QString& specFile_, KConfig* config_) { + // make sure ExecPath is set and executable + // for the bundled scripts, either the exec name is not set, in which case it is the + // name of the spec file, minus the .spec, or the exec is set, and is local to the dir + // if not, look for it + QString exec = config_->readPathEntry("ExecPath"); + QFileInfo specInfo(specFile_), execInfo(exec); + if(exec.isEmpty() || !execInfo.exists()) { + exec = specInfo.dirPath(true) + QDir::separator() + specInfo.baseName(true); // remove ".spec" + } else if(execInfo.isRelative()) { + exec = specInfo.dirPath(true) + exec; + } else if(!execInfo.isExecutable()) { + kdWarning() << "Fetch::Manager::execPathForBundledScript() - not executable: " << specFile_ << endl; + return false; + } + execInfo.setFile(exec); + if(!execInfo.exists() || !execInfo.isExecutable()) { + kdWarning() << "Fetch::Manager::execPathForBundledScript() - no exec file for " << specFile_ << endl; + kdWarning() << "exec = " << exec << endl; + return false; // we're not ok + } + + config_->writePathEntry("ExecPath", exec); + config_->sync(); // might be readonly, but that's ok + return true; +} + +#include "fetchmanager.moc" diff --git a/src/fetch/fetchmanager.h b/src/fetch/fetchmanager.h new file mode 100644 index 0000000..7036d71 --- /dev/null +++ b/src/fetch/fetchmanager.h @@ -0,0 +1,108 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef FETCHMANAGER_H +#define FETCHMANAGER_H + +namespace Tellico { + namespace Fetch { + class SearchResult; + class ConfigWidget; + class ManagerMessage; + } +} + +#include "fetcher.h" +#include "../ptrvector.h" + +#include <ksortablevaluelist.h> + +#include <qobject.h> +#include <qmap.h> + +namespace Tellico { + namespace Fetch { + +typedef KSortableItem<Type, QString> TypePair; // fetcher info, type and name of type +typedef KSortableValueList<Type, QString> TypePairList; +typedef QMap<FetchKey, QString> KeyMap; // map key type to name of key +typedef Vector<Fetcher> FetcherVec; + +/** + * A manager for handling all the different classes of Fetcher. + * + * @author Robby Stephenson + */ +class Manager : public QObject { +Q_OBJECT + +public: + static Manager* self() { if(!s_self) s_self = new Manager(); return s_self; } + + ~Manager(); + + KeyMap keyMap(const QString& source = QString::null) const; + void startSearch(const QString& source, FetchKey key, const QString& value); + void continueSearch(); + void stop(); + bool canFetch() const; + bool hasMoreResults() const; + void loadFetchers(); + const FetcherVec& fetchers() const { return m_fetchers; } + FetcherVec fetchers(int type); + TypePairList typeList(); + ConfigWidget* configWidget(QWidget* parent, Type type, const QString& name); + + // create fetcher for updating an entry + FetcherVec createUpdateFetchers(int collType); + FetcherVec createUpdateFetchers(int collType, FetchKey key); + Fetcher::Ptr createUpdateFetcher(int collType, const QString& source); + + static QString typeName(Type type); + static QPixmap fetcherIcon(Fetch::Type type, int iconGroup=3 /*Small*/, int size=0 /* default */); + static QPixmap fetcherIcon(Fetch::Fetcher::CPtr ptr, int iconGroup=3 /*Small*/, int size=0 /* default*/); + +signals: + void signalStatus(const QString& status); + void signalResultFound(Tellico::Fetch::SearchResult* result); + void signalDone(); + +private slots: + void slotFetcherDone(Tellico::Fetch::Fetcher::Ptr); + +private: + friend class ManagerMessage; + static Manager* s_self; + + Manager(); + Fetcher::Ptr createFetcher(KConfig* config, const QString& configGroup); + FetcherVec defaultFetchers(); + void updateStatus(const QString& message); + + static QString favIcon(const KURL& url); + static bool bundledScriptHasExecPath(const QString& specFile, KConfig* config); + + FetcherVec m_fetchers; + int m_currentFetcherIndex; + KeyMap m_keyMap; + typedef QMap<Fetcher::Ptr, QString> ConfigMap; + ConfigMap m_configMap; + StringMap m_scriptMap; + ManagerMessage* m_messager; + uint m_count; + bool m_loadDefaults : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/gcstarpluginfetcher.cpp b/src/fetch/gcstarpluginfetcher.cpp new file mode 100644 index 0000000..4bffed7 --- /dev/null +++ b/src/fetch/gcstarpluginfetcher.cpp @@ -0,0 +1,486 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "gcstarpluginfetcher.h" +#include "messagehandler.h" +#include "fetchmanager.h" +#include "../collection.h" +#include "../entry.h" +#include "../translators/tellicoimporter.h" +#include "../gui/combobox.h" +#include "../gui/collectiontypecombo.h" +#include "../filehandler.h" +#include "../tellico_kernel.h" +#include "../tellico_debug.h" +#include "../latin1literal.h" +#include "../tellico_utils.h" + +#include <kconfig.h> +#include <kprocess.h> +#include <kprocio.h> +#include <kstandarddirs.h> +#include <kaccelmanager.h> + +#include <qdir.h> +#include <qlayout.h> +#include <qlabel.h> +#include <qwhatsthis.h> + +using Tellico::Fetch::GCstarPluginFetcher; + +GCstarPluginFetcher::PluginMap GCstarPluginFetcher::pluginMap; +GCstarPluginFetcher::PluginParse GCstarPluginFetcher::pluginParse = NotYet; + +//static +GCstarPluginFetcher::PluginList GCstarPluginFetcher::plugins(int collType_) { + if(!pluginMap.contains(collType_)) { + GUI::CursorSaver cs; + QString gcstar = KStandardDirs::findExe(QString::fromLatin1("gcstar")); + + if(pluginParse == NotYet) { + KProcIO proc; + proc << gcstar << QString::fromLatin1("--version"); + // wait 5 seconds at most, just a sanity thing, never want to block completely + if(proc.start(KProcess::Block) && proc.wait(5)) { + QString output; + proc.readln(output); + if(!output.isEmpty()) { + // always going to be x.y[.z] ? + QRegExp versionRx(QString::fromLatin1("(\\d+)\\.(\\d+)(?:\\.(\\d+))?")); + if(versionRx.search(output) > -1) { + int x = versionRx.cap(1).toInt(); + int y = versionRx.cap(2).toInt(); + int z = versionRx.cap(3).toInt(); // ok to be empty + myDebug() << QString::fromLatin1("GCstarPluginFetcher() - found %1.%2.%3").arg(x).arg(y).arg(z) << endl; + // --list-plugins argument was added for 1.3 release + pluginParse = (x >= 1 && y >=3) ? New : Old; + } + } + } + // if still zero, then we should use old in future + if(pluginParse == NotYet) { + pluginParse = Old; + } + } + + if(pluginParse == New) { + readPluginsNew(collType_, gcstar); + } else { + readPluginsOld(collType_, gcstar); + } + } + + return pluginMap.contains(collType_) ? pluginMap[collType_] : GCstarPluginFetcher::PluginList(); +} + +void GCstarPluginFetcher::readPluginsNew(int collType_, const QString& gcstar_) { + PluginList plugins; + + QString gcstarCollection = gcstarType(collType_); + if(gcstarCollection.isEmpty()) { + pluginMap.insert(collType_, plugins); + return; + } + + KProcIO proc; + proc << gcstar_ + << QString::fromLatin1("-x") + << QString::fromLatin1("--list-plugins") + << QString::fromLatin1("--collection") << gcstarCollection; + + if(!proc.start(KProcess::Block)) { + myWarning() << "GCstarPluginFetcher::readPluginsNew() - can't start" << endl; + return; + } + + bool hasName = false; + PluginInfo info; + QString line; + for(int length = 0; length > -1; length = proc.readln(line)) { + if(line.isEmpty()) { + if(hasName) { + plugins << info; + } + hasName = false; + info.clear(); + } else { + // authors have \t at beginning + line = line.stripWhiteSpace(); + if(!hasName) { + info.insert(QString::fromLatin1("name"), line); + hasName = true; + } else { + info.insert(QString::fromLatin1("author"), line); + } +// myDebug() << line << endl; + } + } + + pluginMap.insert(collType_, plugins); +} + +void GCstarPluginFetcher::readPluginsOld(int collType_, const QString& gcstar_) { + QDir dir(gcstar_, QString::fromLatin1("GC*.pm")); + dir.cd(QString::fromLatin1("../../lib/gcstar/GCPlugins/")); + + QRegExp rx(QString::fromLatin1("get(Name|Author|Lang)\\s*\\{\\s*return\\s+['\"](.+)['\"]")); + rx.setMinimal(true); + + PluginList plugins; + + QString dirName = gcstarType(collType_); + if(dirName.isEmpty()) { + pluginMap.insert(collType_, plugins); + return; + } + + QStringList files = dir.entryList(); + for(QStringList::ConstIterator file = files.begin(); file != files.end(); ++file) { + KURL u; + u.setPath(dir.filePath(*file)); + PluginInfo info; + QString text = FileHandler::readTextFile(u); + for(int pos = rx.search(text); + pos > -1; + pos = rx.search(text, pos+rx.matchedLength())) { + info.insert(rx.cap(1).lower(), rx.cap(2)); + } + // only add if it has a name + if(info.contains(QString::fromLatin1("name"))) { + plugins << info; + } + } + // inserting empty map is ok + pluginMap.insert(collType_, plugins); +} + +QString GCstarPluginFetcher::gcstarType(int collType_) { + switch(collType_) { + case Data::Collection::Book: return QString::fromLatin1("GCbooks"); + case Data::Collection::Video: return QString::fromLatin1("GCfilms"); + case Data::Collection::Game: return QString::fromLatin1("GCgames"); + case Data::Collection::Album: return QString::fromLatin1("GCmusics"); + case Data::Collection::Coin: return QString::fromLatin1("GCcoins"); + case Data::Collection::Wine: return QString::fromLatin1("GCwines"); + case Data::Collection::BoardGame: return QString::fromLatin1("GCboardgames"); + default: break; + } + return QString(); +} + +GCstarPluginFetcher::GCstarPluginFetcher(QObject* parent_, const char* name_/*=0*/) : Fetcher(parent_, name_), + m_started(false), m_collType(-1), m_process(0) { +} + +GCstarPluginFetcher::~GCstarPluginFetcher() { + stop(); +} + +QString GCstarPluginFetcher::defaultName() { + return i18n("GCstar Plugin"); +} + +QString GCstarPluginFetcher::source() const { + return m_name; +} + +bool GCstarPluginFetcher::canFetch(int type_) const { + return m_collType == -1 ? false : m_collType == type_; +} + +void GCstarPluginFetcher::readConfigHook(const KConfigGroup& config_) { + m_collType = config_.readNumEntry("CollectionType", -1); + m_plugin = config_.readEntry("Plugin"); +} + +void GCstarPluginFetcher::search(FetchKey key_, const QString& value_) { + m_started = true; + m_data.truncate(0); + + if(key_ != Fetch::Title) { + myDebug() << "GCstarPluginFetcher::search() - only Title searches are supported" << endl; + stop(); + return; + } + + QString gcstar = KStandardDirs::findExe(QString::fromLatin1("gcstar")); + if(gcstar.isEmpty()) { + myWarning() << "GCstarPluginFetcher::search() - gcstar not found!" << endl; + stop(); + return; + } + + QString gcstarCollection = gcstarType(m_collType); + + if(m_plugin.isEmpty()) { + myWarning() << "GCstarPluginFetcher::search() - no plugin name! " << endl; + stop(); + return; + } + + m_process = new KProcess(); + connect(m_process, SIGNAL(receivedStdout(KProcess*, char*, int)), SLOT(slotData(KProcess*, char*, int))); + connect(m_process, SIGNAL(receivedStderr(KProcess*, char*, int)), SLOT(slotError(KProcess*, char*, int))); + connect(m_process, SIGNAL(processExited(KProcess*)), SLOT(slotProcessExited(KProcess*))); + QStringList args; + args << gcstar << QString::fromLatin1("-x") + << QString::fromLatin1("--collection") << gcstarCollection + << QString::fromLatin1("--export") << QString::fromLatin1("Tellico") + << QString::fromLatin1("--website") << m_plugin + << QString::fromLatin1("--download") << KProcess::quote(value_); + myLog() << "GCstarPluginFetcher::search() - " << args.join(QChar(' ')) << endl; + *m_process << args; + if(!m_process->start(KProcess::NotifyOnExit, KProcess::AllOutput)) { + myDebug() << "GCstarPluginFetcher::startSearch() - process failed to start" << endl; + stop(); + } +} + +void GCstarPluginFetcher::stop() { + if(!m_started) { + return; + } + if(m_process) { + m_process->kill(); + delete m_process; + m_process = 0; + } + m_data.truncate(0); + m_started = false; + m_errors.clear(); + emit signalDone(this); +} + +void GCstarPluginFetcher::slotData(KProcess*, char* buffer_, int len_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(buffer_, len_); +} + +void GCstarPluginFetcher::slotError(KProcess*, char* buffer_, int len_) { + QString msg = QString::fromLocal8Bit(buffer_, len_); + msg.prepend(source() + QString::fromLatin1(": ")); + myDebug() << "GCstarPluginFetcher::slotError() - " << msg << endl; + m_errors << msg; +} + +void GCstarPluginFetcher::slotProcessExited(KProcess*) { +// myDebug() << "GCstarPluginFetcher::slotProcessExited()" << endl; + if(!m_process->normalExit() || m_process->exitStatus()) { + myDebug() << "GCstarPluginFetcher::slotProcessExited() - "<< source() << ": process did not exit successfully" << endl; + if(!m_errors.isEmpty()) { + message(m_errors.join(QChar('\n')), MessageHandler::Error); + } + stop(); + return; + } + if(!m_errors.isEmpty()) { + message(m_errors.join(QChar('\n')), MessageHandler::Warning); + } + + if(m_data.isEmpty()) { + myDebug() << "GCstarPluginFetcher::slotProcessExited() - "<< source() << ": no data" << endl; + stop(); + return; + } + + Import::TellicoImporter imp(QString::fromUtf8(m_data, m_data.size())); + + Data::CollPtr coll = imp.collection(); + if(!coll) { + if(!imp.statusMessage().isEmpty()) { + message(imp.statusMessage(), MessageHandler::Status); + } + myDebug() << "GCstarPluginFetcher::slotProcessExited() - "<< source() << ": no collection pointer" << endl; + stop(); + return; + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + QString desc; + switch(coll->type()) { + case Data::Collection::Book: + case Data::Collection::Bibtex: + desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("cr_year")); + } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){ + desc += QChar('/') + entry->field(QString::fromLatin1("pub_year")); + } + break; + + case Data::Collection::Video: + desc = entry->field(QString::fromLatin1("studio")) + + QChar('/') + + entry->field(QString::fromLatin1("director")) + + QChar('/') + + entry->field(QString::fromLatin1("year")) + + QChar('/') + + entry->field(QString::fromLatin1("medium")); + break; + + case Data::Collection::Album: + desc = entry->field(QString::fromLatin1("artist")) + + QChar('/') + + entry->field(QString::fromLatin1("label")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + case Data::Collection::Game: + desc = entry->field(QString::fromLatin1("platform")); + break; + + case Data::Collection::ComicBook: + desc = entry->field(QString::fromLatin1("publisher")) + + QChar('/') + + entry->field(QString::fromLatin1("pub_year")); + break; + + case Data::Collection::BoardGame: + desc = entry->field(QString::fromLatin1("designer")) + + QChar('/') + + entry->field(QString::fromLatin1("publisher")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + default: + break; + } + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, entry); + emit signalResultFound(r); + } + stop(); // be sure to call this +} + +Tellico::Data::EntryPtr GCstarPluginFetcher::fetchEntry(uint uid_) { + return m_entries[uid_]; +} + +void GCstarPluginFetcher::updateEntry(Data::EntryPtr entry_) { + // ry searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + + myDebug() << "GCstarPluginFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* GCstarPluginFetcher::configWidget(QWidget* parent_) const { + return new GCstarPluginFetcher::ConfigWidget(parent_, this); +} + +GCstarPluginFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const GCstarPluginFetcher* fetcher_/*=0*/) + : Fetch::ConfigWidget(parent_), m_needPluginList(true) { + QGridLayout* l = new QGridLayout(optionsWidget(), 3, 4); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = -1; + + QLabel* label = new QLabel(i18n("Collection &type:"), optionsWidget()); + l->addWidget(label, ++row, 0); + m_collCombo = new GUI::CollectionTypeCombo(optionsWidget()); + connect(m_collCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); + connect(m_collCombo, SIGNAL(activated(int)), SLOT(slotTypeChanged())); + l->addMultiCellWidget(m_collCombo, row, row, 1, 3); + QString w = i18n("Set the collection type of the data returned from the plugin."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_collCombo, w); + label->setBuddy(m_collCombo); + + label = new QLabel(i18n("&Plugin: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_pluginCombo = new GUI::ComboBox(optionsWidget()); + connect(m_pluginCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); + connect(m_pluginCombo, SIGNAL(activated(int)), SLOT(slotPluginChanged())); + l->addMultiCellWidget(m_pluginCombo, row, row, 1, 3); + w = i18n("Select the GCstar plugin used for the data source."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_pluginCombo, w); + label->setBuddy(m_pluginCombo); + + label = new QLabel(i18n("Author: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_authorLabel = new QLabel(optionsWidget()); + l->addWidget(m_authorLabel, row, 1); + +// label = new QLabel(i18n("Language: "), optionsWidget()); +// l->addWidget(label, row, 2); +// m_langLabel = new QLabel(optionsWidget()); +// l->addWidget(m_langLabel, row, 3); + + if(fetcher_ && fetcher_->m_collType > -1) { + m_collCombo->setCurrentType(fetcher_->m_collType); + } else { + m_collCombo->setCurrentType(Kernel::self()->collectionType()); + } + + if(fetcher_) { + m_originalPluginName = fetcher_->m_plugin; + } + + KAcceleratorManager::manage(optionsWidget()); +} + +GCstarPluginFetcher::ConfigWidget::~ConfigWidget() { +} + +void GCstarPluginFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + config_.writeEntry("CollectionType", m_collCombo->currentType()); + config_.writeEntry("Plugin", m_pluginCombo->currentText()); +} + +QString GCstarPluginFetcher::ConfigWidget::preferredName() const { + return QString::fromLatin1("GCstar - ") + m_pluginCombo->currentText(); +} + +void GCstarPluginFetcher::ConfigWidget::slotTypeChanged() { + int collType = m_collCombo->currentType(); + m_pluginCombo->clear(); + QStringList pluginNames; + GCstarPluginFetcher::PluginList list = GCstarPluginFetcher::plugins(collType); + for(GCstarPluginFetcher::PluginList::ConstIterator it = list.begin(); it != list.end(); ++it) { + pluginNames << (*it)[QString::fromLatin1("name")].toString(); + m_pluginCombo->insertItem(pluginNames.last(), *it); + } + slotPluginChanged(); + emit signalName(preferredName()); +} + +void GCstarPluginFetcher::ConfigWidget::slotPluginChanged() { + PluginInfo info = m_pluginCombo->currentData().toMap(); + m_authorLabel->setText(info[QString::fromLatin1("author")].toString()); +// m_langLabel->setText(info[QString::fromLatin1("lang")].toString()); + emit signalName(preferredName()); +} + +void GCstarPluginFetcher::ConfigWidget::showEvent(QShowEvent*) { + if(m_needPluginList) { + m_needPluginList = false; + slotTypeChanged(); // update plugin combo box + if(!m_originalPluginName.isEmpty()) { + m_pluginCombo->setCurrentText(m_originalPluginName); + slotPluginChanged(); + } + } +} + +#include "gcstarpluginfetcher.moc" diff --git a/src/fetch/gcstarpluginfetcher.h b/src/fetch/gcstarpluginfetcher.h new file mode 100644 index 0000000..1994b58 --- /dev/null +++ b/src/fetch/gcstarpluginfetcher.h @@ -0,0 +1,121 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_GCSTARPLUGINFETCHER_H +#define TELLICO_GCSTARPLUGINFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <qintdict.h> + +class QLabel; +class KProcess; + +namespace Tellico { + namespace GUI { + class ComboBox; + class CollectionTypeCombo; + } + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class GCstarPluginFetcher : public Fetcher { +Q_OBJECT + +public: + + GCstarPluginFetcher(QObject* parent, const char* name=0); + /** + */ + virtual ~GCstarPluginFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual bool canSearch(FetchKey k) const { return k == Title; } + + virtual void search(FetchKey key, const QString& value); + virtual void updateEntry(Data::EntryPtr entry); + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return GCstarPlugin; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KProcess* proc, char* buffer, int len); + void slotError(KProcess* proc, char* buffer, int len); + void slotProcessExited(KProcess* proc); + +private: + // map Author, Name, Lang, etc... + typedef QMap<QString, QVariant> PluginInfo; + typedef QValueList<PluginInfo> PluginList; + // map collection type to all available plugins + typedef QMap<int, PluginList> PluginMap; + static PluginMap pluginMap; + static PluginList plugins(int collType); + // we need to keep track if we've searched for plugins yet and by what method + enum PluginParse {NotYet, Old, New}; + static PluginParse pluginParse; + static void readPluginsNew(int collType, const QString& exe); + static void readPluginsOld(int collType, const QString& exe); + static QString gcstarType(int collType); + + bool m_started; + int m_collType; + QString m_plugin; + KProcess* m_process; + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; // map from search result id to entry + QStringList m_errors; +}; + +class GCstarPluginFetcher::ConfigWidget : public Fetch::ConfigWidget { +Q_OBJECT + +public: + ConfigWidget(QWidget* parent, const GCstarPluginFetcher* fetcher = 0); + ~ConfigWidget(); + + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + +private slots: + void slotTypeChanged(); + void slotPluginChanged(); + +private: + void showEvent(QShowEvent* event); + + bool m_needPluginList; + QString m_originalPluginName; + GUI::CollectionTypeCombo* m_collCombo; + GUI::ComboBox* m_pluginCombo; + QLabel* m_authorLabel; + QLabel* m_langLabel; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/fetch/googlescholarfetcher.cpp b/src/fetch/googlescholarfetcher.cpp new file mode 100644 index 0000000..21979c4 --- /dev/null +++ b/src/fetch/googlescholarfetcher.cpp @@ -0,0 +1,233 @@ +/*************************************************************************** + copyright : (C) 2008 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "googlescholarfetcher.h" +#include "messagehandler.h" +#include "../filehandler.h" +#include "../translators/bibteximporter.h" +#include "../collection.h" +#include "../entry.h" +#include "../tellico_kernel.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kio/job.h> + +#include <qlabel.h> +#include <qlayout.h> + +namespace { + static const int GOOGLE_MAX_RETURNS_TOTAL = 20; + static const char* SCHOLAR_BASE_URL = "http://scholar.google.com/scholar"; +} + +using Tellico::Fetch::GoogleScholarFetcher; + +GoogleScholarFetcher::GoogleScholarFetcher(QObject* parent_, const char* name_) + : Fetcher(parent_, name_), + m_limit(GOOGLE_MAX_RETURNS_TOTAL), m_start(0), m_job(0), m_started(false), + m_cookieIsSet(false) { + m_bibtexRx = QRegExp(QString::fromLatin1("<a\\s.*href\\s*=\\s*\"([^>]*scholar\\.bib[^>]*)\"")); + m_bibtexRx.setMinimal(true); +} + +GoogleScholarFetcher::~GoogleScholarFetcher() { +} + +QString GoogleScholarFetcher::defaultName() { + // no i18n + return QString::fromLatin1("Google Scholar"); +} + +QString GoogleScholarFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool GoogleScholarFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void GoogleScholarFetcher::readConfigHook(const KConfigGroup& config_) { + Q_UNUSED(config_); +} + +void GoogleScholarFetcher::search(FetchKey key_, const QString& value_) { + if(!m_cookieIsSet) { + // have to set preferences to have bibtex output + FileHandler::readTextFile(QString::fromLatin1("http://scholar.google.com/scholar_setprefs?num=100&scis=yes&scisf=4&submit=Save+Preferences"), true); + m_cookieIsSet = true; + } + m_key = key_; + m_value = value_; + m_started = true; + m_start = 0; + m_total = -1; + doSearch(); +} + +void GoogleScholarFetcher::continueSearch() { + m_started = true; + doSearch(); +} + +void GoogleScholarFetcher::doSearch() { +// myDebug() << "GoogleScholarFetcher::search() - value = " << value_ << endl; + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + KURL u(QString::fromLatin1(SCHOLAR_BASE_URL)); + u.addQueryItem(QString::fromLatin1("start"), QString::number(m_start)); + + switch(m_key) { + case Title: + u.addQueryItem(QString::fromLatin1("q"), QString::fromLatin1("allintitle:%1").arg(m_value)); + break; + + case Keyword: + u.addQueryItem(QString::fromLatin1("q"), m_value); + break; + + case Person: + u.addQueryItem(QString::fromLatin1("q"), QString::fromLatin1("author:%1").arg(m_value)); + break; + + default: + kdWarning() << "GoogleScholarFetcher::search() - key not recognized: " << m_key << endl; + stop(); + return; + } +// myDebug() << "GoogleScholarFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void GoogleScholarFetcher::stop() { + if(!m_started) { + return; + } + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void GoogleScholarFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void GoogleScholarFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "GoogleScholarFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "GoogleScholarFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + + QString text = QString::fromUtf8(m_data, m_data.size()); + QString bibtex; + int count = 0; + for(int pos = text.find(m_bibtexRx); count < m_limit && pos > -1; pos = text.find(m_bibtexRx, pos+m_bibtexRx.matchedLength()), ++count) { + KURL bibtexUrl(QString::fromLatin1(SCHOLAR_BASE_URL), m_bibtexRx.cap(1)); +// myDebug() << bibtexUrl << endl; + bibtex += FileHandler::readTextFile(bibtexUrl, true); + } + + Import::BibtexImporter imp(bibtex); + Data::CollPtr coll = imp.collection(); + if(!coll) { + myDebug() << "GoogleScholarFetcher::slotComplete() - no collection pointer" << endl; + stop(); + return; + } + + count = 0; + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); count < m_limit && entry != entries.end(); ++entry, ++count) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("year")); + } + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + m_start = m_entries.count(); +// m_hasMoreResults = m_start <= m_total; + m_hasMoreResults = false; // for now, no continued searches + + stop(); // required +} + +Tellico::Data::EntryPtr GoogleScholarFetcher::fetchEntry(uint uid_) { + return m_entries[uid_]; +} + +void GoogleScholarFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "GoogleScholarFetcher::updateEntry()" << endl; + // limit to top 5 results + m_limit = 5; + + QString title = entry_->field(QString::fromLatin1("title")); + if(!title.isEmpty()) { + search(Title, title); + return; + } + + myDebug() << "GoogleScholarFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* GoogleScholarFetcher::configWidget(QWidget* parent_) const { + return new GoogleScholarFetcher::ConfigWidget(parent_, this); +} + +GoogleScholarFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const GoogleScholarFetcher*/*=0*/) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +QString GoogleScholarFetcher::ConfigWidget::preferredName() const { + return GoogleScholarFetcher::defaultName(); +} + +#include "googlescholarfetcher.moc" diff --git a/src/fetch/googlescholarfetcher.h b/src/fetch/googlescholarfetcher.h new file mode 100644 index 0000000..4e15475 --- /dev/null +++ b/src/fetch/googlescholarfetcher.h @@ -0,0 +1,103 @@ +/*************************************************************************** + copyright : (C) 2008 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef GOOGLESCHOLARFETCHER_H +#define GOOGLESCHOLARFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <qguardedptr.h> +#include <qregexp.h> + +namespace KIO { + class Job; +} + +namespace Tellico { + namespace Fetch { + +/** + * A fetcher for Google Scholar + * + * @author Robby Stephenson + */ +class GoogleScholarFetcher : public Fetcher { +Q_OBJECT + +public: + /** + */ + GoogleScholarFetcher(QObject* parent, const char* name = 0); + /** + */ + virtual ~GoogleScholarFetcher(); + + /** + */ + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + // amazon can search title or person + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return GoogleScholar; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + /** + * Returns a widget for modifying the fetcher's config. + */ + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const GoogleScholarFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup&) {} + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void doSearch(); + + int m_limit; + int m_start; + int m_total; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; + + QRegExp m_bibtexRx; + bool m_cookieIsSet; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/ibsfetcher.cpp b/src/fetch/ibsfetcher.cpp new file mode 100644 index 0000000..b11258b --- /dev/null +++ b/src/fetch/ibsfetcher.cpp @@ -0,0 +1,415 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "ibsfetcher.h" +#include "messagehandler.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collections/bookcollection.h" +#include "../entry.h" +#include "../filehandler.h" +#include "../latin1literal.h" +#include "../imagefactory.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kio/job.h> + +#include <qregexp.h> +#include <qlayout.h> +#include <qlabel.h> +#include <qfile.h> + +//#define IBS_TEST + +namespace { + static const char* IBS_BASE_URL = "http://www.internetbookshop.it/ser/serpge.asp"; +} + +using Tellico::Fetch::IBSFetcher; + +IBSFetcher::IBSFetcher(QObject* parent_, const char* name_ /*=0*/) + : Fetcher(parent_, name_), m_started(false) { +} + +QString IBSFetcher::defaultName() { + return i18n("Internet Bookshop (ibs.it)"); +} + +QString IBSFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool IBSFetcher::canFetch(int type) const { + return type == Data::Collection::Book || type == Data::Collection::Bibtex; +} + +void IBSFetcher::readConfigHook(const KConfigGroup& config_) { + Q_UNUSED(config_); +} + +void IBSFetcher::search(FetchKey key_, const QString& value_) { + m_started = true; + m_matches.clear(); + +#ifdef IBS_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/ibs.html")); +#else + KURL u(QString::fromLatin1(IBS_BASE_URL)); + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + switch(key_) { + case Title: + u.addQueryItem(QString::fromLatin1("Type"), QString::fromLatin1("keyword")); + u.addQueryItem(QString::fromLatin1("T"), value_); + break; + + case Person: + u.addQueryItem(QString::fromLatin1("Type"), QString::fromLatin1("keyword")); + u.addQueryItem(QString::fromLatin1("A"), value_); + break; + + case ISBN: + { + QString s = value_; + s.remove('-'); + // limit to first isbn + s = s.section(';', 0, 0); + u.setFileName(QString::fromLatin1("serdsp.asp")); + u.addQueryItem(QString::fromLatin1("isbn"), s); + } + break; + + case Keyword: + u.addQueryItem(QString::fromLatin1("Type"), QString::fromLatin1("keyword")); + u.addQueryItem(QString::fromLatin1("S"), value_); + break; + + default: + kdWarning() << "IBSFetcher::search() - key not recognized: " << key_ << endl; + stop(); + return; + } +#endif +// myDebug() << "IBSFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + if(key_ == ISBN) { + connect(m_job, SIGNAL(result(KIO::Job*)), SLOT(slotCompleteISBN(KIO::Job*))); + } else { + connect(m_job, SIGNAL(result(KIO::Job*)), SLOT(slotComplete(KIO::Job*))); + } +} + +void IBSFetcher::stop() { + if(!m_started) { + return; + } + + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void IBSFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void IBSFetcher::slotComplete(KIO::Job* job_) { + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "IBSFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + + QString s = Tellico::decodeHTML(QString(m_data)); + // really specific regexp + QString pat = QString::fromLatin1("http://www.internetbookshop.it/code/"); + QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](") + + QRegExp::escape(pat) + + QString::fromLatin1("[^\"]*)\"[^>]*><b>([^<]+)<"), false); + anchorRx.setMinimal(true); + QRegExp tagRx(QString::fromLatin1("<.*>")); + tagRx.setMinimal(true); + + QString u, t, d; + int pos2; + for(int pos = anchorRx.search(s); m_started && pos > -1; pos = anchorRx.search(s, pos+anchorRx.matchedLength())) { + if(!u.isEmpty()) { + SearchResult* r = new SearchResult(this, t, d, QString()); + emit signalResultFound(r); + +#ifdef IBS_TEST + KURL url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/ibs2.html")); +#else + // the url probable contains & so be careful + KURL url = u.replace(QString::fromLatin1("&"), QChar('&')); +#endif + m_matches.insert(r->uid, url); + + u.truncate(0); + t.truncate(0); + d.truncate(0); + } + u = anchorRx.cap(1); + t = anchorRx.cap(2); + pos2 = s.find(QString::fromLatin1("<br>"), pos, false); + if(pos2 > -1) { + int pos3 = s.find(QString::fromLatin1("<br>"), pos2+1, false); + if(pos3 > -1) { + d = s.mid(pos2, pos3-pos2).remove(tagRx).simplifyWhiteSpace(); + } + } + } +#ifndef IBS_TEST + if(!u.isEmpty()) { + SearchResult* r = new SearchResult(this, t, d, QString()); + emit signalResultFound(r); + m_matches.insert(r->uid, u.replace(QString::fromLatin1("&"), QChar('&'))); + } +#endif + + stop(); +} + +void IBSFetcher::slotCompleteISBN(KIO::Job* job_) { + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "IBSFetcher::slotCompleteISBN() - no data" << endl; + stop(); + return; + } + + QString str = Tellico::decodeHTML(QString(m_data)); + if(str.find(QString::fromLatin1("Libro non presente"), 0, false /* cas-sensitive */) > -1) { + stop(); + return; + } + Data::EntryPtr entry = parseEntry(str); + if(entry) { + QString desc = entry->field(QString::fromLatin1("author")) + + '/' + entry->field(QString::fromLatin1("publisher")); + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + emit signalResultFound(r); + m_matches.insert(r->uid, static_cast<KIO::TransferJob*>(job_)->url().url()); + } + + stop(); +} + +Tellico::Data::EntryPtr IBSFetcher::fetchEntry(uint uid_) { + // if we already grabbed this one, then just pull it out of the dict + Data::EntryPtr entry = m_entries[uid_]; + if(entry) { + return entry; + } + + KURL url = m_matches[uid_]; + if(url.isEmpty()) { + kdWarning() << "IBSFetcher::fetchEntry() - no url in map" << endl; + return 0; + } + + QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true)); + if(results.isEmpty()) { + myDebug() << "IBSFetcher::fetchEntry() - no text results" << endl; + return 0; + } + +// myDebug() << url.url() << endl; +#if 0 + kdWarning() << "Remove debug from ibsfetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.html")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << results; + } + f.close(); +#endif + + entry = parseEntry(results); + if(!entry) { + myDebug() << "IBSFetcher::fetchEntry() - error in processing entry" << endl; + return 0; + } + m_entries.insert(uid_, entry); // keep for later + return entry; +} + +Tellico::Data::EntryPtr IBSFetcher::parseEntry(const QString& str_) { + // myDebug() << "IBSFetcher::parseEntry()" << endl; + // class might be anime_info_top + QString pat = QString::fromLatin1("%1(?:<[^>]+>)+([^<>\\s][^<>]+)"); + + QRegExp isbnRx(QString::fromLatin1("isbn=([\\dxX]{13})"), false); + QString isbn; + int pos = isbnRx.search(str_); + if(pos > -1) { + isbn = isbnRx.cap(1); + } + + Data::CollPtr coll = new Data::BookCollection(true); + + // map captions in HTML to field names + QMap<QString, QString> fieldMap; + fieldMap.insert(QString::fromLatin1("Titolo"), QString::fromLatin1("title")); + fieldMap.insert(QString::fromLatin1("Autore"), QString::fromLatin1("author")); + fieldMap.insert(QString::fromLatin1("Anno"), QString::fromLatin1("pub_year")); + fieldMap.insert(QString::fromLatin1("Categoria"), QString::fromLatin1("genre")); + fieldMap.insert(QString::fromLatin1("Rilegatura"), QString::fromLatin1("binding")); + fieldMap.insert(QString::fromLatin1("Editore"), QString::fromLatin1("publisher")); + fieldMap.insert(QString::fromLatin1("Dati"), QString::fromLatin1("edition")); + + QRegExp pagesRx(QString::fromLatin1("(\\d+) p\\.(\\s*,\\s*)?")); + Data::EntryPtr entry = new Data::Entry(coll); + + for(QMap<QString, QString>::Iterator it = fieldMap.begin(); it != fieldMap.end(); ++it) { + QRegExp infoRx(pat.arg(it.key())); + pos = infoRx.search(str_); + if(pos > -1) { + if(it.data() == Latin1Literal("edition")) { + int pos2 = pagesRx.search(infoRx.cap(1)); + if(pos2 > -1) { + entry->setField(QString::fromLatin1("pages"), pagesRx.cap(1)); + entry->setField(it.data(), infoRx.cap(1).remove(pagesRx)); + } else { + entry->setField(it.data(), infoRx.cap(1)); + } + } else { + entry->setField(it.data(), infoRx.cap(1)); + } + } + } + + // image + if(!isbn.isEmpty()) { + entry->setField(QString::fromLatin1("isbn"), isbn); +#if 1 + QString imgURL = QString::fromLatin1("http://giotto.ibs.it/cop/copt13.asp?f=%1").arg(isbn); + myLog() << "IBSFetcher() - cover = " << imgURL << endl; + QString id = ImageFactory::addImage(imgURL, true, QString::fromLatin1("http://internetbookshop.it")); + if(!id.isEmpty()) { + entry->setField(QString::fromLatin1("cover"), id); + } +#else + QRegExp imgRx(QString::fromLatin1("<img\\s+[^>]*\\s*src\\s*=\\s*\"(http://[^/]*\\.ibs\\.it/[^\"]+e=%1)").arg(isbn)); + imgRx.setMinimal(true); + pos = imgRx.search(str_); + if(pos > -1) { + myLog() << "IBSFetcher() - cover = " << imgRx.cap(1) << endl; + QString id = ImageFactory::addImage(imgRx.cap(1), true, QString::fromLatin1("http://internetbookshop.it")); + if(!id.isEmpty()) { + entry->setField(QString::fromLatin1("cover"), id); + } + } +#endif + } + + // now look for description + QRegExp descRx(QString::fromLatin1("Descrizione(?:<[^>]+>)+([^<>\\s].+)</span>"), false); + descRx.setMinimal(true); + pos = descRx.search(str_); + if(pos == -1) { + descRx.setPattern(QString::fromLatin1("In sintesi(?:<[^>]+>)+([^<>\\s].+)</span>")); + pos = descRx.search(str_); + } + if(pos > -1) { + Data::FieldPtr f = new Data::Field(QString::fromLatin1("plot"), i18n("Plot Summary"), Data::Field::Para); + coll->addField(f); + entry->setField(f, descRx.cap(1).simplifyWhiteSpace()); + } + + // IBS switches the surname and family name of the author + QStringList names = entry->fields(QString::fromLatin1("author"), false); + if(!names.isEmpty() && !names[0].isEmpty()) { + for(QStringList::Iterator it = names.begin(); it != names.end(); ++it) { + if((*it).find(',') > -1) { + continue; // skip if it has a comma + } + QStringList words = QStringList::split(' ', *it); + if(words.isEmpty()) { + continue; + } + // put first word in back + words.append(words[0]); + words.pop_front(); + *it = words.join(QChar(' ')); + } + entry->setField(QString::fromLatin1("author"), names.join(QString::fromLatin1("; "))); + } + return entry; +} + +void IBSFetcher::updateEntry(Data::EntryPtr entry_) { + QString isbn = entry_->field(QString::fromLatin1("isbn")); + if(!isbn.isEmpty()) { + search(Fetch::ISBN, isbn); + return; + } + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + + myDebug() << "IBSFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* IBSFetcher::configWidget(QWidget* parent_) const { + return new IBSFetcher::ConfigWidget(parent_); +} + +IBSFetcher::ConfigWidget::ConfigWidget(QWidget* parent_) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +QString IBSFetcher::ConfigWidget::preferredName() const { + return IBSFetcher::defaultName(); +} + +#include "ibsfetcher.moc" diff --git a/src/fetch/ibsfetcher.h b/src/fetch/ibsfetcher.h new file mode 100644 index 0000000..39326b2 --- /dev/null +++ b/src/fetch/ibsfetcher.h @@ -0,0 +1,87 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_IBSFETCHER_H +#define TELLICO_FETCH_IBSFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace KIO { + class Job; +} + +namespace Tellico { + namespace Fetch { + +/** + * A fetcher for animenfo.com + * + * @author Robby Stephenson + */ +class IBSFetcher : public Fetcher { +Q_OBJECT + +public: + IBSFetcher(QObject* parent, const char* name = 0); + virtual ~IBSFetcher() {} + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + // can search title, person, isbn, or keyword. No UPC or Raw for now. + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == Keyword; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return IBS; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_); + virtual void saveConfig(KConfigGroup&) {} + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + void slotCompleteISBN(KIO::Job* job); + +private: + Data::EntryPtr parseEntry(const QString& str); + + QByteArray m_data; + int m_total; + QMap<int, Data::EntryPtr> m_entries; + QMap<int, KURL> m_matches; + QGuardedPtr<KIO::Job> m_job; + + bool m_started; +// QStringList m_fields; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/imdbfetcher.cpp b/src/fetch/imdbfetcher.cpp new file mode 100644 index 0000000..1066177 --- /dev/null +++ b/src/fetch/imdbfetcher.cpp @@ -0,0 +1,1208 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "imdbfetcher.h" +#include "../tellico_kernel.h" +#include "../collections/videocollection.h" +#include "../entry.h" +#include "../field.h" +#include "../filehandler.h" +#include "../latin1literal.h" +#include "../imagefactory.h" +#include "../tellico_utils.h" +#include "../gui/listboxtext.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kdialogbase.h> +#include <kconfig.h> +#include <klineedit.h> +#include <knuminput.h> + +#include <qregexp.h> +#include <qfile.h> +#include <qmap.h> +#include <qvbox.h> +#include <qlabel.h> +#include <qlistbox.h> +#include <qwhatsthis.h> +#include <qlayout.h> +#include <qcheckbox.h> +#include <qvgroupbox.h> + +//#define IMDB_TEST + +namespace { + static const char* IMDB_SERVER = "akas.imdb.com"; + static const uint IMDB_MAX_RESULTS = 20; + static const QString sep = QString::fromLatin1("; "); +} + +using Tellico::Fetch::IMDBFetcher; + +QRegExp* IMDBFetcher::s_tagRx = 0; +QRegExp* IMDBFetcher::s_anchorRx = 0; +QRegExp* IMDBFetcher::s_anchorTitleRx = 0; +QRegExp* IMDBFetcher::s_anchorNameRx = 0; +QRegExp* IMDBFetcher::s_titleRx = 0; + +// static +void IMDBFetcher::initRegExps() { + s_tagRx = new QRegExp(QString::fromLatin1("<.*>")); + s_tagRx->setMinimal(true); + + s_anchorRx = new QRegExp(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"[^<]*>([^<]*)</a>"), false); + s_anchorRx->setMinimal(true); + + s_anchorTitleRx = new QRegExp(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/title/[^\"]*)\"[^<]*>([^<]*)</a>"), false); + s_anchorTitleRx->setMinimal(true); + + s_anchorNameRx = new QRegExp(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/name/[^\"]*)\"[^<]*>([^<]*)</a>"), false); + s_anchorNameRx->setMinimal(true); + + s_titleRx = new QRegExp(QString::fromLatin1("<title>(.*)</title>"), false); + s_titleRx->setMinimal(true); +} + +IMDBFetcher::IMDBFetcher(QObject* parent_, const char* name_) : Fetcher(parent_, name_), + m_job(0), m_started(false), m_fetchImages(true), m_host(QString::fromLatin1(IMDB_SERVER)), + m_limit(IMDB_MAX_RESULTS), m_countOffset(0) { + if(!s_tagRx) { + initRegExps(); + } +} + +IMDBFetcher::~IMDBFetcher() { +} + +QString IMDBFetcher::defaultName() { + return i18n("Internet Movie Database"); +} + +QString IMDBFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool IMDBFetcher::canFetch(int type) const { + return type == Data::Collection::Video; +} + +void IMDBFetcher::readConfigHook(const KConfigGroup& config_) { + QString h = config_.readEntry("Host"); + if(!h.isEmpty()) { + m_host = h; + } + m_numCast = config_.readNumEntry("Max Cast", 10); + m_fetchImages = config_.readBoolEntry("Fetch Images", true); + m_fields = config_.readListEntry("Custom Fields"); +} + +// multiple values not supported +void IMDBFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_; + m_started = true; + m_redirected = false; + m_data.truncate(0); + m_matches.clear(); + m_popularTitles.truncate(0); + m_exactTitles.truncate(0); + m_partialTitles.truncate(0); + m_currentTitleBlock = Unknown; + m_countOffset = 0; + +// only search if current collection is a video collection + if(Kernel::self()->collectionType() != Data::Collection::Video) { + myDebug() << "IMDBFetcher::search() - collection type mismatch, stopping" << endl; + stop(); + return; + } + +#ifdef IMDB_TEST + if(m_key == Title) { + m_url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-title.html")); + m_redirected = false; + } else { + m_url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-name.html")); + m_redirected = true; + } +#else + m_url = KURL(); + m_url.setProtocol(QString::fromLatin1("http")); + m_url.setHost(m_host.isEmpty() ? QString::fromLatin1(IMDB_SERVER) : m_host); + m_url.setPath(QString::fromLatin1("/find")); + + switch(key_) { + case Title: + m_url.addQueryItem(QString::fromLatin1("s"), QString::fromLatin1("tt")); + break; + + case Person: + m_url.addQueryItem(QString::fromLatin1("s"), QString::fromLatin1("nm")); + break; + + default: + kdWarning() << "IMDBFetcher::search() - FetchKey not supported" << endl; + stop(); + return; + } + + // as far as I can tell, the url encoding should always be iso-8859-1 + // not utf-8 + m_url.addQueryItem(QString::fromLatin1("q"), value_, 4 /* iso-8859-1 */); + +// myDebug() << "IMDBFetcher::search() url = " << m_url << endl; +#endif + + m_job = KIO::get(m_url, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); + connect(m_job, SIGNAL(redirection(KIO::Job *, const KURL&)), + SLOT(slotRedirection(KIO::Job*, const KURL&))); +} + +void IMDBFetcher::continueSearch() { + m_started = true; + m_limit += IMDB_MAX_RESULTS; + + if(m_currentTitleBlock == Popular) { + parseTitleBlock(m_popularTitles); + // if the offset is 0, then we need to be looking at the next block + m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular; + } + + // current title block might have changed + if(m_currentTitleBlock == Exact) { + parseTitleBlock(m_exactTitles); + m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact; + } + + if(m_currentTitleBlock == Partial) { + parseTitleBlock(m_partialTitles); + m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial; + } + + if(m_currentTitleBlock == SinglePerson) { + parseSingleNameResult(); + } + + stop(); +} + +void IMDBFetcher::stop() { + if(!m_started) { + return; + } +// myLog() << "IMDBFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + + m_started = false; + m_redirected = false; + + emit signalDone(this); +} + +void IMDBFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void IMDBFetcher::slotRedirection(KIO::Job*, const KURL& toURL_) { + m_url = toURL_; + m_redirected = true; +} + +void IMDBFetcher::slotComplete(KIO::Job* job_) { + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + stop(); + return; + } + + // a single result was found if we got redirected + if(m_key == Title) { + if(m_redirected) { + parseSingleTitleResult(); + } else { + parseMultipleTitleResults(); + } + } else { + if(m_redirected) { + parseSingleNameResult(); + } else { + parseMultipleNameResults(); + } + } +} + +void IMDBFetcher::parseSingleTitleResult() { +// myDebug() << "IMDBFetcher::parseSingleTitleResult()" << endl; + s_titleRx->search(Tellico::decodeHTML(QString(m_data))); + // split title at parenthesis + const QString cap1 = s_titleRx->cap(1); + int pPos = cap1.find('('); + // FIXME: maybe remove parentheses here? + SearchResult* r = new SearchResult(this, + pPos == -1 ? cap1 : cap1.left(pPos), + pPos == -1 ? QString::null : cap1.mid(pPos), + QString()); + m_matches.insert(r->uid, m_url); + emit signalResultFound(r); + + m_hasMoreResults = false; + stop(); +} + +void IMDBFetcher::parseMultipleTitleResults() { +// myDebug() << "IMDBFetcher::parseMultipleTitleResults()" << endl; + QString output = Tellico::decodeHTML(QString(m_data)); + + // IMDb can return three title lists, popular, exact, and partial + // the popular titles are in the first table, after the "Popular Results" text + int pos_popular = output.find(QString::fromLatin1("Popular Titles"), 0, false); + int pos_exact = output.find(QString::fromLatin1("Exact Matches"), QMAX(pos_popular, 0), false); + int pos_partial = output.find(QString::fromLatin1("Partial Matches"), QMAX(pos_exact, 0), false); + int end_popular = pos_exact; // keep track of where to end + if(end_popular == -1) { + end_popular = pos_partial == -1 ? output.length() : pos_partial; + } + int end_exact = pos_partial; // keep track of where to end + if(end_exact == -1) { + end_exact = output.length(); + } + + // if found popular matches + if(pos_popular > -1) { + m_popularTitles = output.mid(pos_popular, end_popular-pos_popular); + } + // if found exact matches + if(pos_exact > -1) { + m_exactTitles = output.mid(pos_exact, end_exact-pos_exact); + } + if(pos_partial > -1) { + m_partialTitles = output.mid(pos_partial); + } + + parseTitleBlock(m_popularTitles); + // if the offset is 0, then we need to be looking at the next block + m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular; + + if(m_matches.size() < m_limit) { + parseTitleBlock(m_exactTitles); + m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact; + } + + if(m_matches.size() < m_limit) { + parseTitleBlock(m_partialTitles); + m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial; + } + +#ifndef NDEBUG + if(m_matches.size() == 0) { + myDebug() << "IMDBFetcher::parseMultipleTitleResults() - no matches found." << endl; + } +#endif + + stop(); +} + +void IMDBFetcher::parseTitleBlock(const QString& str_) { + if(str_.isEmpty()) { + m_countOffset = 0; + return; + } +// myDebug() << "IMDBFetcher::parseTitleBlock() - " << m_currentTitleBlock << endl; + + QRegExp akaRx(QString::fromLatin1("aka (.*)(</li>|<br)"), false); + akaRx.setMinimal(true); + + m_hasMoreResults = false; + + int count = 0; + int start = s_anchorTitleRx->search(str_); + while(m_started && start > -1) { + // split title at parenthesis + const QString cap1 = s_anchorTitleRx->cap(1); // the anchor url + const QString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace(); // the anchor text + start += s_anchorTitleRx->matchedLength(); + int pPos = cap2.find('('); // if it has parentheses, use that for description + QString desc; + if(pPos > -1) { + int pPos2 = cap2.find(')', pPos+1); + if(pPos2 > -1) { + desc = cap2.mid(pPos+1, pPos2-pPos-1); + } + } else { + // parenthesis might be outside anchor tag + int end = s_anchorTitleRx->search(str_, start); + if(end == -1) { + end = str_.length(); + } + QString text = str_.mid(start, end-start); + pPos = text.find('('); + if(pPos > -1) { + int pNewLine = text.find(QString::fromLatin1("<br")); + if(pNewLine == -1 || pPos < pNewLine) { + int pPos2 = text.find(')', pPos); + desc = text.mid(pPos+1, pPos2-pPos-1); + } + pPos = -1; + } + } + // multiple matches might have 'aka' info + int end = s_anchorTitleRx->search(str_, start+1); + if(end == -1) { + end = str_.length(); + } + int akaPos = akaRx.search(str_, start+1); + if(akaPos > -1 && akaPos < end) { + // limit to 50 chars + desc += QChar(' ') + akaRx.cap(1).stripWhiteSpace().remove(*s_tagRx); + if(desc.length() > 50) { + desc = desc.left(50) + QString::fromLatin1("..."); + } + } + + start = s_anchorTitleRx->search(str_, start); + + if(count < m_countOffset) { + ++count; + continue; + } + + // if we got this far, then there is a valid result + if(m_matches.size() >= m_limit) { + m_hasMoreResults = true; + break; + } + + SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, QString()); + KURL u(m_url, cap1); + u.setQuery(QString::null); + m_matches.insert(r->uid, u); + emit signalResultFound(r); + ++count; + } + if(!m_hasMoreResults && m_currentTitleBlock != Partial) { + m_hasMoreResults = true; + } + m_countOffset = m_matches.size() < m_limit ? 0 : count; +} + +void IMDBFetcher::parseSingleNameResult() { +// myDebug() << "IMDBFetcher::parseSingleNameResult()" << endl; + + m_currentTitleBlock = SinglePerson; + + QString output = Tellico::decodeHTML(QString(m_data)); + + int pos = s_anchorTitleRx->search(output); + if(pos == -1) { + stop(); + return; + } + + QRegExp tvRegExp(QString::fromLatin1("TV\\sEpisode"), false); + + int len = 0; + int count = 0; + QString desc; + for( ; m_started && pos > -1; pos = s_anchorTitleRx->search(output, pos+len)) { + desc.truncate(0); + bool isEpisode = false; + len = s_anchorTitleRx->cap(0).length(); + // split title at parenthesis + const QString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace(); + int pPos = cap2.find('('); + if(pPos > -1) { + desc = cap2.mid(pPos); + } else { + // look until the next <a + int aPos = output.find(QString::fromLatin1("<a"), pos+len, false); + if(aPos == -1) { + aPos = output.length(); + } + QString tmp = output.mid(pos+len, aPos-pos-len); + if(tmp.find(tvRegExp) > -1) { + isEpisode = true; + } + pPos = tmp.find('('); + if(pPos > -1) { + int pNewLine = tmp.find(QString::fromLatin1("<br")); + if(pNewLine == -1 || pPos < pNewLine) { + int pEnd = tmp.find(')', pPos+1); + desc = tmp.mid(pPos+1, pEnd-pPos-1).remove(*s_tagRx); + } + // but need to indicate it wasn't found initially + pPos = -1; + } + } + + ; + + if(count < m_countOffset) { + ++count; + continue; + } + + ++count; + if(isEpisode) { + continue; + } + + // if we got this far, then there is a valid result + if(m_matches.size() >= m_limit) { + m_hasMoreResults = true; + break; + } + + // FIXME: maybe remove parentheses here? + SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, QString()); + KURL u(m_url, s_anchorTitleRx->cap(1)); // relative URL constructor + u.setQuery(QString::null); + m_matches.insert(r->uid, u); +// myDebug() << u.prettyURL() << endl; +// myDebug() << cap2 << endl; + emit signalResultFound(r); + } + if(pos == -1) { + m_hasMoreResults = false; + } + m_countOffset = count - 1; + + stop(); +} + +void IMDBFetcher::parseMultipleNameResults() { +// myDebug() << "IMDBFetcher::parseMultipleNameResults()" << endl; + + // the exact results are in the first table after the "exact results" text + QString output = Tellico::decodeHTML(QString(m_data)); + int pos = output.find(QString::fromLatin1("Popular Results"), 0, false); + if(pos == -1) { + pos = output.find(QString::fromLatin1("Exact Matches"), 0, false); + } + + // find beginning of partial matches + int end = output.find(QString::fromLatin1("Other Results"), QMAX(pos, 0), false); + if(end == -1) { + end = output.find(QString::fromLatin1("Partial Matches"), QMAX(pos, 0), false); + if(end == -1) { + end = output.find(QString::fromLatin1("Approx Matches"), QMAX(pos, 0), false); + if(end == -1) { + end = output.length(); + } + } + } + + QMap<QString, KURL> map; + QMap<QString, int> nameMap; + + QString s; + // if found exact matches + if(pos > -1) { + pos = s_anchorNameRx->search(output, pos+13); + while(pos > -1 && pos < end && m_matches.size() < m_limit) { + KURL u(m_url, s_anchorNameRx->cap(1)); + s = s_anchorNameRx->cap(2).stripWhiteSpace() + ' '; + // if more than one exact, add parentheses + if(nameMap.contains(s) && nameMap[s] > 0) { + // fix the first one that didn't have a number + if(nameMap[s] == 1) { + KURL u2 = map[s]; + map.remove(s); + map.insert(s + "(1) ", u2); + } + nameMap.insert(s, nameMap[s] + 1); + // check for duplicate names + s += QString::fromLatin1("(%1) ").arg(nameMap[s]); + } else { + nameMap.insert(s, 1); + } + map.insert(s, u); + pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length()); + } + } + + // go ahead and search for partial matches + pos = s_anchorNameRx->search(output, end); + while(pos > -1 && m_matches.size() < m_limit) { + KURL u(m_url, s_anchorNameRx->cap(1)); // relative URL + s = s_anchorNameRx->cap(2).stripWhiteSpace(); + if(nameMap.contains(s) && nameMap[s] > 0) { + // fix the first one that didn't have a number + if(nameMap[s] == 1) { + KURL u2 = map[s]; + map.remove(s); + map.insert(s + " (1)", u2); + } + nameMap.insert(s, nameMap[s] + 1); + // check for duplicate names + s += QString::fromLatin1(" (%1)").arg(nameMap[s]); + } else { + nameMap.insert(s, 1); + } + map.insert(s, u); + pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length()); + } + + if(map.count() == 0) { + stop(); + return; + } + + KDialogBase* dlg = new KDialogBase(Kernel::self()->widget(), "imdb dialog", + true, i18n("Select IMDB Result"), KDialogBase::Ok|KDialogBase::Cancel); + QVBox* box = new QVBox(dlg); + box->setSpacing(10); + (void) new QLabel(i18n("<qt>Your search returned multiple matches. Please select one below.</qt>"), box); + + QListBox* listBox = new QListBox(box); + listBox->setMinimumWidth(400); + listBox->setColumnMode(QListBox::FitToWidth); + const QStringList values = map.keys(); + for(QStringList::ConstIterator it = values.begin(); it != values.end(); ++it) { + if((*it).endsWith(QChar(' '))) { + GUI::ListBoxText* box = new GUI::ListBoxText(listBox, *it, 0); + box->setColored(true); + } else { + (void) new GUI::ListBoxText(listBox, *it); + } + } + listBox->setSelected(0, true); + QWhatsThis::add(listBox, i18n("<qt>Select a search result.</qt>")); + + dlg->setMainWidget(box); + if(dlg->exec() != QDialog::Accepted || listBox->currentText().isEmpty()) { + dlg->delayedDestruct(); + stop(); + return; + } + + m_url = map[listBox->currentText()]; + dlg->delayedDestruct(); + + // redirected is true since that's how I tell if an exact match has been found + m_redirected = true; + m_data.truncate(0); + m_job = KIO::get(m_url, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); + connect(m_job, SIGNAL(redirection(KIO::Job *, const KURL&)), + SLOT(slotRedirection(KIO::Job*, const KURL&))); + + // do not stop() here +} + +Tellico::Data::EntryPtr IMDBFetcher::fetchEntry(uint uid_) { + // if we already grabbed this one, then just pull it out of the dict + Data::EntryPtr entry = m_entries[uid_]; + if(entry) { + return entry; + } + + KURL url = m_matches[uid_]; + if(url.isEmpty()) { + myDebug() << "IMDBFetcher::fetchEntry() - no url found" << endl; + return 0; + } + + KURL origURL = m_url; // keep to switch back + QString results; + // if the url matches the current one, no need to redownload it + if(url == m_url) { +// myDebug() << "IMDBFetcher::fetchEntry() - matches previous URL, no downloading needed." << endl; + results = Tellico::decodeHTML(QString(m_data)); + } else { + // now it's sychronous +#ifdef IMDB_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-title-result.html")); + results = Tellico::decodeHTML(FileHandler::readTextFile(u)); +#else + // be quiet about failure + results = Tellico::decodeHTML(FileHandler::readTextFile(url, true)); + m_url = url; // needed for processing +#endif + } + if(results.isEmpty()) { + myDebug() << "IMDBFetcher::fetchEntry() - no text results" << endl; + m_url = origURL; + return 0; + } + + entry = parseEntry(results); + m_url = origURL; + if(!entry) { + myDebug() << "IMDBFetcher::fetchEntry() - error in processing entry" << endl; + return 0; + } + m_entries.insert(uid_, entry); // keep for later + return entry; +} + +Tellico::Data::EntryPtr IMDBFetcher::parseEntry(const QString& str_) { + Data::CollPtr coll = new Data::VideoCollection(true); + Data::EntryPtr entry = new Data::Entry(coll); + + doTitle(str_, entry); + doRunningTime(str_, entry); + doAspectRatio(str_, entry); + doAlsoKnownAs(str_, entry); + doPlot(str_, entry, m_url); + doLists(str_, entry); + doPerson(str_, entry, QString::fromLatin1("Director"), QString::fromLatin1("director")); + doPerson(str_, entry, QString::fromLatin1("Writer"), QString::fromLatin1("writer")); + doRating(str_, entry); + doCast(str_, entry, m_url); + if(m_fetchImages) { + // needs base URL + doCover(str_, entry, m_url); + } + + const QString imdb = QString::fromLatin1("imdb"); + if(!coll->hasField(imdb) && m_fields.findIndex(imdb) > -1) { + Data::FieldPtr field = new Data::Field(imdb, i18n("IMDB Link"), Data::Field::URL); + field->setCategory(i18n("General")); + coll->addField(field); + } + if(coll->hasField(imdb) && coll->fieldByName(imdb)->type() == Data::Field::URL) { + m_url.setQuery(QString::null); + entry->setField(imdb, m_url.url()); + } + return entry; +} + +void IMDBFetcher::doTitle(const QString& str_, Data::EntryPtr entry_) { + if(s_titleRx->search(str_) > -1) { + const QString cap1 = s_titleRx->cap(1); + // titles always have parentheses + int pPos = cap1.find('('); + QString title = cap1.left(pPos).stripWhiteSpace(); + // remove first and last quotes is there + if(title.startsWith(QChar('"')) && title.endsWith(QChar('"'))) { + title = title.mid(1, title.length()-2); + } + entry_->setField(QString::fromLatin1("title"), title); + // remove parenthesis + uint pPos2 = pPos+1; + while(pPos2 < cap1.length() && cap1[pPos2].isDigit()) { + ++pPos2; + } + QString year = cap1.mid(pPos+1, pPos2-pPos-1); + if(!year.isEmpty()) { + entry_->setField(QString::fromLatin1("year"), year); + } + } +} + +void IMDBFetcher::doRunningTime(const QString& str_, Data::EntryPtr entry_) { + // running time + QRegExp runtimeRx(QString::fromLatin1("runtime:.*(\\d+)\\s+min"), false); + runtimeRx.setMinimal(true); + + if(runtimeRx.search(str_) > -1) { +// myDebug() << "running-time = " << runtimeRx.cap(1) << endl; + entry_->setField(QString::fromLatin1("running-time"), runtimeRx.cap(1)); + } +} + +void IMDBFetcher::doAspectRatio(const QString& str_, Data::EntryPtr entry_) { + QRegExp rx(QString::fromLatin1("aspect ratio:.*([\\d\\.]+\\s*:\\s*[\\d\\.]+)"), false); + rx.setMinimal(true); + + if(rx.search(str_) > -1) { +// myDebug() << "aspect ratio = " << rx.cap(1) << endl; + entry_->setField(QString::fromLatin1("aspect-ratio"), rx.cap(1).stripWhiteSpace()); + } +} + +void IMDBFetcher::doAlsoKnownAs(const QString& str_, Data::EntryPtr entry_) { + if(m_fields.findIndex(QString::fromLatin1("alttitle")) == -1) { + return; + } + + // match until next b tag +// QRegExp akaRx(QString::fromLatin1("also known as(.*)<b(?:\\s.*)?>")); + QRegExp akaRx(QString::fromLatin1("also known as(.*)<(b[>\\s/]|div)"), false); + akaRx.setMinimal(true); + + if(akaRx.search(str_) > -1 && !akaRx.cap(1).isEmpty()) { + Data::FieldPtr f = entry_->collection()->fieldByName(QString::fromLatin1("alttitle")); + if(!f) { + f = new Data::Field(QString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table); + f->setFormatFlag(Data::Field::FormatTitle); + entry_->collection()->addField(f); + } + + // split by <br>, remembering it could become valid xhtml! + QRegExp brRx(QString::fromLatin1("<br[\\s/]*>"), false); + brRx.setMinimal(true); + QStringList list = QStringList::split(brRx, akaRx.cap(1)); + // lang could be included with [fr] +// const QRegExp parRx(QString::fromLatin1("\\(.+\\)")); + const QRegExp brackRx(QString::fromLatin1("\\[\\w+\\]")); + QStringList values; + for(QStringList::Iterator it = list.begin(); it != list.end(); ++it) { + QString s = *it; + // sometimes, the word "more" gets linked to the releaseinfo page, check that + if(s.find(QString::fromLatin1("releaseinfo")) > -1) { + continue; + } + s.remove(*s_tagRx); + s.remove(brackRx); + s = s.stripWhiteSpace(); + // the first value ends up being or starting with the colon after "Also know as" + // I'm too lazy to figure out a better regexp + if(s.startsWith(QChar(':'))) { + s = s.mid(1); + } + if(!s.isEmpty()) { + values += s; + } + } + if(!values.isEmpty()) { + entry_->setField(QString::fromLatin1("alttitle"), values.join(sep)); + } + } +} + +void IMDBFetcher::doPlot(const QString& str_, Data::EntryPtr entry_, const KURL& baseURL_) { + // plot summaries provided by users are on a separate page + // should those be preferred? + + bool useUserSummary = false; + + QString thisPlot; + // match until next opening tag + QRegExp plotRx(QString::fromLatin1("plot (?:outline|summary):(.*)<[^/].*</"), false); + plotRx.setMinimal(true); + QRegExp plotURLRx(QString::fromLatin1("<a\\s+.*href\\s*=\\s*\".*/title/.*/plotsummary\""), false); + plotURLRx.setMinimal(true); + if(plotRx.search(str_) > -1) { + thisPlot = plotRx.cap(1); + thisPlot.remove(*s_tagRx); // remove HTML tags + entry_->setField(QString::fromLatin1("plot"), thisPlot); + // if thisPlot ends with (more) or contains + // a url that ends with plotsummary, then we'll grab it, otherwise not + if(plotRx.cap(0).endsWith(QString::fromLatin1("(more)</")) || plotURLRx.search(plotRx.cap(0)) > -1) { + useUserSummary = true; + } + } + + if(useUserSummary) { + QRegExp idRx(QString::fromLatin1("title/(tt\\d+)")); + idRx.search(baseURL_.path()); + KURL plotURL = baseURL_; + plotURL.setPath(QString::fromLatin1("/title/") + idRx.cap(1) + QString::fromLatin1("/plotsummary")); + // be quiet about failure + QString plotPage = FileHandler::readTextFile(plotURL, true); + + if(!plotPage.isEmpty()) { + QRegExp plotRx(QString::fromLatin1("<p\\s+class\\s*=\\s*\"plotpar\">(.*)</p")); + plotRx.setMinimal(true); + if(plotRx.search(plotPage) > -1) { + QString userPlot = plotRx.cap(1); + userPlot.remove(*s_tagRx); // remove HTML tags + entry_->setField(QString::fromLatin1("plot"), Tellico::decodeHTML(userPlot)); + } + } + } +} + +void IMDBFetcher::doPerson(const QString& str_, Data::EntryPtr entry_, + const QString& imdbHeader_, const QString& fieldName_) { + QRegExp br2Rx(QString::fromLatin1("<br[\\s/]*>\\s*<br[\\s/]*>"), false); + br2Rx.setMinimal(true); + QRegExp divRx(QString::fromLatin1("<[/]*div"), false); + divRx.setMinimal(true); + QString name = QString::fromLatin1("/name/"); + + StringSet people; + for(int pos = str_.find(imdbHeader_); pos > 0; pos = str_.find(imdbHeader_, pos)) { + // loop until repeated <br> tags or </div> tag + const int endPos1 = str_.find(br2Rx, pos); + const int endPos2 = str_.find(divRx, pos); + const int endPos = QMIN(endPos1, endPos2); // ok to be -1 + pos = s_anchorRx->search(str_, pos+1); + while(pos > -1 && pos < endPos) { + if(s_anchorRx->cap(1).find(name) > -1) { + people.add(s_anchorRx->cap(2).stripWhiteSpace()); + } + pos = s_anchorRx->search(str_, pos+1); + } + } + if(!people.isEmpty()) { + entry_->setField(fieldName_, people.toList().join(sep)); + } +} + +void IMDBFetcher::doCast(const QString& str_, Data::EntryPtr entry_, const KURL& baseURL_) { + // the extended cast list is on a separate page + // that's usually a lot of people + // but since it can be in billing order, the main actors might not + // be in the short list + QRegExp idRx(QString::fromLatin1("title/(tt\\d+)")); + idRx.search(baseURL_.path()); +#ifdef IMDB_TEST + KURL castURL = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-title-fullcredits.html")); +#else + KURL castURL = baseURL_; + castURL.setPath(QString::fromLatin1("/title/") + idRx.cap(1) + QString::fromLatin1("/fullcredits")); +#endif + // be quiet about failure and be sure to translate entities + QString castPage = Tellico::decodeHTML(FileHandler::readTextFile(castURL, true)); + + int pos = -1; + // the text to search, depends on which page is being read + QString castText = castPage; + if(castText.isEmpty()) { + // fall back to short list + castText = str_; + pos = castText.find(QString::fromLatin1("cast overview"), 0, false); + if(pos == -1) { + pos = castText.find(QString::fromLatin1("credited cast"), 0, false); + } + } else { + // first look for anchor + QRegExp castAnchorRx(QString::fromLatin1("<a\\s+name\\s*=\\s*\"cast\""), false); + pos = castText.find(castAnchorRx); + if(pos < 0) { + QRegExp tableClassRx(QString::fromLatin1("<table\\s+class\\s*=\\s*\"cast\""), false); + pos = castText.find(tableClassRx); + if(pos < 0) { + // fragile, the word "cast" appears in the title, but need to find + // the one right above the actual cast table + // for TV shows, there's a link on the sidebar for "episodes case" + // so need to not match that one + pos = castText.find(QString::fromLatin1("cast</"), 0, false); + if(pos > 9) { + // back up 9 places + if(castText.mid(pos-9, 9).startsWith(QString::fromLatin1("episodes"))) { + // find next cast list + pos = castText.find(QString::fromLatin1("cast</"), pos+6, false); + } + } + } + } + } + if(pos == -1) { // no cast list found + myDebug() << "IMDBFetcher::doCast() - no cast list found" << endl; + return; + } + + const QString name = QString::fromLatin1("/name/"); + QRegExp tdRx(QString::fromLatin1("<td[^>]*>(.*)</td>"), false); + tdRx.setMinimal(true); + + QStringList cast; + // loop until closing table tag + const int endPos = castText.find(QString::fromLatin1("</table"), pos, false); + pos = s_anchorRx->search(castText, pos+1); + while(pos > -1 && pos < endPos && static_cast<int>(cast.count()) < m_numCast) { + if(s_anchorRx->cap(1).find(name) > -1) { + // now search for <td> item with character name + // there's a column with ellipses then the character + const int pos2 = tdRx.search(castText, pos); + if(pos2 > -1 && tdRx.search(castText, pos2+1) > -1) { + cast += s_anchorRx->cap(2).stripWhiteSpace() + + QString::fromLatin1("::") + tdRx.cap(1).simplifyWhiteSpace().remove(*s_tagRx); + } else { + cast += s_anchorRx->cap(2).stripWhiteSpace(); + } + } + pos = s_anchorRx->search(castText, pos+1); + } + + if(!cast.isEmpty()) { + entry_->setField(QString::fromLatin1("cast"), cast.join(sep)); + } +} + +void IMDBFetcher::doRating(const QString& str_, Data::EntryPtr entry_) { + if(m_fields.findIndex(QString::fromLatin1("imdb-rating")) == -1) { + return; + } + + // don't add a colon, since there's a <br> at the end + // some of the imdb images use /10.gif in their path, so check for space or bracket + QRegExp rx(QString::fromLatin1("[>\\s](\\d+.?\\d*)/10[<//s]"), false); + rx.setMinimal(true); + + if(rx.search(str_) > -1 && !rx.cap(1).isEmpty()) { + Data::FieldPtr f = entry_->collection()->fieldByName(QString::fromLatin1("imdb-rating")); + if(!f) { + f = new Data::Field(QString::fromLatin1("imdb-rating"), i18n("IMDB Rating"), Data::Field::Rating); + f->setCategory(i18n("General")); + f->setProperty(QString::fromLatin1("maximum"), QString::fromLatin1("10")); + entry_->collection()->addField(f); + } + + bool ok; + float value = rx.cap(1).toFloat(&ok); + if(ok) { + entry_->setField(QString::fromLatin1("imdb-rating"), QString::number(value)); + } + } +} + +void IMDBFetcher::doCover(const QString& str_, Data::EntryPtr entry_, const KURL& baseURL_) { + // cover is the img with the "cover" alt text + QRegExp imgRx(QString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*\"([^\"]*)\"[^>]*>"), false); + imgRx.setMinimal(true); + + QRegExp posterRx(QString::fromLatin1("<a\\s+[^>]*name\\s*=\\s*\"poster\"[^>]*>(.*)</a>"), false); + posterRx.setMinimal(true); + + const QString cover = QString::fromLatin1("cover"); + + int pos = posterRx.search(str_); + while(pos > -1) { + if(imgRx.search(posterRx.cap(1)) > -1) { + KURL u(baseURL_, imgRx.cap(1)); + QString id = ImageFactory::addImage(u, true); + if(!id.isEmpty()) { + entry_->setField(cover, id); + } + return; + } + pos = posterRx.search(str_, pos+1); + } + + // didn't find the cover, IMDb also used to put "cover" inside the url + pos = imgRx.search(str_); + while(pos > -1) { + if(imgRx.cap(0).find(cover, 0, false) > -1) { + KURL u(baseURL_, imgRx.cap(1)); + QString id = ImageFactory::addImage(u, true); + if(!id.isEmpty()) { + entry_->setField(cover, id); + } + return; + } + pos = imgRx.search(str_, pos+1); + } +} + +// end up reparsing whole string, but it's not really that slow +// loook at every anchor tag in the string +void IMDBFetcher::doLists(const QString& str_, Data::EntryPtr entry_) { + const QString genre = QString::fromLatin1("/Genres/"); + const QString country = QString::fromLatin1("/Countries/"); + const QString lang = QString::fromLatin1("/Languages/"); + const QString colorInfo = QString::fromLatin1("color-info"); + const QString cert = QString::fromLatin1("certificates="); + const QString soundMix = QString::fromLatin1("sound-mix="); + const QString year = QString::fromLatin1("/Years/"); + const QString company = QString::fromLatin1("/company/"); + + // IIMdb also has links with the word "sections" in them, remove that + // for genres and nationalities + + QStringList genres, countries, langs, certs, tracks, studios; + for(int pos = s_anchorRx->search(str_); pos > -1; pos = s_anchorRx->search(str_, pos+1)) { + const QString cap1 = s_anchorRx->cap(1); + if(cap1.find(genre) > -1) { + if(s_anchorRx->cap(2).find(QString::fromLatin1(" section"), 0, false) == -1) { + genres += s_anchorRx->cap(2).stripWhiteSpace(); + } + } else if(cap1.find(country) > -1) { + if(s_anchorRx->cap(2).find(QString::fromLatin1(" section"), 0, false) == -1) { + countries += s_anchorRx->cap(2).stripWhiteSpace(); + } + } else if(cap1.find(lang) > -1) { + langs += s_anchorRx->cap(2).stripWhiteSpace(); + } else if(cap1.find(colorInfo) > -1) { + // change "black and white" to "black & white" + entry_->setField(QString::fromLatin1("color"), + s_anchorRx->cap(2).replace(QString::fromLatin1("and"), QChar('&')).stripWhiteSpace()); + } else if(cap1.find(cert) > -1) { + certs += s_anchorRx->cap(2).stripWhiteSpace(); + } else if(cap1.find(soundMix) > -1) { + tracks += s_anchorRx->cap(2).stripWhiteSpace(); + } else if(cap1.find(company) > -1) { + studios += s_anchorRx->cap(2).stripWhiteSpace(); + // if year field wasn't set before, do it now + } else if(entry_->field(QString::fromLatin1("year")).isEmpty() && cap1.find(year) > -1) { + entry_->setField(QString::fromLatin1("year"), s_anchorRx->cap(2).stripWhiteSpace()); + } + } + + entry_->setField(QString::fromLatin1("genre"), genres.join(sep)); + entry_->setField(QString::fromLatin1("nationality"), countries.join(sep)); + entry_->setField(QString::fromLatin1("language"), langs.join(sep)); + entry_->setField(QString::fromLatin1("audio-track"), tracks.join(sep)); + entry_->setField(QString::fromLatin1("studio"), studios.join(sep)); + if(!certs.isEmpty()) { + // first try to set default certification + const QStringList& certsAllowed = entry_->collection()->fieldByName(QString::fromLatin1("certification"))->allowed(); + for(QStringList::ConstIterator it = certs.begin(); it != certs.end(); ++it) { + QString country = (*it).section(':', 0, 0); + QString cert = (*it).section(':', 1, 1); + if(cert == Latin1Literal("Unrated")) { + cert = QChar('U'); + } + cert += QString::fromLatin1(" (") + country + ')'; + if(certsAllowed.findIndex(cert) > -1) { + entry_->setField(QString::fromLatin1("certification"), cert); + break; + } + } + + // now add new field for all certifications + const QString allc = QString::fromLatin1("allcertification"); + if(m_fields.findIndex(allc) > -1) { + Data::FieldPtr f = entry_->collection()->fieldByName(allc); + if(!f) { + f = new Data::Field(allc, i18n("Certifications"), Data::Field::Table); + f->setFlags(Data::Field::AllowGrouped); + entry_->collection()->addField(f); + } + entry_->setField(QString::fromLatin1("allcertification"), certs.join(sep)); + } + } +} + +void IMDBFetcher::updateEntry(Data::EntryPtr entry_) { +// myLog() << "IMDBFetcher::updateEntry() - " << entry_->title() << endl; + // only take first 5 + m_limit = 5; + QString t = entry_->field(QString::fromLatin1("title")); + KURL link = entry_->field(QString::fromLatin1("imdb")); + if(!link.isEmpty() && link.isValid()) { + // check if we want a different host + if(link.host() != m_host) { +// myLog() << "IMDBFetcher::updateEntry() - switching hosts to " << m_host << endl; + link.setHost(m_host); + } + m_key = Fetch::Title; + m_value = t; + m_started = true; + m_data.truncate(0); + m_matches.clear(); + m_url = link; + m_redirected = true; // m_redirected is used as a flag later to tell if we get a single result + m_job = KIO::get(m_url, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); + connect(m_job, SIGNAL(redirection(KIO::Job *, const KURL&)), + SLOT(slotRedirection(KIO::Job*, const KURL&))); + return; + } + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* IMDBFetcher::configWidget(QWidget* parent_) const { + return new IMDBFetcher::ConfigWidget(parent_, this); +} + +IMDBFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const IMDBFetcher* fetcher_/*=0*/) + : Fetch::ConfigWidget(parent_) { + QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = -1; + QLabel* label = new QLabel(i18n("Hos&t: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_hostEdit = new KLineEdit(optionsWidget()); + connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_hostEdit, row, 1); + QString w = i18n("The Internet Movie Database uses several different servers. Choose the one " + "you wish to use."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_hostEdit, w); + label->setBuddy(m_hostEdit); + + label = new QLabel(i18n("&Maximum cast: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_numCast = new KIntSpinBox(0, 99, 1, 10, 10, optionsWidget()); + connect(m_numCast, SIGNAL(valueChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_numCast, row, 1); + w = i18n("The list of cast members may include many people. Set the maximum number returned from the search."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_numCast, w); + label->setBuddy(m_numCast); + + m_fetchImageCheck = new QCheckBox(i18n("Download cover &image"), optionsWidget()); + connect(m_fetchImageCheck, SIGNAL(clicked()), SLOT(slotSetModified())); + ++row; + l->addMultiCellWidget(m_fetchImageCheck, row, row, 0, 1); + w = i18n("The cover image may be downloaded as well. However, too many large images in the " + "collection may degrade performance."); + QWhatsThis::add(m_fetchImageCheck, w); + + l->setRowStretch(++row, 10); + + // now add additional fields widget + addFieldsWidget(IMDBFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList()); + + if(fetcher_) { + m_hostEdit->setText(fetcher_->m_host); + m_numCast->setValue(fetcher_->m_numCast); + m_fetchImageCheck->setChecked(fetcher_->m_fetchImages); + } else { //defaults + m_hostEdit->setText(QString::fromLatin1(IMDB_SERVER)); + m_numCast->setValue(10); + m_fetchImageCheck->setChecked(true); + } +} + +void IMDBFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + QString host = m_hostEdit->text().stripWhiteSpace(); + if(!host.isEmpty()) { + config_.writeEntry("Host", host); + } + config_.writeEntry("Max Cast", m_numCast->value()); + config_.writeEntry("Fetch Images", m_fetchImageCheck->isChecked()); + + saveFieldsConfig(config_); + slotSetModified(false); +} + +QString IMDBFetcher::ConfigWidget::preferredName() const { + return IMDBFetcher::defaultName(); +} + +//static +Tellico::StringMap IMDBFetcher::customFields() { + StringMap map; + map[QString::fromLatin1("imdb")] = i18n("IMDB Link"); + map[QString::fromLatin1("imdb-rating")] = i18n("IMDB Rating"); + map[QString::fromLatin1("alttitle")] = i18n("Alternative Titles"); + map[QString::fromLatin1("allcertification")] = i18n("Certifications"); + return map; +} + +#include "imdbfetcher.moc" diff --git a/src/fetch/imdbfetcher.h b/src/fetch/imdbfetcher.h new file mode 100644 index 0000000..3dc19f2 --- /dev/null +++ b/src/fetch/imdbfetcher.h @@ -0,0 +1,141 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef IMDBFETCHER_H +#define IMDBFETCHER_H + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <kurl.h> +#include <kio/job.h> + +#include <qcstring.h> // for QByteArray +#include <qmap.h> +#include <qguardedptr.h> + +class KLineEdit; +class KIntSpinBox; +class QCheckBox; +class QRegExpr; + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class IMDBFetcher : public Fetcher { +Q_OBJECT + +public: + IMDBFetcher(QObject* parent, const char* name=0); + /** + */ + virtual ~IMDBFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + // imdb can search title, person + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return IMDB; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + static StringMap customFields(); + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const IMDBFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + + private: + KLineEdit* m_hostEdit; + QCheckBox* m_fetchImageCheck; + KIntSpinBox* m_numCast; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + void slotRedirection(KIO::Job* job, const KURL& toURL); + +private: + static void initRegExps(); + static QRegExp* s_tagRx; + static QRegExp* s_anchorRx; + static QRegExp* s_anchorTitleRx; + static QRegExp* s_anchorNameRx; + static QRegExp* s_titleRx; + + void doTitle(const QString& s, Data::EntryPtr e); + void doRunningTime(const QString& s, Data::EntryPtr e); + void doAspectRatio(const QString& s, Data::EntryPtr e); + void doAlsoKnownAs(const QString& s, Data::EntryPtr e); + void doPlot(const QString& s, Data::EntryPtr e, const KURL& baseURL_); + void doPerson(const QString& s, Data::EntryPtr e, + const QString& imdbHeader, const QString& fieldName); + void doCast(const QString& s, Data::EntryPtr e, const KURL& baseURL_); + void doLists(const QString& s, Data::EntryPtr e); + void doRating(const QString& s, Data::EntryPtr e); + void doCover(const QString& s, Data::EntryPtr e, const KURL& baseURL); + + void parseSingleTitleResult(); + void parseSingleNameResult(); + void parseMultipleTitleResults(); + void parseTitleBlock(const QString& str); + void parseMultipleNameResults(); + Data::EntryPtr parseEntry(const QString& str); + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QMap<int, KURL> m_matches; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; + bool m_fetchImages; + + QString m_host; + int m_numCast; + KURL m_url; + bool m_redirected; + uint m_limit; + QStringList m_fields; + + QString m_popularTitles; + QString m_exactTitles; + QString m_partialTitles; + enum TitleBlock { Unknown = 0, Popular = 1, Exact = 2, Partial = 3, SinglePerson = 4}; + TitleBlock m_currentTitleBlock; + int m_countOffset; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/fetch/isbndbfetcher.cpp b/src/fetch/isbndbfetcher.cpp new file mode 100644 index 0000000..5ffc379 --- /dev/null +++ b/src/fetch/isbndbfetcher.cpp @@ -0,0 +1,350 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "isbndbfetcher.h" +#include "messagehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kstandarddirs.h> +#include <kconfig.h> + +#include <qdom.h> +#include <qlabel.h> +#include <qlayout.h> +#include <qfile.h> + +namespace { + static const int ISBNDB_RETURNS_PER_REQUEST = 10; + static const int ISBNDB_MAX_RETURNS_TOTAL = 25; + static const char* ISBNDB_BASE_URL = "http://isbndb.com/api/books.xml"; + static const char* ISBNDB_APP_ID = "3B9S3BQS"; +} + +using Tellico::Fetch::ISBNdbFetcher; + +ISBNdbFetcher::ISBNdbFetcher(QObject* parent_, const char* name_) + : Fetcher(parent_, name_), m_xsltHandler(0), + m_limit(ISBNDB_MAX_RETURNS_TOTAL), m_page(1), m_total(-1), m_countOffset(0), + m_job(0), m_started(false) { +} + +ISBNdbFetcher::~ISBNdbFetcher() { + delete m_xsltHandler; + m_xsltHandler = 0; +} + +QString ISBNdbFetcher::defaultName() { + return i18n("ISBNdb.com"); +} + +QString ISBNdbFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool ISBNdbFetcher::canFetch(int type) const { + return type == Data::Collection::Book || type == Data::Collection::ComicBook || type == Data::Collection::Bibtex; +} + +void ISBNdbFetcher::readConfigHook(const KConfigGroup& config_) { + Q_UNUSED(config_); +} + +void ISBNdbFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_.stripWhiteSpace(); + m_started = true; + m_page = 1; + m_total = -1; + m_numResults = 0; + m_countOffset = 0; + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + doSearch(); +} + +void ISBNdbFetcher::continueSearch() { + m_started = true; + m_limit += ISBNDB_MAX_RETURNS_TOTAL; + doSearch(); +} + +void ISBNdbFetcher::doSearch() { + m_data.truncate(0); + +// myDebug() << "ISBNdbFetcher::search() - value = " << value_ << endl; + + KURL u(QString::fromLatin1(ISBNDB_BASE_URL)); + u.addQueryItem(QString::fromLatin1("access_key"), QString::fromLatin1(ISBNDB_APP_ID)); + u.addQueryItem(QString::fromLatin1("results"), QString::fromLatin1("details,authors,subjects,texts")); + u.addQueryItem(QString::fromLatin1("page_number"), QString::number(m_page)); + + switch(m_key) { + case Title: + u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("title")); + u.addQueryItem(QString::fromLatin1("value1"), m_value); + break; + + case Person: + // yes, this also queries titles, too, it's a limitation of the isbndb api service + u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("combined")); + u.addQueryItem(QString::fromLatin1("value1"), m_value); + break; + + case Keyword: + u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("full")); + u.addQueryItem(QString::fromLatin1("value1"), m_value); + break; + + case ISBN: + u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("isbn")); + { + // only grab first value + QString v = m_value.section(QChar(';'), 0); + v.remove('-'); + u.addQueryItem(QString::fromLatin1("value1"), v); + } + break; + + default: + kdWarning() << "ISBNdbFetcher::search() - key not recognized: " << m_key << endl; + stop(); + return; + } +// myDebug() << "ISBNdbFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void ISBNdbFetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "ISBNdbFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void ISBNdbFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void ISBNdbFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "ISBNdbFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "ISBNdbFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from isbndbfetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "ISBNdbFetcher::slotComplete() - server did not return valid XML." << endl; + return; + } + + if(m_total == -1) { + QDomNode n = dom.documentElement().namedItem(QString::fromLatin1("BookList")); + QDomElement e = n.toElement(); + if(!e.isNull()) { + m_total = e.attribute(QString::fromLatin1("total_results"), QString::number(-1)).toInt(); + } + } + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return; + } + } + + // assume result is always utf-8 + QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size())); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + + int count = 0; + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); m_numResults < m_limit && entry != entries.end(); ++entry, ++count) { + if(count < m_countOffset) { + continue; + } + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("cr_year")); + } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){ + desc += QChar('/') + entry->field(QString::fromLatin1("pub_year")); + } + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + ++m_numResults; + } + + // are there any additional results to get? + m_hasMoreResults = m_page * ISBNDB_RETURNS_PER_REQUEST < m_total; + + const int currentTotal = QMIN(m_total, m_limit); + if(m_page * ISBNDB_RETURNS_PER_REQUEST < currentTotal) { + int foundCount = (m_page-1) * ISBNDB_RETURNS_PER_REQUEST + coll->entryCount(); + message(i18n("Results from %1: %2/%3").arg(source()).arg(foundCount).arg(m_total), MessageHandler::Status); + ++m_page; + m_countOffset = 0; + doSearch(); + } else { + m_countOffset = m_entries.count() % ISBNDB_RETURNS_PER_REQUEST; + if(m_countOffset == 0) { + ++m_page; // need to go to next page + } + stop(); // required + } +} + +Tellico::Data::EntryPtr ISBNdbFetcher::fetchEntry(uint uid_) { + Data::EntryPtr entry = m_entries[uid_]; + if(!entry) { + kdWarning() << "ISBNdbFetcher::fetchEntry() - no entry in dict" << endl; + return 0; + } + + // if the publisher id is set, then we need to grab the real publisher name + const QString id = entry->field(QString::fromLatin1("pub_id")); + if(!id.isEmpty()) { + KURL u(QString::fromLatin1(ISBNDB_BASE_URL)); + u.setFileName(QString::fromLatin1("publishers.xml")); + u.addQueryItem(QString::fromLatin1("access_key"), QString::fromLatin1(ISBNDB_APP_ID)); + u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("publisher_id")); + u.addQueryItem(QString::fromLatin1("value1"), id); + + QDomDocument dom = FileHandler::readXMLFile(u, true); + if(!dom.isNull()) { + QString pub = dom.documentElement().namedItem(QString::fromLatin1("PublisherList")) + .namedItem(QString::fromLatin1("PublisherData")) + .namedItem(QString::fromLatin1("Name")) + .toElement().text(); + if(!pub.isEmpty()) { + entry->setField(QString::fromLatin1("publisher"), pub); + } + } + entry->setField(QString::fromLatin1("pub_id"), QString()); + } + + return entry; +} + +void ISBNdbFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("isbndb2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "ISBNdbFetcher::initXSLTHandler() - can not locate isbndb2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + delete m_xsltHandler; + m_xsltHandler = new XSLTHandler(u); + if(!m_xsltHandler->isValid()) { + kdWarning() << "ISBNdbFetcher::initXSLTHandler() - error in isbndb2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +void ISBNdbFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "ISBNdbFetcher::updateEntry()" << endl; + // limit to top 5 results + m_limit = 5; + + QString isbn = entry_->field(QString::fromLatin1("isbn")); + if(!isbn.isEmpty()) { + search(Fetch::ISBN, isbn); + return; + } + + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + m_limit = 10; // raise limit so more possibility of match + search(Fetch::Title, t); + return; + } + + myDebug() << "ISBNdbFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* ISBNdbFetcher::configWidget(QWidget* parent_) const { + return new ISBNdbFetcher::ConfigWidget(parent_, this); +} + +ISBNdbFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ISBNdbFetcher*/*=0*/) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +QString ISBNdbFetcher::ConfigWidget::preferredName() const { + return ISBNdbFetcher::defaultName(); +} + +#include "isbndbfetcher.moc" diff --git a/src/fetch/isbndbfetcher.h b/src/fetch/isbndbfetcher.h new file mode 100644 index 0000000..e49246a --- /dev/null +++ b/src/fetch/isbndbfetcher.h @@ -0,0 +1,94 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_ISBNDBFETCHER_H +#define TELLICO_FETCH_ISBNDBFETCHER_H + +namespace Tellico { + class XSLTHandler; +} + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <kio/job.h> + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class ISBNdbFetcher : public Fetcher { +Q_OBJECT + +public: + ISBNdbFetcher(QObject* parent = 0, const char* name = 0); + ~ISBNdbFetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword || k == ISBN; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return ISBNdb; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const ISBNdbFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup&) {} + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void initXSLTHandler(); + void doSearch(); + + XSLTHandler* m_xsltHandler; + int m_limit; + int m_page; + int m_total; + int m_numResults; + int m_countOffset; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; +}; + + } +} +#endif diff --git a/src/fetch/messagehandler.cpp b/src/fetch/messagehandler.cpp new file mode 100644 index 0000000..f3c36a1 --- /dev/null +++ b/src/fetch/messagehandler.cpp @@ -0,0 +1,35 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "messagehandler.h" +#include "fetchmanager.h" +#include "../tellico_kernel.h" + +#include <kmessagebox.h> + +using Tellico::Fetch::ManagerMessage; + +// all messages go to manager +void ManagerMessage::send(const QString& message_, Type type_) { + Fetch::Manager::self()->updateStatus(message_); + // plus errors get a message box + if(type_ == Error) { + KMessageBox::sorry(Kernel::self()->widget(), message_); + } else if(type_ == Warning) { + KMessageBox::information(Kernel::self()->widget(), message_); + } +} + +void ManagerMessage::infoList(const QString& message_, const QStringList& list_) { + KMessageBox::informationList(Kernel::self()->widget(), message_, list_); +} diff --git a/src/fetch/messagehandler.h b/src/fetch/messagehandler.h new file mode 100644 index 0000000..0ec9269 --- /dev/null +++ b/src/fetch/messagehandler.h @@ -0,0 +1,49 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_MESSAGEHANDLER_H +#define TELLICO_FETCH_MESSAGEHANDLER_H + +class QString; +class QStringList; + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class MessageHandler { +public: + enum Type { Status, Warning, Error, ListError }; + + MessageHandler() {} + virtual ~MessageHandler() {} + + virtual void send(const QString& message, Type type) = 0; + virtual void infoList(const QString& message, const QStringList& list) = 0; +}; + +class ManagerMessage : public MessageHandler { +public: + ManagerMessage() : MessageHandler() {} + virtual ~ManagerMessage() {} + + virtual void send(const QString& message, Type type); + virtual void infoList(const QString& message, const QStringList& list); +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/fetch/scripts/Makefile.am b/src/fetch/scripts/Makefile.am new file mode 100644 index 0000000..050c460 --- /dev/null +++ b/src/fetch/scripts/Makefile.am @@ -0,0 +1,30 @@ +####### kdevelop will overwrite this part!!! (begin)########## + +EXTRA_DIST = \ +fr.allocine.py fr.allocine.py.spec \ +ministerio_de_cultura.py ministerio_de_cultura.py.spec \ +dark_horse_comics.py dark_horse_comics.py.spec \ +boardgamegeek.rb boardgamegeek.rb.spec + +####### kdevelop will overwrite this part!!! (end)############ + +scriptdir = $(kde_datadir)/tellico/data-sources +script_SCRIPTS = \ +fr.allocine.py \ +ministerio_de_cultura.py \ +dark_horse_comics.py \ +boardgamegeek.rb + +script_DATA = \ +fr.allocine.py.spec \ +ministerio_de_cultura.py.spec \ +dark_horse_comics.py.spec \ +boardgamegeek.rb.spec + +KDE_OPTIONS = noautodist + +CLEANFILES = *~ + +# probably a better way to do this +uninstall-hook: + -if [ -d $(scriptdir) ]; then rmdir $(scriptdir); fi diff --git a/src/fetch/scripts/boardgamegeek.rb b/src/fetch/scripts/boardgamegeek.rb new file mode 100644 index 0000000..b3cf4f3 --- /dev/null +++ b/src/fetch/scripts/boardgamegeek.rb @@ -0,0 +1,235 @@ +#!/usr/bin/env ruby +# +# *************************************************************************** +# copyright : (C) 2006 by Steve Beattie +# : (C) 2008 by Sven Werlen +# email : [email protected] +# : [email protected] +# *************************************************************************** +# +# *************************************************************************** +# * * +# * This program is free software; you can redistribute it and/or modify * +# * it under the terms of version 2 of the GNU General Public License as * +# * published by the Free Software Foundation; * +# * * +# *************************************************************************** + +# $Id: boardgamegeek.rb 313 2006-10-02 22:17:11Z steve $ + +# This program is expected to be invoked from tellico +# (http://periapsis.org/tellico) as an external data source. It provides +# searches for boardgames from the boardgamegeek.com website, via +# boardgamegeek's xmlapi interface +# (http://www.boardgamegeek.com/xmlapi/) +# +# It only allows searches via name; the boardgamegeek xmlapi is not yet +# rich enough to support queries by designer, publisher, category, or +# mechanism. I'd like to add support for querying by boardgamegeek id, +# but that needs additional support in tellico. +# +# Sven Werlen: 03 Feb 2008: script has been extended to retrieve cover +# images (/thumbnail from xmlapi). Images are retrieved from the website +# and base64 is generated on-the-fly. +# +require 'rexml/document' +require 'net/http' +require 'cgi' +require "base64" +include REXML + +$my_version = '$Rev: 313 $' + +class Game + attr_writer :year + attr_writer :description + attr_writer :cover + attr_writer :image + + def initialize(name, id) + @name = name + @id = id + @publishers = [] + @designers = [] + @players = [] + end + + def add_publisher(publisher) + @publishers << publisher + end + + def add_designer(designer) + @designers << designer + end + + def add_players(players) + @players << players + end + + def to_s() + "@name (#@id #@publishers #@year)" + end + + def toXML() + element = Element.new 'entry' + element.add_element Element.new('title').add_text(@name) + element.add_element Element.new('description').add_text(@description) if @description + element.add_element Element.new('year').add_text(@year) if @year + element.add_element Element.new('boardgamegeek-link').add_text("http://www.boardgamegeek/game/#{@id}") if @id + element.add_element Element.new('bggid').add_text(@id) if @id + element.add_element Element.new('cover').add_text(@cover) if @cover + + if @publishers.length > 0 + pub_elements = Element.new('publishers') + @publishers.each {|p| pub_elements.add_element Element.new('publisher').add_text(p)} + element.add_element pub_elements + end + if @designers.length > 0 + des_elements = Element.new('designers') + @designers.each {|d| des_elements.add_element Element.new('designer').add_text(d)} + element.add_element des_elements + end + if @players.length > 0 + players_elements = Element.new('num-players') + @players.each {|n| players_elements.add_element Element.new('num-player').add_text(n.to_s)} + element.add_element players_elements + end + return element + end + + def image() + image = Element.new 'image' + image.add_attribute('format', 'JPEG') + image.add_attribute('id', @id + ".jpg") + image.add_text(@image) + return image + end +end + +def getGameList(query) + #puts("Query is #{query}") + + search_result = nil + Net::HTTP.start('www.boardgamegeek.com', 80) do + |http| search_result = (http.get("/xmlapi/search?search=#{CGI.escape(query)}", + {"User-Agent" => "BoardGameGeek plugin for Tellico #{$my_version}"}).body) + http.finish + end + doc = REXML::Document.new(search_result) + + games = XPath.match(doc, "//game") + #games.each {|g| puts g.elements['name'].text+g.attributes['gameid']} + ids = [] + games.each {|g| ids << g.attributes['gameid']} + return ids +end + +def getGameDetails(ids) + #ids.each {|id| puts id} + + query = "/xmlapi/game/#{ids.join(',')}" + #puts query + search_result = nil + Net::HTTP.start('www.boardgamegeek.com', 80) do |http| + search_result = http.get(query, {"User-Agent" => "BoardGameGeek plugin for Tellico #{$my_version}"}) + http.finish + end + games = [] + case search_result + when Net::HTTPOK then + doc = REXML::Document.new(search_result.body) + + games_xml = XPath.match(doc, "//game") + games_xml.each do |g| + if( g.elements['name'] != nil ) + game = Game.new(g.elements['name'].text, g.attributes['gameid']) + game.year = g.elements['yearpublished'].text + game.description = g.elements['description'].text + g.elements.each('publisher'){|p| game.add_publisher p.elements['name'].text} + g.elements.each('designer'){|d| game.add_designer d.elements['name'].text} + minp = Integer(g.elements['minplayers'].text) + maxp = Integer(g.elements['maxplayers'].text) + minp.upto(maxp) {|n| game.add_players(n)} + + # retrieve cover + coverurl = g.elements['thumbnail'] != nil ? g.elements['thumbnail'].text : nil + if( coverurl =~ /files.boardgamegeek.com(.*)$/ ) + # puts "downloading... " + $1 + cover = nil + Net::HTTP.start('files.boardgamegeek.com', 80) do |http| + cover = (http.get($1, {"User-Agent" => "BoardGameGeek plugin for Tellico #{$my_version}"})) + end + case cover + when Net::HTTPOK then + game.cover = g.attributes['gameid'] + ".jpg"; + game.image = Base64.encode64(cover.body); + end + else + # puts "invalid cover: " + coverurl + end + games << game + end + end + end + return games +end + +def listToXML(gameList) + doc = REXML::Document.new + doc << REXML::DocType.new('tellico PUBLIC', '"-//Robby Stephenson/DTD Tellico V10.0//EN" "http://periapsis.org/tellico/dtd/v10/tellico.dtd"') + doc << XMLDecl.new + tellico = Element.new 'tellico' + tellico.add_attribute('xmlns', 'http://periapsis.org/tellico/') + tellico.add_attribute('syntaxVersion', '10') + collection = Element.new 'collection' + collection.add_attribute('title', 'My Collection') + collection.add_attribute('type', '13') + + fields = Element.new 'fields' + field = Element.new 'field' + field.add_attribute('name', '_default') + fields.add_element(field) + field = Element.new 'field' + field.add_attribute('name', 'bggid') + field.add_attribute('title', 'BoardGameGeek ID') + field.add_attribute('category', 'General') + field.add_attribute('flags', '0') + field.add_attribute('format', '4') + field.add_attribute('type', '6') + field.add_attribute('i18n', 'true') + fields.add_element(field) + collection.add_element(fields) + + images = Element.new 'images' + + id = 0 + gameList.each do + |g| element = g.toXML() + element.add_attribute('id', id) + id = id + 1 + collection.add_element(element) + images.add_element(g.image()); + end + collection.add_element(images); + tellico.add_element(collection) + doc.add_element(tellico) + doc.write($stdout, 0) + puts "" +end + +if __FILE__ == $0 + + def showUsage + warn "usage: #{__FILE__} game_query" + exit 1 + end + + showUsage unless ARGV.length == 1 + + idList = getGameList(ARGV.shift) + if idList + gameList = getGameDetails(idList) + end + + listToXML(gameList) +end diff --git a/src/fetch/scripts/boardgamegeek.rb.spec b/src/fetch/scripts/boardgamegeek.rb.spec new file mode 100644 index 0000000..6e0aab0 --- /dev/null +++ b/src/fetch/scripts/boardgamegeek.rb.spec @@ -0,0 +1,7 @@ +Name=BoardGameGeek +Type=data-source +ArgumentKeys=1 +Arguments=%1 +CollectionType=13 +FormatType=0 +UpdateArgs=%{title} diff --git a/src/fetch/scripts/dark_horse_comics.py b/src/fetch/scripts/dark_horse_comics.py new file mode 100644 index 0000000..4f3b651 --- /dev/null +++ b/src/fetch/scripts/dark_horse_comics.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +# *************************************************************************** +# copyright : (C) 2006 by Mathias Monnerville +# email : [email protected] +# *************************************************************************** +# +# *************************************************************************** +# * * +# * This program is free software; you can redistribute it and/or modify * +# * it under the terms of version 2 of the GNU General Public License as * +# * published by the Free Software Foundation; * +# * * +# *************************************************************************** + +# $Id: comics_darkhorsecomics.py 123 2006-03-24 08:47:48Z mathias $ + +""" +This script has to be used with tellico (http://periapsis.org/tellico) as an external data source program. +It allows searching through the Dark Horse Comics web database. + +Related info and cover are fetched automatically. It takes only one argument (comic title). + +Tellico data source setup: +- source name: Dark Horse Comics (US) (or whatever you want :) +- Collection type: comics collection +- Result type: tellico +- Path: /path/to/script/comics_darkhorsecomics.py +- Arguments: +Title (checked) = %1 +Update (checked) = %{title} +""" + +import sys, os, re, md5, random, string +import urllib, urllib2, time, base64 +import xml.dom.minidom + +XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" +DOCTYPE = """<!DOCTYPE tellico PUBLIC "-//Robby Stephenson/DTD Tellico V9.0//EN" "http://periapsis.org/tellico/dtd/v9/tellico.dtd">""" +NULLSTRING = '' + +VERSION = "0.2" + + +def genMD5(): + """ + Generates and returns a random md5 string. Its main purpose is to allow random + image file name generation. + """ + obj = md5.new() + float = random.random() + obj.update(str(float)) + return obj.hexdigest() + +class BasicTellicoDOM: + """ + This class manages tellico's XML data model (DOM) + """ + def __init__(self): + self.__doc = xml.dom.minidom.Document() + self.__root = self.__doc.createElement('tellico') + self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') + self.__root.setAttribute('syntaxVersion', '9') + + self.__collection = self.__doc.createElement('collection') + self.__collection.setAttribute('title', 'My Comics') + self.__collection.setAttribute('type', '6') + + self.__images = self.__doc.createElement('images') + + self.__root.appendChild(self.__collection) + self.__doc.appendChild(self.__root) + + # Current movie id. See entry's id attribute in self.addEntry() + self.__currentId = 0 + + + def addEntry(self, movieData): + """ + Add a comic entry. + Returns an entry node instance + """ + d = movieData + entryNode = self.__doc.createElement('entry') + entryNode.setAttribute('id', str(self.__currentId)) + + titleNode = self.__doc.createElement('title') + titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) + + yearNode = self.__doc.createElement('pub_year') + yearNode.appendChild(self.__doc.createTextNode(d['pub_year'])) + + countryNode = self.__doc.createElement('country') + countryNode.appendChild(self.__doc.createTextNode(d['country'])) + pubNode = self.__doc.createElement('publisher') + pubNode.appendChild(self.__doc.createTextNode(d['publisher'])) + langNode = self.__doc.createElement('language') + langNode.appendChild(self.__doc.createTextNode(d['language'])) + + writersNode = self.__doc.createElement('writers') + for g in d['writer']: + writerNode = self.__doc.createElement('writer') + writerNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + writersNode.appendChild(writerNode) + + genresNode = self.__doc.createElement('genres') + for g in d['genre']: + genreNode = self.__doc.createElement('genre') + genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + genresNode.appendChild(genreNode) + + commentsNode = self.__doc.createElement('comments') + #for g in d['comments']: + # commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8'))) + commentsData = string.join(d['comments'], '\n\n') + commentsNode.appendChild(self.__doc.createTextNode(unicode(commentsData, 'latin-1').encode('utf-8'))) + + artistsNode = self.__doc.createElement('artists') + for k, v in d['artist'].iteritems(): + artistNode = self.__doc.createElement('artist') + artistNode.appendChild(self.__doc.createTextNode(unicode(v, 'latin-1').encode('utf-8'))) + artistsNode.appendChild(artistNode) + + pagesNode = self.__doc.createElement('pages') + pagesNode.appendChild(self.__doc.createTextNode(d['pages'])) + + issueNode = self.__doc.createElement('issue') + issueNode.appendChild(self.__doc.createTextNode(d['issue'])) + + if d['image']: + imageNode = self.__doc.createElement('image') + imageNode.setAttribute('format', 'JPEG') + imageNode.setAttribute('id', d['image'][0]) + imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) + + coverNode = self.__doc.createElement('cover') + coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) + + for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode', + 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode', + 'countryNode' ): + entryNode.appendChild(eval(name)) + + if d['image']: + entryNode.appendChild(coverNode) + self.__images.appendChild(imageNode) + + self.__collection.appendChild(entryNode) + + self.__currentId += 1 + return entryNode + + def printEntry(self, nEntry): + """ + Prints entry's XML content to stdout + """ + try: + print nEntry.toxml() + except: + print sys.stderr, "Error while outputing XML content from entry to Tellico" + + def printXMLTree(self): + """ + Outputs XML content to stdout + """ + self.__collection.appendChild(self.__images) + print XML_HEADER; print DOCTYPE + print self.__root.toxml() + + +class DarkHorseParser: + def __init__(self): + self.__baseURL = 'http://www.darkhorse.com' + self.__basePath = '/profile/profile.php?sku=' + self.__searchURL = '/search/search.php?frompage=userinput&sstring=%s&x=0&y=0' + self.__coverPath = 'http://images.darkhorse.com/covers/' + self.__movieURL = self.__baseURL + self.__basePath + + # Define some regexps + self.__regExps = { 'title' : '<font size="\+2"><b>(?P<title>.*?)</b></font>', + 'pub_date' : '<b>Pub.* Date:</b> *<a.*>(?P<pub_date>.*)</a>', + 'desc' : '<p>(?P<desc>.*?)<br>', + 'writer' : '<b>Writer: *</b> *<a.*?>(?P<writer>.*)</a>', + 'cover_artist' : '<b>Cover Artist: *</b> *<a.*>(?P<cover_artist>.*)</a>', + 'penciller' : '<b>Penciller: *</b> *<a.*>(?P<penciller>.*)</a>', + 'inker' : '<b>Inker: *</b> *<a.*>(?P<inker>.*)</a>', + 'letterer' : '<b>Letterer: *</b> *<a.*>(?P<letterer>.*)</a>', + 'colorist' : '<b>Colorist: *</b> *<a.*>(?P<colorist>.*)</a>', + 'genre' : '<b>Genre: *</b> *<a.*?>(?P<genre>.*?)</a><br>', + 'format' : '<b>Format: *</b> *(?P<format>.*?)<br>', + } + + # Compile patterns objects + self.__regExpsPO = {} + for k, pattern in self.__regExps.iteritems(): + self.__regExpsPO[k] = re.compile(pattern) + + self.__domTree = BasicTellicoDOM() + + def run(self, title): + """ + Runs the allocine.fr parser: fetch movie related links, then fills and prints the DOM tree + to stdout (in tellico format) so that tellico can use it. + """ + self.__getMovie(title) + # Print results to stdout + self.__domTree.printXMLTree() + + def __getHTMLContent(self, url): + """ + Fetch HTML data from url + """ + u = urllib2.urlopen(url) + self.__data = u.read() + u.close() + + def __fetchMovieLinks(self): + """ + Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() + that need to be parsed. + """ + matchList = re.findall("""<a *href="%s(?P<page>.*?)">(?P<title>.*?)</a>""" % self.__basePath.replace('?', '\?'), self.__data) + if not matchList: return None + + return matchList + + def __fetchCover(self, path, delete = True): + """ + Fetch cover to /tmp. Returns base64 encoding of data. + The image is deleted if delete is True + """ + md5 = genMD5() + imObj = urllib2.urlopen(path.strip()) + img = imObj.read() + imObj.close() + imgPath = "/tmp/%s.jpeg" % md5 + try: + f = open(imgPath, 'w') + f.write(img) + f.close() + except: + print sys.stderr, "Error: could not write image into /tmp" + + b64data = (md5 + '.jpeg', base64.encodestring(img)) + + # Delete temporary image + if delete: + try: + os.remove(imgPath) + except: + print sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5 + + return b64data + + def __fetchMovieInfo(self, url): + """ + Looks for movie information + """ + self.__getHTMLContent(url) + + # First grab picture data + imgMatch = re.search("""<img src="%s(?P<imgpath>.*?)".*>""" % self.__coverPath, self.__data) + if imgMatch: + imgPath = self.__coverPath + imgMatch.group('imgpath') + # Fetch cover and gets its base64 encoded data + b64img = self.__fetchCover(imgPath) + else: + b64img = None + + # Now isolate data between <div class="bodytext">...</div> elements + # re.S sets DOTALL; it makes the "." special character match any character at all, including a newline + m = re.search("""<div class="bodytext">(?P<part>.*)</div>""", self.__data, re.S) + self.__data = m.group('part') + + matches = {} + data = {} + data['comments'] = [] + data['artist'] = {} + + # Default values + data['publisher'] = 'Dark Horse Comics' + data['language'] = 'English' + data['country'] = 'USA' + + data['image'] = b64img + data['pub_year'] = NULLSTRING + + for name, po in self.__regExpsPO.iteritems(): + data[name] = NULLSTRING + if name == 'desc': + matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) + else: + matches[name] = po.search(self.__data) + + if matches[name]: + if name == 'title': + title = matches[name].group('title').strip() + data[name] = title + # Look for issue information + m = re.search("#(?P<issue>[0-9]+)", title) + if m: + data['issue'] = m.group('issue') + else: + data['issue'] = '' + + elif name == 'pub_date': + pub_date = matches[name].group('pub_date').strip() + data['pub_year'] = pub_date[-4:] + # Add this to comments field + data['comments'].insert(0, "Pub. Date: %s" % pub_date) + + elif name == 'desc': + # Find biggest size + max = 0 + for i in range(len(matches[name])): + if len(matches[name][i]) > len(matches[name][max]): + max = i + data['comments'].append(matches[name][max].strip()) + + elif name == 'writer': + # We may find several writers + data[name] = [] + writersList = re.sub('</?a.*?>', '', matches[name].group('writer')).split(',') + for d in writersList: + data[name].append(d.strip()) + + elif name == 'cover_artist': + data['artist']['Cover Artist'] = matches[name].group('cover_artist').strip() + + elif name == 'penciller': + data['artist']['Penciller'] = matches[name].group('penciller').strip() + + elif name == 'inker': + data['artist']['Inker'] = matches[name].group('inker').strip() + + elif name == 'colorist': + data['artist']['Colorist'] = matches[name].group('colorist').strip() + + elif name == 'letterer': + data['artist']['Letterer'] = matches[name].group('letterer').strip() + + elif name == 'genre': + # We may find several genres + data[name] = [] + genresList = re.sub('</?a.*?>', '', matches[name].group('genre')).split(',') + for d in genresList: + data[name].append(d.strip()) + + elif name == 'format': + format = matches[name].group('format').strip() + data['comments'].insert(1, format) + m = re.search("(?P<pages>[0-9]+)", format) + if m: + data['pages'] = m.group('pages') + else: + data['pages'] = '' + + return data + + + def __getMovie(self, title): + if not len(title): return + + self.__title = title + self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.quote(self.__title))) + + # Get all links + links = self.__fetchMovieLinks() + + # Now retrieve infos + if links: + for entry in links: + data = self.__fetchMovieInfo( url = self.__movieURL + entry[0] ) + # Add DC link (custom field) + data['darkhorse'] = "%s%s" % (self.__movieURL, entry[0]) + node = self.__domTree.addEntry(data) + # Print entries on-the-fly + #self.__domTree.printEntry(node) + else: + return None + +def halt(): + print "HALT." + sys.exit(0) + +def showUsage(): + print "Usage: %s comic" % sys.argv[0] + sys.exit(1) + +def main(): + if len(sys.argv) < 2: + showUsage() + + parser = DarkHorseParser() + parser.run(sys.argv[1]) + +if __name__ == '__main__': + main() diff --git a/src/fetch/scripts/dark_horse_comics.py.spec b/src/fetch/scripts/dark_horse_comics.py.spec new file mode 100644 index 0000000..9481dc8 --- /dev/null +++ b/src/fetch/scripts/dark_horse_comics.py.spec @@ -0,0 +1,7 @@ +Name=Dark Horse Comics +Type=data-source +ArgumentKeys=1 +Arguments=%1 +CollectionType=6 +FormatType=0 +UpdateArgs=%{title} diff --git a/src/fetch/scripts/fr.allocine.py b/src/fetch/scripts/fr.allocine.py new file mode 100755 index 0000000..97a2247 --- /dev/null +++ b/src/fetch/scripts/fr.allocine.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +# *************************************************************************** +# copyright : (C) 2006 by Mathias Monnerville +# email : [email protected] +# *************************************************************************** +# +# *************************************************************************** +# * * +# * This program is free software; you can redistribute it and/or modify * +# * it under the terms of version 2 of the GNU General Public License as * +# * published by the Free Software Foundation; * +# * * +# *************************************************************************** + +# Version 0.4: 2007-08-27 +# * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres +# could not be retrieved. Fixed bad http request error due to some changes in HTML code. +# +# Version 0.3: +# * Fixed parsing: some fields in allocine's HTML pages have changed. Movie's image could not be fetched anymore. Fixed. +# +# Version 0.2: +# * Fixed parsing: allocine's HTML pages have changed. Movie's image could not be fetched anymore. +# +# Version 0.1: +# * Initial release. + +import sys, os, re, md5, random +import urllib, urllib2, time, base64 +import xml.dom.minidom + +XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" +DOCTYPE = """<!DOCTYPE tellico PUBLIC "-//Robby Stephenson/DTD Tellico V9.0//EN" "http://periapsis.org/tellico/dtd/v9/tellico.dtd">""" + +VERSION = "0.4" + +def genMD5(): + obj = md5.new() + float = random.random() + obj.update(str(float)) + return obj.hexdigest() + +class BasicTellicoDOM: + def __init__(self): + self.__doc = xml.dom.minidom.Document() + self.__root = self.__doc.createElement('tellico') + self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') + self.__root.setAttribute('syntaxVersion', '9') + + self.__collection = self.__doc.createElement('collection') + self.__collection.setAttribute('title', 'My Movies') + self.__collection.setAttribute('type', '3') + + self.__fields = self.__doc.createElement('fields') + # Add all default (standard) fields + self.__dfltField = self.__doc.createElement('field') + self.__dfltField.setAttribute('name', '_default') + + # Add a custom 'Collection' field + self.__customField = self.__doc.createElement('field') + self.__customField.setAttribute('name', 'titre-original') + self.__customField.setAttribute('title', 'Original Title') + self.__customField.setAttribute('flags', '8') + self.__customField.setAttribute('category', 'General') + self.__customField.setAttribute('format', '1') + self.__customField.setAttribute('type', '1') + self.__customField.setAttribute('i18n', 'yes') + + self.__fields.appendChild(self.__dfltField) + self.__fields.appendChild(self.__customField) + self.__collection.appendChild(self.__fields) + + self.__images = self.__doc.createElement('images') + + self.__root.appendChild(self.__collection) + self.__doc.appendChild(self.__root) + + # Current movie id + self.__currentId = 0 + + + def addEntry(self, movieData): + """ + Add a movie entry + """ + d = movieData + entryNode = self.__doc.createElement('entry') + entryNode.setAttribute('id', str(self.__currentId)) + + titleNode = self.__doc.createElement('title') + titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) + + otitleNode = self.__doc.createElement('titre-original') + otitleNode.appendChild(self.__doc.createTextNode(unicode(d['otitle'], 'latin-1').encode('utf-8'))) + + yearNode = self.__doc.createElement('year') + yearNode.appendChild(self.__doc.createTextNode(unicode(d['year'], 'latin-1').encode('utf-8'))) + + genresNode = self.__doc.createElement('genres') + for g in d['genres']: + genreNode = self.__doc.createElement('genre') + genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + genresNode.appendChild(genreNode) + + natsNode = self.__doc.createElement('nationalitys') + natNode = self.__doc.createElement('nat') + natNode.appendChild(self.__doc.createTextNode(unicode(d['nat'], 'latin-1').encode('utf-8'))) + natsNode.appendChild(natNode) + + castsNode = self.__doc.createElement('casts') + for g in d['actors']: + castNode = self.__doc.createElement('cast') + col1Node = self.__doc.createElement('column') + col2Node = self.__doc.createElement('column') + col1Node.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + castNode.appendChild(col1Node) + castNode.appendChild(col2Node) + castsNode.appendChild(castNode) + + dirsNode = self.__doc.createElement('directors') + for g in d['dirs']: + dirNode = self.__doc.createElement('director') + dirNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + dirsNode.appendChild(dirNode) + + timeNode = self.__doc.createElement('running-time') + timeNode.appendChild(self.__doc.createTextNode(unicode(d['time'], 'latin-1').encode('utf-8'))) + + allocineNode = self.__doc.createElement(unicode('allocin�-link', 'latin-1').encode('utf-8')) + allocineNode.appendChild(self.__doc.createTextNode(unicode(d['allocine'], 'latin-1').encode('utf-8'))) + + plotNode = self.__doc.createElement('plot') + plotNode.appendChild(self.__doc.createTextNode(unicode(d['plot'], 'latin-1').encode('utf-8'))) + + if d['image']: + imageNode = self.__doc.createElement('image') + imageNode.setAttribute('format', 'JPEG') + imageNode.setAttribute('id', d['image'][0]) + imageNode.setAttribute('width', '120') + imageNode.setAttribute('height', '160') + imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) + + coverNode = self.__doc.createElement('cover') + coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) + + for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode', + 'castsNode', 'dirsNode', 'timeNode', 'allocineNode', 'plotNode' ): + entryNode.appendChild(eval(name)) + + if d['image']: + entryNode.appendChild(coverNode) + self.__images.appendChild(imageNode) + + self.__collection.appendChild(entryNode) + + self.__currentId += 1 + + def printXML(self): + """ + Outputs XML content to stdout + """ + self.__collection.appendChild(self.__images) + print XML_HEADER; print DOCTYPE + print self.__root.toxml() + + +class AlloCineParser: + def __init__(self): + self.__baseURL = 'http://www.allocine.fr' + self.__basePath = '/film/fichefilm_gen_cfilm' + self.__searchURL= 'http://www.allocine.fr/recherche/?motcle=%s&f=3&rub=1' + self.__movieURL = self.__baseURL + self.__basePath + + # Define some regexps + self.__regExps = { 'title' : '<title>(?P<title>.+?)</title>', + 'dirs' : 'R�alis� par <a.*?>(?P<step1>.+?)</a>.*?</h4>', + 'actors' : '<h4>Avec *<a.*?>(?P<step1>.+)</a> ', + 'nat' : '<h4>Film *(?P<nat>.+?)[,\.]', + 'genres' : '<h4>Genre *: *<a.*?>(?P<step1>.+?)</a></h4>', + 'time' : '<h4>Dur�e *: *(?P<hours>[0-9])?h *(?P<mins>[0-9]{1,2})min', + 'year' : 'Ann�e de production *: *(?P<year>[0-9]{4})', + # Original movie title + 'otitle' : 'Titre original *: *<i>(?P<otitle>.+?)</i>', + 'plot' : """(?s)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4> *(?P<plot>.+?) *</h4>""", + 'image' : """<td valign="top" width="120".*?<img src="(?P<image>.+?)" border"""} + + + self.__domTree = BasicTellicoDOM() + + def run(self, title): + """ + Runs the allocine.fr parser: fetch movie related links, then fills and prints the DOM tree + to stdout (in tellico format) so that tellico can use it. + """ + self.__getMovie(title) + # Print results to stdout + self.__domTree.printXML() + + def __getHTMLContent(self, url): + """ + Fetch HTML data from url + """ + + u = urllib2.urlopen(url) + self.__data = u.read() + u.close() + + def __fetchMovieLinks(self): + """ + Retrieve all links related to movie + """ + matchList = re.findall("""<h4><a *href="%s=(?P<page>.*?\.html?)" *class="link1">(?P<title>.*?)</a>""" % self.__basePath, self.__data) + if not matchList: return None + + return matchList + + def __fetchMovieInfo(self, url): + """ + Looks for movie information + """ + self.__getHTMLContent(url) + + matches = data = {} + + for name, regexp in self.__regExps.iteritems(): + if name == 'image': + matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) + else: + matches[name] = re.search(regexp, self.__data) + + if matches[name]: + if name == 'title': + data[name] = matches[name].group('title').strip() + elif name == 'dirs': + dirsList = re.sub('</?a.*?>', '', matches[name].group('step1')).split(',') + data[name] = [] + for d in dirsList: + data[name].append(d.strip()) + + elif name == 'actors': + actorsList = re.sub('</?a.*?>', '', matches[name].group('step1')).split(',') + data[name] = [] + for d in actorsList: + data[name].append(d.strip()) + + elif name == 'nat': + data[name] = matches[name].group('nat').strip() + + elif name == 'genres': + genresList = re.sub('</?a.*?>', '', matches[name].group('step1')).split(',') + data[name] = [] + for d in genresList: + data[name].append(d.strip()) + + elif name == 'time': + h, m = matches[name].group('hours'), matches[name].group('mins') + totmin = int(h)*60+int(m) + data[name] = str(totmin) + + elif name == 'year': + data[name] = matches[name].group('year').strip() + + elif name == 'otitle': + data[name] = matches[name].group('otitle').strip() + + elif name == 'plot': + data[name] = matches[name].group('plot').strip() + + # Image path + elif name == 'image': + # Save image to a temporary folder + md5 = genMD5() + imObj = urllib2.urlopen(matches[name][0].strip()) + img = imObj.read() + imObj.close() + imgPath = "/tmp/%s.jpeg" % md5 + try: + f = open(imgPath, 'w') + f.write(img) + f.close() + except: + # Could be great if we can pass exit code and some message + # to tellico in case of failure... + pass + + data[name] = (md5 + '.jpeg', base64.encodestring(img)) + # Delete temporary image + try: + os.remove(imgPath) + except: + # Could be great if we can pass exit code and some msg + # to tellico in case of failure... + pass + else: + matches[name] = '' + + return data + + + def __getMovie(self, title): + if not len(title): return + + self.__title = title + self.__getHTMLContent(self.__searchURL % urllib.quote(self.__title)) + + # Get all links + links = self.__fetchMovieLinks() + + # Now retrieve infos + if links: + for entry in links: + data = self.__fetchMovieInfo( url = "%s=%s" % (self.__movieURL, entry[0]) ) + # Add allocine link (custom field) + data['allocine'] = "%s=%s" % (self.__movieURL, entry[0]) + self.__domTree.addEntry(data) + else: + return None + + + +def showUsage(): + print "Usage: %s movietitle" % sys.argv[0] + sys.exit(1) + +def main(): + if len(sys.argv) < 2: + showUsage() + + parser = AlloCineParser() + parser.run(sys.argv[1]) + +if __name__ == '__main__': + main() diff --git a/src/fetch/scripts/fr.allocine.py.spec b/src/fetch/scripts/fr.allocine.py.spec new file mode 100644 index 0000000..773b951 --- /dev/null +++ b/src/fetch/scripts/fr.allocine.py.spec @@ -0,0 +1,7 @@ +Name=Allocine.fr +Type=data-source +ArgumentKeys=1 +Arguments=%1 +CollectionType=3 +FormatType=0 +UpdateArgs=%{title} diff --git a/src/fetch/scripts/ministerio_de_cultura.py b/src/fetch/scripts/ministerio_de_cultura.py new file mode 100644 index 0000000..8a768f9 --- /dev/null +++ b/src/fetch/scripts/ministerio_de_cultura.py @@ -0,0 +1,595 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +# *************************************************************************** +# copyright : (C) 2006-2008 by Mathias Monnerville +# email : [email protected] +# *************************************************************************** +# +# *************************************************************************** +# * * +# * This program is free software; you can redistribute it and/or modify * +# * it under the terms of version 2 of the GNU General Public License as * +# * published by the Free Software Foundation; * +# * * +# *************************************************************************** + +# $Id: books_ministerio_de_cultura.py 428 2007-03-07 13:17:17Z mathias $ + +""" +This script has to be used with tellico (http://periapsis.org/tellico) as an external data source program. +It allows searching for books in Spanish Ministry of Culture's database (at http://www.mcu.es/bases/spa/isbn/ISBN.html). + +Multiple ISBN/UPC searching is supported through the -m option: + ./books_ministerio_de_cultura.py -m filename +where filename holds one ISBN or UPC per line. + +Tellico data source setup: +- Source type: External Application +- Source name: Ministerio de Cultura (ES) (or whatever you want :) +- Collection type: Book Collection +- Result type: Tellico +- Path: /path/to/script/books_ministerio_de_cultura.py +- Arguments: +Title (checked) = -t %1 +Person (checked) = -a %1 +ISBN (checked) = -i %1 +UPC (checked) = -i %1 +Update (checked) = %{title} + +** Please note that this script is also part of the Tellico's distribution. +** You will always find the latest version in the SVN trunk of Tellico + +SVN Version: + * Removes translators for Authors List + * Adds translators to translator field + * Change from "Collection" to "Series" + * Process "Series Number" + * Adds in comments "ed.lit." authors + * If there isn't connection to Spanish Ministry of Culture + shows a nice error message (timeout: 5 seconds) + * Removed "translated from/to" from Comments field as already + exists in "Publishing" field + * Removed "Collection" field as I moved to Series/Series Number + +Version 0.3.2: + * Now find 'notas' field related information + * search URL modified to fetch information of exhausted books too + +Version 0.3.1: +Bug Fixes: + * The 'tr.' string does not appear among authors anymore + * Fixed an AttributeError exception related to a regexp matching the number of pages + +Version 0.3: +Bug Fixes: + * URL of the search engine has changed: + http://www.mcu.es/bases/spa/isbn/ISBN.html is now http://www.mcu.es/comun/bases/isbn/ISBN.html + * All the regexps have been rewritten to match the new site's content + +Version 0.2: +New features: + * Support for multiple ISBN/UPC searching (support from command line with -m option) + * Default books collection enhanced with a new custom field 'Collection' + * Search extended for both available and exhausted books + * Hyphens are stripped out in the ISBN (or UPC) search + +Bug Fixes: + * Publication year now holds only the year + * ISBN regexp fix + * Fix for publisher field (values were inverted) + * -i parameter works for both ISBN and UPC based search + +Version 0.1: + * Initial Release +""" + +import sys, os, re, md5, random, string +import urllib, urllib2, time, base64 +import xml.dom.minidom, types +import socket + +XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" +DOCTYPE = """<!DOCTYPE tellico PUBLIC "-//Robby Stephenson/DTD Tellico V9.0//EN" "http://periapsis.org/tellico/dtd/v9/tellico.dtd">""" +NULLSTRING = '' + +VERSION = "0.3.2" + +ISBN, AUTHOR, TITLE = range(3) + +TRANSLATOR_STR = "tr." +EDLIT_STR = "ed. lit." + +class EngineError(Exception): pass + +class BasicTellicoDOM: + """ + This class manages tellico's XML data model (DOM) + """ + def __init__(self): + self.__doc = xml.dom.minidom.Document() + self.__root = self.__doc.createElement('tellico') + self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') + self.__root.setAttribute('syntaxVersion', '9') + + self.__collection = self.__doc.createElement('collection') + self.__collection.setAttribute('title', 'My Books') + self.__collection.setAttribute('type', '2') + + self.__fields = self.__doc.createElement('fields') + # Add all default (standard) fields + self.__dfltField = self.__doc.createElement('field') + self.__dfltField.setAttribute('name', '_default') + + # Add a custom 'Collection' field (Left by reference for + # the future) + #self.__customCollectionField = self.__doc.createElement('field') + #self.__customCollectionField.setAttribute('name', 'book_collection') + #self.__customCollectionField.setAttribute('title', 'Collection') + #self.__customCollectionField.setAttribute('flags', '7') + #self.__customCollectionField.setAttribute('category', 'Classification') + #self.__customCollectionField.setAttribute('format', '0') + #self.__customCollectionField.setAttribute('type', '1') + #self.__customCollectionField.setAttribute('i18n', 'yes') + + + self.__fields.appendChild(self.__dfltField) + #self.__fields.appendChild(self.__customCollectionField) + self.__collection.appendChild(self.__fields) + + self.__root.appendChild(self.__collection) + self.__doc.appendChild(self.__root) + + # Current movie id. See entry's id attribute in self.addEntry() + self.__currentId = 0 + + + def addEntry(self, movieData): + """ + Add a comic entry. + Returns an entry node instance + """ + + d = movieData + + # Convert all strings to UTF-8 + for i in d.keys(): + if type(d[i]) == types.ListType: + d[i] = [unicode(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))] + elif type(d[i]) == types.StringType: + d[i] = unicode(d[i], 'latin-1').encode('utf-8') + + entryNode = self.__doc.createElement('entry') + entryNode.setAttribute('id', str(self.__currentId)) + + titleNode = self.__doc.createElement('title') + titleNode.appendChild(self.__doc.createTextNode(d['title'])) + + yearNode = self.__doc.createElement('pub_year') + yearNode.appendChild(self.__doc.createTextNode(d['pub_year'])) + + pubNode = self.__doc.createElement('publisher') + pubNode.appendChild(self.__doc.createTextNode(d['publisher'])) + + langsNode = self.__doc.createElement('languages') + for l in d['language']: + langNode = self.__doc.createElement('language') + langNode.appendChild(self.__doc.createTextNode(l)) + langsNode.appendChild(langNode) + + keywordsNode = self.__doc.createElement('keywords') + keywordNode = self.__doc.createElement('keyword') + keywordNode.appendChild(self.__doc.createTextNode(d['keyword'])) + keywordsNode.appendChild(keywordNode) + + edNode = self.__doc.createElement('edition') + edNode.appendChild(self.__doc.createTextNode(d['edition'])) + + writersNode = self.__doc.createElement('authors') + for g in d['author']: + writerNode = self.__doc.createElement('author') + writerNode.appendChild(self.__doc.createTextNode(g)) + writersNode.appendChild(writerNode) + + commentsNode = self.__doc.createElement('comments') + commentsData = string.join(d['comments'], '<br/>') + commentsNode.appendChild(self.__doc.createTextNode(commentsData)) + + pagesNode = self.__doc.createElement('pages') + pagesNode.appendChild(self.__doc.createTextNode(d['pages'])) + + isbnNode = self.__doc.createElement('isbn') + isbnNode.appendChild(self.__doc.createTextNode(d['isbn'])) + + priceNode = self.__doc.createElement('pur_price') + priceNode.appendChild(self.__doc.createTextNode(d['pur_price'])) + + seriesNode = self.__doc.createElement('series') + seriesNode.appendChild(self.__doc.createTextNode(d['series'])) + + seriesNumNode = self.__doc.createElement('series_num') + seriesNumNode.appendChild(self.__doc.createTextNode(d['series_num'])) + + translatorNode = self.__doc.createElement('translator') + translatorNode.appendChild(self.__doc.createTextNode(d['translator'])) + + for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers', + 'comments', 'pages', 'isbn', 'price', 'series', 'seriesNum', 'translator' ): + entryNode.appendChild(eval(name + 'Node')) + + self.__collection.appendChild(entryNode) + self.__currentId += 1 + + return entryNode + + def printEntry(self, nEntry): + """ + Prints entry's XML content to stdout + """ + + try: + print nEntry.toxml() + except: + print sys.stderr, "Error while outputing XML content from entry to Tellico" + + def printXMLTree(self): + """ + Outputs XML content to stdout + """ + + print XML_HEADER; print DOCTYPE + print self.__root.toxml() + + +class MinisterioCulturaParser: + def __init__(self): + # Search form is at http://www.mcu.es/comun/bases/isbn/ISBN.html + self.__baseURL = 'http://www.mcu.es' + self.__searchURL = '/cgi-brs/BasesHTML/isbn/BRSCGI?CMD=VERLST&BASE=ISBN&DOCS=1-15&CONF=AEISPA.cnf&OPDEF=AND&SEPARADOR=' + \ + '&WDIS-C=DISPONIBLE+or+AGOTADO&WGEN-C=&WISB-C=%s&WAUT-C=%s&WTIT-C=%s&WMAT-C=&WEDI-C=&' + + self.__suffixURL = 'WFEP-C=&%40T353-GE=&%40T353-LE=&WSER-C=&WLUG-C=&WLEN-C=&WCLA-C=&WSOP-C=' + + # Define some regexps + self.__regExps = { 'author' : '<th scope="row">Autor:.*?<td>(?P<author>.*?)</td>', + 'isbn' : '<span class="cabTitulo">ISBN.*?<strong>(?P<isbn>.*?)</strong>', # Matches ISBN 13 + 'title' : '<th scope="row">Título:.*?<td>(?P<title>.*?)</td>', + 'language' : '<th scope="row">Lengua:.*?<td>(?P<language>.*?)</td>', + 'edition' : '<th scope="row">Edición:.*?<td>.*?<span>(?P<edition>.*?)</span>', + 'pur_price' : '<th scope="row">Precio:.*?<td>.*?<span>(?P<pur_price>.*?)€</span>', + 'desc' : '<th scope="row">Descripción:.*?<td>.*?<span>(?P<desc>.*?)</span>', + 'publication' : '<th scope="row">Publicación:.*?<td>.*?<span>(?P<publication>.*?)</span>', + 'keyword' : '<th scope="row">Materias:.*?<td>.*?<span>(?P<keywords>.*?)</span>', + 'notas' : '<th scope="row">Notas:.*?<td>.*?<span>(?P<notas>.*?)</span>', + 'cdu' : '<th scope="row">CDU:.*?<td><span>(?P<cdu>.*?)</span></td>', + 'encuadernacion': '<th scope="row">Encuadernación:.*?<td>.*?<span>(?P<encuadernacion>.*?)</span>', + 'series' : '<th scope="row">Colección:.*?<td>.*?<span>(?P<series>.*?)</span>' + } + + # Compile patterns objects + self.__regExpsPO = {} + for k, pattern in self.__regExps.iteritems(): + self.__regExpsPO[k] = re.compile(pattern) + + self.__domTree = BasicTellicoDOM() + + def run(self, criteria, kind): + """ + Runs the parser: fetch book related links, then fills and prints the DOM tree + to stdout (in tellico format) so that tellico can use it. + """ + + # Strip out hyphens if kind is ISBN + if kind == ISBN: + criteria = criteria.replace('-', NULLSTRING) + # Support for multiple search + isbnList = criteria.split(';') + for n in isbnList: + self.__getBook(n, kind) + else: + self.__getBook(criteria, kind) + + # Print results to stdout + self.__domTree.printXMLTree() + + def __getHTMLContent(self, url): + """ + Fetch HTML data from url + """ + + try: + u = urllib2.urlopen(url) + except Exception, e: + u.close() + sys.exit(""" +Network error while getting HTML content. +Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage: +'%s'""" % e) + + + self.__data = u.read() + u.close() + + def __fetchBookLinks(self): + """ + Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent() + that need to be parsed. + """ + + matchList = re.findall("""<div class="isbnResDescripcion">.*?<p>.*?<A target="_top" HREF="(?P<url>.*?)">""", self.__data, re.S) + + if not matchList: return None + return matchList + + def __fetchBookInfo(self, url): + """ + Looks for book information + """ + + self.__getHTMLContent(url) + + matches = {} + data = {} + + data['comments'] = [] + # Empty string if series not available + data['series_num'] = NULLSTRING + data['translator'] = NULLSTRING + + for name, po in self.__regExpsPO.iteritems(): + data[name] = NULLSTRING + matches[name] = re.search(self.__regExps[name], self.__data, re.S | re.I) + + + if matches[name]: + if name == 'title': + d = matches[name].group('title').strip() + d = re.sub('<.?strong>', NULLSTRING, d) + d = re.sub('\n', NULLSTRING, d) + data['title'] = d + + elif name == 'isbn': + data['isbn'] = matches[name].group('isbn').strip() + + elif name == 'edition': + data['edition'] = matches[name].group('edition').strip() + + elif name == 'pur_price': + d = matches[name].group('pur_price') + data['pur_price'] = d.strip() + ' EUR' + + elif name == 'publication': + d = matches[name].group('publication') + for p in ('</?[Aa].*?>', ' ', ':', ','): + d = re.sub(p, NULLSTRING, d) + + d = d.split('\n') + # d[1] is an empty string + data['publisher'] = "%s (%s)" % (d[2], d[0]) + data['pub_year'] = re.sub('\d{2}\/', NULLSTRING, d[3]) + del data['publication'] + + elif name == 'desc': + d = matches[name].group('desc') + m = re.search('\d+ ', d) + # When not available + data['pages'] = NULLSTRING + if m: + data['pages'] = m.group(0).strip() + m = re.search('; (?P<format>.*cm)', d) + if m: + data['comments'].append('Format: ' + m.group('format').strip()) + del data['desc'] + + elif name == 'encuadernacion': + data['comments'].append(matches[name].group('encuadernacion').strip()) + + elif name == 'keyword': + d = matches[name].group('keywords') + d = re.sub('</?[Aa].*?>', NULLSTRING, d) + data['keyword'] = d.strip() + + elif name == 'cdu': + data['comments'].append('CDU: ' + matches[name].group('cdu').strip()) + + elif name == 'notas': + data['comments'].append(matches[name].group('notas').strip()) + + elif name == 'series': + d = matches[name].group('series').strip() + d = re.sub(' ', ' ', d) + data[name] = d + # data[name] can contain something like 'Byblos, 162/24' + + # Maybe better to add the reg exp to get seriesNum in self.__regExps + p = re.compile('[0-9]+$') + s = re.search(p, data[name]) + + if s: + # if series ends with a number, it seems that is a + # number of the book inside the series. We save in seriesNum + data['series_num'] = s.group() + + # it removes lasts digits (plus one because is space or /) from + # data['series'] + l = len(data['series_num']) + 1 + data[name] = data[name][0:-l] + data[name] = data[name].rstrip(",") # remove the , between series and series_num + + elif name == 'author': + # We may find several authors + data[name] = [] + authorsList = re.findall('<a.*?>(?P<author>.*?)</a>', matches[name].group('author'), re.S | re.I) + if not authorsList: + # No href links + authors = re.search('<li>(?P<author>.*?)</li>', matches[name].group('author'), re.S | re.I) + try: + results = authors.group('author').strip().split(',') + except AttributeError: + results = [] + results = [r.strip() for r in results] + data[name] = results + else: + for d in authorsList: + # Sometimes, the search engine outputs some image between a elements + if d.strip()[:4] != '<img': + data[name].append(d.strip()) + + # Move tr authors (translators) to translators list + translator = self.__getSpecialRol(data[name], TRANSLATOR_STR) + edlit = self.__getSpecialRol(data[name], EDLIT_STR) + data[name] = self.__removeSpecialsFromAuthors(data[name], translator, TRANSLATOR_STR) + data[name] = self.__removeSpecialsFromAuthors(data[name], edlit, EDLIT_STR) + + if len(translator) > 0: + data['translator'] = self.__formatSpecials(translator, NULLSTRING) + + if len(edlit) > 0: + data['comments'].append(self.__formatSpecials(edlit, "Editor Literario: ")) + + elif name == 'language': + # We may find several languages + d = matches[name].group('language') + d = re.sub('\n', NULLSTRING, d) + d = d.split('<span>') + a = [] + for lg in d: + if len(lg): + lg = re.sub('</span>', NULLSTRING, lg) + # Because HTML is not interpreted in the 'language' field of Tellico + lg = re.sub('ó', 'o', lg) + a.append(lg.strip()) + # Removes that word so that only the language name remains. + a[0] = re.sub('publicacion: ', NULLSTRING, a[0]) + data['language'] = a + # Add other language related info to the 'comments' field too + #for lg in a[1:]: + #data['comments'].append(lg) + + return data + + + def __getBook(self, data, kind = ISBN): + if not len(data): + raise EngineError, "No data given. Unable to proceed." + + if kind == ISBN: + self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ + (urllib.quote(data), # ISBN + NULLSTRING, # AUTHOR + NULLSTRING), # TITLE + self.__suffixURL) + ) + elif kind == AUTHOR: + self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ + (NULLSTRING, # ISBN + urllib.quote(data), # AUTHOR + NULLSTRING), # TITLE + self.__suffixURL) + ) + + elif kind == TITLE: + self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \ + (NULLSTRING, # ISBN + NULLSTRING, # AUTHOR + urllib.quote(data)), # TITLE + self.__suffixURL) + ) + + # Get all links + links = self.__fetchBookLinks() + + # Now retrieve infos + if links: + for entry in links: + data = self.__fetchBookInfo( url = self.__baseURL + entry.replace(' ', '%20') ) + node = self.__domTree.addEntry(data) + else: + return None + + def __getSpecialRol(self, authors, special): + """ + Receives a list like ['Stephen King','Lorenzo Cortina','tr.', + 'Rosal�a V�zquez','tr.'] and returns a list with special names + """ + + j = 0; max = len(authors) + special_rol = [] + while j < max: + if authors[j] == special: + special_rol.append(authors[j-1]) + j += 1 + + return special_rol + + def __removeSpecialsFromAuthors(self, authors, specials, string): + """ + Receives a list with authors+translators and removes 'tr.' and + authors from there. Example: + authors: ['Stephen King','Lorenzo Cortina','tr.','Rosal�a V�zquez','tr.'] + translators: ['Lorenzo Cortina','Rosal�a V�zquez'] + returns: ['Stephen King'] + + (We could also guess string value because is the next position + in authors list) + """ + + newauthors = authors[:] + + for t in specials: + newauthors.remove(t) + newauthors.remove(string) + + return newauthors + + def __formatSpecials(self, translators, prefix): + """ + Receives a list with translators and returns a string + (authors are handled different: each author in a different node) + """ + + return prefix + string.join(translators, '; ') + +def halt(): + print "HALT." + sys.exit(0) + +def showUsage(): + print """Usage: %s options +Where options are: + -t title + -i (ISBN|UPC) + -a author + -m filename (support for multiple ISBN/UPC search)""" % sys.argv[0] + sys.exit(1) + +def main(): + if len(sys.argv) < 3: + showUsage() + + socket.setdefaulttimeout(5) + + # ;-separated ISBNs string + isbnStringList = NULLSTRING + + opts = {'-t' : TITLE, '-i' : ISBN, '-a' : AUTHOR, '-m' : isbnStringList} + if sys.argv[1] not in opts.keys(): + showUsage() + + if sys.argv[1] == '-m': + try: + f = open(sys.argv[2], 'r') + data = f.readlines() + # remove trailing \n + sys.argv[2] = string.join([d[:-1] for d in data], ';') + sys.argv[1] = '-i' + f.close() + except IOError, e: + print "Error: %s" % e + sys.exit(1) + + parser = MinisterioCulturaParser() + parser.run(sys.argv[2], opts[sys.argv[1]]) + +if __name__ == '__main__': + main() diff --git a/src/fetch/scripts/ministerio_de_cultura.py.spec b/src/fetch/scripts/ministerio_de_cultura.py.spec new file mode 100644 index 0000000..ef24ac5 --- /dev/null +++ b/src/fetch/scripts/ministerio_de_cultura.py.spec @@ -0,0 +1,7 @@ +Name=Spanish Ministry of Culture +Type=data-source +ArgumentKeys=1,2,3,4 +Arguments=-t %1,-a %1,-i %1,-i %1 +CollectionType=2 +FormatType=0 +UpdateArgs=-t %{title} diff --git a/src/fetch/srufetcher.cpp b/src/fetch/srufetcher.cpp new file mode 100644 index 0000000..1d7289b --- /dev/null +++ b/src/fetch/srufetcher.cpp @@ -0,0 +1,541 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "srufetcher.h" +#include "messagehandler.h" +#include "../field.h" +#include "../collection.h" +#include "../translators/tellico_xml.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../translators/dcimporter.h" +#include "../tellico_kernel.h" +#include "../tellico_debug.h" +#include "../gui/lineedit.h" +#include "../gui/combobox.h" +#include "../latin1literal.h" +#include "../tellico_utils.h" +#include "../lccnvalidator.h" + +#include <klocale.h> +#include <kio/job.h> +#include <kstandarddirs.h> +#include <kconfig.h> +#include <kcombobox.h> +#include <kaccelmanager.h> +#include <knuminput.h> + +#include <qlabel.h> +#include <qlayout.h> +#include <qwhatsthis.h> + +//#define SRU_DEBUG + +namespace { + // 7090 was the old default port, but that wa sjust because LoC used it + // let's use default HTTP port of 80 now + static const int SRU_DEFAULT_PORT = 80; + static const int SRU_MAX_RECORDS = 25; +} + +using Tellico::Fetch::SRUFetcher; +using Tellico::Fetch::SRUConfigWidget; + +SRUFetcher::SRUFetcher(QObject* parent_, const char* name_) + : Fetcher(parent_, name_), m_job(0), m_MARCXMLHandler(0), m_MODSHandler(0), m_started(false) { +} + +SRUFetcher::SRUFetcher(const QString& name_, const QString& host_, uint port_, const QString& path_, + QObject* parent_) : Fetcher(parent_), + m_host(host_), m_port(port_), m_path(path_), + m_job(0), m_MARCXMLHandler(0), m_MODSHandler(0), m_started(false) { + m_name = name_; // m_name is protected in super class +} + +SRUFetcher::~SRUFetcher() { + delete m_MARCXMLHandler; + m_MARCXMLHandler = 0; + delete m_MODSHandler; + m_MODSHandler = 0; +} + +QString SRUFetcher::defaultName() { + return i18n("SRU Server"); +} + +QString SRUFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool SRUFetcher::canFetch(int type) const { + return type == Data::Collection::Book || type == Data::Collection::Bibtex; +} + +void SRUFetcher::readConfigHook(const KConfigGroup& config_) { + m_host = config_.readEntry("Host"); + int p = config_.readNumEntry("Port", SRU_DEFAULT_PORT); + if(p > 0) { + m_port = p; + } + m_path = config_.readEntry("Path"); + // used to be called Database + if(m_path.isEmpty()) { + m_path = config_.readEntry("Database"); + } + if(!m_path.startsWith(QChar('/'))) { + m_path.prepend('/'); + } + m_format = config_.readEntry("Format", QString::fromLatin1("mods")); + m_fields = config_.readListEntry("Custom Fields"); +} + +void SRUFetcher::search(FetchKey key_, const QString& value_) { + if(m_host.isEmpty() || m_path.isEmpty()) { + myDebug() << "SRUFetcher::search() - settings are not set!" << endl; + stop(); + return; + } + + m_started = true; + +#ifdef SRU_DEBUG + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/sru.xml")); +#else + KURL u; + u.setProtocol(QString::fromLatin1("http")); + u.setHost(m_host); + u.setPort(m_port); + u.setPath(m_path); + + u.addQueryItem(QString::fromLatin1("operation"), QString::fromLatin1("searchRetrieve")); + u.addQueryItem(QString::fromLatin1("version"), QString::fromLatin1("1.1")); + u.addQueryItem(QString::fromLatin1("maximumRecords"), QString::number(SRU_MAX_RECORDS)); + u.addQueryItem(QString::fromLatin1("recordSchema"), m_format); + + const int type = Kernel::self()->collectionType(); + QString str = QChar('"') + value_ + QChar('"'); + switch(key_) { + case Title: + u.addQueryItem(QString::fromLatin1("query"), QString::fromLatin1("dc.title=") + str); + break; + + case Person: + { + QString s; + if(type == Data::Collection::Book || type == Data::Collection::Bibtex) { + s = QString::fromLatin1("author=") + str + QString::fromLatin1(" or dc.author=") + str; + } else { + s = QString::fromLatin1("dc.creator=") + str + QString::fromLatin1(" or dc.editor=") + str; + } + u.addQueryItem(QString::fromLatin1("query"), s); + } + break; + + case ISBN: + // no validation here + str.remove('-'); + // limit to first isbn + str = str.section(';', 0, 0); + u.addQueryItem(QString::fromLatin1("query"), QString::fromLatin1("bath.isbn=") + str); + break; + + case LCCN: + { + // limit to first lccn + str.remove('-'); + str = str.section(';', 0, 0); + // also try formalized lccn + QString lccn = LCCNValidator::formalize(str); + u.addQueryItem(QString::fromLatin1("query"), + QString::fromLatin1("bath.lccn=") + str + + QString::fromLatin1(" or bath.lccn=") + lccn + ); + } + break; + + case Keyword: + u.addQueryItem(QString::fromLatin1("query"), str); + break; + + case Raw: + { + QString key = value_.section('=', 0, 0).stripWhiteSpace(); + QString str = value_.section('=', 1).stripWhiteSpace(); + u.addQueryItem(key, str); + } + break; + + default: + kdWarning() << "SRUFetcher::search() - key not recognized: " << key_ << endl; + stop(); + break; + } +#endif +// myDebug() << u.prettyURL() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void SRUFetcher::stop() { + if(!m_started) { + return; + } + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void SRUFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void SRUFetcher::slotComplete(KIO::Job* job_) { + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + stop(); + return; + } + + Data::CollPtr coll; + QString msg; + + const QString result = QString::fromUtf8(m_data, m_data.size()); + + // first check for SRU errors + const QString& diag = XML::nsZingDiag; + Import::XMLImporter xmlImporter(result); + QDomDocument dom = xmlImporter.domDocument(); + + QDomNodeList diagList = dom.elementsByTagNameNS(diag, QString::fromLatin1("diagnostic")); + for(uint i = 0; i < diagList.count(); ++i) { + QDomElement elem = diagList.item(i).toElement(); + QDomNodeList nodeList1 = elem.elementsByTagNameNS(diag, QString::fromLatin1("message")); + QDomNodeList nodeList2 = elem.elementsByTagNameNS(diag, QString::fromLatin1("details")); + for(uint j = 0; j < nodeList1.count(); ++j) { + QString d = nodeList1.item(j).toElement().text(); + if(!d.isEmpty()) { + QString d2 = nodeList2.item(j).toElement().text(); + if(!d2.isEmpty()) { + d += " (" + d2 + ')'; + } + myDebug() << "SRUFetcher::slotComplete() - " << d << endl; + if(!msg.isEmpty()) msg += '\n'; + msg += d; + } + } + } + + QString modsResult; + if(m_format == Latin1Literal("mods")) { + modsResult = result; + } else if(m_format == Latin1Literal("marcxml") && initMARCXMLHandler()) { + modsResult = m_MARCXMLHandler->applyStylesheet(result); + } + if(!modsResult.isEmpty() && initMODSHandler()) { + Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(modsResult)); + coll = imp.collection(); + if(!msg.isEmpty()) msg += '\n'; + msg += imp.statusMessage(); + } else if(m_format == Latin1Literal("dc")) { + Import::DCImporter imp(dom); + coll = imp.collection(); + if(!msg.isEmpty()) msg += '\n'; + msg += imp.statusMessage(); + } else { + myDebug() << "SRUFetcher::slotComplete() - unrecognized format: " << m_format << endl; + stop(); + return; + } + + if(coll && !msg.isEmpty()) { + message(msg, coll->entryCount() == 0 ? MessageHandler::Warning : MessageHandler::Status); + } + + if(!coll) { + myDebug() << "SRUFetcher::slotComplete() - no collection pointer" << endl; + if(!msg.isEmpty()) { + message(msg, MessageHandler::Error); + } + stop(); + return; + } + + const StringMap customFields = SRUFetcher::customFields(); + for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) { + if(!m_fields.contains(it.key())) { + coll->removeField(it.key()); + } + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + QString desc; + switch(coll->type()) { + case Data::Collection::Book: + desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("cr_year")); + } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){ + desc += QChar('/') + entry->field(QString::fromLatin1("pub_year")); + } + break; + + case Data::Collection::Video: + desc = entry->field(QString::fromLatin1("studio")) + + QChar('/') + + entry->field(QString::fromLatin1("director")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + case Data::Collection::Album: + desc = entry->field(QString::fromLatin1("artist")) + + QChar('/') + + entry->field(QString::fromLatin1("label")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + break; + + default: + break; + } + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, entry); + emit signalResultFound(r); + } + stop(); +} + +Tellico::Data::EntryPtr SRUFetcher::fetchEntry(uint uid_) { + return m_entries[uid_]; +} + +void SRUFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "SRUFetcher::updateEntry() - " << source() << ": " << entry_->title() << endl; + QString isbn = entry_->field(QString::fromLatin1("isbn")); + if(!isbn.isEmpty()) { + search(Fetch::ISBN, isbn); + return; + } + + QString lccn = entry_->field(QString::fromLatin1("lccn")); + if(!lccn.isEmpty()) { + search(Fetch::LCCN, lccn); + return; + } + + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + + myDebug() << "SRUFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +bool SRUFetcher::initMARCXMLHandler() { + if(m_MARCXMLHandler) { + return true; + } + + QString xsltfile = locate("appdata", QString::fromLatin1("MARC21slim2MODS3.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "SRUFetcher::initHandlers() - can not locate MARC21slim2MODS3.xsl." << endl; + return false; + } + + KURL u; + u.setPath(xsltfile); + + m_MARCXMLHandler = new XSLTHandler(u); + if(!m_MARCXMLHandler->isValid()) { + kdWarning() << "SRUFetcher::initHandlers() - error in MARC21slim2MODS3.xsl." << endl; + delete m_MARCXMLHandler; + m_MARCXMLHandler = 0; + return false; + } + return true; +} + +bool SRUFetcher::initMODSHandler() { + if(m_MODSHandler) { + return true; + } + + QString xsltfile = locate("appdata", QString::fromLatin1("mods2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "SRUFetcher::initHandlers() - can not locate mods2tellico.xsl." << endl; + return false; + } + + KURL u; + u.setPath(xsltfile); + + m_MODSHandler = new XSLTHandler(u); + if(!m_MODSHandler->isValid()) { + kdWarning() << "SRUFetcher::initHandlers() - error in mods2tellico.xsl." << endl; + delete m_MODSHandler; + m_MODSHandler = 0; + return false; + } + return true; +} + +Tellico::Fetch::Fetcher::Ptr SRUFetcher::libraryOfCongress(QObject* parent_) { + return new SRUFetcher(i18n("Library of Congress (US)"), QString::fromLatin1("z3950.loc.gov"), 7090, + QString::fromLatin1("voyager"), parent_); +} + +// static +Tellico::StringMap SRUFetcher::customFields() { + StringMap map; + map[QString::fromLatin1("address")] = i18n("Address"); + map[QString::fromLatin1("abstract")] = i18n("Abstract"); + return map; +} + +Tellico::Fetch::ConfigWidget* SRUFetcher::configWidget(QWidget* parent_) const { + return new SRUConfigWidget(parent_, this); +} + +SRUConfigWidget::SRUConfigWidget(QWidget* parent_, const SRUFetcher* fetcher_ /*=0*/) + : ConfigWidget(parent_) { + QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = -1; + QLabel* label = new QLabel(i18n("Hos&t: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_hostEdit = new GUI::LineEdit(optionsWidget()); + connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SIGNAL(signalName(const QString&))); + connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotCheckHost())); + l->addWidget(m_hostEdit, row, 1); + QString w = i18n("Enter the host name of the server."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_hostEdit, w); + label->setBuddy(m_hostEdit); + + label = new QLabel(i18n("&Port: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_portSpinBox = new KIntSpinBox(0, 999999, 1, SRU_DEFAULT_PORT, 10, optionsWidget()); + connect(m_portSpinBox, SIGNAL(valueChanged(int)), SLOT(slotSetModified())); + l->addWidget(m_portSpinBox, row, 1); + w = i18n("Enter the port number of the server. The default is %1.").arg(SRU_DEFAULT_PORT); + QWhatsThis::add(label, w); + QWhatsThis::add(m_portSpinBox, w); + label->setBuddy(m_portSpinBox); + + label = new QLabel(i18n("Path: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_pathEdit = new GUI::LineEdit(optionsWidget()); + connect(m_pathEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_pathEdit, row, 1); + w = i18n("Enter the path to the database used by the server."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_pathEdit, w); + label->setBuddy(m_pathEdit); + + label = new QLabel(i18n("Format: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_formatCombo = new GUI::ComboBox(optionsWidget()); + m_formatCombo->insertItem(QString::fromLatin1("MODS"), QString::fromLatin1("mods")); + m_formatCombo->insertItem(QString::fromLatin1("MARCXML"), QString::fromLatin1("marcxml")); + m_formatCombo->insertItem(QString::fromLatin1("Dublin Core"), QString::fromLatin1("dc")); + connect(m_formatCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); + l->addWidget(m_formatCombo, row, 1); + w = i18n("Enter the result format used by the server."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_formatCombo, w); + label->setBuddy(m_formatCombo); + + l->setRowStretch(++row, 1); + + // now add additional fields widget + addFieldsWidget(SRUFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList()); + + if(fetcher_) { + m_hostEdit->setText(fetcher_->m_host); + m_portSpinBox->setValue(fetcher_->m_port); + m_pathEdit->setText(fetcher_->m_path); + m_formatCombo->setCurrentData(fetcher_->m_format); + } + KAcceleratorManager::manage(optionsWidget()); +} + +void SRUConfigWidget::saveConfig(KConfigGroup& config_) { + QString s = m_hostEdit->text().stripWhiteSpace(); + if(!s.isEmpty()) { + config_.writeEntry("Host", s); + } + int port = m_portSpinBox->value(); + if(port > 0) { + config_.writeEntry("Port", port); + } + s = m_pathEdit->text().stripWhiteSpace(); + if(!s.isEmpty()) { + config_.writeEntry("Path", s); + } + s = m_formatCombo->currentData().toString(); + if(!s.isEmpty()) { + config_.writeEntry("Format", s); + } + saveFieldsConfig(config_); + slotSetModified(false); +} + +QString SRUConfigWidget::preferredName() const { + QString s = m_hostEdit->text(); + return s.isEmpty() ? SRUFetcher::defaultName() : s; +} + +void SRUConfigWidget::slotCheckHost() { + QString s = m_hostEdit->text(); + // someone might be pasting a full URL, check that + if(s.find(':') > -1 || s.find('/') > -1) { + KURL u(s); + if(u.isValid()) { + m_hostEdit->setText(u.host()); + if(u.port() > 0) { + m_portSpinBox->setValue(u.port()); + } + if(!u.path().isEmpty()) { + m_pathEdit->setText(u.path()); + } + } + } +} + +#include "srufetcher.moc" diff --git a/src/fetch/srufetcher.h b/src/fetch/srufetcher.h new file mode 100644 index 0000000..fd07323 --- /dev/null +++ b/src/fetch/srufetcher.h @@ -0,0 +1,131 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_SRUFETCHER_H +#define TELLICO_SRUFETCHER_H + +namespace Tellico { + class XSLTHandler; + namespace GUI { + class LineEdit; + class ComboBox; + } +} + +class KIntSpinBox; +class KComboBox; +namespace KIO { + class Job; +} + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace Tellico { + namespace Fetch { + +class SRUConfigWidget; + +/** + * A fetcher for SRU servers. + * Right now, only MODS is supported. + * + * @author Robby Stephenson + */ +class SRUFetcher : public Fetcher { +Q_OBJECT + +friend class SRUConfigWidget; + +public: + /** + */ + SRUFetcher(QObject* parent, const char* name = 0); + SRUFetcher(const QString& name, const QString& host, uint port, const QString& dbname, + QObject* parent); + /** + */ + virtual ~SRUFetcher(); + + /** + */ + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + // only search title, person, isbn, or keyword. No Raw for now. + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == Keyword || k == LCCN; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return SRU; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + static StringMap customFields(); + + virtual ConfigWidget* configWidget(QWidget* parent) const; + + static QString defaultName(); + + static Fetcher::Ptr libraryOfCongress(QObject* parent); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + bool initMARCXMLHandler(); + bool initMODSHandler(); + + QString m_host; + uint m_port; + QString m_path; + QString m_format; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; + QGuardedPtr<KIO::Job> m_job; + XSLTHandler* m_MARCXMLHandler; + XSLTHandler* m_MODSHandler; + bool m_started; + QStringList m_fields; +}; + +class SRUConfigWidget : public ConfigWidget { +Q_OBJECT + +friend class SRUFetcher; + +public: + SRUConfigWidget(QWidget* parent_, const SRUFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup& config); + virtual QString preferredName() const; + +private slots: + void slotCheckHost(); + +private: + GUI::LineEdit* m_hostEdit; + KIntSpinBox* m_portSpinBox; + GUI::LineEdit* m_pathEdit; + GUI::ComboBox* m_formatCombo; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/yahoofetcher.cpp b/src/fetch/yahoofetcher.cpp new file mode 100644 index 0000000..002b63b --- /dev/null +++ b/src/fetch/yahoofetcher.cpp @@ -0,0 +1,400 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "yahoofetcher.h" +#include "messagehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../imagefactory.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kstandarddirs.h> +#include <kconfig.h> +#include <kio/job.h> + +#include <qdom.h> +#include <qlabel.h> +#include <qlayout.h> +#include <qfile.h> + +namespace { + static const int YAHOO_MAX_RETURNS_TOTAL = 20; + static const char* YAHOO_BASE_URL = "http://search.yahooapis.com/AudioSearchService/V1/albumSearch"; + static const char* YAHOO_APP_ID = "tellico-robby"; +} + +using Tellico::Fetch::YahooFetcher; + +YahooFetcher::YahooFetcher(QObject* parent_, const char* name_) + : Fetcher(parent_, name_), m_xsltHandler(0), + m_limit(YAHOO_MAX_RETURNS_TOTAL), m_job(0), m_started(false) { +} + +YahooFetcher::~YahooFetcher() { + delete m_xsltHandler; + m_xsltHandler = 0; +} + +QString YahooFetcher::defaultName() { + return i18n("Yahoo! Audio Search"); +} + +QString YahooFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool YahooFetcher::canFetch(int type) const { + return type == Data::Collection::Album; +} + +void YahooFetcher::readConfigHook(const KConfigGroup& config_) { + Q_UNUSED(config_); +} + +void YahooFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_; + m_started = true; + m_start = 1; + m_total = -1; + doSearch(); +} + +void YahooFetcher::continueSearch() { + m_started = true; + doSearch(); +} + +void YahooFetcher::doSearch() { +// myDebug() << "YahooFetcher::search() - value = " << value_ << endl; + + KURL u(QString::fromLatin1(YAHOO_BASE_URL)); + u.addQueryItem(QString::fromLatin1("appid"), QString::fromLatin1(YAHOO_APP_ID)); + u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("all")); + u.addQueryItem(QString::fromLatin1("output"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("start"), QString::number(m_start)); + u.addQueryItem(QString::fromLatin1("results"), QString::number(YAHOO_MAX_RETURNS_TOTAL)); + + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + switch(m_key) { + case Title: + u.addQueryItem(QString::fromLatin1("album"), m_value); + break; + + case Person: + u.addQueryItem(QString::fromLatin1("artist"), m_value); + break; + + // raw is used for the entry updates + case Raw: +// u.removeQueryItem(QString::fromLatin1("type")); +// u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("phrase")); + u.setQuery(u.query() + '&' + m_value); + break; + + default: + kdWarning() << "YahooFetcher::search() - key not recognized: " << m_key << endl; + stop(); + return; + } +// myDebug() << "YahooFetcher::search() - url: " << u.url() << endl; + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void YahooFetcher::stop() { + if(!m_started) { + return; + } + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void YahooFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void YahooFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "YahooFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "YahooFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from yahoofetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return; + } + } + + if(m_total == -1) { + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "YahooFetcher::slotComplete() - server did not return valid XML." << endl; + return; + } + // total is top level element, with attribute totalResultsAvailable + QDomElement e = dom.documentElement(); + if(!e.isNull()) { + m_total = e.attribute(QString::fromLatin1("totalResultsAvailable")).toInt(); + } + } + + // assume yahoo is always utf-8 + QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size())); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(!coll) { + myDebug() << "YahooFetcher::slotComplete() - no collection pointer" << endl; + stop(); + return; + } + + int count = 0; + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); count < m_limit && entry != entries.end(); ++entry, ++count) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("artist")) + + QChar('/') + + entry->field(QString::fromLatin1("label")) + + QChar('/') + + entry->field(QString::fromLatin1("year")); + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + m_start = m_entries.count() + 1; + m_hasMoreResults = m_start <= m_total; + stop(); // required +} + +Tellico::Data::EntryPtr YahooFetcher::fetchEntry(uint uid_) { + Data::EntryPtr entry = m_entries[uid_]; + if(!entry) { + kdWarning() << "YahooFetcher::fetchEntry() - no entry in dict" << endl; + return 0; + } + + KURL imageURL = entry->field(QString::fromLatin1("image")); + if(!imageURL.isEmpty()) { + QString id = ImageFactory::addImage(imageURL, true); + if(id.isEmpty()) { + // rich text causes layout issues +// emit signalStatus(i18n("<qt>The cover image for <i>%1</i> could not be loaded.</qt>").arg( +// entry->field(QString::fromLatin1("title")))); + message(i18n("The cover image could not be loaded."), MessageHandler::Warning); + } else { + entry->setField(QString::fromLatin1("cover"), id); + } + } + + getTracks(entry); + + // don't want to show image urls in the fetch dialog + entry->setField(QString::fromLatin1("image"), QString::null); + // no need for album id now ? + entry->setField(QString::fromLatin1("yahoo"), QString::null); + return entry; +} + +void YahooFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("yahoo2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "YahooFetcher::initXSLTHandler() - can not locate yahoo2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + delete m_xsltHandler; + m_xsltHandler = new XSLTHandler(u); + if(!m_xsltHandler->isValid()) { + kdWarning() << "YahooFetcher::initXSLTHandler() - error in yahoo2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +void YahooFetcher::getTracks(Data::EntryPtr entry_) { + // get album id + if(!entry_ || entry_->field(QString::fromLatin1("yahoo")).isEmpty()) { + return; + } + + const QString albumid = entry_->field(QString::fromLatin1("yahoo")); + + KURL u(QString::fromLatin1(YAHOO_BASE_URL)); + u.setFileName(QString::fromLatin1("songSearch")); + u.addQueryItem(QString::fromLatin1("appid"), QString::fromLatin1(YAHOO_APP_ID)); + u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("all")); + u.addQueryItem(QString::fromLatin1("output"), QString::fromLatin1("xml")); + // go ahesad and ask for all results, since there might well be more than 10 songs on the CD + u.addQueryItem(QString::fromLatin1("results"), QString::number(50)); + u.addQueryItem(QString::fromLatin1("albumid"), albumid); + +// myDebug() << "YahooFetcher::getTracks() - url: " << u.url() << endl; + QDomDocument dom = FileHandler::readXMLFile(u, false /*no namespace*/, true /*quiet*/); + if(dom.isNull()) { + myDebug() << "YahooFetcher::getTracks() - null dom returned" << endl; + return; + } + +#if 0 + kdWarning() << "Remove debug from yahoofetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << dom.toString(); + } + f.close(); +#endif + + const QString track = QString::fromLatin1("track"); + + QDomNodeList nodes = dom.documentElement().childNodes(); + for(uint i = 0; i < nodes.count(); ++i) { + QDomElement e = nodes.item(i).toElement(); + if(e.isNull()) { + continue; + } + QString t = e.namedItem(QString::fromLatin1("Title")).toElement().text(); + QString n = e.namedItem(QString::fromLatin1("Track")).toElement().text(); + bool ok; + int trackNum = Tellico::toUInt(n, &ok); + // trackNum might be 0 + if(t.isEmpty() || !ok || trackNum < 1) { + continue; + } + QString a = e.namedItem(QString::fromLatin1("Artist")).toElement().text(); + QString l = e.namedItem(QString::fromLatin1("Length")).toElement().text(); + + int len = Tellico::toUInt(l, &ok); + QString value = t + "::" + a; + if(ok && len > 0) { + value += + "::" + Tellico::minutes(len); + } + entry_->setField(track, insertValue(entry_->field(track), value, trackNum)); + } +} + +// not zero-based +QString YahooFetcher::insertValue(const QString& str_, const QString& value_, uint pos_) { + QStringList list = Data::Field::split(str_, true); + for(uint i = list.count(); i < pos_; ++i) { + list += QString::null; + } + bool write = true; + if(!list[pos_-1].isNull()) { + // for some reason, some songs are repeated from yahoo, with 0 length, don't overwrite that + if(value_.contains(QString::fromLatin1("::")) < 2) { // means no length value + write = false; + } + } + if(!value_.isEmpty() && write) { + list[pos_-1] = value_; + } + return list.join(QString::fromLatin1("; ")); +} + +void YahooFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "YahooFetcher::updateEntry()" << endl; + // limit to top 5 results + m_limit = 5; + + QString value; + QString title = entry_->field(QString::fromLatin1("title")); + if(!title.isEmpty()) { + value += QString::fromLatin1("album=") + title; + } + QString artist = entry_->field(QString::fromLatin1("artist")); + if(!artist.isEmpty()) { + if(!value.isEmpty()) { + value += '&'; + } + value += QString::fromLatin1("artist=") + artist; + } + if(!value.isEmpty()) { + search(Fetch::Raw, value); + return; + } + + myDebug() << "YahooFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* YahooFetcher::configWidget(QWidget* parent_) const { + return new YahooFetcher::ConfigWidget(parent_, this); +} + +YahooFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const YahooFetcher*/*=0*/) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +QString YahooFetcher::ConfigWidget::preferredName() const { + return YahooFetcher::defaultName(); +} + +#include "yahoofetcher.moc" diff --git a/src/fetch/yahoofetcher.h b/src/fetch/yahoofetcher.h new file mode 100644 index 0000000..7ff5733 --- /dev/null +++ b/src/fetch/yahoofetcher.h @@ -0,0 +1,105 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef YAHOOFETCHER_H +#define YAHOOFETCHER_H + +namespace Tellico { + class XSLTHandler; +} + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <qcstring.h> // for QByteArray +#include <qguardedptr.h> + +namespace KIO { + class Job; +} + +namespace Tellico { + namespace Fetch { + +/** + * @author Robby Stephenson + */ +class YahooFetcher : public Fetcher { +Q_OBJECT + +public: + /** + */ + YahooFetcher(QObject* parent, const char* name = 0); + /** + */ + virtual ~YahooFetcher(); + + /** + */ + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Yahoo; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + + /** + * Returns a widget for modifying the fetcher's config. + */ + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget : public Fetch::ConfigWidget { + public: + ConfigWidget(QWidget* parent_, const YahooFetcher* fetcher = 0); + virtual void saveConfig(KConfigGroup&) {} + virtual QString preferredName() const; + }; + friend class ConfigWidget; + + static QString defaultName(); + +private slots: + void slotData(KIO::Job* job, const QByteArray& data); + void slotComplete(KIO::Job* job); + +private: + void initXSLTHandler(); + void doSearch(); + void getTracks(Data::EntryPtr entry); + QString insertValue(const QString& str, const QString& value, uint pos); + + XSLTHandler* m_xsltHandler; + int m_limit; + int m_start; + int m_total; + + QByteArray m_data; + QMap<int, Data::EntryPtr> m_entries; // they get modified after collection is created, so can't be const + QGuardedPtr<KIO::Job> m_job; + + FetchKey m_key; + QString m_value; + bool m_started; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/fetch/z3950-servers.cfg b/src/fetch/z3950-servers.cfg new file mode 100644 index 0000000..f4f6157 --- /dev/null +++ b/src/fetch/z3950-servers.cfg @@ -0,0 +1,106 @@ +[loc] +Charset=marc8 +Database=Voyager +Host=z3950.loc.gov +Locale=en +Name=Library of Congress (US) +Port=7090 +Syntax=mods + +[blzcat] +Host=3950cat.bl.uk +Port=9909 +Database=BLAC +Name=The British Library +Charset=marc-8 +Locale=en_GB + +[sudoc] +Host=carmin.sudoc.abes.fr +Port=210 +Database=ABES-Z39-PUBLIC +Name=Sudoc (France) +Charset=iso-5426 +Locale=fr +Syntax=usmarc + +[bibsys] +Host=z3950.bibsys.no +Port=2100 +Database=BIBSYS +Name=BIBSYS (Norway) +Charset=iso-8859-1 +Locale=no +Syntax=usmarc + +[sbn] +Host=opac.sbn.it +Port=3950 +Database=nopac +Name=Italian National Library +Charset=iso-8859-1 +Locale=it +Syntax=unimarc + +[porbase] +Host=z3950.bn.pt +Port=210 +Database=bnd +Name=Portuguese National Library +Charset=iso-8859-1 +Locale=pt +Syntax=unimarc + +[nlp] +Host=alpha.bn.org.pl +Port=210 +Database=INNOPAC +Name=National Library of Poland +Charset=iso6937 +Locale=pl +Syntax=usmarc + +[amicus] +Host=amicus.collectionscanada.ca +Port=210 +Database=NL +Name=National Library of Canada +Charset=iso-8859-1 +Locale=ca +Syntax=marc21 + +[iul] +Host=libnet.ac.il +Port=9991 +Database=ULI02 +Name=Israel Union List +Charset=utf-8 +Locale=il +Syntax=marc21 + +[naul] +Host=catalogue.nla.gov.au +Port=7090 +Database=Voyager +Name=National Library of Australia +Charset=utf-8 +Locale=au +Syntax=marc21 + +[libis] +Host=z3950.libis.lt +Port=210 +Database=knygos +Name=National Library of Lithuania +Charset=utf-8 +Syntax=unimarc +Locale=lt + +[copac] +Host=z3950.copac.ac.uk +Port=210 +Database=COPAC +Name=Copac (UK and Ireland) +Charset=utf-8 +Locale=uk,ie,en +Syntax=mods diff --git a/src/fetch/z3950connection.cpp b/src/fetch/z3950connection.cpp new file mode 100644 index 0000000..27efe51 --- /dev/null +++ b/src/fetch/z3950connection.cpp @@ -0,0 +1,503 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : $EMAIL + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "z3950connection.h" +#include "z3950fetcher.h" +#include "messagehandler.h" +#include "../latin1literal.h" +#include "../tellico_debug.h" +#include "../iso5426converter.h" +#include "../iso6937converter.h" + +#include <config.h> + +#ifdef HAVE_YAZ +extern "C" { +#include <yaz/zoom.h> +#include <yaz/marcdisp.h> +#include <yaz/yaz-version.h> +} +#endif + +#include <klocale.h> + +#include <qfile.h> + +namespace { + static const size_t Z3950_DEFAULT_MAX_RECORDS = 20; +} + +using Tellico::Fetch::Z3950ResultFound; +using Tellico::Fetch::Z3950Connection; + +Z3950ResultFound::Z3950ResultFound(const QString& s) : QCustomEvent(uid()) + , m_result(QDeepCopy<QString>(s)) { + ++Z3950Connection::resultsLeft; +} + +Z3950ResultFound::~Z3950ResultFound() { + --Z3950Connection::resultsLeft; +} + +class Z3950Connection::Private { +public: + Private() {} +#ifdef HAVE_YAZ + ~Private() { + ZOOM_options_destroy(conn_opt); + ZOOM_connection_destroy(conn); + }; + + ZOOM_options conn_opt; + ZOOM_connection conn; +#endif +}; + +int Z3950Connection::resultsLeft = 0; + +// since the character set goes into a yaz api call +// I'm paranoid about user insertions, so just grab 64 +// characters at most +Z3950Connection::Z3950Connection(Z3950Fetcher* fetcher, + const QString& host, + uint port, + const QString& dbname, + const QString& sourceCharSet, + const QString& syntax, + const QString& esn) + : QThread() + , d(new Private()) + , m_connected(false) + , m_aborted(false) + , m_fetcher(fetcher) + , m_host(QDeepCopy<QString>(host)) + , m_port(port) + , m_dbname(QDeepCopy<QString>(dbname)) + , m_sourceCharSet(QDeepCopy<QString>(sourceCharSet.left(64))) + , m_syntax(QDeepCopy<QString>(syntax)) + , m_esn(QDeepCopy<QString>(esn)) + , m_start(0) + , m_limit(Z3950_DEFAULT_MAX_RECORDS) + , m_hasMore(false) { +} + +Z3950Connection::~Z3950Connection() { + m_connected = false; + delete d; + d = 0; +} + +void Z3950Connection::reset() { + m_start = 0; + m_limit = Z3950_DEFAULT_MAX_RECORDS; +} + +void Z3950Connection::setQuery(const QString& query_) { + m_pqn = QDeepCopy<QString>(query_); +} + +void Z3950Connection::setUserPassword(const QString& user_, const QString& pword_) { + m_user = QDeepCopy<QString>(user_); + m_password = QDeepCopy<QString>(pword_); +} + +void Z3950Connection::run() { +// myDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl; + m_aborted = false; + m_hasMore = false; + resultsLeft = 0; +#ifdef HAVE_YAZ + + if(!makeConnection()) { + done(); + return; + } + + ZOOM_query query = ZOOM_query_create(); + myLog() << "Z3950Connection::run() - pqn = " << toCString(m_pqn) << endl; + int errcode = ZOOM_query_prefix(query, toCString(m_pqn)); + if(errcode != 0) { + myDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl; + ZOOM_query_destroy(query); + QString s = i18n("Query error!"); + s += ' ' + m_pqn; + done(s, MessageHandler::Error); + return; + } + + ZOOM_resultset resultSet = ZOOM_connection_search(d->conn, query); + + // check abort status + if(m_aborted) { + done(); + return; + } + + // I know the LOC wants the syntax = "xml" and esn = "mods" + // to get MODS data, that seems a bit odd... + // esn only makes sense for marc and grs-1 + // if syntax is mods, set esn to mods too + QCString type = "raw"; + if(m_syntax == Latin1Literal("mods")) { + m_syntax = QString::fromLatin1("xml"); + ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); + type = "xml"; + } else { + ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1()); + } + ZOOM_resultset_option_set(resultSet, "start", QCString().setNum(m_start)); + ZOOM_resultset_option_set(resultSet, "count", QCString().setNum(m_limit-m_start)); + // search in default syntax, unless syntax is already set + if(!m_syntax.isEmpty()) { + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", m_syntax.latin1()); + } + + const char* errmsg; + const char* addinfo; + errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo); + if(errcode != 0) { + ZOOM_resultset_destroy(resultSet); + ZOOM_query_destroy(query); + m_connected = false; + + QString s = i18n("Connection search error %1: %2").arg(errcode).arg(toString(errmsg)); + if(!QCString(addinfo).isEmpty()) { + s += " (" + toString(addinfo) + ")"; + } + myDebug() << "Z3950Connection::run() - " << s << endl; + done(s, MessageHandler::Error); + return; + } + + const size_t numResults = ZOOM_resultset_size(resultSet); + + QString newSyntax = m_syntax; + if(numResults > 0) { + myLog() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl; + // so now we know that results exist, might have to check syntax + int len; + ZOOM_record rec = ZOOM_resultset_record(resultSet, 0); + // want raw unless it's mods + ZOOM_record_get(rec, type, &len); + if(len > 0 && m_syntax.isEmpty()) { + newSyntax = QString::fromLatin1(ZOOM_record_get(rec, "syntax", &len)).lower(); + myLog() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl; + if(newSyntax == Latin1Literal("mods") || newSyntax == Latin1Literal("xml")) { + m_syntax = QString::fromLatin1("xml"); + ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); + } else if(newSyntax == Latin1Literal("grs-1")) { + // if it's defaulting to grs-1, go ahead and change it to try to get a marc + // record since grs-1 is a last resort for us + newSyntax.truncate(0); + } + } + // right now, we just understand mods, unimarc, marc21/usmarc, and grs-1 + if(newSyntax != Latin1Literal("xml") && + newSyntax != Latin1Literal("usmarc") && + newSyntax != Latin1Literal("marc21") && + newSyntax != Latin1Literal("unimarc") && + newSyntax != Latin1Literal("grs-1")) { + myLog() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl; + newSyntax = QString::fromLatin1("xml"); + ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "xml", &len); + if(len == 0) { + // change set name back + ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1()); + newSyntax = QString::fromLatin1("usmarc"); // try usmarc + myLog() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + newSyntax = QString::fromLatin1("marc21"); // try marc21 + myLog() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + newSyntax = QString::fromLatin1("unimarc"); // try unimarc + myLog() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + newSyntax = QString::fromLatin1("grs-1"); // try grs-1 + myLog() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + myLog() << "Z3950Connection::run() - giving up" << endl; + ZOOM_resultset_destroy(resultSet); + ZOOM_query_destroy(query); + done(i18n("Record syntax error"), MessageHandler::Error); + return; + } + myLog() << "Z3950Connection::run() - final syntax is " << newSyntax << endl; + } + } + + // go back to fooling ourselves and calling it mods + if(m_syntax == Latin1Literal("xml")) { + m_syntax = QString::fromLatin1("mods"); + } + if(newSyntax == Latin1Literal("xml")) { + newSyntax = QString::fromLatin1("mods"); + } + // save syntax change for next time + if(m_syntax != newSyntax) { + kapp->postEvent(m_fetcher, new Z3950SyntaxChange(newSyntax)); + m_syntax = newSyntax; + } + + if(m_sourceCharSet.isEmpty()) { + m_sourceCharSet = QString::fromLatin1("marc-8"); + } + + const size_t realLimit = QMIN(numResults, m_limit); + + for(size_t i = m_start; i < realLimit && !m_aborted; ++i) { + myLog() << "Z3950Connection::run() - grabbing index " << i << endl; + ZOOM_record rec = ZOOM_resultset_record(resultSet, i); + if(!rec) { + myDebug() << "Z3950Connection::run() - no record returned for index " << i << endl; + continue; + } + int len; + QString data; + if(m_syntax == Latin1Literal("mods")) { + data = toString(ZOOM_record_get(rec, "xml", &len)); + } else if(m_syntax == Latin1Literal("grs-1")) { // grs-1 + // we're going to parse the rendered data, very ugly... + data = toString(ZOOM_record_get(rec, "render", &len)); + } else { +#if 0 + kdWarning() << "Remove debug from z3950connection.cpp" << endl; + { + QFile f1(QString::fromLatin1("/tmp/z3950.raw")); + if(f1.open(IO_WriteOnly)) { + QDataStream t(&f1); + t << ZOOM_record_get(rec, "raw", &len); + } + f1.close(); + } +#endif + data = toXML(ZOOM_record_get(rec, "raw", &len), m_sourceCharSet); + } + Z3950ResultFound* ev = new Z3950ResultFound(data); + QApplication::postEvent(m_fetcher, ev); + } + + ZOOM_resultset_destroy(resultSet); + ZOOM_query_destroy(query); + + m_hasMore = m_limit < numResults; + if(m_hasMore) { + m_start = m_limit; + m_limit += Z3950_DEFAULT_MAX_RECORDS; + } +#endif + done(); +} + +bool Z3950Connection::makeConnection() { + if(m_connected) { + return true; + } +// myDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl; +// I don't know what to do except assume database, user, and password are in locale encoding +#ifdef HAVE_YAZ + d->conn_opt = ZOOM_options_create(); + ZOOM_options_set(d->conn_opt, "implementationName", "Tellico"); + ZOOM_options_set(d->conn_opt, "databaseName", toCString(m_dbname)); + ZOOM_options_set(d->conn_opt, "user", toCString(m_user)); + ZOOM_options_set(d->conn_opt, "password", toCString(m_password)); + + d->conn = ZOOM_connection_create(d->conn_opt); + ZOOM_connection_connect(d->conn, m_host.latin1(), m_port); + + int errcode; + const char* errmsg; // unused: carries same info as 'errcode' + const char* addinfo; + errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo); + if(errcode != 0) { + ZOOM_options_destroy(d->conn_opt); + ZOOM_connection_destroy(d->conn); + m_connected = false; + + QString s = i18n("Connection error %1: %2").arg(errcode).arg(toString(errmsg)); + if(!QCString(addinfo).isEmpty()) { + s += " (" + toString(addinfo) + ")"; + } + myDebug() << "Z3950Connection::makeConnection() - " << s << endl; + done(s, MessageHandler::Error); + return false; + } +#endif + m_connected = true; + return true; +} + +void Z3950Connection::done() { + checkPendingEvents(); + kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore)); +} + +void Z3950Connection::done(const QString& msg_, int type_) { + checkPendingEvents(); + if(m_aborted) { + kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore)); + } else { + kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore, msg_, type_)); + } +} + +void Z3950Connection::checkPendingEvents() { + // if there's still some pending result events, go ahead and just wait 1 second + if(resultsLeft > 0) { + sleep(1); + } +} + +inline +QCString Z3950Connection::toCString(const QString& text_) { + return iconvRun(text_.utf8(), QString::fromLatin1("utf-8"), m_sourceCharSet); +} + +inline +QString Z3950Connection::toString(const QCString& text_) { + return QString::fromUtf8(iconvRun(text_, m_sourceCharSet, QString::fromLatin1("utf-8"))); +} + +// static +QCString Z3950Connection::iconvRun(const QCString& text_, const QString& fromCharSet_, const QString& toCharSet_) { +#ifdef HAVE_YAZ + if(text_.isEmpty()) { + return text_; + } + + if(fromCharSet_ == toCharSet_) { + return text_; + } + + yaz_iconv_t cd = yaz_iconv_open(toCharSet_.latin1(), fromCharSet_.latin1()); + if(!cd) { + // maybe it's iso 5426, which we sorta support + QString charSetLower = fromCharSet_.lower(); + charSetLower.remove('-').remove(' '); + if(charSetLower == Latin1Literal("iso5426")) { + return iconvRun(Iso5426Converter::toUtf8(text_).utf8(), QString::fromLatin1("utf-8"), toCharSet_); + } else if(charSetLower == Latin1Literal("iso6937")) { + return iconvRun(Iso6937Converter::toUtf8(text_).utf8(), QString::fromLatin1("utf-8"), toCharSet_); + } + kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_ + << " to " << toCharSet_ << " is unsupported" << endl; + return text_; + } + + const char* input = text_; + size_t inlen = text_.length(); + + size_t outlen = 2 * inlen; // this is enough, right? + QMemArray<char> result0(outlen); + char* result = result0.data(); + + int r = yaz_iconv(cd, const_cast<char**>(&input), &inlen, &result, &outlen); + if(r <= 0) { + myDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl; + return text_; + } + // bug in yaz, need to flush buffer to catch last character + yaz_iconv(cd, 0, 0, &result, &outlen); + + // length is pointer difference + size_t len = result - result0; + + QCString output = QCString(result0, len+1); +// myDebug() << "-------------------------------------------" << endl; +// myDebug() << output << endl; +// myDebug() << "-------------------------------------------" << endl; + yaz_iconv_close(cd); + return output; +#endif + return text_; +} + +QString Z3950Connection::toXML(const QCString& marc_, const QString& charSet_) { +#ifdef HAVE_YAZ + if(marc_.isEmpty()) { + myDebug() << "Z3950Connection::toXML() - empty string" << endl; + return QString::null; + } + + yaz_iconv_t cd = yaz_iconv_open("utf-8", charSet_.latin1()); + if(!cd) { + // maybe it's iso 5426, which we sorta support + QString charSetLower = charSet_.lower(); + charSetLower.remove('-').remove(' '); + if(charSetLower == Latin1Literal("iso5426")) { + return toXML(Iso5426Converter::toUtf8(marc_).utf8(), QString::fromLatin1("utf-8")); + } else if(charSetLower == Latin1Literal("iso6937")) { + return toXML(Iso6937Converter::toUtf8(marc_).utf8(), QString::fromLatin1("utf-8")); + } + kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl; + return QString::null; + } + + yaz_marc_t mt = yaz_marc_create(); + yaz_marc_iconv(mt, cd); + yaz_marc_xml(mt, YAZ_MARC_MARCXML); + + // first 5 bytes are length + bool ok; +#if YAZ_VERSIONL < 0x030000 + int len = marc_.left(5).toInt(&ok); +#else + size_t len = marc_.left(5).toInt(&ok); +#endif + if(ok && (len < 25 || len > 100000)) { + myDebug() << "Z3950Connection::toXML() - bad length: " << (ok ? len : -1) << endl; + return QString::null; + } + +#if YAZ_VERSIONL < 0x030000 + char* result; +#else + const char* result; +#endif + int r = yaz_marc_decode_buf(mt, marc_, -1, &result, &len); + if(r <= 0) { + myDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl; + return QString::null; + } + + QString output = QString::fromLatin1("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); + output += QString::fromUtf8(QCString(result, len+1), len+1); +// myDebug() << QCString(result) << endl; +// myDebug() << "-------------------------------------------" << endl; +// myDebug() << output << endl; + yaz_iconv_close(cd); + yaz_marc_destroy(mt); + + return output; +#else // no yaz + return QString::null; +#endif +} diff --git a/src/fetch/z3950connection.h b/src/fetch/z3950connection.h new file mode 100644 index 0000000..0929cb7 --- /dev/null +++ b/src/fetch/z3950connection.h @@ -0,0 +1,126 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_FETCH_Z3950CONNECTION_H +#define TELLICO_FETCH_Z3950CONNECTION_H + +#include <qthread.h> +#include <qevent.h> +#include <qdeepcopy.h> + +#include <ksharedptr.h> + +namespace Tellico { + namespace Fetch { + class Z3950Fetcher; + +class Z3950ResultFound : public QCustomEvent { +public: + Z3950ResultFound(const QString& s); + ~Z3950ResultFound(); + const QString& result() const { return m_result; } + + static int uid() { return User + 11111; } + +private: + QString m_result; +}; + +class Z3950ConnectionDone : public QCustomEvent { +public: + Z3950ConnectionDone(bool more) : QCustomEvent(uid()), m_type(-1), m_hasMore(more) {} + Z3950ConnectionDone(bool more, const QString& s, int t) : QCustomEvent(uid()), m_msg(QDeepCopy<QString>(s)), m_type(t), m_hasMore(more) {} + + const QString& message() const { return m_msg; } + int messageType() const { return m_type; } + bool hasMoreResults() const { return m_hasMore; } + + static int uid() { return User + 22222; } + +private: + QString m_msg; + int m_type; + bool m_hasMore; +}; + +class Z3950SyntaxChange : public QCustomEvent { +public: + Z3950SyntaxChange(const QString& s) : QCustomEvent(uid()), m_syntax(QDeepCopy<QString>(s)) {} + const QString& syntax() const { return m_syntax; } + + static int uid() { return User + 33333; } + +private: + QString m_syntax; +}; + +/** + * @author Robby Stephenson + */ +class Z3950Connection : public QThread { +public: + Z3950Connection(Z3950Fetcher* fetcher, + const QString& host, + uint port, + const QString& dbname, + const QString& sourceCharSet, + const QString& syntax, + const QString& esn); + ~Z3950Connection(); + + void reset(); + void setQuery(const QString& query); + void setUserPassword(const QString& user, const QString& pword); + void run(); + + void abort() { m_aborted = true; } + +private: + static QCString iconvRun(const QCString& text, const QString& fromCharSet, const QString& toCharSet); + static QString toXML(const QCString& marc, const QString& fromCharSet); + + bool makeConnection(); + void done(); + void done(const QString& message, int type); + QCString toCString(const QString& text); + QString toString(const QCString& text); + void checkPendingEvents(); + + class Private; + Private* d; + + bool m_connected; + bool m_aborted; + + KSharedPtr<Z3950Fetcher> m_fetcher; + QString m_host; + uint m_port; + QString m_dbname; + QString m_user; + QString m_password; + QString m_sourceCharSet; + QString m_syntax; + QString m_pqn; + QString m_esn; + size_t m_start; + size_t m_limit; + bool m_hasMore; + + friend class Z3950ResultFound; + static int resultsLeft; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/fetch/z3950fetcher.cpp b/src/fetch/z3950fetcher.cpp new file mode 100644 index 0000000..5e045cf --- /dev/null +++ b/src/fetch/z3950fetcher.cpp @@ -0,0 +1,782 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + * In addition, as a special exception, the author gives permission to * + * link the code of this program with the OpenSSL library released by * + * the OpenSSL Project (or with modified versions of OpenSSL that use * + * the same license as OpenSSL), and distribute linked combinations * + * including the two. You must obey the GNU General Public License in * + * all respects for all of the code used other than OpenSSL. If you * + * modify this file, you may extend this exception to your version of * + * the file, but you are not obligated to do so. If you do not wish to * + * do so, delete this exception statement from your version. * + * * + ***************************************************************************/ + +#include <config.h> + +#include "z3950fetcher.h" +#include "z3950connection.h" +#include "messagehandler.h" +#include "../collection.h" +#include "../latin1literal.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../translators/grs1importer.h" +#include "../tellico_debug.h" +#include "../gui/lineedit.h" +#include "../gui/combobox.h" +#include "../isbnvalidator.h" +#include "../lccnvalidator.h" + +#include <klocale.h> +#include <kstandarddirs.h> +#include <kapplication.h> +#include <knuminput.h> +#include <kconfig.h> +#include <kcombobox.h> +#include <kaccelmanager.h> +#include <kseparator.h> + +#include <qfile.h> +#include <qlayout.h> +#include <qlabel.h> +#include <qwhatsthis.h> +#include <qdom.h> + +namespace { + static const int Z3950_DEFAULT_PORT = 210; + static const QString Z3950_DEFAULT_ESN = QString::fromLatin1("F"); +} + +using Tellico::Fetch::Z3950Fetcher; + +Z3950Fetcher::Z3950Fetcher(QObject* parent_, const char* name_) + : Fetcher(parent_, name_), m_conn(0), m_port(Z3950_DEFAULT_PORT), m_esn(Z3950_DEFAULT_ESN), + m_started(false), m_done(true), m_MARC21XMLHandler(0), + m_UNIMARCXMLHandler(0), m_MODSHandler(0) { +} + +Z3950Fetcher::~Z3950Fetcher() { + delete m_MARC21XMLHandler; + m_MARC21XMLHandler = 0; + delete m_UNIMARCXMLHandler; + m_UNIMARCXMLHandler = 0; + delete m_MODSHandler; + m_MODSHandler = 0; + delete m_conn; + m_conn = 0; +} + +QString Z3950Fetcher::defaultName() { + return i18n("z39.50 Server"); +} + +QString Z3950Fetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool Z3950Fetcher::canFetch(int type) const { + return type == Data::Collection::Book || type == Data::Collection::Bibtex; +} + +void Z3950Fetcher::readConfigHook(const KConfigGroup& config_) { + QString preset = config_.readEntry("Preset"); + if(preset.isEmpty()) { + m_host = config_.readEntry("Host"); + int p = config_.readNumEntry("Port", Z3950_DEFAULT_PORT); + if(p > 0) { + m_port = p; + } + m_dbname = config_.readEntry("Database"); + m_sourceCharSet = config_.readEntry("Charset"); + m_syntax = config_.readEntry("Syntax"); + m_user = config_.readEntry("User"); + m_password = config_.readEntry("Password"); + } else { + m_preset = preset; + QString serverFile = locate("appdata", QString::fromLatin1("z3950-servers.cfg")); + if(!serverFile.isEmpty()) { + KConfig cfg(serverFile, true /* read-only */, false /* read KDE */); + const QStringList servers = cfg.groupList(); + for(QStringList::ConstIterator server = servers.begin(); server != servers.end(); ++server) { + cfg.setGroup(*server); + + const QString id = *server; + if(id == preset) { + const QString name = cfg.readEntry("Name"); + m_host = cfg.readEntry("Host"); + m_port = cfg.readNumEntry("Port", Z3950_DEFAULT_PORT); + m_dbname = cfg.readEntry("Database"); + m_sourceCharSet = cfg.readEntry("Charset"); + m_syntax = cfg.readEntry("Syntax"); + m_user = cfg.readEntry("User"); + m_password = cfg.readEntry("Password"); + } + } + } + } + + m_fields = config_.readListEntry("Custom Fields"); +} + +void Z3950Fetcher::saveConfigHook(KConfigGroup& config_) { + config_.writeEntry("Syntax", m_syntax); + config_.sync(); +} + +void Z3950Fetcher::search(FetchKey key_, const QString& value_) { +#ifdef HAVE_YAZ + m_started = true; + m_done = false; + if(m_host.isEmpty() || m_dbname.isEmpty()) { + myDebug() << "Z3950Fetcher::search() - settings are not set!" << endl; + stop(); + return; + } + m_key = key_; + m_value = value_; + m_started = true; + + QString svalue = m_value; + QRegExp rx1(QString::fromLatin1("['\"].*\\1")); + if(!rx1.exactMatch(svalue)) { + svalue.prepend('"').append('"'); + } + + switch(key_) { + case Title: + m_pqn = QString::fromLatin1("@attr 1=4 ") + svalue; + break; + case Person: +// m_pqn = QString::fromLatin1("@or "); +// m_pqn += QString::fromLatin1("@attr 1=1 \"") + m_value + '"'; + m_pqn = QString::fromLatin1(" @attr 1=1003 ") + svalue; + break; + case ISBN: + { + m_pqn.truncate(0); + QString s = m_value; + s.remove('-'); + QStringList isbnList = QStringList::split(QString::fromLatin1("; "), s); + // also going to search for isbn10 values + for(QStringList::Iterator it = isbnList.begin(); it != isbnList.end(); ++it) { + if((*it).startsWith(QString::fromLatin1("978"))) { + QString isbn10 = ISBNValidator::isbn10(*it); + isbn10.remove('-'); + isbnList.insert(it, isbn10); + } + } + const int count = isbnList.count(); + if(count > 1) { + m_pqn = QString::fromLatin1("@or "); + } + for(int i = 0; i < count; ++i) { + m_pqn += QString::fromLatin1(" @attr 1=7 ") + isbnList[i]; + if(i < count-2) { + m_pqn += QString::fromLatin1(" @or"); + } + } + } + break; + case LCCN: + { + m_pqn.truncate(0); + QString s = m_value; + s.remove('-'); + QStringList lccnList = QStringList::split(QString::fromLatin1("; "), s); + while(!lccnList.isEmpty()) { + m_pqn += QString::fromLatin1(" @or @attr 1=9 ") + lccnList.front(); + if(lccnList.count() > 1) { + m_pqn += QString::fromLatin1(" @or"); + } + m_pqn += QString::fromLatin1(" @attr 1=9 ") + LCCNValidator::formalize(lccnList.front()); + lccnList.pop_front(); + } + } + break; + case Keyword: + m_pqn = QString::fromLatin1("@attr 1=1016 ") + svalue; + break; + case Raw: + m_pqn = m_value; + break; + default: + kdWarning() << "Z3950Fetcher::search() - key not recognized: " << key_ << endl; + stop(); + return; + } +// m_pqn = QString::fromLatin1("@attr 1=7 0253333490"); + myLog() << "Z3950Fetcher::search() - PQN query = " << m_pqn << endl; + + if(m_conn) { + m_conn->reset(); // reset counts + } + + process(); +#else // HAVE_YAZ + Q_UNUSED(key_); + Q_UNUSED(value_); + stop(); + return; +#endif +} + +void Z3950Fetcher::continueSearch() { +#ifdef HAVE_YAZ + m_started = true; + process(); +#endif +} + +void Z3950Fetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "Z3950Fetcher::stop()" << endl; + m_started = false; + if(m_conn) { + // give it a second to cleanup + m_conn->abort(); + m_conn->wait(1000); + } + emit signalDone(this); +} + +bool Z3950Fetcher::initMARC21Handler() { + if(m_MARC21XMLHandler) { + return true; + } + + QString xsltfile = locate("appdata", QString::fromLatin1("MARC21slim2MODS3.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "Z3950Fetcher::initHandlers() - can not locate MARC21slim2MODS3.xsl." << endl; + return false; + } + + KURL u; + u.setPath(xsltfile); + + m_MARC21XMLHandler = new XSLTHandler(u); + if(!m_MARC21XMLHandler->isValid()) { + kdWarning() << "Z3950Fetcher::initHandlers() - error in MARC21slim2MODS3.xsl." << endl; + delete m_MARC21XMLHandler; + m_MARC21XMLHandler = 0; + return false; + } + return true; +} + +bool Z3950Fetcher::initUNIMARCHandler() { + if(m_UNIMARCXMLHandler) { + return true; + } + + QString xsltfile = locate("appdata", QString::fromLatin1("UNIMARC2MODS3.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "Z3950Fetcher::initHandlers() - can not locate UNIMARC2MODS3.xsl." << endl; + return false; + } + + KURL u; + u.setPath(xsltfile); + + m_UNIMARCXMLHandler = new XSLTHandler(u); + if(!m_UNIMARCXMLHandler->isValid()) { + kdWarning() << "Z3950Fetcher::initHandlers() - error in UNIMARC2MODS3.xsl." << endl; + delete m_UNIMARCXMLHandler; + m_UNIMARCXMLHandler = 0; + return false; + } + return true; +} + +bool Z3950Fetcher::initMODSHandler() { + if(m_MODSHandler) { + return true; + } + + QString xsltfile = locate("appdata", QString::fromLatin1("mods2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "Z3950Fetcher::initHandlers() - can not locate mods2tellico.xsl." << endl; + return false; + } + + KURL u; + u.setPath(xsltfile); + + m_MODSHandler = new XSLTHandler(u); + if(!m_MODSHandler->isValid()) { + kdWarning() << "Z3950Fetcher::initHandlers() - error in mods2tellico.xsl." << endl; + delete m_MODSHandler; + m_MODSHandler = 0; + // no use in keeping the MARC handlers now + delete m_MARC21XMLHandler; + m_MARC21XMLHandler = 0; + delete m_UNIMARCXMLHandler; + m_UNIMARCXMLHandler = 0; + return false; + } + return true; +} + +void Z3950Fetcher::process() { + if(m_conn) { + m_conn->wait(); + } else { + m_conn = new Z3950Connection(this, m_host, m_port, m_dbname, m_sourceCharSet, m_syntax, m_esn); + if(!m_user.isEmpty()) { + m_conn->setUserPassword(m_user, m_password); + } + } + + m_conn->setQuery(m_pqn); + m_conn->start(); +} + +void Z3950Fetcher::handleResult(const QString& result_) { + if(result_.isEmpty()) { + myDebug() << "Z3950Fetcher::handleResult() - empty record found, maybe the character encoding or record format is wrong?" << endl; + return; + } + +#if 0 + kdWarning() << "Remove debug from z3950fetcher.cpp" << endl; + { + QFile f1(QString::fromLatin1("/tmp/marc.xml")); + if(f1.open(IO_WriteOnly)) { +// if(f1.open(IO_WriteOnly | IO_Append)) { + QTextStream t(&f1); + t.setEncoding(QTextStream::UnicodeUTF8); + t << result_; + } + f1.close(); + } +#endif + // assume always utf-8 + QString str, msg; + Data::CollPtr coll = 0; + // not marc, has to be grs-1 + if(m_syntax == Latin1Literal("grs-1")) { + Import::GRS1Importer imp(result_); + coll = imp.collection(); + msg = imp.statusMessage(); + } else { // now the MODS stuff + if(m_syntax == Latin1Literal("mods")) { + str = result_; + } else if(m_syntax == Latin1Literal("unimarc") && initUNIMARCHandler()) { + str = m_UNIMARCXMLHandler->applyStylesheet(result_); + } else if(initMARC21Handler()) { // got to be usmarc/marc21 + str = m_MARC21XMLHandler->applyStylesheet(result_); + } + if(str.isEmpty() || !initMODSHandler()) { + myDebug() << "Z3950Fetcher::handleResult() - empty string or can't init" << endl; + stop(); + return; + } +#if 0 + kdWarning() << "Remove debug from z3950fetcher.cpp" << endl; + { + QFile f2(QString::fromLatin1("/tmp/mods.xml")); +// if(f2.open(IO_WriteOnly)) { + if(f2.open(IO_WriteOnly | IO_Append)) { + QTextStream t(&f2); + t.setEncoding(QTextStream::UnicodeUTF8); + t << str; + } + f2.close(); + } +#endif + Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(str)); + imp.setOptions(imp.options() & ~Import::ImportProgress); // no progress needed + coll = imp.collection(); + msg = imp.statusMessage(); + } + + if(!coll) { + if(!msg.isEmpty()) { + message(msg, MessageHandler::Warning); + } + myDebug() << "Z3950Fetcher::handleResult() - no collection pointer: " << msg << endl; + return; + } + + if(coll->entryCount() == 0) { +// myDebug() << "Z3950Fetcher::handleResult() - no Tellico entry in result" << endl; + return; + } + + const StringMap customFields = Z3950Fetcher::customFields(); + for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) { + if(!m_fields.contains(it.key())) { + coll->removeField(it.key()); + } + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + QString desc = entry->field(QString::fromLatin1("author")) + '/' + + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("cr_year")); + } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){ + desc += QChar('/') + entry->field(QString::fromLatin1("pub_year")); + } + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, entry); + emit signalResultFound(r); + } +} + +void Z3950Fetcher::done() { + m_done = true; + stop(); +} + +Tellico::Data::EntryPtr Z3950Fetcher::fetchEntry(uint uid_) { + return m_entries[uid_]; +} + +void Z3950Fetcher::customEvent(QCustomEvent* event_) { + if(!m_conn) { + return; + } + + if(event_->type() == Z3950ResultFound::uid()) { + if(m_done) { + kdWarning() << "Z3950Fetcher::customEvent() - result returned after done signal!" << endl; + } + Z3950ResultFound* e = static_cast<Z3950ResultFound*>(event_); + handleResult(e->result()); + } else if(event_->type() == Z3950ConnectionDone::uid()) { + Z3950ConnectionDone* e = static_cast<Z3950ConnectionDone*>(event_); + if(e->messageType() > -1) { + message(e->message(), e->messageType()); + } + m_hasMoreResults = e->hasMoreResults(); + m_conn->wait(); + done(); + } else if(event_->type() == Z3950SyntaxChange::uid()) { + if(m_done) { + kdWarning() << "Z3950Fetcher::customEvent() - syntax changed after done signal!" << endl; + } + Z3950SyntaxChange* e = static_cast<Z3950SyntaxChange*>(event_); + if(m_syntax != e->syntax()) { + m_syntax = e->syntax(); + // it gets saved when saveConfigHook() get's called from the Fetcher() d'tor + } + } else { + kdWarning() << "Z3950Fetcher::customEvent() - weird type: " << event_->type() << endl; + } +} + +void Z3950Fetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "Z3950Fetcher::updateEntry() - " << source() << ": " << entry_->title() << endl; + QString isbn = entry_->field(QString::fromLatin1("isbn")); + if(!isbn.isEmpty()) { + search(Fetch::ISBN, isbn); + return; + } + + QString lccn = entry_->field(QString::fromLatin1("lccn")); + if(!lccn.isEmpty()) { + search(Fetch::LCCN, lccn); + return; + } + + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + + myDebug() << "Z3950Fetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* Z3950Fetcher::configWidget(QWidget* parent_) const { + return new Z3950Fetcher::ConfigWidget(parent_, this); +} + +Z3950Fetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const Z3950Fetcher* fetcher_/*=0*/) + : Fetch::ConfigWidget(parent_) { + QGridLayout* l = new QGridLayout(optionsWidget(), 7, 2); + l->setSpacing(4); + l->setColStretch(1, 10); + + int row = -1; + + m_usePreset = new QCheckBox(i18n("Use preset &server:"), optionsWidget()); + l->addWidget(m_usePreset, ++row, 0); + connect(m_usePreset, SIGNAL(toggled(bool)), SLOT(slotTogglePreset(bool))); + m_serverCombo = new GUI::ComboBox(optionsWidget()); + connect(m_serverCombo, SIGNAL(activated(int)), SLOT(slotPresetChanged())); + l->addWidget(m_serverCombo, row, 1); + ++row; + l->addMultiCellWidget(new KSeparator(optionsWidget()), row, row, 0, 1); + l->setRowSpacing(row, 10); + + QLabel* label = new QLabel(i18n("Hos&t: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_hostEdit = new GUI::LineEdit(optionsWidget()); + connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SIGNAL(signalName(const QString&))); + l->addWidget(m_hostEdit, row, 1); + QString w = i18n("Enter the host name of the server."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_hostEdit, w); + label->setBuddy(m_hostEdit); + + label = new QLabel(i18n("&Port: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_portSpinBox = new KIntSpinBox(0, 999999, 1, Z3950_DEFAULT_PORT, 10, optionsWidget()); + connect(m_portSpinBox, SIGNAL(valueChanged(int)), SLOT(slotSetModified())); + l->addWidget(m_portSpinBox, row, 1); + w = i18n("Enter the port number of the server. The default is %1.").arg(Z3950_DEFAULT_PORT); + QWhatsThis::add(label, w); + QWhatsThis::add(m_portSpinBox, w); + label->setBuddy(m_portSpinBox); + + label = new QLabel(i18n("&Database: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_databaseEdit = new GUI::LineEdit(optionsWidget()); + connect(m_databaseEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_databaseEdit, row, 1); + w = i18n("Enter the database name used by the server."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_databaseEdit, w); + label->setBuddy(m_databaseEdit); + + label = new QLabel(i18n("Ch&aracter set: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_charSetCombo = new KComboBox(true, optionsWidget()); + m_charSetCombo->insertItem(QString::null); + m_charSetCombo->insertItem(QString::fromLatin1("marc8")); + m_charSetCombo->insertItem(QString::fromLatin1("iso-8859-1")); + m_charSetCombo->insertItem(QString::fromLatin1("utf-8")); + connect(m_charSetCombo, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_charSetCombo, row, 1); + w = i18n("Enter the character set encoding used by the z39.50 server. The most likely choice " + "is MARC-8, although ISO-8859-1 is common as well."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_charSetCombo, w); + label->setBuddy(m_charSetCombo); + + label = new QLabel(i18n("&Format: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_syntaxCombo = new GUI::ComboBox(optionsWidget()); + m_syntaxCombo->insertItem(i18n("Auto-detect"), QString()); + m_syntaxCombo->insertItem(QString::fromLatin1("MODS"), QString::fromLatin1("mods")); + m_syntaxCombo->insertItem(QString::fromLatin1("MARC21"), QString::fromLatin1("marc21")); + m_syntaxCombo->insertItem(QString::fromLatin1("UNIMARC"), QString::fromLatin1("unimarc")); + m_syntaxCombo->insertItem(QString::fromLatin1("USMARC"), QString::fromLatin1("usmarc")); + m_syntaxCombo->insertItem(QString::fromLatin1("GRS-1"), QString::fromLatin1("grs-1")); + connect(m_syntaxCombo, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_syntaxCombo, row, 1); + w = i18n("Enter the data format used by the z39.50 server. Tellico will attempt to " + "automatically detect the best setting if <i>auto-detect</i> is selected."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_syntaxCombo, w); + label->setBuddy(m_syntaxCombo); + + label = new QLabel(i18n("&User: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_userEdit = new GUI::LineEdit(optionsWidget()); + m_userEdit->setHint(i18n("Optional")); + connect(m_userEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_userEdit, row, 1); + w = i18n("Enter the authentication user name used by the z39.50 database. Most servers " + "do not need one."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_userEdit, w); + label->setBuddy(m_userEdit); + + label = new QLabel(i18n("Pass&word: "), optionsWidget()); + l->addWidget(label, ++row, 0); + m_passwordEdit = new GUI::LineEdit(optionsWidget()); + m_passwordEdit->setHint(i18n("Optional")); + m_passwordEdit->setEchoMode(QLineEdit::Password); + connect(m_passwordEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified())); + l->addWidget(m_passwordEdit, row, 1); + w = i18n("Enter the authentication password used by the z39.50 database. Most servers " + "do not need one. The password will be saved in plain text in the Tellico " + "configuration file."); + QWhatsThis::add(label, w); + QWhatsThis::add(m_passwordEdit, w); + label->setBuddy(m_passwordEdit); + + l->setRowStretch(++row, 1); + + // now add additional fields widget + addFieldsWidget(Z3950Fetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList()); + + loadPresets(fetcher_ ? fetcher_->m_preset : QString::null); + if(fetcher_) { + m_hostEdit->setText(fetcher_->m_host); + m_portSpinBox->setValue(fetcher_->m_port); + m_databaseEdit->setText(fetcher_->m_dbname); + m_userEdit->setText(fetcher_->m_user); + m_passwordEdit->setText(fetcher_->m_password); + m_charSetCombo->setCurrentText(fetcher_->m_sourceCharSet); + // the syntax is detected automatically by the fetcher + // since the config group gets deleted in the config file, + // the value needs to be retained here + m_syntax = fetcher_->m_syntax; + m_syntaxCombo->setCurrentData(m_syntax); + } + KAcceleratorManager::manage(optionsWidget()); + + // start with presets turned off + m_usePreset->setChecked(fetcher_ && !fetcher_->m_preset.isEmpty()); + + slotTogglePreset(m_usePreset->isChecked()); +} + +Z3950Fetcher::ConfigWidget::~ConfigWidget() { +} + +void Z3950Fetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + if(m_usePreset->isChecked()) { + QString presetID = m_serverCombo->currentData().toString(); + config_.writeEntry("Preset", presetID); + return; + } + config_.deleteEntry("Preset"); + + QString s = m_hostEdit->text().stripWhiteSpace(); + if(!s.isEmpty()) { + config_.writeEntry("Host", s); + } + int port = m_portSpinBox->value(); + if(port > 0) { + config_.writeEntry("Port", port); + } + s = m_databaseEdit->text().stripWhiteSpace(); + if(!s.isEmpty()) { + config_.writeEntry("Database", s); + } + s = m_charSetCombo->currentText(); + if(!s.isEmpty()) { + config_.writeEntry("Charset", s); + } + s = m_userEdit->text(); + if(!s.isEmpty()) { + config_.writeEntry("User", s); + } + s = m_passwordEdit->text(); + if(!s.isEmpty()) { + config_.writeEntry("Password", s); + } + s = m_syntaxCombo->currentData().toString(); + if(!s.isEmpty()) { + m_syntax = s; + } + config_.writeEntry("Syntax", m_syntax); + + saveFieldsConfig(config_); + slotSetModified(false); +} + +// static +Tellico::StringMap Z3950Fetcher::customFields() { + StringMap map; + map[QString::fromLatin1("address")] = i18n("Address"); + map[QString::fromLatin1("abstract")] = i18n("Abstract"); + map[QString::fromLatin1("illustrator")] = i18n("Illustrator"); + return map; +} + +void Z3950Fetcher::ConfigWidget::slotTogglePreset(bool on) { + m_serverCombo->setEnabled(on); + if(on) { + emit signalName(m_serverCombo->currentText()); + } + m_hostEdit->setEnabled(!on); + if(!on && !m_hostEdit->text().isEmpty()) { + emit signalName(m_hostEdit->text()); + } + m_portSpinBox->setEnabled(!on); + m_databaseEdit->setEnabled(!on); + m_userEdit->setEnabled(!on); + m_passwordEdit->setEnabled(!on); + m_charSetCombo->setEnabled(!on); + m_syntaxCombo->setEnabled(!on); + if(on) { + emit signalName(m_serverCombo->currentText()); + } +} + +void Z3950Fetcher::ConfigWidget::slotPresetChanged() { + emit signalName(m_serverCombo->currentText()); +} + +void Z3950Fetcher::ConfigWidget::loadPresets(const QString& current_) { + QString lang = KGlobal::locale()->languageList().first(); + QString lang2A; + { + QString dummy; + KGlobal::locale()->splitLocale(lang, lang2A, dummy, dummy); + } + + QString serverFile = locate("appdata", QString::fromLatin1("z3950-servers.cfg")); + if(serverFile.isEmpty()) { + kdWarning() << "Z3950Fetcher::loadPresets() - no z3950 servers file found" << endl; + return; + } + + int idx = -1; + + KConfig cfg(serverFile, true /* read-only */, false /* read KDE */); + const QStringList servers = cfg.groupList(); + // I want the list of servers sorted by name + QMap<QString, QString> serverNameMap; + for(QStringList::ConstIterator server = servers.constBegin(); server != servers.constEnd(); ++server) { + if((*server).isEmpty()) { + myDebug() << "Z3950Fetcher::ConfigWidget::loadPresets() - empty id" << endl; + continue; + } + cfg.setGroup(*server); + const QString name = cfg.readEntry("Name"); + if(!name.isEmpty()) { + serverNameMap.insert(name, *server); + } + } + for(QMap<QString, QString>::ConstIterator it = serverNameMap.constBegin(); it != serverNameMap.constEnd(); ++it) { + const QString name = it.key(); + const QString id = it.data(); + cfg.setGroup(id); + + m_serverCombo->insertItem(i18n(name.utf8()), id); + if(current_.isEmpty() && idx == -1) { + // set the initial selection to something depending on the language + const QStringList locales = cfg.readListEntry("Locale"); + if(locales.findIndex(lang) > -1 || locales.findIndex(lang2A) > -1) { + idx = m_serverCombo->count() - 1; + } + } else if(id == current_) { + idx = m_serverCombo->count() - 1; + } + } + if(idx > -1) { + m_serverCombo->setCurrentItem(idx); + } +} + +QString Z3950Fetcher::ConfigWidget::preferredName() const { + if(m_usePreset->isChecked()) { + return m_serverCombo->currentText(); + } + QString s = m_hostEdit->text(); + return s.isEmpty() ? i18n("z39.50 Server") : s; +} + +#include "z3950fetcher.moc" diff --git a/src/fetch/z3950fetcher.h b/src/fetch/z3950fetcher.h new file mode 100644 index 0000000..ec6dca0 --- /dev/null +++ b/src/fetch/z3950fetcher.h @@ -0,0 +1,153 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : [email protected] + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + * In addition, as a special exception, the author gives permission to * + * link the code of this program with the OpenSSL library released by * + * the OpenSSL Project (or with modified versions of OpenSSL that use * + * the same license as OpenSSL), and distribute linked combinations * + * including the two. You must obey the GNU General Public License in * + * all respects for all of the code used other than OpenSSL. If you * + * modify this file, you may extend this exception to your version of * + * the file, but you are not obligated to do so. If you do not wish to * + * do so, delete this exception statement from your version. * + * * + ***************************************************************************/ + +#ifndef TELLICO_Z3950FETCHER_H +#define TELLICO_Z3950FETCHER_H + +namespace Tellico { + class XSLTHandler; + namespace GUI { + class LineEdit; + class ComboBox; + } +} + +class KIntSpinBox; +class KComboBox; + +#include "fetcher.h" +#include "configwidget.h" +#include "../datavectors.h" + +#include <qguardedptr.h> + +namespace Tellico { + namespace Fetch { + class Z3950Connection; + +/** + * @author Robby Stephenson + */ +class Z3950Fetcher : public Fetcher { +Q_OBJECT + +public: + Z3950Fetcher(QObject* parent, const char* name = 0); + + virtual ~Z3950Fetcher(); + + virtual QString source() const; + virtual bool isSearching() const { return m_started; } + virtual void search(FetchKey key, const QString& value); + virtual void continueSearch(); + // can search title, person, isbn, or keyword. No UPC or Raw for now. + virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == Keyword || k == LCCN; } + virtual void stop(); + virtual Data::EntryPtr fetchEntry(uint uid); + virtual Type type() const { return Z3950; } + virtual bool canFetch(int type) const; + virtual void readConfigHook(const KConfigGroup& config); + virtual void saveConfigHook(KConfigGroup& config); + + virtual void updateEntry(Data::EntryPtr entry); + const QString& host() const { return m_host; } + + static StringMap customFields(); + + virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const; + + class ConfigWidget; + friend class ConfigWidget; + + static QString defaultName(); + +protected: + virtual void customEvent(QCustomEvent* event); + +private: + bool initMARC21Handler(); + bool initUNIMARCHandler(); + bool initMODSHandler(); + void process(); + void handleResult(const QString& result); + void done(); + + Z3950Connection* m_conn; + + QString m_host; + uint m_port; + QString m_dbname; + QString m_user; + QString m_password; + QString m_sourceCharSet; + QString m_syntax; + QString m_pqn; // prefix query notation + QString m_esn; // element set name + + FetchKey m_key; + QString m_value; + QMap<int, Data::EntryPtr> m_entries; + bool m_started; + bool m_done; + QString m_preset; + + XSLTHandler* m_MARC21XMLHandler; + XSLTHandler* m_UNIMARCXMLHandler; + XSLTHandler* m_MODSHandler; + QStringList m_fields; + + friend class Z3950Connection; +}; + +class Z3950Fetcher::ConfigWidget : public Fetch::ConfigWidget { +Q_OBJECT + +public: + ConfigWidget(QWidget* parent, const Z3950Fetcher* fetcher = 0); + virtual ~ConfigWidget(); + virtual void saveConfig(KConfigGroup& config_); + virtual QString preferredName() const; + +private slots: + void slotTogglePreset(bool on); + void slotPresetChanged(); + +private: + void loadPresets(const QString& current); + + QCheckBox* m_usePreset; + GUI::ComboBox* m_serverCombo; + GUI::LineEdit* m_hostEdit; + KIntSpinBox* m_portSpinBox; + GUI::LineEdit* m_databaseEdit; + GUI::LineEdit* m_userEdit; + GUI::LineEdit* m_passwordEdit; + KComboBox* m_charSetCombo; + GUI::ComboBox* m_syntaxCombo; + // have to remember syntax + QString m_syntax; +}; + + } // end namespace +} // end namespace +#endif |