diff options
Diffstat (limited to 'src/fetch/z3950connection.cpp')
-rw-r--r-- | src/fetch/z3950connection.cpp | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/src/fetch/z3950connection.cpp b/src/fetch/z3950connection.cpp new file mode 100644 index 0000000..27efe51 --- /dev/null +++ b/src/fetch/z3950connection.cpp @@ -0,0 +1,503 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : $EMAIL + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "z3950connection.h" +#include "z3950fetcher.h" +#include "messagehandler.h" +#include "../latin1literal.h" +#include "../tellico_debug.h" +#include "../iso5426converter.h" +#include "../iso6937converter.h" + +#include <config.h> + +#ifdef HAVE_YAZ +extern "C" { +#include <yaz/zoom.h> +#include <yaz/marcdisp.h> +#include <yaz/yaz-version.h> +} +#endif + +#include <klocale.h> + +#include <qfile.h> + +namespace { + static const size_t Z3950_DEFAULT_MAX_RECORDS = 20; +} + +using Tellico::Fetch::Z3950ResultFound; +using Tellico::Fetch::Z3950Connection; + +Z3950ResultFound::Z3950ResultFound(const QString& s) : QCustomEvent(uid()) + , m_result(QDeepCopy<QString>(s)) { + ++Z3950Connection::resultsLeft; +} + +Z3950ResultFound::~Z3950ResultFound() { + --Z3950Connection::resultsLeft; +} + +class Z3950Connection::Private { +public: + Private() {} +#ifdef HAVE_YAZ + ~Private() { + ZOOM_options_destroy(conn_opt); + ZOOM_connection_destroy(conn); + }; + + ZOOM_options conn_opt; + ZOOM_connection conn; +#endif +}; + +int Z3950Connection::resultsLeft = 0; + +// since the character set goes into a yaz api call +// I'm paranoid about user insertions, so just grab 64 +// characters at most +Z3950Connection::Z3950Connection(Z3950Fetcher* fetcher, + const QString& host, + uint port, + const QString& dbname, + const QString& sourceCharSet, + const QString& syntax, + const QString& esn) + : QThread() + , d(new Private()) + , m_connected(false) + , m_aborted(false) + , m_fetcher(fetcher) + , m_host(QDeepCopy<QString>(host)) + , m_port(port) + , m_dbname(QDeepCopy<QString>(dbname)) + , m_sourceCharSet(QDeepCopy<QString>(sourceCharSet.left(64))) + , m_syntax(QDeepCopy<QString>(syntax)) + , m_esn(QDeepCopy<QString>(esn)) + , m_start(0) + , m_limit(Z3950_DEFAULT_MAX_RECORDS) + , m_hasMore(false) { +} + +Z3950Connection::~Z3950Connection() { + m_connected = false; + delete d; + d = 0; +} + +void Z3950Connection::reset() { + m_start = 0; + m_limit = Z3950_DEFAULT_MAX_RECORDS; +} + +void Z3950Connection::setQuery(const QString& query_) { + m_pqn = QDeepCopy<QString>(query_); +} + +void Z3950Connection::setUserPassword(const QString& user_, const QString& pword_) { + m_user = QDeepCopy<QString>(user_); + m_password = QDeepCopy<QString>(pword_); +} + +void Z3950Connection::run() { +// myDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl; + m_aborted = false; + m_hasMore = false; + resultsLeft = 0; +#ifdef HAVE_YAZ + + if(!makeConnection()) { + done(); + return; + } + + ZOOM_query query = ZOOM_query_create(); + myLog() << "Z3950Connection::run() - pqn = " << toCString(m_pqn) << endl; + int errcode = ZOOM_query_prefix(query, toCString(m_pqn)); + if(errcode != 0) { + myDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl; + ZOOM_query_destroy(query); + QString s = i18n("Query error!"); + s += ' ' + m_pqn; + done(s, MessageHandler::Error); + return; + } + + ZOOM_resultset resultSet = ZOOM_connection_search(d->conn, query); + + // check abort status + if(m_aborted) { + done(); + return; + } + + // I know the LOC wants the syntax = "xml" and esn = "mods" + // to get MODS data, that seems a bit odd... + // esn only makes sense for marc and grs-1 + // if syntax is mods, set esn to mods too + QCString type = "raw"; + if(m_syntax == Latin1Literal("mods")) { + m_syntax = QString::fromLatin1("xml"); + ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); + type = "xml"; + } else { + ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1()); + } + ZOOM_resultset_option_set(resultSet, "start", QCString().setNum(m_start)); + ZOOM_resultset_option_set(resultSet, "count", QCString().setNum(m_limit-m_start)); + // search in default syntax, unless syntax is already set + if(!m_syntax.isEmpty()) { + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", m_syntax.latin1()); + } + + const char* errmsg; + const char* addinfo; + errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo); + if(errcode != 0) { + ZOOM_resultset_destroy(resultSet); + ZOOM_query_destroy(query); + m_connected = false; + + QString s = i18n("Connection search error %1: %2").arg(errcode).arg(toString(errmsg)); + if(!QCString(addinfo).isEmpty()) { + s += " (" + toString(addinfo) + ")"; + } + myDebug() << "Z3950Connection::run() - " << s << endl; + done(s, MessageHandler::Error); + return; + } + + const size_t numResults = ZOOM_resultset_size(resultSet); + + QString newSyntax = m_syntax; + if(numResults > 0) { + myLog() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl; + // so now we know that results exist, might have to check syntax + int len; + ZOOM_record rec = ZOOM_resultset_record(resultSet, 0); + // want raw unless it's mods + ZOOM_record_get(rec, type, &len); + if(len > 0 && m_syntax.isEmpty()) { + newSyntax = QString::fromLatin1(ZOOM_record_get(rec, "syntax", &len)).lower(); + myLog() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl; + if(newSyntax == Latin1Literal("mods") || newSyntax == Latin1Literal("xml")) { + m_syntax = QString::fromLatin1("xml"); + ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); + } else if(newSyntax == Latin1Literal("grs-1")) { + // if it's defaulting to grs-1, go ahead and change it to try to get a marc + // record since grs-1 is a last resort for us + newSyntax.truncate(0); + } + } + // right now, we just understand mods, unimarc, marc21/usmarc, and grs-1 + if(newSyntax != Latin1Literal("xml") && + newSyntax != Latin1Literal("usmarc") && + newSyntax != Latin1Literal("marc21") && + newSyntax != Latin1Literal("unimarc") && + newSyntax != Latin1Literal("grs-1")) { + myLog() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl; + newSyntax = QString::fromLatin1("xml"); + ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "xml", &len); + if(len == 0) { + // change set name back + ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1()); + newSyntax = QString::fromLatin1("usmarc"); // try usmarc + myLog() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + newSyntax = QString::fromLatin1("marc21"); // try marc21 + myLog() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + newSyntax = QString::fromLatin1("unimarc"); // try unimarc + myLog() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + newSyntax = QString::fromLatin1("grs-1"); // try grs-1 + myLog() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl; + ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); + rec = ZOOM_resultset_record(resultSet, 0); + ZOOM_record_get(rec, "raw", &len); + } + if(len == 0) { + myLog() << "Z3950Connection::run() - giving up" << endl; + ZOOM_resultset_destroy(resultSet); + ZOOM_query_destroy(query); + done(i18n("Record syntax error"), MessageHandler::Error); + return; + } + myLog() << "Z3950Connection::run() - final syntax is " << newSyntax << endl; + } + } + + // go back to fooling ourselves and calling it mods + if(m_syntax == Latin1Literal("xml")) { + m_syntax = QString::fromLatin1("mods"); + } + if(newSyntax == Latin1Literal("xml")) { + newSyntax = QString::fromLatin1("mods"); + } + // save syntax change for next time + if(m_syntax != newSyntax) { + kapp->postEvent(m_fetcher, new Z3950SyntaxChange(newSyntax)); + m_syntax = newSyntax; + } + + if(m_sourceCharSet.isEmpty()) { + m_sourceCharSet = QString::fromLatin1("marc-8"); + } + + const size_t realLimit = QMIN(numResults, m_limit); + + for(size_t i = m_start; i < realLimit && !m_aborted; ++i) { + myLog() << "Z3950Connection::run() - grabbing index " << i << endl; + ZOOM_record rec = ZOOM_resultset_record(resultSet, i); + if(!rec) { + myDebug() << "Z3950Connection::run() - no record returned for index " << i << endl; + continue; + } + int len; + QString data; + if(m_syntax == Latin1Literal("mods")) { + data = toString(ZOOM_record_get(rec, "xml", &len)); + } else if(m_syntax == Latin1Literal("grs-1")) { // grs-1 + // we're going to parse the rendered data, very ugly... + data = toString(ZOOM_record_get(rec, "render", &len)); + } else { +#if 0 + kdWarning() << "Remove debug from z3950connection.cpp" << endl; + { + QFile f1(QString::fromLatin1("/tmp/z3950.raw")); + if(f1.open(IO_WriteOnly)) { + QDataStream t(&f1); + t << ZOOM_record_get(rec, "raw", &len); + } + f1.close(); + } +#endif + data = toXML(ZOOM_record_get(rec, "raw", &len), m_sourceCharSet); + } + Z3950ResultFound* ev = new Z3950ResultFound(data); + QApplication::postEvent(m_fetcher, ev); + } + + ZOOM_resultset_destroy(resultSet); + ZOOM_query_destroy(query); + + m_hasMore = m_limit < numResults; + if(m_hasMore) { + m_start = m_limit; + m_limit += Z3950_DEFAULT_MAX_RECORDS; + } +#endif + done(); +} + +bool Z3950Connection::makeConnection() { + if(m_connected) { + return true; + } +// myDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl; +// I don't know what to do except assume database, user, and password are in locale encoding +#ifdef HAVE_YAZ + d->conn_opt = ZOOM_options_create(); + ZOOM_options_set(d->conn_opt, "implementationName", "Tellico"); + ZOOM_options_set(d->conn_opt, "databaseName", toCString(m_dbname)); + ZOOM_options_set(d->conn_opt, "user", toCString(m_user)); + ZOOM_options_set(d->conn_opt, "password", toCString(m_password)); + + d->conn = ZOOM_connection_create(d->conn_opt); + ZOOM_connection_connect(d->conn, m_host.latin1(), m_port); + + int errcode; + const char* errmsg; // unused: carries same info as 'errcode' + const char* addinfo; + errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo); + if(errcode != 0) { + ZOOM_options_destroy(d->conn_opt); + ZOOM_connection_destroy(d->conn); + m_connected = false; + + QString s = i18n("Connection error %1: %2").arg(errcode).arg(toString(errmsg)); + if(!QCString(addinfo).isEmpty()) { + s += " (" + toString(addinfo) + ")"; + } + myDebug() << "Z3950Connection::makeConnection() - " << s << endl; + done(s, MessageHandler::Error); + return false; + } +#endif + m_connected = true; + return true; +} + +void Z3950Connection::done() { + checkPendingEvents(); + kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore)); +} + +void Z3950Connection::done(const QString& msg_, int type_) { + checkPendingEvents(); + if(m_aborted) { + kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore)); + } else { + kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore, msg_, type_)); + } +} + +void Z3950Connection::checkPendingEvents() { + // if there's still some pending result events, go ahead and just wait 1 second + if(resultsLeft > 0) { + sleep(1); + } +} + +inline +QCString Z3950Connection::toCString(const QString& text_) { + return iconvRun(text_.utf8(), QString::fromLatin1("utf-8"), m_sourceCharSet); +} + +inline +QString Z3950Connection::toString(const QCString& text_) { + return QString::fromUtf8(iconvRun(text_, m_sourceCharSet, QString::fromLatin1("utf-8"))); +} + +// static +QCString Z3950Connection::iconvRun(const QCString& text_, const QString& fromCharSet_, const QString& toCharSet_) { +#ifdef HAVE_YAZ + if(text_.isEmpty()) { + return text_; + } + + if(fromCharSet_ == toCharSet_) { + return text_; + } + + yaz_iconv_t cd = yaz_iconv_open(toCharSet_.latin1(), fromCharSet_.latin1()); + if(!cd) { + // maybe it's iso 5426, which we sorta support + QString charSetLower = fromCharSet_.lower(); + charSetLower.remove('-').remove(' '); + if(charSetLower == Latin1Literal("iso5426")) { + return iconvRun(Iso5426Converter::toUtf8(text_).utf8(), QString::fromLatin1("utf-8"), toCharSet_); + } else if(charSetLower == Latin1Literal("iso6937")) { + return iconvRun(Iso6937Converter::toUtf8(text_).utf8(), QString::fromLatin1("utf-8"), toCharSet_); + } + kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_ + << " to " << toCharSet_ << " is unsupported" << endl; + return text_; + } + + const char* input = text_; + size_t inlen = text_.length(); + + size_t outlen = 2 * inlen; // this is enough, right? + QMemArray<char> result0(outlen); + char* result = result0.data(); + + int r = yaz_iconv(cd, const_cast<char**>(&input), &inlen, &result, &outlen); + if(r <= 0) { + myDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl; + return text_; + } + // bug in yaz, need to flush buffer to catch last character + yaz_iconv(cd, 0, 0, &result, &outlen); + + // length is pointer difference + size_t len = result - result0; + + QCString output = QCString(result0, len+1); +// myDebug() << "-------------------------------------------" << endl; +// myDebug() << output << endl; +// myDebug() << "-------------------------------------------" << endl; + yaz_iconv_close(cd); + return output; +#endif + return text_; +} + +QString Z3950Connection::toXML(const QCString& marc_, const QString& charSet_) { +#ifdef HAVE_YAZ + if(marc_.isEmpty()) { + myDebug() << "Z3950Connection::toXML() - empty string" << endl; + return QString::null; + } + + yaz_iconv_t cd = yaz_iconv_open("utf-8", charSet_.latin1()); + if(!cd) { + // maybe it's iso 5426, which we sorta support + QString charSetLower = charSet_.lower(); + charSetLower.remove('-').remove(' '); + if(charSetLower == Latin1Literal("iso5426")) { + return toXML(Iso5426Converter::toUtf8(marc_).utf8(), QString::fromLatin1("utf-8")); + } else if(charSetLower == Latin1Literal("iso6937")) { + return toXML(Iso6937Converter::toUtf8(marc_).utf8(), QString::fromLatin1("utf-8")); + } + kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl; + return QString::null; + } + + yaz_marc_t mt = yaz_marc_create(); + yaz_marc_iconv(mt, cd); + yaz_marc_xml(mt, YAZ_MARC_MARCXML); + + // first 5 bytes are length + bool ok; +#if YAZ_VERSIONL < 0x030000 + int len = marc_.left(5).toInt(&ok); +#else + size_t len = marc_.left(5).toInt(&ok); +#endif + if(ok && (len < 25 || len > 100000)) { + myDebug() << "Z3950Connection::toXML() - bad length: " << (ok ? len : -1) << endl; + return QString::null; + } + +#if YAZ_VERSIONL < 0x030000 + char* result; +#else + const char* result; +#endif + int r = yaz_marc_decode_buf(mt, marc_, -1, &result, &len); + if(r <= 0) { + myDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl; + return QString::null; + } + + QString output = QString::fromLatin1("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); + output += QString::fromUtf8(QCString(result, len+1), len+1); +// myDebug() << QCString(result) << endl; +// myDebug() << "-------------------------------------------" << endl; +// myDebug() << output << endl; + yaz_iconv_close(cd); + yaz_marc_destroy(mt); + + return output; +#else // no yaz + return QString::null; +#endif +} |