/***************************************************************************
    copyright            : (C) 2005-2006 by Robby Stephenson
    email                : $EMAIL
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This file has been modified to match the requirements of KBibTeX.     *
 *   In case of problems or bugs arising from this implementation, please  *
 *   contact the KBibTeX team first.                                       *
 *                             Thomas Fischer <fischer@unix-ag.uni-kl.de>  *
 *                                                                         *
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of version 2 of the GNU General Public License as  *
 *   published by the Free Software Foundation;                            *
 *                                                                         *
 ***************************************************************************/

#include "z3950connection.h"
// #include "z3950fetcher.h"
#include "messagehandler.h"
#include "latin1literal.h"
#include <kdebug.h>
#include "iso5426converter.h"
#include "iso6937converter.h"

#include <tqapplication.h>

#include <config.h>

#ifdef HAVE_YAZ
extern "C"
{
#include <yaz/zoom.h>
#include <yaz/marcdisp.h>
#include <yaz/yaz-version.h>
}
#endif

#include <tdelocale.h>

#include <tqfile.h>

namespace
{
    static const size_t Z3950_DEFAULT_MAX_RECORDS = 20;
}

using KBibTeX::Z3950ResultFound;
using KBibTeX::Z3950Connection;

Z3950ResultFound::Z3950ResultFound( const TQString& s ) : TQCustomEvent( uid() )
        , m_result( TQDeepCopy<TQString>( s ) )
{
    ++Z3950Connection::resultsLeft;
}

Z3950ResultFound::~Z3950ResultFound()
{
    --Z3950Connection::resultsLeft;
}

class Z3950Connection::Private
{
public:
    Private() {}
#ifdef HAVE_YAZ
    ~Private()
    {
        ZOOM_options_destroy( conn_opt );
        ZOOM_connection_destroy( conn );
    };

    ZOOM_options conn_opt;
    ZOOM_connection conn;
#endif
};

int Z3950Connection::resultsLeft = 0;

// since the character set goes into a yaz api call
// I'm paranoid about user insertions, so just grab 64
// characters at most
Z3950Connection::Z3950Connection( TQObject* fetcher,
                                  const TQString& host,
                                  uint port,
                                  const TQString& dbname,
                                  const TQString& sourceCharSet,
                                  const TQString& syntax,
                                  const TQString& esn )
        : TQThread()
        , d( new Private() )
        , m_connected( false )
        , m_aborted( false )
        , m_fetcher( fetcher )
        , m_host( TQDeepCopy<TQString>( host ) )
        , m_port( port )
        , m_dbname( TQDeepCopy<TQString>( dbname ) )
        , m_sourceCharSet( TQDeepCopy<TQString>( sourceCharSet.left( 64 ) ) )
        , m_syntax( TQDeepCopy<TQString>( syntax ) )
        , m_esn( TQDeepCopy<TQString>( esn ) )
        , m_start( 0 )
        , m_limit( Z3950_DEFAULT_MAX_RECORDS )
        , m_hasMore( false )
{
}

Z3950Connection::~Z3950Connection()
{
    m_connected = false;
    delete d;
    d = 0;
}

void Z3950Connection::reset()
{
    m_start = 0;
    m_limit = Z3950_DEFAULT_MAX_RECORDS;
}

void Z3950Connection::setQuery( const TQString& query_, unsigned int numHits )
{
    m_pqn = TQDeepCopy<TQString>( query_ );
    m_limit = Z3950_DEFAULT_MAX_RECORDS < numHits ? Z3950_DEFAULT_MAX_RECORDS : numHits;
}

void Z3950Connection::setUserPassword( const TQString& user_, const TQString& pword_ )
{
    m_user = TQDeepCopy<TQString>( user_ );
    m_password = TQDeepCopy<TQString>( pword_ );
}

void Z3950Connection::run()
{
//  kdDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl;
    m_aborted = false;
    m_hasMore = false;
    resultsLeft = 0;
#ifdef HAVE_YAZ

    if ( !makeConnection() )
    {
        done();
        return;
    }

    ZOOM_query query = ZOOM_query_create();
    kdDebug() << "Z3950Connection::run() - pqn = " << toCString( m_pqn ) << endl;
    int errcode = ZOOM_query_prefix( query, toCString( m_pqn ) );
    if ( errcode != 0 )
    {
        kdDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl;
        ZOOM_query_destroy( query );
        TQString s = i18n( "Query error!" );
        s += ' ' + m_pqn;
        done( s, MessageHandler::Error );
        return;
    }

    ZOOM_resultset resultSet = ZOOM_connection_search( d->conn, query );

    // check abort status
    if ( m_aborted )
    {
        done();
        return;
    }

    // I know the LOC wants the syntax = "xml" and esn = "mods"
    // to get MODS data, that seems a bit odd...
    // esn only makes sense for marc and grs-1
    // if syntax is mods, set esn to mods too
    TQCString type = "raw";
    if ( m_syntax == Latin1Literal( "mods" ) )
    {
        m_syntax = TQString::fromLatin1( "xml" );
        ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" );
        type = "xml";
    }
    else
    {
        ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() );
    }
    ZOOM_resultset_option_set( resultSet, "start", TQCString().setNum( m_start ) );
    ZOOM_resultset_option_set( resultSet, "count", TQCString().setNum( m_limit - m_start ) );
    // search in default syntax, unless syntax is already set
    if ( !m_syntax.isEmpty() )
    {
        ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", m_syntax.latin1() );
    }

    const char* errmsg;
    const char* addinfo;
    errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo );
    if ( errcode != 0 )
    {
        ZOOM_resultset_destroy( resultSet );
        ZOOM_query_destroy( query );
        m_connected = false;

        TQString s = i18n( "Connection search error %1: %2" ).arg( errcode ).arg( toString( errmsg ) );
        if ( !TQCString( addinfo ).isEmpty() )
        {
            s += " (" + toString( addinfo ) + ")";
        }
        kdDebug() << "Z3950Connection::run() - " << s << endl;
        done( s, MessageHandler::Error );
        return;
    }

    const size_t numResults = ZOOM_resultset_size( resultSet );

    TQString newSyntax = m_syntax;
    if ( numResults > 0 )
    {
        kdDebug() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl;
        // so now we know that results exist, might have to check syntax
        int len;
        ZOOM_record rec = ZOOM_resultset_record( resultSet, 0 );
        // want raw unless it's mods
        ZOOM_record_get( rec, type, &len );
        if ( len > 0 && m_syntax.isEmpty() )
        {
            newSyntax = TQString::fromLatin1( ZOOM_record_get( rec, "syntax", &len ) ).lower();
            kdDebug() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl;
            if ( newSyntax == Latin1Literal( "mods" ) || newSyntax == Latin1Literal( "xml" ) )
            {
                m_syntax = TQString::fromLatin1( "xml" );
                ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" );
            }
            else if ( newSyntax == Latin1Literal( "grs-1" ) )
            {
                // if it's defaulting to grs-1, go ahead and change it to try to get a marc
                // record since grs-1 is a last resort for us
                newSyntax.truncate( 0 );
            }
        }
        // right now, we just understand mods, unimarc, marc21/usmarc, and grs-1
        if ( newSyntax != Latin1Literal( "xml" ) &&
                newSyntax != Latin1Literal( "usmarc" ) &&
                newSyntax != Latin1Literal( "marc21" ) &&
                newSyntax != Latin1Literal( "unimarc" ) &&
                newSyntax != Latin1Literal( "grs-1" ) )
        {
            kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl;
            newSyntax = TQString::fromLatin1( "xml" );
            ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" );
            ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
            rec = ZOOM_resultset_record( resultSet, 0 );
            ZOOM_record_get( rec, "xml", &len );
            if ( len == 0 )
            {
                // change set name back
                ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() );
                newSyntax = TQString::fromLatin1( "usmarc" ); // try usmarc
                kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl;
                ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
                rec = ZOOM_resultset_record( resultSet, 0 );
                ZOOM_record_get( rec, "raw", &len );
            }
            if ( len == 0 )
            {
                newSyntax = TQString::fromLatin1( "marc21" ); // try marc21
                kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl;
                ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
                rec = ZOOM_resultset_record( resultSet, 0 );
                ZOOM_record_get( rec, "raw", &len );
            }
            if ( len == 0 )
            {
                newSyntax = TQString::fromLatin1( "unimarc" ); // try unimarc
                kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl;
                ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
                rec = ZOOM_resultset_record( resultSet, 0 );
                ZOOM_record_get( rec, "raw", &len );
            }
            if ( len == 0 )
            {
                newSyntax = TQString::fromLatin1( "grs-1" ); // try grs-1
                kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl;
                ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
                rec = ZOOM_resultset_record( resultSet, 0 );
                ZOOM_record_get( rec, "raw", &len );
            }
            if ( len == 0 )
            {
                kdDebug() << "Z3950Connection::run() - giving up" << endl;
                ZOOM_resultset_destroy( resultSet );
                ZOOM_query_destroy( query );
                done( i18n( "Record syntax error" ), MessageHandler::Error );
                return;
            }
            kdDebug() << "Z3950Connection::run() - final syntax is " << newSyntax << endl;
        }
    }

    // go back to fooling ourselves and calling it mods
    if ( m_syntax == Latin1Literal( "xml" ) )
    {
        m_syntax = TQString::fromLatin1( "mods" );
    }
    if ( newSyntax == Latin1Literal( "xml" ) )
    {
        newSyntax = TQString::fromLatin1( "mods" );
    }
    // save syntax change for next time
    if ( m_syntax != newSyntax )
    {
        tqApp->postEvent( m_fetcher, new Z3950SyntaxChange( newSyntax ) );
        m_syntax = newSyntax;
    }

    if ( m_sourceCharSet.isEmpty() )
    {
        m_sourceCharSet = TQString::fromLatin1( "marc-8" );
    }

    const size_t realLimit = TQMIN( numResults, m_limit );

    for ( size_t i = m_start; i < realLimit && !m_aborted; ++i )
    {
        kdDebug() << "Z3950Connection::run() - grabbing index " << i << endl;
        ZOOM_record rec = ZOOM_resultset_record( resultSet, i );
        if ( !rec )
        {
            kdDebug() << "Z3950Connection::run() - no record returned for index " << i << endl;
            continue;
        }
        int len;
        TQString data;
        if ( m_syntax == Latin1Literal( "mods" ) )
        {
            data = toString( ZOOM_record_get( rec, "xml", &len ) );
        }
        else if ( m_syntax == Latin1Literal( "grs-1" ) )   // grs-1
        {
            // we're going to parse the rendered data, very ugly...
            data = toString( ZOOM_record_get( rec, "render", &len ) );
        }
        else
        {
#if 0
            kdWarning() << "Remove debug from z3950connection.cpp" << endl;
            {
                TQFile f1( TQString::fromLatin1( "/tmp/z3950.raw" ) );
                if ( f1.open( IO_WriteOnly ) )
                {
                    TQDataStream t( &f1 );
                    t << ZOOM_record_get( rec, "raw", &len );
                }
                f1.close();
            }
#endif
            data = toXML( ZOOM_record_get( rec, "raw", &len ), m_sourceCharSet );
        }
        Z3950ResultFound* ev = new Z3950ResultFound( data );
        TQApplication::postEvent( m_fetcher, ev );
    }

    ZOOM_resultset_destroy( resultSet );
    ZOOM_query_destroy( query );

    m_hasMore = m_limit < numResults;
    if ( m_hasMore )
    {
        m_start = m_limit;
        m_limit += Z3950_DEFAULT_MAX_RECORDS;
    }
#endif
    done();
}

bool Z3950Connection::makeConnection()
{
    if ( m_connected )
    {
        return true;
    }
//  kdDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl;
// I don't know what to do except assume database, user, and password are in locale encoding
#ifdef HAVE_YAZ
    d->conn_opt = ZOOM_options_create();
    ZOOM_options_set( d->conn_opt, "implementationName", "KBibTeX" );
    ZOOM_options_set( d->conn_opt, "databaseName",       toCString( m_dbname ) );
    ZOOM_options_set( d->conn_opt, "user",               toCString( m_user ) );
    ZOOM_options_set( d->conn_opt, "password",           toCString( m_password ) );

    d->conn = ZOOM_connection_create( d->conn_opt );
    ZOOM_connection_connect( d->conn, m_host.latin1(), m_port );

    int errcode;
    const char* errmsg; // unused: carries same info as 'errcode'
    const char* addinfo;
    errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo );
    if ( errcode != 0 )
    {
        ZOOM_options_destroy( d->conn_opt );
        ZOOM_connection_destroy( d->conn );
        m_connected = false;

        TQString s = i18n( "Connection error %1: %2" ).arg( errcode ).arg( toString( errmsg ) );
        if ( !TQCString( addinfo ).isEmpty() )
        {
            s += " (" + toString( addinfo ) + ")";
        }
        kdDebug() << "Z3950Connection::makeConnection() - " << s << endl;
        done( s, MessageHandler::Error );
        return false;
    }
#endif
    m_connected = true;
    return true;
}

void Z3950Connection::done()
{
    checkPendingEvents();
    tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) );
}

void Z3950Connection::done( const TQString& msg_, int type_ )
{
    checkPendingEvents();
    if ( m_aborted )
    {
        tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) );
    }
    else
    {
        tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore, msg_, type_ ) );
    }
}

void Z3950Connection::checkPendingEvents()
{
    // if there's still some pending result events, go ahead and just wait 1 second
    if ( resultsLeft > 0 )
    {
        sleep( 1 );
    }
}

inline
TQCString Z3950Connection::toCString( const TQString& text_ )
{
    return iconvRun( text_.utf8(), TQString::fromLatin1( "utf-8" ), m_sourceCharSet );
}

inline
TQString Z3950Connection::toString( const TQCString& text_ )
{
    return TQString::fromUtf8( iconvRun( text_, m_sourceCharSet, TQString::fromLatin1( "utf-8" ) ) );
}

// static
TQCString Z3950Connection::iconvRun( const TQCString& text_, const TQString& fromCharSet_, const TQString& toCharSet_ )
{
#ifdef HAVE_YAZ
    if ( text_.isEmpty() )
    {
        return text_;
    }

    if ( fromCharSet_ == toCharSet_ )
    {
        return text_;
    }

    yaz_iconv_t cd = yaz_iconv_open( toCharSet_.latin1(), fromCharSet_.latin1() );
    if ( !cd )
    {
        // maybe it's iso 5426, which we sorta support
        TQString charSetLower = fromCharSet_.lower();
        charSetLower.remove( '-' ).remove( ' ' );
        if ( charSetLower == Latin1Literal( "iso5426" ) )
        {
            return iconvRun( Iso5426Converter::toUtf8( text_ ).utf8(), TQString::fromLatin1( "utf-8" ), toCharSet_ );
        }
        else if ( charSetLower == Latin1Literal( "iso6937" ) )
        {
            return iconvRun( Iso6937Converter::toUtf8( text_ ).utf8(), TQString::fromLatin1( "utf-8" ), toCharSet_ );
        }
        kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_
        << " to " << toCharSet_ << " is unsupported" << endl;
        return text_;
    }

    const char* input = text_;
    size_t inlen = text_.length();

    size_t outlen = 2 * inlen;  // this is enough, right?
    TQMemArray<char> result0( outlen );
    char* result = result0.data();

    int r = yaz_iconv( cd, const_cast<char**>( &input ), &inlen, &result, &outlen );
    if ( r <= 0 )
    {
        kdDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl;
        return text_;
    }
    // bug in yaz, need to flush buffer to catch last character
    yaz_iconv( cd, 0, 0, &result, &outlen );

    // length is pointer difference
    size_t len = result - result0;

    TQCString output = TQCString( result0, len + 1 );
//  kdDebug() << "-------------------------------------------" << endl;
//  kdDebug() << output << endl;
//  kdDebug() << "-------------------------------------------" << endl;
    yaz_iconv_close( cd );
    return output;
#endif
    return text_;
}

TQString Z3950Connection::toXML( const TQCString& marc_, const TQString& charSet_ )
{
#ifdef HAVE_YAZ
    if ( marc_.isEmpty() )
    {
        kdDebug() << "Z3950Connection::toXML() - empty string" << endl;
        return TQString::null;
    }

    yaz_iconv_t cd = yaz_iconv_open( "utf-8", charSet_.latin1() );
    if ( !cd )
    {
        // maybe it's iso 5426, which we sorta support
        TQString charSetLower = charSet_.lower();
        charSetLower.remove( '-' ).remove( ' ' );
        if ( charSetLower == Latin1Literal( "iso5426" ) )
        {
            return toXML( Iso5426Converter::toUtf8( marc_ ).utf8(), TQString::fromLatin1( "utf-8" ) );
        }
        else if ( charSetLower == Latin1Literal( "iso6937" ) )
        {
            return toXML( Iso6937Converter::toUtf8( marc_ ).utf8(), TQString::fromLatin1( "utf-8" ) );
        }
        kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl;
        return TQString::null;
    }

    yaz_marc_t mt = yaz_marc_create();
    yaz_marc_iconv( mt, cd );
    yaz_marc_xml( mt, YAZ_MARC_MARCXML );

    // first 5 bytes are length
    bool ok;
#if YAZ_VERSIONL < 0x030000
    int len = marc_.left( 5 ).toInt( &ok );
#else
    size_t len = marc_.left( 5 ).toInt( &ok );
#endif
    if ( ok && ( len < 25 || len > 100000 ) )
    {
        kdDebug() << "Z3950Connection::toXML() - bad length: " << ( ok ? len : -1 ) << endl;
        return TQString::null;
    }

#if YAZ_VERSIONL < 0x030000
    char* result;
#else
    const char* result;
#endif
    int r = yaz_marc_decode_buf( mt, marc_, -1, &result, &len );
    if ( r <= 0 )
    {
        kdDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl;
        return TQString::null;
    }

    TQString output = TQString::fromLatin1( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" );
    output += TQString::fromUtf8( TQCString( result, len + 1 ), len + 1 );
//  kdDebug() << TQCString(result) << endl;
//  kdDebug() << "-------------------------------------------" << endl;
//  kdDebug() << output << endl;
    yaz_iconv_close( cd );
    yaz_marc_destroy( mt );

    return output;
#else // no yaz
    return TQString::null;
#endif
}