/***************************************************************************
    copyright            : (C) 2005-2006 by Robby Stephenson
    email                : robby@periapsis.org
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of version 2 of the GNU General Public License as  *
 *   published by the Free Software Foundation;                            *
 *                                                                         *
 ***************************************************************************/

#include "entrezfetcher.h"
#include "../tellico_kernel.h"
#include "../latin1literal.h"
#include "../collection.h"
#include "../entry.h"
#include "../filehandler.h"
#include "../translators/xslthandler.h"
#include "../translators/tellicoimporter.h"
#include "../tellico_debug.h"

#include <klocale.h>
#include <kconfig.h>
#include <kstandarddirs.h>
#include <kio/job.h>

#include <tqdom.h>
#include <tqlabel.h>
#include <tqlayout.h>
#include <tqfile.h>

//#define ENTREZ_TEST

namespace {
  static const int ENTREZ_MAX_RETURNS_TOTAL = 25;
  static const char* ENTREZ_BASE_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
  static const char* ENTREZ_SEARCH_CGI = "esearch.fcgi";
  static const char* ENTREZ_SUMMARY_CGI = "esummary.fcgi";
  static const char* ENTREZ_FETCH_CGI = "efetch.fcgi";
  static const char* ENTREZ_LINK_CGI = "elink.fcgi";
  static const char* ENTREZ_DEFAULT_DATABASE = "pubmed";
}

using Tellico::Fetch::EntrezFetcher;

EntrezFetcher::EntrezFetcher(TQObject* parent_, const char* name_) : Fetcher(parent_, name_), m_xsltHandler(0),
    m_step(Begin), m_started(false) {
}

EntrezFetcher::~EntrezFetcher() {
}

TQString EntrezFetcher::defaultName() {
  return i18n("Entrez Database");
}

TQString EntrezFetcher::source() const {
  return m_name.isEmpty() ? defaultName() : m_name;
}

bool EntrezFetcher::canFetch(int type) const {
  return type == Data::Collection::Bibtex;
}

void EntrezFetcher::readConfigHook(const KConfigGroup& config_) {
  TQString s = config_.readEntry("Database", TQString::fromLatin1(ENTREZ_DEFAULT_DATABASE)); // default to pubmed
  if(!s.isEmpty()) {
    m_dbname = s;
  }
  m_fields = config_.readListEntry("Custom Fields");
}

void EntrezFetcher::search(FetchKey key_, const TQString& value_) {
  m_started = true;
  m_start = 1;
  m_total = -1;

// only search if current collection is a bibliography
  if(!canFetch(Kernel::self()->collectionType())) {
    myDebug() << "EntrezFetcher::search() - collection type mismatch, stopping" << endl;
    stop();
    return;
  }
  if(m_dbname.isEmpty()) {
    m_dbname = TQString::fromLatin1(ENTREZ_DEFAULT_DATABASE);
  }

#ifdef ENTREZ_TEST
  KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/esearch.xml"));
#else
  KURL u(TQString::fromLatin1(ENTREZ_BASE_URL));
  u.addPath(TQString::fromLatin1(ENTREZ_SEARCH_CGI));
  u.addQueryItem(TQString::fromLatin1("tool"),       TQString::fromLatin1("Tellico"));
  u.addQueryItem(TQString::fromLatin1("retmode"),    TQString::fromLatin1("xml"));
  u.addQueryItem(TQString::fromLatin1("usehistory"), TQString::fromLatin1("y"));
  u.addQueryItem(TQString::fromLatin1("retmax"),     TQString::fromLatin1("1")); // we're just getting the count
  u.addQueryItem(TQString::fromLatin1("db"),         m_dbname);
  u.addQueryItem(TQString::fromLatin1("term"),       value_);
  switch(key_) {
    case Title:
      u.addQueryItem(TQString::fromLatin1("field"), TQString::fromLatin1("titl"));
      break;

    case Person:
      u.addQueryItem(TQString::fromLatin1("field"), TQString::fromLatin1("auth"));
      break;

    case Keyword:
      // for Tellico Keyword searches basically mean search for any field matching
//      u.addQueryItem(TQString::fromLatin1("field"), TQString::fromLatin1("word"));
      break;

    case PubmedID:
      u.addQueryItem(TQString::fromLatin1("field"), TQString::fromLatin1("pmid"));
      break;

    case DOI:
    case Raw:
      u.setQuery(u.query() + '&' + value_);
      break;

    default:
      kdWarning() << "EntrezFetcher::search() - FetchKey not supported" << endl;
      stop();
      return;
  }
#endif

  m_step = Search;
//  myLog() << "EntrezFetcher::doSearch() - url: " << u.url() << endl;
  m_job = KIO::get(u, false, false);
  connect(m_job, TQT_SIGNAL(data(KIO::Job*, const TQByteArray&)),
          TQT_SLOT(slotData(KIO::Job*, const TQByteArray&)));
  connect(m_job, TQT_SIGNAL(result(KIO::Job*)),
          TQT_SLOT(slotComplete(KIO::Job*)));
}

void EntrezFetcher::continueSearch() {
  m_started = true;
  doSummary();
}

void EntrezFetcher::stop() {
  if(!m_started) {
    return;
  }
  if(m_job) {
    m_job->kill();
    m_job = 0;
  }
  m_data.truncate(0);
  m_started = false;
  m_step = Begin;
  emit signalDone(this);
}

void EntrezFetcher::slotData(KIO::Job*, const TQByteArray& data_) {
  TQDataStream stream(m_data, IO_WriteOnly | IO_Append);
  stream.writeRawBytes(data_.data(), data_.size());
}

void EntrezFetcher::slotComplete(KIO::Job* job_) {
  // since the fetch is done, don't worry about holding the job pointer
  m_job = 0;

  if(job_->error()) {
    job_->showErrorDialog(Kernel::self()->widget());
    stop();
    return;
  }

  if(m_data.isEmpty()) {
    myDebug() << "EntrezFetcher::slotComplete() - no data" << endl;
    stop();
    return;
  }

#if 0
  kdWarning() << "Remove debug from entrezfetcher.cpp: " << __LINE__ << endl;
  TQFile f(TQString::fromLatin1("/tmp/test.xml"));
  if(f.open(IO_WriteOnly)) {
    TQTextStream t(&f);
    t.setEncoding(TQTextStream::UnicodeUTF8);
    t << TQCString(m_data, m_data.size()+1);
  }
  f.close();
#endif

  switch(m_step) {
    case Search:
      searchResults();
      break;
    case Summary:
      summaryResults();
      break;
    case Begin:
    case Fetch:
    default:
      myLog() << "EntrezFetcher::slotComplete() - wrong step = " << m_step << endl;
      stop();
      break;
  }
}

void EntrezFetcher::searchResults() {
  TQDomDocument dom;
  if(!dom.setContent(m_data, false)) {
    kdWarning() << "EntrezFetcher::searchResults() - server did not return valid XML." << endl;
    stop();
    return;
  }
  // find Count, QueryKey, and WebEnv elements
  int count = 0;
  for(TQDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) {
    TQDomElement e = n.toElement();
    if(e.isNull()) {
      continue;
    }
    if(e.tagName() == Latin1Literal("Count")) {
      m_total = e.text().toInt();
      ++count;
    } else if(e.tagName() == Latin1Literal("QueryKey")) {
      m_queryKey = e.text();
      ++count;
    } else if(e.tagName() == Latin1Literal("WebEnv")) {
      m_webEnv = e.text();
      ++count;
    }
    if(count >= 3) {
      break; // found them all
    }
  }

  m_data.truncate(0);
  doSummary();
}

void EntrezFetcher::doSummary() {
#ifdef ENTREZ_TEST
  KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/esummary.xml"));
#else
  KURL u(TQString::fromLatin1(ENTREZ_BASE_URL));
  u.addPath(TQString::fromLatin1(ENTREZ_SUMMARY_CGI));
  u.addQueryItem(TQString::fromLatin1("tool"),       TQString::fromLatin1("Tellico"));
  u.addQueryItem(TQString::fromLatin1("retmode"),    TQString::fromLatin1("xml"));
  u.addQueryItem(TQString::fromLatin1("retstart"),   TQString::number(m_start));
  u.addQueryItem(TQString::fromLatin1("retmax"),     TQString::number(TQMIN(m_total-m_start-1, ENTREZ_MAX_RETURNS_TOTAL)));
  u.addQueryItem(TQString::fromLatin1("usehistory"), TQString::fromLatin1("y"));
  u.addQueryItem(TQString::fromLatin1("db"),         m_dbname);
  u.addQueryItem(TQString::fromLatin1("query_key"),  m_queryKey);
  u.addQueryItem(TQString::fromLatin1("WebEnv"),     m_webEnv);
#endif

  m_step = Summary;
//  myLog() << "EntrezFetcher::searchResults() - url: " << u.url() << endl;
  m_job = KIO::get(u, false, false);
  connect(m_job, TQT_SIGNAL(data(KIO::Job*, const TQByteArray&)),
          TQT_SLOT(slotData(KIO::Job*, const TQByteArray&)));
  connect(m_job, TQT_SIGNAL(result(KIO::Job*)),
          TQT_SLOT(slotComplete(KIO::Job*)));
}

void EntrezFetcher::summaryResults() {
  TQDomDocument dom;
  if(!dom.setContent(m_data, false)) {
    kdWarning() << "EntrezFetcher::summaryResults() - server did not return valid XML." << endl;
    stop();
    return;
  }
  // top child is eSummaryResult
  // all children are DocSum
  for(TQDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) {
    TQDomElement e = n.toElement();
    if(e.isNull() || e.tagName() != Latin1Literal("DocSum")) {
      continue;
    }
    TQDomNodeList nodes = e.elementsByTagName(TQString::fromLatin1("Id"));
    if(nodes.count() == 0) {
      myDebug() << "EntrezFetcher::summaryResults() - no Id elements" << endl;
      continue;
    }
    int id = nodes.item(0).toElement().text().toInt();
    TQString title, pubdate, authors;
    nodes = e.elementsByTagName(TQString::fromLatin1("Item"));
    for(uint j = 0; j < nodes.count(); ++j) {
      if(nodes.item(j).toElement().attribute(TQString::fromLatin1("Name")) == Latin1Literal("Title")) {
        title = nodes.item(j).toElement().text();
      } else if(nodes.item(j).toElement().attribute(TQString::fromLatin1("Name")) == Latin1Literal("PubDate")) {
        pubdate = nodes.item(j).toElement().text();
      } else if(nodes.item(j).toElement().attribute(TQString::fromLatin1("Name")) == Latin1Literal("AuthorList")) {
        TQStringList list;
        for(TQDomNode aNode = nodes.item(j).firstChild(); !aNode.isNull(); aNode = aNode.nextSibling()) {
          // lazy, assume all children Items are authors
          if(aNode.nodeName() == Latin1Literal("Item")) {
            list << aNode.toElement().text();
          }
        }
        authors = list.join(TQString::fromLatin1("; "));
      }
      if(!title.isEmpty() && !pubdate.isEmpty() && !authors.isEmpty()) {
        break; // done now
      }
    }
    SearchResult* r = new SearchResult(this, title, pubdate + '/' + authors, TQString());
    m_matches.insert(r->uid, id);
    emit signalResultFound(r);
  }
  m_start = m_matches.count() + 1;
  m_hasMoreResults = m_start <= m_total;
  stop(); // done searching
}

Tellico::Data::EntryPtr EntrezFetcher::fetchEntry(uint uid_) {
  // if we already grabbed this one, then just pull it out of the dict
  Data::EntryPtr entry = m_entries[uid_];
  if(entry) {
    return entry;
  }

  if(!m_matches.contains(uid_)) {
    return 0;
  }

  if(!m_xsltHandler) {
    initXSLTHandler();
    if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
      stop();
      return 0;
    }
  }

  int id = m_matches[uid_];
#ifdef ENTREZ_TEST
  KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/pubmed.xml"));
#else
  KURL u(TQString::fromLatin1(ENTREZ_BASE_URL));
  u.addPath(TQString::fromLatin1(ENTREZ_FETCH_CGI));
  u.addQueryItem(TQString::fromLatin1("tool"),       TQString::fromLatin1("Tellico"));
  u.addQueryItem(TQString::fromLatin1("retmode"),    TQString::fromLatin1("xml"));
  u.addQueryItem(TQString::fromLatin1("rettype"),    TQString::fromLatin1("abstract"));
  u.addQueryItem(TQString::fromLatin1("db"),         m_dbname);
  u.addQueryItem(TQString::fromLatin1("id"),         TQString::number(id));
#endif
  // now it's sychronous, and we know that it's utf8
  TQString xmlOutput = FileHandler::readTextFile(u, false /*quiet*/, true /*utf8*/);
  if(xmlOutput.isEmpty()) {
    kdWarning() << "EntrezFetcher::fetchEntry() - unable to download " << u << endl;
    return 0;
  }
#if 0
  kdWarning() << "EntrezFetcher::fetchEntry() - turn me off!" << endl;
  TQFile f1(TQString::fromLatin1("/tmp/test-entry.xml"));
  if(f1.open(IO_WriteOnly)) {
    TQTextStream t(&f1);
    t.setEncoding(TQTextStream::UnicodeUTF8);
    t << xmlOutput;
  }
  f1.close();
#endif
  TQString str = m_xsltHandler->applyStylesheet(xmlOutput);
  Import::TellicoImporter imp(str);
  Data::CollPtr coll = imp.collection();
  if(!coll) {
    kdWarning() << "EntrezFetcher::fetchEntry() - invalid collection" << endl;
    return 0;
  }
  if(coll->entryCount() == 0) {
    myDebug() << "EntrezFetcher::fetchEntry() - no entries in collection" << endl;
    return 0;
  } else if(coll->entryCount() > 1) {
    myDebug() << "EntrezFetcher::fetchEntry() - collection has multiple entries, taking first one" << endl;
  }

  Data::EntryPtr e = coll->entries().front();

  // try to get a link, but only if necessary
  if(m_fields.contains(TQString::fromLatin1("url"))) {
    KURL link(TQString::fromLatin1(ENTREZ_BASE_URL));
    link.addPath(TQString::fromLatin1(ENTREZ_LINK_CGI));
    link.addQueryItem(TQString::fromLatin1("tool"),   TQString::fromLatin1("Tellico"));
    link.addQueryItem(TQString::fromLatin1("cmd"),    TQString::fromLatin1("llinks"));
    link.addQueryItem(TQString::fromLatin1("db"),     m_dbname);
    link.addQueryItem(TQString::fromLatin1("dbfrom"), m_dbname);
    link.addQueryItem(TQString::fromLatin1("id"),     TQString::number(id));

    TQDomDocument linkDom = FileHandler::readXMLFile(link, false /* namespace */, true /* quiet */);
    // need eLinkResult/LinkSet/IdUrlList/IdUrlSet/ObjUrl/Url
    TQDomNode linkNode = linkDom.namedItem(TQString::fromLatin1("eLinkResult"))
                               .namedItem(TQString::fromLatin1("LinkSet"))
                               .namedItem(TQString::fromLatin1("IdUrlList"))
                               .namedItem(TQString::fromLatin1("IdUrlSet"))
                               .namedItem(TQString::fromLatin1("ObjUrl"))
                               .namedItem(TQString::fromLatin1("Url"));
    if(!linkNode.isNull()) {
      TQString u = linkNode.toElement().text();
//      myDebug() << u << endl;
      if(!u.isEmpty()) {
        if(!coll->hasField(TQString::fromLatin1("url"))) {
          Data::FieldPtr field = new Data::Field(TQString::fromLatin1("url"), i18n("URL"), Data::Field::URL);
          field->setCategory(i18n("Miscellaneous"));
          coll->addField(field);
        }
        e->setField(TQString::fromLatin1("url"), u);
      }
    }
  }

  const StringMap customFields = EntrezFetcher::customFields();
  for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) {
    if(!m_fields.contains(it.key())) {
      coll->removeField(it.key());
    }
  }

  m_entries.insert(uid_, e);
  return e;
}

void EntrezFetcher::initXSLTHandler() {
  TQString xsltfile = locate("appdata", TQString::fromLatin1("pubmed2tellico.xsl"));
  if(xsltfile.isEmpty()) {
    kdWarning() << "EntrezFetcher::initXSLTHandler() - can not locate pubmed2tellico.xsl." << endl;
    return;
  }

  KURL u;
  u.setPath(xsltfile);

  if(!m_xsltHandler) {
    m_xsltHandler = new XSLTHandler(u);
  }
  if(!m_xsltHandler->isValid()) {
    kdWarning() << "EntrezFetcher::initXSLTHandler() - error in pubmed2tellico.xsl." << endl;
    delete m_xsltHandler;
    m_xsltHandler = 0;
    return;
  }
}

void EntrezFetcher::updateEntry(Data::EntryPtr entry_) {
//  myDebug() << "EntrezFetcher::updateEntry()" << endl;
  TQString s = entry_->field(TQString::fromLatin1("pmid"));
  if(!s.isEmpty()) {
    search(PubmedID, s);
    return;
  }

  s = entry_->field(TQString::fromLatin1("doi"));
  if(!s.isEmpty()) {
    search(DOI, s);
    return;
  }

  s = entry_->field(TQString::fromLatin1("title"));
  if(!s.isEmpty()) {
    search(Title, s);
    return;
  }

  myDebug() << "EntrezFetcher::updateEntry() - insufficient info to search" << endl;
  emit signalDone(this); // always need to emit this if not continuing with the search
}

Tellico::Fetch::ConfigWidget* EntrezFetcher::configWidget(TQWidget* parent_) const {
  return new EntrezFetcher::ConfigWidget(parent_, this);
}

EntrezFetcher::ConfigWidget::ConfigWidget(TQWidget* parent_, const EntrezFetcher* fetcher_/*=0*/)
    : Fetch::ConfigWidget(parent_) {
  TQVBoxLayout* l = new TQVBoxLayout(optionsWidget());
  l->addWidget(new TQLabel(i18n("This source has no options."), optionsWidget()));
  l->addStretch();

  // now add additional fields widget
  addFieldsWidget(EntrezFetcher::customFields(), fetcher_ ? fetcher_->m_fields : TQStringList());
}

void EntrezFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
  saveFieldsConfig(config_);
  slotSetModified(false);
}

TQString EntrezFetcher::ConfigWidget::preferredName() const {
  return EntrezFetcher::defaultName();
}

//static
Tellico::StringMap EntrezFetcher::customFields() {
  StringMap map;
  map[TQString::fromLatin1("institution")] = i18n("Institution");
  map[TQString::fromLatin1("abstract")]    = i18n("Abstract");
  map[TQString::fromLatin1("url")]         = i18n("URL");
  return map;
}

#include "entrezfetcher.moc"