/***************************************************************************
    copyright            : (C) 2004-2006 by Robby Stephenson
    email                : robby@periapsis.org
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of version 2 of the GNU General Public License as  *
 *   published by the Free Software Foundation;                            *
 *                                                                         *
 ***************************************************************************/

#include "imdbfetcher.h"
#include "../tellico_kernel.h"
#include "../collections/videocollection.h"
#include "../entry.h"
#include "../field.h"
#include "../filehandler.h"
#include "../latin1literal.h"
#include "../imagefactory.h"
#include "../tellico_utils.h"
#include "../gui/listboxtext.h"
#include "../tellico_debug.h"

#include <tdelocale.h>
#include <kdialogbase.h>
#include <tdeconfig.h>
#include <klineedit.h>
#include <knuminput.h>

#include <tqregexp.h>
#include <tqfile.h>
#include <tqmap.h>
#include <tqvbox.h>
#include <tqlabel.h>
#include <tqlistbox.h>
#include <tqwhatsthis.h>
#include <tqlayout.h>
#include <tqcheckbox.h>
#include <tqvgroupbox.h>

//#define IMDB_TEST

namespace {
  static const char* IMDB_SERVER = "akas.imdb.com";
  static const uint IMDB_MAX_RESULTS = 20;
  static const TQString sep = TQString::fromLatin1("; ");
}

using Tellico::Fetch::IMDBFetcher;

TQRegExp* IMDBFetcher::s_tagRx = 0;
TQRegExp* IMDBFetcher::s_anchorRx = 0;
TQRegExp* IMDBFetcher::s_anchorTitleRx = 0;
TQRegExp* IMDBFetcher::s_anchorNameRx = 0;
TQRegExp* IMDBFetcher::s_titleRx = 0;

// static
void IMDBFetcher::initRegExps() {
  s_tagRx = new TQRegExp(TQString::fromLatin1("<.*>"));
  s_tagRx->setMinimal(true);

  s_anchorRx = new TQRegExp(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"[^<]*>([^<]*)</a>"), false);
  s_anchorRx->setMinimal(true);

  s_anchorTitleRx = new TQRegExp(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/title/[^\"]*)\"[^<]*>([^<]*)</a>"), false);
  s_anchorTitleRx->setMinimal(true);

  s_anchorNameRx = new TQRegExp(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/name/[^\"]*)\"[^<]*>([^<]*)</a>"), false);
  s_anchorNameRx->setMinimal(true);

  s_titleRx = new TQRegExp(TQString::fromLatin1("<title>(.*)</title>"), false);
  s_titleRx->setMinimal(true);
}

IMDBFetcher::IMDBFetcher(TQObject* parent_, const char* name_) : Fetcher(parent_, name_),
    m_job(0), m_started(false), m_fetchImages(true), m_host(TQString::fromLatin1(IMDB_SERVER)),
    m_limit(IMDB_MAX_RESULTS), m_countOffset(0) {
  if(!s_tagRx) {
    initRegExps();
  }
}

IMDBFetcher::~IMDBFetcher() {
}

TQString IMDBFetcher::defaultName() {
  return i18n("Internet Movie Database");
}

TQString IMDBFetcher::source() const {
  return m_name.isEmpty() ? defaultName() : m_name;
}

bool IMDBFetcher::canFetch(int type) const {
  return type == Data::Collection::Video;
}

void IMDBFetcher::readConfigHook(const TDEConfigGroup& config_) {
  TQString h = config_.readEntry("Host");
  if(!h.isEmpty()) {
    m_host = h;
  }
  m_numCast = config_.readNumEntry("Max Cast", 10);
  m_fetchImages = config_.readBoolEntry("Fetch Images", true);
  m_fields = config_.readListEntry("Custom Fields");
}

// multiple values not supported
void IMDBFetcher::search(FetchKey key_, const TQString& value_) {
  m_key = key_;
  m_value = value_;
  m_started = true;
  m_redirected = false;
  m_data.truncate(0);
  m_matches.clear();
  m_popularTitles.truncate(0);
  m_exactTitles.truncate(0);
  m_partialTitles.truncate(0);
  m_currentTitleBlock = Unknown;
  m_countOffset = 0;

// only search if current collection is a video collection
  if(Kernel::self()->collectionType() != Data::Collection::Video) {
    myDebug() << "IMDBFetcher::search() - collection type mismatch, stopping" << endl;
    stop();
    return;
  }

#ifdef IMDB_TEST
  if(m_key == Title) {
    m_url = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-title.html"));
    m_redirected = false;
  } else {
    m_url = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-name.html"));
    m_redirected = true;
  }
#else
  m_url = KURL();
  m_url.setProtocol(TQString::fromLatin1("http"));
  m_url.setHost(m_host.isEmpty() ? TQString::fromLatin1(IMDB_SERVER) : m_host);
  m_url.setPath(TQString::fromLatin1("/find"));

  switch(key_) {
    case Title:
      m_url.addQueryItem(TQString::fromLatin1("s"), TQString::fromLatin1("tt"));
      break;

    case Person:
      m_url.addQueryItem(TQString::fromLatin1("s"), TQString::fromLatin1("nm"));
      break;

    default:
      kdWarning() << "IMDBFetcher::search() - FetchKey not supported" << endl;
      stop();
      return;
  }

  // as far as I can tell, the url encoding should always be iso-8859-1
  // not utf-8
  m_url.addQueryItem(TQString::fromLatin1("q"), value_, 4 /* iso-8859-1 */);

//  myDebug() << "IMDBFetcher::search() url = " << m_url << endl;
#endif

  m_job = TDEIO::get(m_url, false, false);
  connect(m_job, TQT_SIGNAL(data(TDEIO::Job*, const TQByteArray&)),
          TQT_SLOT(slotData(TDEIO::Job*, const TQByteArray&)));
  connect(m_job, TQT_SIGNAL(result(TDEIO::Job*)),
          TQT_SLOT(slotComplete(TDEIO::Job*)));
  connect(m_job, TQT_SIGNAL(redirection(TDEIO::Job *, const KURL&)),
          TQT_SLOT(slotRedirection(TDEIO::Job*, const KURL&)));
}

void IMDBFetcher::continueSearch() {
  m_started = true;
  m_limit += IMDB_MAX_RESULTS;

  if(m_currentTitleBlock == Popular) {
    parseTitleBlock(m_popularTitles);
    // if the offset is 0, then we need to be looking at the next block
    m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular;
  }

  // current title block might have changed
  if(m_currentTitleBlock == Exact) {
    parseTitleBlock(m_exactTitles);
    m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact;
  }

  if(m_currentTitleBlock == Partial) {
    parseTitleBlock(m_partialTitles);
    m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial;
  }

  if(m_currentTitleBlock == SinglePerson) {
    parseSingleNameResult();
  }

  stop();
}

void IMDBFetcher::stop() {
  if(!m_started) {
    return;
  }
//  myLog() << "IMDBFetcher::stop()" << endl;
  if(m_job) {
    m_job->kill();
    m_job = 0;
  }

  m_started = false;
  m_redirected = false;

  emit signalDone(this);
}

void IMDBFetcher::slotData(TDEIO::Job*, const TQByteArray& data_) {
  TQDataStream stream(m_data, IO_WriteOnly | IO_Append);
  stream.writeRawBytes(data_.data(), data_.size());
}

void IMDBFetcher::slotRedirection(TDEIO::Job*, const KURL& toURL_) {
  m_url = toURL_;
  m_redirected = true;
}

void IMDBFetcher::slotComplete(TDEIO::Job* job_) {
  // since the fetch is done, don't worry about holding the job pointer
  m_job = 0;

  if(job_->error()) {
    job_->showErrorDialog(Kernel::self()->widget());
    stop();
    return;
  }

  if(m_data.isEmpty()) {
    stop();
    return;
  }

  // a single result was found if we got redirected
  if(m_key == Title) {
    if(m_redirected) {
      parseSingleTitleResult();
    } else {
      parseMultipleTitleResults();
    }
  } else {
    if(m_redirected) {
      parseSingleNameResult();
    } else {
      parseMultipleNameResults();
    }
  }
}

void IMDBFetcher::parseSingleTitleResult() {
//  myDebug() << "IMDBFetcher::parseSingleTitleResult()" << endl;
  s_titleRx->search(Tellico::decodeHTML(TQString(m_data)));
  // split title at parenthesis
  const TQString cap1 = s_titleRx->cap(1);
  int pPos = cap1.find('(');
  // FIXME: maybe remove parentheses here?
  SearchResult* r = new SearchResult(this,
                                     pPos == -1 ? cap1 : cap1.left(pPos),
                                     pPos == -1 ? TQString() : cap1.mid(pPos),
                                     TQString());
  m_matches.insert(r->uid, m_url);
  emit signalResultFound(r);

  m_hasMoreResults = false;
  stop();
}

void IMDBFetcher::parseMultipleTitleResults() {
//  myDebug() << "IMDBFetcher::parseMultipleTitleResults()" << endl;
  TQString output = Tellico::decodeHTML(TQString(m_data));

  // IMDb can return three title lists, popular, exact, and partial
  // the popular titles are in the first table, after the "Popular Results" text
  int pos_popular = output.find(TQString::fromLatin1("Popular Titles"),  0,                    false);
  int pos_exact   = output.find(TQString::fromLatin1("Exact Matches"),   TQMAX(pos_popular, 0), false);
  int pos_partial = output.find(TQString::fromLatin1("Partial Matches"), TQMAX(pos_exact, 0),   false);
  int end_popular = pos_exact; // keep track of where to end
  if(end_popular == -1) {
    end_popular = pos_partial == -1 ? output.length() : pos_partial;
  }
  int end_exact = pos_partial; // keep track of where to end
  if(end_exact == -1) {
    end_exact = output.length();
  }

  // if found popular matches
  if(pos_popular > -1) {
    m_popularTitles = output.mid(pos_popular, end_popular-pos_popular);
  }
  // if found exact matches
  if(pos_exact > -1) {
    m_exactTitles = output.mid(pos_exact, end_exact-pos_exact);
  }
  if(pos_partial > -1) {
    m_partialTitles = output.mid(pos_partial);
  }

  parseTitleBlock(m_popularTitles);
  // if the offset is 0, then we need to be looking at the next block
  m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular;

  if(m_matches.size() < m_limit) {
    parseTitleBlock(m_exactTitles);
    m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact;
  }

  if(m_matches.size() < m_limit) {
    parseTitleBlock(m_partialTitles);
    m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial;
  }

#ifndef NDEBUG
  if(m_matches.size() == 0) {
    myDebug() << "IMDBFetcher::parseMultipleTitleResults() - no matches found." << endl;
  }
#endif

  stop();
}

void IMDBFetcher::parseTitleBlock(const TQString& str_) {
  if(str_.isEmpty()) {
    m_countOffset = 0;
    return;
  }
//  myDebug() << "IMDBFetcher::parseTitleBlock() - " << m_currentTitleBlock << endl;

  TQRegExp akaRx(TQString::fromLatin1("aka (.*)(</li>|<br)"), false);
  akaRx.setMinimal(true);

  m_hasMoreResults = false;

  int count = 0;
  int start = s_anchorTitleRx->search(str_);
  while(m_started && start > -1) {
    // split title at parenthesis
    const TQString cap1 = s_anchorTitleRx->cap(1); // the anchor url
    const TQString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace(); // the anchor text
    start += s_anchorTitleRx->matchedLength();
    int pPos = cap2.find('('); // if it has parentheses, use that for description
    TQString desc;
    if(pPos > -1) {
      int pPos2 = cap2.find(')', pPos+1);
      if(pPos2 > -1) {
        desc = cap2.mid(pPos+1, pPos2-pPos-1);
      }
    } else {
      // parenthesis might be outside anchor tag
      int end = s_anchorTitleRx->search(str_, start);
      if(end == -1) {
        end = str_.length();
      }
      TQString text = str_.mid(start, end-start);
      pPos = text.find('(');
      if(pPos > -1) {
        int pNewLine = text.find(TQString::fromLatin1("<br"));
        if(pNewLine == -1 || pPos < pNewLine) {
          int pPos2 = text.find(')', pPos);
          desc = text.mid(pPos+1, pPos2-pPos-1);
        }
        pPos = -1;
      }
    }
    // multiple matches might have 'aka' info
    int end = s_anchorTitleRx->search(str_, start+1);
    if(end == -1) {
      end = str_.length();
    }
    int akaPos = akaRx.search(str_, start+1);
    if(akaPos > -1 && akaPos < end) {
      // limit to 50 chars
      desc += TQChar(' ') + akaRx.cap(1).stripWhiteSpace().remove(*s_tagRx);
      if(desc.length() > 50) {
        desc = desc.left(50) + TQString::fromLatin1("...");
      }
    }

    start = s_anchorTitleRx->search(str_, start);

    if(count < m_countOffset) {
      ++count;
      continue;
    }

    // if we got this far, then there is a valid result
    if(m_matches.size() >= m_limit) {
      m_hasMoreResults = true;
      break;
    }

    SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, TQString());
    KURL u(m_url, cap1);
    u.setQuery(TQString());
    m_matches.insert(r->uid, u);
    emit signalResultFound(r);
    ++count;
  }
  if(!m_hasMoreResults && m_currentTitleBlock != Partial) {
    m_hasMoreResults = true;
  }
  m_countOffset = m_matches.size() < m_limit ? 0 : count;
}

void IMDBFetcher::parseSingleNameResult() {
//  myDebug() << "IMDBFetcher::parseSingleNameResult()" << endl;

  m_currentTitleBlock = SinglePerson;

  TQString output = Tellico::decodeHTML(TQString(m_data));

  int pos = s_anchorTitleRx->search(output);
  if(pos == -1) {
    stop();
    return;
  }

  TQRegExp tvRegExp(TQString::fromLatin1("TV\\sEpisode"), false);

  int len = 0;
  int count = 0;
  TQString desc;
  for( ; m_started && pos > -1; pos = s_anchorTitleRx->search(output, pos+len)) {
    desc.truncate(0);
    bool isEpisode = false;
    len = s_anchorTitleRx->cap(0).length();
    // split title at parenthesis
    const TQString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace();
    int pPos = cap2.find('(');
    if(pPos > -1) {
      desc = cap2.mid(pPos);
    } else {
      // look until the next <a
      int aPos = output.find(TQString::fromLatin1("<a"), pos+len, false);
      if(aPos == -1) {
        aPos = output.length();
      }
      TQString tmp = output.mid(pos+len, aPos-pos-len);
      if(tmp.find(tvRegExp) > -1) {
        isEpisode = true;
      }
      pPos = tmp.find('(');
      if(pPos > -1) {
        int pNewLine = tmp.find(TQString::fromLatin1("<br"));
        if(pNewLine == -1 || pPos < pNewLine) {
          int pEnd = tmp.find(')', pPos+1);
          desc = tmp.mid(pPos+1, pEnd-pPos-1).remove(*s_tagRx);
        }
        // but need to indicate it wasn't found initially
        pPos = -1;
      }
    }

    ;

    if(count < m_countOffset) {
      ++count;
      continue;
    }

    ++count;
    if(isEpisode) {
      continue;
    }

    // if we got this far, then there is a valid result
    if(m_matches.size() >= m_limit) {
      m_hasMoreResults = true;
      break;
    }

    // FIXME: maybe remove parentheses here?
    SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, TQString());
    KURL u(m_url, s_anchorTitleRx->cap(1)); // relative URL constructor
    u.setQuery(TQString());
    m_matches.insert(r->uid, u);
//    myDebug() << u.prettyURL() << endl;
//    myDebug() << cap2 << endl;
    emit signalResultFound(r);
  }
  if(pos == -1) {
    m_hasMoreResults = false;
  }
  m_countOffset = count - 1;

  stop();
}

void IMDBFetcher::parseMultipleNameResults() {
//  myDebug() << "IMDBFetcher::parseMultipleNameResults()" << endl;

  // the exact results are in the first table after the "exact results" text
  TQString output = Tellico::decodeHTML(TQString(m_data));
  int pos = output.find(TQString::fromLatin1("Popular Results"), 0, false);
  if(pos == -1) {
    pos = output.find(TQString::fromLatin1("Exact Matches"), 0, false);
  }

  // find beginning of partial matches
  int end = output.find(TQString::fromLatin1("Other Results"), TQMAX(pos, 0), false);
  if(end == -1) {
    end = output.find(TQString::fromLatin1("Partial Matches"), TQMAX(pos, 0), false);
    if(end == -1) {
      end = output.find(TQString::fromLatin1("Approx Matches"), TQMAX(pos, 0), false);
      if(end == -1) {
        end = output.length();
      }
    }
  }

  TQMap<TQString, KURL> map;
  TQMap<TQString, int> nameMap;

  TQString s;
  // if found exact matches
  if(pos > -1) {
    pos = s_anchorNameRx->search(output, pos+13);
    while(pos > -1 && pos < end && m_matches.size() < m_limit) {
      KURL u(m_url, s_anchorNameRx->cap(1));
      s = s_anchorNameRx->cap(2).stripWhiteSpace() + ' ';
      // if more than one exact, add parentheses
      if(nameMap.contains(s) && nameMap[s] > 0) {
        // fix the first one that didn't have a number
        if(nameMap[s] == 1) {
          KURL u2 = map[s];
          map.remove(s);
          map.insert(s + "(1) ", u2);
        }
        nameMap.insert(s, nameMap[s] + 1);
        // check for duplicate names
        s += TQString::fromLatin1("(%1) ").arg(nameMap[s]);
      } else {
        nameMap.insert(s, 1);
      }
      map.insert(s, u);
      pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length());
    }
  }

  // go ahead and search for partial matches
  pos = s_anchorNameRx->search(output, end);
  while(pos > -1 && m_matches.size() < m_limit) {
    KURL u(m_url, s_anchorNameRx->cap(1)); // relative URL
    s = s_anchorNameRx->cap(2).stripWhiteSpace();
    if(nameMap.contains(s) && nameMap[s] > 0) {
    // fix the first one that didn't have a number
      if(nameMap[s] == 1) {
        KURL u2 = map[s];
        map.remove(s);
        map.insert(s + " (1)", u2);
      }
      nameMap.insert(s, nameMap[s] + 1);
      // check for duplicate names
      s += TQString::fromLatin1(" (%1)").arg(nameMap[s]);
    } else {
      nameMap.insert(s, 1);
    }
    map.insert(s, u);
    pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length());
  }

  if(map.count() == 0) {
    stop();
    return;
  }

  KDialogBase* dlg = new KDialogBase(Kernel::self()->widget(), "imdb dialog",
                                     true, i18n("Select IMDB Result"), KDialogBase::Ok|KDialogBase::Cancel);
  TQVBox* box = new TQVBox(dlg);
  box->setSpacing(10);
  (void) new TQLabel(i18n("<qt>Your search returned multiple matches. Please select one below.</qt>"), box);

  TQListBox* listBox = new TQListBox(box);
  listBox->setMinimumWidth(400);
  listBox->setColumnMode(TQListBox::FitToWidth);
  const TQStringList values = map.keys();
  for(TQStringList::ConstIterator it = values.begin(); it != values.end(); ++it) {
    if((*it).endsWith(TQChar(' '))) {
      GUI::ListBoxText* box = new GUI::ListBoxText(listBox, *it, 0);
      box->setColored(true);
    } else {
      (void) new GUI::ListBoxText(listBox, *it);
    }
  }
  listBox->setSelected(0, true);
  TQWhatsThis::add(listBox, i18n("<qt>Select a search result.</qt>"));

  dlg->setMainWidget(box);
  if(dlg->exec() != TQDialog::Accepted || listBox->currentText().isEmpty()) {
    dlg->delayedDestruct();
    stop();
    return;
  }

  m_url = map[listBox->currentText()];
  dlg->delayedDestruct();

  // redirected is true since that's how I tell if an exact match has been found
  m_redirected = true;
  m_data.truncate(0);
  m_job = TDEIO::get(m_url, false, false);
  connect(m_job, TQT_SIGNAL(data(TDEIO::Job*, const TQByteArray&)),
          TQT_SLOT(slotData(TDEIO::Job*, const TQByteArray&)));
  connect(m_job, TQT_SIGNAL(result(TDEIO::Job*)),
          TQT_SLOT(slotComplete(TDEIO::Job*)));
  connect(m_job, TQT_SIGNAL(redirection(TDEIO::Job *, const KURL&)),
          TQT_SLOT(slotRedirection(TDEIO::Job*, const KURL&)));

  // do not stop() here
}

Tellico::Data::EntryPtr IMDBFetcher::fetchEntry(uint uid_) {
  // if we already grabbed this one, then just pull it out of the dict
  Data::EntryPtr entry = m_entries[uid_];
  if(entry) {
    return entry;
  }

  KURL url = m_matches[uid_];
  if(url.isEmpty()) {
    myDebug() << "IMDBFetcher::fetchEntry() - no url found" << endl;
    return 0;
  }

  KURL origURL = m_url; // keep to switch back
  TQString results;
  // if the url matches the current one, no need to redownload it
  if(url == m_url) {
//    myDebug() << "IMDBFetcher::fetchEntry() - matches previous URL, no downloading needed." << endl;
    results = Tellico::decodeHTML(TQString(m_data));
  } else {
    // now it's sychronous
#ifdef IMDB_TEST
    KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-title-result.html"));
    results = Tellico::decodeHTML(FileHandler::readTextFile(u));
#else
    // be quiet about failure
    results = Tellico::decodeHTML(FileHandler::readTextFile(url, true));
    m_url = url; // needed for processing
#endif
  }
  if(results.isEmpty()) {
    myDebug() << "IMDBFetcher::fetchEntry() - no text results" << endl;
    m_url = origURL;
    return 0;
  }

  entry = parseEntry(results);
  m_url = origURL;
  if(!entry) {
    myDebug() << "IMDBFetcher::fetchEntry() - error in processing entry" << endl;
    return 0;
  }
  m_entries.insert(uid_, entry); // keep for later
  return entry;
}

Tellico::Data::EntryPtr IMDBFetcher::parseEntry(const TQString& str_) {
  Data::CollPtr coll = new Data::VideoCollection(true);
  Data::EntryPtr entry = new Data::Entry(coll);

  doTitle(str_, entry);
  doRunningTime(str_, entry);
  doAspectRatio(str_, entry);
  doAlsoKnownAs(str_, entry);
  doPlot(str_, entry, m_url);
  doLists(str_, entry);
  doPerson(str_, entry, TQString::fromLatin1("Director"), TQString::fromLatin1("director"));
  doPerson(str_, entry, TQString::fromLatin1("Writer"), TQString::fromLatin1("writer"));
  doRating(str_, entry);
  doCast(str_, entry, m_url);
  if(m_fetchImages) {
    // needs base URL
    doCover(str_, entry, m_url);
  }

  const TQString imdb = TQString::fromLatin1("imdb");
  if(!coll->hasField(imdb) && m_fields.findIndex(imdb) > -1) {
    Data::FieldPtr field = new Data::Field(imdb, i18n("IMDB Link"), Data::Field::URL);
    field->setCategory(i18n("General"));
    coll->addField(field);
  }
  if(coll->hasField(imdb) && coll->fieldByName(imdb)->type() == Data::Field::URL) {
    m_url.setQuery(TQString());
    entry->setField(imdb, m_url.url());
  }
  return entry;
}

void IMDBFetcher::doTitle(const TQString& str_, Data::EntryPtr entry_) {
  if(s_titleRx->search(str_) > -1) {
    const TQString cap1 = s_titleRx->cap(1);
    // titles always have parentheses
    int pPos = cap1.find('(');
    TQString title = cap1.left(pPos).stripWhiteSpace();
    // remove first and last quotes is there
    if(title.startsWith(TQChar('"')) && title.endsWith(TQChar('"'))) {
      title = title.mid(1, title.length()-2);
    }
    entry_->setField(TQString::fromLatin1("title"), title);
    // remove parenthesis
    uint pPos2 = pPos+1;
    while(pPos2 < cap1.length() && cap1[pPos2].isDigit()) {
      ++pPos2;
    }
    TQString year = cap1.mid(pPos+1, pPos2-pPos-1);
    if(!year.isEmpty()) {
      entry_->setField(TQString::fromLatin1("year"), year);
    }
  }
}

void IMDBFetcher::doRunningTime(const TQString& str_, Data::EntryPtr entry_) {
  // running time
  TQRegExp runtimeRx(TQString::fromLatin1("runtime:.*(\\d+)\\s+min"), false);
  runtimeRx.setMinimal(true);

  if(runtimeRx.search(str_) > -1) {
//    myDebug() << "running-time = " << runtimeRx.cap(1) << endl;
    entry_->setField(TQString::fromLatin1("running-time"), runtimeRx.cap(1));
  }
}

void IMDBFetcher::doAspectRatio(const TQString& str_, Data::EntryPtr entry_) {
  TQRegExp rx(TQString::fromLatin1("aspect ratio:.*([\\d\\.]+\\s*:\\s*[\\d\\.]+)"), false);
  rx.setMinimal(true);

  if(rx.search(str_) > -1) {
//    myDebug() << "aspect ratio = " << rx.cap(1) << endl;
    entry_->setField(TQString::fromLatin1("aspect-ratio"), rx.cap(1).stripWhiteSpace());
  }
}

void IMDBFetcher::doAlsoKnownAs(const TQString& str_, Data::EntryPtr entry_) {
  if(m_fields.findIndex(TQString::fromLatin1("alttitle")) == -1) {
    return;
  }

  // match until next b tag
//  TQRegExp akaRx(TQString::fromLatin1("also known as(.*)<b(?:\\s.*)?>"));
  TQRegExp akaRx(TQString::fromLatin1("also known as(.*)<(b[>\\s/]|div)"), false);
  akaRx.setMinimal(true);

  if(akaRx.search(str_) > -1 && !akaRx.cap(1).isEmpty()) {
    Data::FieldPtr f = entry_->collection()->fieldByName(TQString::fromLatin1("alttitle"));
    if(!f) {
      f = new Data::Field(TQString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table);
      f->setFormatFlag(Data::Field::FormatTitle);
      entry_->collection()->addField(f);
    }

    // split by <br>, remembering it could become valid xhtml!
    TQRegExp brRx(TQString::fromLatin1("<br[\\s/]*>"), false);
    brRx.setMinimal(true);
    TQStringList list = TQStringList::split(brRx, akaRx.cap(1));
    // lang could be included with [fr]
//    const TQRegExp parRx(TQString::fromLatin1("\\(.+\\)"));
    const TQRegExp brackRx(TQString::fromLatin1("\\[\\w+\\]"));
    TQStringList values;
    for(TQStringList::Iterator it = list.begin(); it != list.end(); ++it) {
      TQString s = *it;
      // sometimes, the word "more" gets linked to the releaseinfo page, check that
      if(s.find(TQString::fromLatin1("releaseinfo")) > -1) {
        continue;
      }
      s.remove(*s_tagRx);
      s.remove(brackRx);
      s = s.stripWhiteSpace();
      // the first value ends up being or starting with the colon after "Also know as"
      // I'm too lazy to figure out a better regexp
      if(s.startsWith(TQChar(':'))) {
        s = s.mid(1);
      }
      if(!s.isEmpty()) {
        values += s;
      }
    }
    if(!values.isEmpty()) {
      entry_->setField(TQString::fromLatin1("alttitle"), values.join(sep));
    }
  }
}

void IMDBFetcher::doPlot(const TQString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
  // plot summaries provided by users are on a separate page
  // should those be preferred?

  bool useUserSummary = false;

  TQString thisPlot;
  // match until next opening tag
  TQRegExp plotRx(TQString::fromLatin1("plot\\s*(?:outline|summary)?:(.*)<[^/].*</"), false);
  plotRx.setMinimal(true);
  TQRegExp plotURLRx(TQString::fromLatin1("<a\\s+.*href\\s*=\\s*\".*/title/.*/plotsummary\""), false);
  plotURLRx.setMinimal(true);
  if(plotRx.search(str_) > -1) {
    thisPlot = plotRx.cap(1);
    thisPlot.remove(*s_tagRx); // remove HTML tags
    entry_->setField(TQString::fromLatin1("plot"), thisPlot);
    // if thisPlot ends with (more) or contains
    // a url that ends with plotsummary, then we'll grab it, otherwise not
    if(plotRx.cap(0).endsWith(TQString::fromLatin1("(more)</")) || plotURLRx.search(plotRx.cap(0)) > -1) {
      useUserSummary = true;
    }
  }

  if(useUserSummary) {
    TQRegExp idRx(TQString::fromLatin1("title/(tt\\d+)"));
    idRx.search(baseURL_.path());
    KURL plotURL = baseURL_;
    plotURL.setPath(TQString::fromLatin1("/title/") + idRx.cap(1) + TQString::fromLatin1("/plotsummary"));
    // be quiet about failure
    TQString plotPage = FileHandler::readTextFile(plotURL, true);

    if(!plotPage.isEmpty()) {
      TQRegExp plotRx(TQString::fromLatin1("<p\\s+class\\s*=\\s*\"plotpar\">(.*)</p"));
      plotRx.setMinimal(true);
      if(plotRx.search(plotPage) > -1) {
        TQString userPlot = plotRx.cap(1);
        userPlot.remove(*s_tagRx); // remove HTML tags
        // remove last little "written by", if there
        userPlot.remove(TQRegExp(TQString::fromLatin1("\\s*written by.*$"), false));
        entry_->setField(TQString::fromLatin1("plot"), Tellico::decodeHTML(userPlot));
      }
    }
  }
}

void IMDBFetcher::doPerson(const TQString& str_, Data::EntryPtr entry_,
                           const TQString& imdbHeader_, const TQString& fieldName_) {
  TQRegExp br2Rx(TQString::fromLatin1("<br[\\s/]*>\\s*<br[\\s/]*>"), false);
  br2Rx.setMinimal(true);
  TQRegExp divRx(TQString::fromLatin1("<[/]*div"), false);
  divRx.setMinimal(true);
  TQString name = TQString::fromLatin1("/name/");

  StringSet people;
  for(int pos = str_.find(imdbHeader_); pos > 0; pos = str_.find(imdbHeader_, pos)) {
    // loop until repeated <br> tags or </div> tag
    const int endPos1 = str_.find(br2Rx, pos);
    const int endPos2 = str_.find(divRx, pos);
    const int endPos = TQMIN(endPos1, endPos2); // ok to be -1
    pos = s_anchorRx->search(str_, pos+1);
    while(pos > -1 && pos < endPos) {
      if(s_anchorRx->cap(1).find(name) > -1) {
        people.add(s_anchorRx->cap(2).stripWhiteSpace());
      }
      pos = s_anchorRx->search(str_, pos+1);
    }
  }
  if(!people.isEmpty()) {
    entry_->setField(fieldName_, people.toList().join(sep));
  }
}

void IMDBFetcher::doCast(const TQString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
  // the extended cast list is on a separate page
  // that's usually a lot of people
  // but since it can be in billing order, the main actors might not
  // be in the short list
  TQRegExp idRx(TQString::fromLatin1("title/(tt\\d+)"));
  idRx.search(baseURL_.path());
#ifdef IMDB_TEST
  KURL castURL = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/imdb-title-fullcredits.html"));
#else
  KURL castURL = baseURL_;
  castURL.setPath(TQString::fromLatin1("/title/") + idRx.cap(1) + TQString::fromLatin1("/fullcredits"));
#endif
  // be quiet about failure and be sure to translate entities
  TQString castPage = Tellico::decodeHTML(FileHandler::readTextFile(castURL, true));

  int pos = -1;
  // the text to search, depends on which page is being read
  TQString castText = castPage;
  if(castText.isEmpty()) {
    // fall back to short list
    castText = str_;
    pos = castText.find(TQString::fromLatin1("cast overview"), 0, false);
    if(pos == -1) {
      pos = castText.find(TQString::fromLatin1("credited cast"), 0, false);
    }
  } else {
    // first look for anchor
    TQRegExp castAnchorRx(TQString::fromLatin1("<a\\s+name\\s*=\\s*\"cast\""), false);
    pos = castText.find(castAnchorRx);
    if(pos < 0) {
      TQRegExp tableClassRx(TQString::fromLatin1("<table\\s+class\\s*=\\s*\"cast\""), false);
      pos = castText.find(tableClassRx);
      if(pos < 0) {
        // fragile, the word "cast" appears in the title, but need to find
        // the one right above the actual cast table
        // for TV shows, there's a link on the sidebar for "episodes case"
        // so need to not match that one
        pos = castText.find(TQString::fromLatin1("cast</"), 0, false);
        if(pos > 9) {
          // back up 9 places
          if(castText.mid(pos-9, 9).startsWith(TQString::fromLatin1("episodes"))) {
            // find next cast list
            pos = castText.find(TQString::fromLatin1("cast</"), pos+6, false);
          }
        }
      }
    }
  }
  if(pos == -1) { // no cast list found
    myDebug() << "IMDBFetcher::doCast() - no cast list found" << endl;
    return;
  }

  const TQString name = TQString::fromLatin1("/name/");
  TQRegExp tdRx(TQString::fromLatin1("<td[^>]*>(.*)</td>"), false);
  tdRx.setMinimal(true);

  TQStringList cast;
  // loop until closing table tag
  const int endPos = castText.find(TQString::fromLatin1("</table"), pos, false);
  pos = s_anchorRx->search(castText, pos+1);
  while(pos > -1 && pos < endPos && static_cast<int>(cast.count()) < m_numCast) {
    if(s_anchorRx->cap(1).find(name) > -1) {
      // now search for <td> item with character name
      // there's a column with ellipses then the character
      const int pos2 = tdRx.search(castText, pos);
      if(pos2 > -1 && tdRx.search(castText, pos2+1) > -1) {
        cast += s_anchorRx->cap(2).stripWhiteSpace()
              + TQString::fromLatin1("::") + tdRx.cap(1).simplifyWhiteSpace().remove(*s_tagRx);
      } else {
        cast += s_anchorRx->cap(2).stripWhiteSpace();
      }
    }
    pos = s_anchorRx->search(castText, pos+1);
  }

  if(!cast.isEmpty()) {
    entry_->setField(TQString::fromLatin1("cast"), cast.join(sep));
  }
}

void IMDBFetcher::doRating(const TQString& str_, Data::EntryPtr entry_) {
  if(m_fields.findIndex(TQString::fromLatin1("imdb-rating")) == -1) {
    return;
  }

  // don't add a colon, since there's a <br> at the end
  // some of the imdb images use /10.gif in their path, so check for space or bracket
  TQRegExp rx(TQString::fromLatin1("[>\\s](\\d+.?\\d*)/10[<//s]"), false);
  rx.setMinimal(true);

  if(rx.search(str_) > -1 && !rx.cap(1).isEmpty()) {
    Data::FieldPtr f = entry_->collection()->fieldByName(TQString::fromLatin1("imdb-rating"));
    if(!f) {
      f = new Data::Field(TQString::fromLatin1("imdb-rating"), i18n("IMDB Rating"), Data::Field::Rating);
      f->setCategory(i18n("General"));
      f->setProperty(TQString::fromLatin1("maximum"), TQString::fromLatin1("10"));
      entry_->collection()->addField(f);
    }

    bool ok;
    float value = rx.cap(1).toFloat(&ok);
    if(ok) {
      entry_->setField(TQString::fromLatin1("imdb-rating"), TQString::number(value));
    }
  }
}

void IMDBFetcher::doCover(const TQString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
  // cover is the img with the "cover" alt text
  TQRegExp imgRx(TQString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*\"([^\"]*)\"[^>]*>"), false);
  imgRx.setMinimal(true);

  TQRegExp posterRx(TQString::fromLatin1("<a\\s+[^>]*name\\s*=\\s*\"poster\"[^>]*>(.*)</a>"), false);
  posterRx.setMinimal(true);

  const TQString cover = TQString::fromLatin1("cover");

  int pos = posterRx.search(str_);
  while(pos > -1) {
    if(imgRx.search(posterRx.cap(1)) > -1) {
      KURL u(baseURL_, imgRx.cap(1));
      TQString id = ImageFactory::addImage(u, true);
      if(!id.isEmpty()) {
        entry_->setField(cover, id);
      }
      return;
    }
    pos = posterRx.search(str_, pos+1);
  }

  // didn't find the cover, IMDb also used to put "cover" inside the url
  pos = imgRx.search(str_);
  while(pos > -1) {
    if(imgRx.cap(0).find(cover, 0, false) > -1) {
      KURL u(baseURL_, imgRx.cap(1));
      TQString id = ImageFactory::addImage(u, true);
      if(!id.isEmpty()) {
        entry_->setField(cover, id);
      }
      return;
    }
    pos = imgRx.search(str_, pos+1);
  }
}

// end up reparsing whole string, but it's not really that slow
// loook at every anchor tag in the string
void IMDBFetcher::doLists(const TQString& str_, Data::EntryPtr entry_) {
  const TQString genre = TQString::fromLatin1("/Genres/");
  const TQString country = TQString::fromLatin1("/Countries/");
  const TQString lang = TQString::fromLatin1("/Languages/");
  const TQString colorInfo = TQString::fromLatin1("color-info");
  const TQString cert = TQString::fromLatin1("certificates=");
  const TQString soundMix = TQString::fromLatin1("sound-mix=");
  const TQString year = TQString::fromLatin1("/Years/");
  const TQString company = TQString::fromLatin1("/company/");

  // IIMdb also has links with the word "sections" in them, remove that
  // for genres and nationalities

  TQStringList genres, countries, langs, certs, tracks, studios;
  for(int pos = s_anchorRx->search(str_); pos > -1; pos = s_anchorRx->search(str_, pos+1)) {
    const TQString cap1 = s_anchorRx->cap(1);
    if(cap1.find(genre) > -1) {
      if(s_anchorRx->cap(2).find(TQString::fromLatin1(" section"), 0, false) == -1) {
        genres += s_anchorRx->cap(2).stripWhiteSpace();
      }
    } else if(cap1.find(country) > -1) {
      if(s_anchorRx->cap(2).find(TQString::fromLatin1(" section"), 0, false) == -1) {
        countries += s_anchorRx->cap(2).stripWhiteSpace();
      }
    } else if(cap1.find(lang) > -1) {
      langs += s_anchorRx->cap(2).stripWhiteSpace();
    } else if(cap1.find(colorInfo) > -1) {
      // change "black and white" to "black & white"
      entry_->setField(TQString::fromLatin1("color"),
                       s_anchorRx->cap(2).replace(TQString::fromLatin1("and"), TQChar('&')).stripWhiteSpace());
    } else if(cap1.find(cert) > -1) {
      certs += s_anchorRx->cap(2).stripWhiteSpace();
    } else if(cap1.find(soundMix) > -1) {
      tracks += s_anchorRx->cap(2).stripWhiteSpace();
    } else if(cap1.find(company) > -1) {
      studios += s_anchorRx->cap(2).stripWhiteSpace();
      // if year field wasn't set before, do it now
    } else if(entry_->field(TQString::fromLatin1("year")).isEmpty() && cap1.find(year) > -1) {
      entry_->setField(TQString::fromLatin1("year"), s_anchorRx->cap(2).stripWhiteSpace());
    }
  }

  entry_->setField(TQString::fromLatin1("genre"), genres.join(sep));
  entry_->setField(TQString::fromLatin1("nationality"), countries.join(sep));
  entry_->setField(TQString::fromLatin1("language"), langs.join(sep));
  entry_->setField(TQString::fromLatin1("audio-track"), tracks.join(sep));
  entry_->setField(TQString::fromLatin1("studio"), studios.join(sep));
  if(!certs.isEmpty()) {
    // first try to set default certification
    const TQStringList& certsAllowed = entry_->collection()->fieldByName(TQString::fromLatin1("certification"))->allowed();
    for(TQStringList::ConstIterator it = certs.begin(); it != certs.end(); ++it) {
      TQString country = (*it).section(':', 0, 0);
      TQString cert = (*it).section(':', 1, 1);
      if(cert == Latin1Literal("Unrated")) {
        cert = TQChar('U');
      }
      cert += TQString::fromLatin1(" (") + country + ')';
      if(certsAllowed.findIndex(cert) > -1) {
        entry_->setField(TQString::fromLatin1("certification"), cert);
        break;
      }
    }

    // now add new field for all certifications
    const TQString allc = TQString::fromLatin1("allcertification");
    if(m_fields.findIndex(allc) > -1) {
      Data::FieldPtr f = entry_->collection()->fieldByName(allc);
      if(!f) {
        f = new Data::Field(allc, i18n("Certifications"), Data::Field::Table);
        f->setFlags(Data::Field::AllowGrouped);
        entry_->collection()->addField(f);
      }
      entry_->setField(TQString::fromLatin1("allcertification"), certs.join(sep));
    }
  }
}

void IMDBFetcher::updateEntry(Data::EntryPtr entry_) {
//  myLog() << "IMDBFetcher::updateEntry() - " << entry_->title() << endl;
  // only take first 5
  m_limit = 5;
  TQString t = entry_->field(TQString::fromLatin1("title"));
  KURL link = entry_->field(TQString::fromLatin1("imdb"));
  if(!link.isEmpty() && link.isValid()) {
    // check if we want a different host
    if(link.host() != m_host) {
//      myLog() << "IMDBFetcher::updateEntry() - switching hosts to " << m_host << endl;
      link.setHost(m_host);
    }
    m_key = Fetch::Title;
    m_value = t;
    m_started = true;
    m_data.truncate(0);
    m_matches.clear();
    m_url = link;
    m_redirected = true; // m_redirected is used as a flag later to tell if we get a single result
    m_job = TDEIO::get(m_url, false, false);
    connect(m_job, TQT_SIGNAL(data(TDEIO::Job*, const TQByteArray&)),
            TQT_SLOT(slotData(TDEIO::Job*, const TQByteArray&)));
    connect(m_job, TQT_SIGNAL(result(TDEIO::Job*)),
            TQT_SLOT(slotComplete(TDEIO::Job*)));
    connect(m_job, TQT_SIGNAL(redirection(TDEIO::Job *, const KURL&)),
            TQT_SLOT(slotRedirection(TDEIO::Job*, const KURL&)));
    return;
  }
  // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
  if(!t.isEmpty()) {
    search(Fetch::Title, t);
    return;
  }
  emit signalDone(this); // always need to emit this if not continuing with the search
}

Tellico::Fetch::ConfigWidget* IMDBFetcher::configWidget(TQWidget* parent_) const {
  return new IMDBFetcher::ConfigWidget(parent_, this);
}

IMDBFetcher::ConfigWidget::ConfigWidget(TQWidget* parent_, const IMDBFetcher* fetcher_/*=0*/)
    : Fetch::ConfigWidget(parent_) {
  TQGridLayout* l = new TQGridLayout(optionsWidget(), 4, 2);
  l->setSpacing(4);
  l->setColStretch(1, 10);

  int row = -1;
  TQLabel* label = new TQLabel(i18n("Hos&t: "), optionsWidget());
  l->addWidget(label, ++row, 0);
  m_hostEdit = new KLineEdit(optionsWidget());
  connect(m_hostEdit, TQT_SIGNAL(textChanged(const TQString&)), TQT_SLOT(slotSetModified()));
  l->addWidget(m_hostEdit, row, 1);
  TQString w = i18n("The Internet Movie Database uses several different servers. Choose the one "
                   "you wish to use.");
  TQWhatsThis::add(label, w);
  TQWhatsThis::add(m_hostEdit, w);
  label->setBuddy(m_hostEdit);

  label = new TQLabel(i18n("&Maximum cast: "), optionsWidget());
  l->addWidget(label, ++row, 0);
  m_numCast = new KIntSpinBox(0, 99, 1, 10, 10, optionsWidget());
  connect(m_numCast, TQT_SIGNAL(valueChanged(const TQString&)), TQT_SLOT(slotSetModified()));
  l->addWidget(m_numCast, row, 1);
  w = i18n("The list of cast members may include many people. Set the maximum number returned from the search.");
  TQWhatsThis::add(label, w);
  TQWhatsThis::add(m_numCast, w);
  label->setBuddy(m_numCast);

  m_fetchImageCheck = new TQCheckBox(i18n("Download cover &image"), optionsWidget());
  connect(m_fetchImageCheck, TQT_SIGNAL(clicked()), TQT_SLOT(slotSetModified()));
  ++row;
  l->addMultiCellWidget(m_fetchImageCheck, row, row, 0, 1);
  w = i18n("The cover image may be downloaded as well. However, too many large images in the "
           "collection may degrade performance.");
  TQWhatsThis::add(m_fetchImageCheck, w);

  l->setRowStretch(++row, 10);

  // now add additional fields widget
  addFieldsWidget(IMDBFetcher::customFields(), fetcher_ ? fetcher_->m_fields : TQStringList());

  if(fetcher_) {
    m_hostEdit->setText(fetcher_->m_host);
    m_numCast->setValue(fetcher_->m_numCast);
    m_fetchImageCheck->setChecked(fetcher_->m_fetchImages);
  } else { //defaults
    m_hostEdit->setText(TQString::fromLatin1(IMDB_SERVER));
    m_numCast->setValue(10);
    m_fetchImageCheck->setChecked(true);
  }
}

void IMDBFetcher::ConfigWidget::saveConfig(TDEConfigGroup& config_) {
  TQString host = m_hostEdit->text().stripWhiteSpace();
  if(!host.isEmpty()) {
    config_.writeEntry("Host", host);
  }
  config_.writeEntry("Max Cast", m_numCast->value());
  config_.writeEntry("Fetch Images", m_fetchImageCheck->isChecked());

  saveFieldsConfig(config_);
  slotSetModified(false);
}

TQString IMDBFetcher::ConfigWidget::preferredName() const {
  return IMDBFetcher::defaultName();
}

//static
Tellico::StringMap IMDBFetcher::customFields() {
  StringMap map;
  map[TQString::fromLatin1("imdb")]             = i18n("IMDB Link");
  map[TQString::fromLatin1("imdb-rating")]      = i18n("IMDB Rating");
  map[TQString::fromLatin1("alttitle")]         = i18n("Alternative Titles");
  map[TQString::fromLatin1("allcertification")] = i18n("Certifications");
  return map;
}

#include "imdbfetcher.moc"