diff options
Diffstat (limited to 'src/webquerygooglescholar.cpp')
-rw-r--r-- | src/webquerygooglescholar.cpp | 469 |
1 files changed, 469 insertions, 0 deletions
diff --git a/src/webquerygooglescholar.cpp b/src/webquerygooglescholar.cpp new file mode 100644 index 0000000..a1c2d52 --- /dev/null +++ b/src/webquerygooglescholar.cpp @@ -0,0 +1,469 @@ +/*************************************************************************** + * Copyright (C) 2004-2009 by Thomas Fischer * + * [email protected] * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#include <qfile.h> +#include <qapplication.h> +#include <qregexp.h> +#include <qtimer.h> +#include <qmap.h> +#include <qspinbox.h> +#include <kconfig.h> + +#include <klineedit.h> +#include <klocale.h> +#include <kdebug.h> +#include <kmessagebox.h> +#include <kio/job.h> + +#include <dcopref.h> + +#include <settings.h> +#include "webquerygooglescholar.h" + +namespace KBibTeX +{ + WebQueryGoogleScholarWidget::WebQueryGoogleScholarWidget( QWidget *parent, const char *name ) + : WebQueryWidget( parent, name ) + { + init(); + + Settings *settings = Settings::self(); + QString value = settings->getWebQueryDefault( "GoogleScholar" ); + value = value == QString::null ? "" : value; + lineEditQuery->setText( value ); + slotTextChanged( value, true ); + } + + WebQueryGoogleScholar::WebQueryGoogleScholar( QWidget* parent ) + : WebQuery( parent ), m_transferJob( NULL ), m_transferJobBuffer( NULL ) + { + m_importer = new BibTeX::FileImporterBibTeX( FALSE ); + m_importer->setIgnoreComments( TRUE ); + m_widget = new WebQueryGoogleScholarWidget( parent ); + } + + WebQueryGoogleScholar::~WebQueryGoogleScholar() + { + delete m_widget; + delete m_importer; + } + + QString WebQueryGoogleScholar::title() + { + return i18n( "Google Scholar" ); + } + + QString WebQueryGoogleScholar::disclaimer() + { + return i18n( "About Google Scholar" ); + } + + QString WebQueryGoogleScholar::disclaimerURL() + { + return "http://scholar.google.com/intl/en/scholar/about.html"; + } + + WebQueryWidget *WebQueryGoogleScholar::widget() + { + return m_widget; + } + + void WebQueryGoogleScholar::query() + { + WebQuery::query(); + + /** save search term in settings */ + Settings *settings = Settings::self(); + settings->setWebQueryDefault( "GoogleScholar", m_widget->lineEditQuery->text() ); + + /** generate web-save search term */ + m_searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" ); + m_searchTerm = m_searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" ); + if ( m_searchTerm.isEmpty() ) + { + setEndSearch( WebQuery::statusInvalidQuery ); + return; + } + + /** initialize variables */ + m_abort = false; + m_numberOfResults = m_widget->spinBoxMaxHits->value(); + setNumStages( m_numberOfResults + 5 ); + + /** reset KDE configuration for cookie handling */ + readAndChangeConfig(); + + /** prepare HTTP request (buffer, signals, job) */ + m_transferJobBuffer = new QBuffer(); + m_transferJobBuffer->open( IO_WriteOnly ); + KIO::TransferJob* m_transferJob = KIO::get( KURL( "http://scholar.google.com/scholar_ncr" ), false, false ); + connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) ); + connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedStartpage( KIO::Job * ) ) ); + } + + void WebQueryGoogleScholar::cancelQuery() + { + /** user aborted search */ + m_abort = true; + if ( m_transferJob != NULL ) m_transferJob->kill( false ); + setEndSearch( WebQuery::statusError ); + } + + void WebQueryGoogleScholar::slotFinishedStartpage( KIO::Job *job ) + { + /** close and delete buffer (content does not matter) */ + m_transferJobBuffer->close(); + delete m_transferJobBuffer; + + /** if aborted in the mean time, clean up everything */ + if ( m_abort ) + { + restoreConfig(); + return; + } + + /** error occurred */ + if ( job->error() != 0 ) + { + restoreConfig(); + kdDebug() << "Error in slotFinishedStartpage: " << job->error() << endl; + setEndSearch( statusError ); + return; + } + + /** update progress bar */ + enterNextStage(); + + /** prepare next HTTP request for preferences page (buffer, signals, job) */ + m_transferJobBuffer = new QBuffer(); + m_transferJobBuffer->open( IO_WriteOnly ); + KIO::TransferJob* m_transferJob = KIO::get( KURL( "http://scholar.google.com/scholar_preferences?hl=en" ), false, false ); + connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) ); + connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedLoadingSettings( KIO::Job * ) ) ); + + } + + void WebQueryGoogleScholar::slotFinishedLoadingSettings( KIO::Job *job ) + { + /** close and delete buffer (content does not matter) */ + m_transferJobBuffer->close(); + QString htmlCode = textFromBuffer( m_transferJobBuffer ); + delete m_transferJobBuffer; + + /** if aborted in the mean time, clean up everything */ + if ( m_abort ) + { + restoreConfig(); + return; + } + + /** error occurred */ + if ( job->error() != 0 ) + { + restoreConfig(); + kdDebug() << "Error in slotFinishedLoadingSettings: " << job->error() << endl; + setEndSearch( statusError ); + return; + } + + /** update progress bar */ + enterNextStage(); + + /** parse html code to get form values */ + QMap<QString, QString> keyValues = evalFormFields( htmlCode ); + /** set form values for BibTeX search */ + keyValues["scis"] = "yes"; + keyValues["scisf"] = "4"; + keyValues["submit"] = "Save+Preferences"; + keyValues["num"] = QString::number( m_numberOfResults ); + + /** prepare next HTTP request to submit preferences (buffer, signals, job) */ + KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar_setprefs", keyValues ) ); + m_transferJobBuffer = new QBuffer(); + m_transferJobBuffer->open( IO_WriteOnly ); + KIO::TransferJob* m_transferJob = KIO::get( nextUrl, false, false ); + connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) ); + connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedSavingSettings( KIO::Job * ) ) ); + } + + void WebQueryGoogleScholar::slotFinishedSavingSettings( KIO::Job *job ) + { + /** close and delete buffer (content does not matter) */ + m_transferJobBuffer->close(); + QString htmlCode = textFromBuffer( m_transferJobBuffer ); + delete m_transferJobBuffer; + + /** if aborted in the mean time, clean up everything */ + if ( m_abort ) + { + restoreConfig(); + return; + } + + /** error occurred */ + if ( job->error() != 0 ) + { + restoreConfig(); + kdDebug() << "Error in slotFinishedSavingSettings: " << job->error() << endl; + setEndSearch( statusError ); + return; + } + + /** update progress bar */ + enterNextStage(); + + /** parse html code to get form values */ + QMap<QString, QString> keyValues = evalFormFields( htmlCode ); + /** set form values for search */ + keyValues["q"] = m_searchTerm; + keyValues["num"] = QString::number( m_numberOfResults ); + + /** prepare next HTTP request for actual search (buffer, signals, job) */ + KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar", keyValues ) ); + m_transferJobBuffer = new QBuffer(); + m_transferJobBuffer->open( IO_WriteOnly ); + KIO::TransferJob* m_transferJob = KIO::get( nextUrl, false, false ); + connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) ); + connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedReceivingResultOverview( KIO::Job * ) ) ); + } + + void WebQueryGoogleScholar::slotFinishedReceivingResultOverview( KIO::Job *job ) + { + /** close and delete buffer (content does not matter) */ + m_transferJobBuffer->close(); + QString htmlCode = textFromBuffer( m_transferJobBuffer ); + delete m_transferJobBuffer; + + /** if aborted in the mean time, clean up everything */ + if ( m_abort ) + { + restoreConfig(); + return; + } + + /** error occurred */ + if ( job->error() != 0 ) + { + restoreConfig(); + kdDebug() << "Error in slotFinishedReceivingResultOverview: " << job->error() << endl; + setEndSearch( statusError ); + return; + } + + /** update progress bar */ + enterNextStage(); + + /** find all links to BibTeX files in result page */ + QRegExp reBibUrl( "/scholar.bib[^ \">]+" ); + int pos = 0; + while ( !m_aborted && ( pos = htmlCode.find( reBibUrl, pos + 1 ) ) > 0 ) + { + /** download individual BibTeX file for each search hit */ + KURL bibUrl( "http://scholar.google.com" + reBibUrl.cap( 0 ).replace( "&", "&" ) ); + BibTeX::File *tmpBibFile = downloadBibTeXFile( bibUrl ); + + /** update progress bar */ + enterNextStage(); + + /** parse, evaluate and store first BibTeX entry */ + if ( tmpBibFile != NULL ) + { + BibTeX::File::ElementList::iterator it = tmpBibFile->begin(); + if ( it != tmpBibFile->end() ) + { + BibTeX::Entry *entry = dynamic_cast<BibTeX::Entry*>( *it ); + if ( entry != NULL ) + emit foundEntry( new BibTeX::Entry( entry ), false ); + } + delete tmpBibFile; + } + } + + /** restore old cookie configuration */ + restoreConfig(); + + /** set result status */ + if ( m_aborted ) + setEndSearch( statusAborted ); + else + setEndSearch( statusSuccess ); + } + + void WebQueryGoogleScholar::readAndChangeConfig() + { + KConfig cfg( "kcookiejarrc" ); + cfg.setGroup( "Cookie Policy" ); + m_originalEnableCookies = cfg.readBoolEntry( "Cookies", true ); + m_originalSessionCookies = cfg.readBoolEntry( "AcceptSessionCookies", true ); + QStringList cookieSettingsList = QStringList::split( ',', cfg.readEntry( "CookieDomainAdvice", "" ) ); + m_originalCookieGlobalAdvice = cfg.readEntry( "CookieGlobalAdvice", "Accept" ); + + for ( QStringList::Iterator it = cookieSettingsList.begin(); it != cookieSettingsList.end(); ++it ) + { + QStringList keyValue = QStringList::split( ':', *it ); + if ( keyValue.size() == 2 ) + { + m_originalCookieMap[keyValue[0]] = keyValue[1]; + } + } + + cfg.writeEntry( "Cookies", true ); + cfg.writeEntry( "CookieGlobalAdvice", "Accept" ); + cfg.writeEntry( "AcceptSessionCookies", true ); + cookieSettingsList.clear(); + for ( QMap<QString, QString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it ) + { + QString value = it.key().contains( ".google." ) ? "Accept" : it.data(); + cookieSettingsList << it.key() + ":" + value; + } + cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) ); + cfg.sync(); + + ( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" ); + } + + void WebQueryGoogleScholar::restoreConfig() + { + KConfig cfg( "kcookiejarrc" ); + cfg.setGroup( "Cookie Policy" ); + cfg.writeEntry( "CookieGlobalAdvice", m_originalCookieGlobalAdvice ); + cfg.writeEntry( "Cookies", m_originalEnableCookies ); + cfg.writeEntry( "AcceptSessionCookies", m_originalSessionCookies ); + QStringList cookieSettingsList; + for ( QMap<QString, QString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it ) + cookieSettingsList << it.key() + ":" + it.data(); + cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) ); + cfg.sync(); + + if ( !m_originalEnableCookies ) + ( void )DCOPRef( "kded", "kcookiejar" ).send( "shutdown" ); + else + ( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" ); + } + + QString WebQueryGoogleScholar::textFromBuffer( QBuffer *buffer ) + { + QString htmlCode = ""; + buffer->open( IO_ReadOnly ); + QTextStream ts( buffer ); + while ( !ts.atEnd() ) + htmlCode.append( ts.readLine() ); + buffer->close(); + return htmlCode; + } + + QMap <QString, QString> WebQueryGoogleScholar::evalFormFields( const QString &htmlCode ) + { + QMap<QString, QString> keyValues; + + QRegExp reInput( "<input[^>]+>" ); + QRegExp reSplit( "[<>=\" ]+" ); + int pos = 0; + while (( pos = htmlCode.find( reInput, pos + 1 ) ) > 5 ) + { + QStringList elements = QStringList::split( reSplit, reInput.cap( 0 ) ); + bool checked = false; + bool isCheckable = false; + bool isSubmit = false; + QString key = QString::null; + QString value = QString::null; + for ( QStringList::Iterator it = elements.begin(); it != elements.end(); ++it ) + { + if ( *it == "name" ) + { + ++it; if ( it != elements.end() ) key = *it; else break; + } + if ( *it == "value" ) + { + ++it; if ( it != elements.end() ) value = *it; else + { + value = ""; break; + } + } + if ( *it == "checked" ) + checked = true; + if ( *it == "type" ) + { + ++it; + if ( it == elements.end() ) break; + isCheckable = *it == "radio" || *it == "checkbox"; + isSubmit = *it == "submit"; + } + } + if (( !isCheckable || checked ) && ( !isSubmit || value == "submit" ) && value != QString::null && key != QString::null ) + { + keyValues[key] = value; + } + } + + QRegExp reSelect( "<select name=([^ >\"]+).*</select>" ); + reSelect.setMinimal( true ); + QRegExp reOption( "<option[^>]+>" ); + int pos3 = 0; + while (( pos3 = htmlCode.find( reSelect, pos3 + 1 ) ) > 5 ) + { + QString key = reSelect.cap( 1 ); + QString sub = reSelect.cap( 0 ); + int pos2 = 0; + while (( pos2 = sub.find( reOption, pos2 + 1 ) ) > 5 ) + { + QStringList elements = QStringList::split( reSplit, reOption.cap( 0 ) ); + bool selected = false; + QString value = QString::null; + for ( QStringList::Iterator it = elements.begin(); it != elements.end(); ++it ) + { + if ( *it == "value" ) + { + ++it; if ( it != elements.end() ) value = *it; else + { + value = ""; break; + } + } + if ( *it == "selected" ) + selected = true; + } + if ( selected && value != QString::null && key != QString::null ) + { + keyValues[key] = value; + } + } + } + + return keyValues; + } + + QString WebQueryGoogleScholar::formFieldsToUrl( const QString &prefix, const QMap<QString, QString> &keyValues ) + { + bool first = true; + QString nextUrl = prefix; + for ( QMap<QString, QString>::ConstIterator it = keyValues.begin(); it != keyValues.end(); ++it ) + { + if ( first ) + nextUrl.append( "?" ); + else + nextUrl.append( "&" ); + first = false; + nextUrl.append( it.key() + "=" + it.data() ); + } + + return nextUrl; + } + +} +#include "webquerygooglescholar.moc" |