summaryrefslogtreecommitdiffstats
path: root/src/webqueryciteseerx.h
blob: fad8d60780bfabd4197b99aaa5567d4ff98c02da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/***************************************************************************
 *   Copyright (C) 2008 by Jacob Kanev <[email protected]>,                 *
 *   Thomas Fischer <[email protected]>                            *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
#ifndef KBIBTEXWEBQUERYCITESEERX_H
#define KBIBTEXWEBQUERYCITESEERX_H

#include <tqstring.h>
#include <tqstringlist.h>

#include <tdeio/job.h>

#include "fileimporterbibtex.h"
#include "webquery.h"

#include <deque>

class TQBuffer;

namespace KBibTeX
{
    /**
    @author Thomas Fischer <[email protected]>
    */

    class WebQueryCiteSeerXWidget : public WebQueryWidget
    {
        Q_OBJECT

    public:
        WebQueryCiteSeerXWidget( TQWidget *parent, const char *name = 0 );
    };


    /// Convinience typedef for member pointer to parsing function

    class WebQueryCiteSeerX;
    typedef void ( WebQueryCiteSeerX::* DataParser )( const TQString & );

    /// Query the citeseer database.
    /** This class is used for querying the citeseer data base. CiteSeerX is still beta, so this class has to be adapted as soon as the CiteSeer people change their web interface. After entering the search term, citeseer returns a page with 10 links (one for each paper), and one link for the next 10 hits. This class uses a queue to schedule each reading job, and two parsing functions, one for the summary page and one for each paper result. BibTeX fields abstract, title, author, year, journal, and pages are found.
    @author Jacob Kanev <[email protected]> */

    class WebQueryCiteSeerX : public WebQuery
    {
        Q_OBJECT

    public:

        struct DataRequest
        {
            KURL url;
            DataParser parser;
        };

        /// Construct.
        WebQueryCiteSeerX( TQWidget* parent );

        /// Destroy.
        virtual ~WebQueryCiteSeerX();

        /// Main function: start query.
        void query();

        /// Return title.
        TQString title();

        /// Return disclaimer.
        TQString disclaimer();

        /// Return disclaimer URL.
        TQString disclaimerURL();

        /// Return GUI element.
        WebQueryWidget *widget();

    protected:
        /// Callback for cancelling.
        void cancelQuery();

    private slots:

        /// Callback when the job is finished.
        /**Reads the data from the job, and hands it over to the currently set parser. */
        void getData( TDEIO::Job *job );

    private:

        /// Parses the main page and schedules single-paper reading jobs.
        /** Function parses the summary page, and schedules one job for each paper link, and one job for the "Next 10" summary page. */
        void parseSummaryPage( const TQString &data );

        /// Parses single-paper pages.
        /** Function reads the "Abstract:" and the "@entrytype{" strings found in the html page. */
        void parsePaperPage( const TQString &data );

        /// Execute next waiting job.
        /** Takes the next query out of the queue, sets the appropriate parser, and schedules getData with the URL. */
        void nextJob();

        /// Find single bibtex field in html page and add to entry.
        /** Function uses the first collected text from the description (a regular expression), and adds it as type "type" to the "entry". */
        void parseForSingleExpression( TQString description, const TQString &data, BibTeX::Entry *entry, BibTeX::EntryField::FieldType type );

        /// The currently active parser.
        DataParser m_currentParser;

        /// The internet address of CiteSeerX.
        TQString m_citeSeerXServer;

        /// Number of hits desired by user.
        int m_desiredHits;

        /// Number of hits read from summary pages.
        int m_receivedHits;

        /// List with waiting queries
        std::deque<DataRequest> m_queryQueue;

        /// Pointer to QT-dialog.
        WebQueryCiteSeerXWidget *m_widget;
    };

}

#endif