summaryrefslogtreecommitdiffstats
path: root/klinkstatus/src/engine/linkchecker.h
blob: c1ecb213f252f00a298e2a2d647f2b7804ca573b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/***************************************************************************
 *   Copyright (C) 2004 by Paulo Moura Guedes                                     *
 *   [email protected]                                            *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.             *
 ***************************************************************************/
#ifndef LINKCHECKER_H
#define LINKCHECKER_H

#include <tqobject.h>
#include <tqthread.h>
#include <tqstring.h>

#include <kio/jobclasses.h>
class KHTMLPart;

#include "../parser/http.h"
#include "linkstatus.h"
class SearchManager;

#include <iostream>
using namespace std;

/**
@author Paulo Moura Guedes
*/
class LinkChecker : public TQObject
{
    Q_OBJECT
  TQ_OBJECT
public:
    LinkChecker(LinkStatus* linkstatus, int time_out = 50,
                TQObject *parent = 0, const char *name = 0);
    ~LinkChecker();

    //virtual void run();
    void check();
    void setSearchManager(SearchManager* search_manager);

    LinkStatus const* linkStatus() const;

    static bool hasAnchor(KHTMLPart* html_part, TQString const& anchor);

signals:

    void transactionFinished(const LinkStatus * linkstatus,
                             LinkChecker * checker);
    void jobFinnished(LinkChecker * checker);

protected slots:

    void slotData(KIO::Job *, const TQByteArray &data);
    void slotRedirection (KIO::Job *, const KURL &url);
    void slotMimetype(KIO::Job *, const TQString &type);
    void slotResult(KIO::Job* job);
    void slotTimeOut();

protected:

    void finnish();
    HttpResponseHeader getHttpHeader(KIO::Job* job, bool remember_check = true);
    void checkRef(); // #...

private:
    
    LinkStatus::Status getHttpStatus() const;
    void checkRef(LinkStatus const* linkstatus_parent);
    void checkRef(KURL const& url);
    void killJob();    
    /**
     * @param url 
     * @return false if the redirection was already checked by the search manager
     */
    bool processRedirection(KURL const& url);
    
    void findDocumentCharset(TQString const& data);

private:

    SearchManager* search_manager_;
    LinkStatus* const linkstatus_;
    KIO::TransferJob* t_job_;
    int time_out_;
    LinkChecker* checker_;
    TQString document_charset_;
/*  A redirection has appened, with the current URL. Several redirections 
    can happen until the final URL is reached.*/
    bool redirection_;
    KURL redirection_url_;
    TQString doc_html_;
    bool header_checked_;
    bool finnished_;
    bool parsing_;
    
    /**
     * Whether the charset of the document is already checked.
     * (e.g. <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>)
     */
    bool is_charset_checked_;
    /**
     * Wheter the page define the enconding (latin1, utf8, etc).
     * According to the spec (http://www.w3.org/TR/html4/charset.html), 
     * it first check the server response and then the info in the html meta element.
     */
    bool has_defined_charset_;
    
    static int count_; // debug attribute that counts how many links were checked
};

inline LinkStatus const* LinkChecker::linkStatus() const
{
    return linkstatus_;
}


#endif