1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
/***************************************************************************
* Copyright (C) 2004 by Paulo Moura Guedes *
* [email protected] *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
***************************************************************************/
#ifndef LINKCHECKER_H
#define LINKCHECKER_H
#include <tqobject.h>
#include <tqthread.h>
#include <tqstring.h>
#include <tdeio/jobclasses.h>
class TDEHTMLPart;
#include "../parser/http.h"
#include "linkstatus.h"
class SearchManager;
#include <iostream>
using namespace std;
/**
@author Paulo Moura Guedes
*/
class LinkChecker : public TQObject
{
Q_OBJECT
public:
LinkChecker(LinkStatus* linkstatus, int time_out = 50,
TQObject *parent = 0, const char *name = 0);
~LinkChecker();
//virtual void run();
void check();
void setSearchManager(SearchManager* search_manager);
LinkStatus const* linkStatus() const;
static bool hasAnchor(TDEHTMLPart* html_part, TQString const& anchor);
signals:
void transactionFinished(const LinkStatus * linkstatus,
LinkChecker * checker);
void jobFinnished(LinkChecker * checker);
protected slots:
void slotData(TDEIO::Job *, const TQByteArray &data);
void slotRedirection (TDEIO::Job *, const KURL &url);
void slotMimetype(TDEIO::Job *, const TQString &type);
void slotResult(TDEIO::Job* job);
void slotTimeOut();
protected:
void finnish();
HttpResponseHeader getHttpHeader(TDEIO::Job* job, bool remember_check = true);
void checkRef(); // #...
private:
LinkStatus::Status getHttpStatus() const;
void checkRef(LinkStatus const* linkstatus_parent);
void checkRef(KURL const& url);
void killJob();
/**
* @param url
* @return false if the redirection was already checked by the search manager
*/
bool processRedirection(KURL const& url);
void findDocumentCharset(TQString const& data);
private:
SearchManager* search_manager_;
LinkStatus* const linkstatus_;
TDEIO::TransferJob* t_job_;
int time_out_;
LinkChecker* checker_;
TQString document_charset_;
/* A redirection has appened, with the current URL. Several redirections
can happen until the final URL is reached.*/
bool redirection_;
KURL redirection_url_;
TQString doc_html_;
bool header_checked_;
bool finnished_;
bool parsing_;
/**
* Whether the charset of the document is already checked.
* (e.g. <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>)
*/
bool is_charset_checked_;
/**
* Wheter the page define the enconding (latin1, utf8, etc).
* According to the spec (http://www.w3.org/TR/html4/charset.html),
* it first check the server response and then the info in the html meta element.
*/
bool has_defined_charset_;
static int count_; // debug attribute that counts how many links were checked
};
inline LinkStatus const* LinkChecker::linkStatus() const
{
return linkstatus_;
}
#endif
|