summaryrefslogtreecommitdiffstats
path: root/reader/src/formats/html/HtmlReader.h
blob: 876fad82b2310a10795b87776e64144d42ffe9f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/*
 * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */

#ifndef __HTMLREADER_H__
#define __HTMLREADER_H__

#include <string>
#include <vector>

#include <ZLEncodingConverter.h>
#include "../EncodedTextReader.h"

class ZLInputStream;

class HtmlReader : public EncodedTextReader {

public:
	struct HtmlAttribute {
		std::string Name;
		std::string Value;
		bool HasValue;

		HtmlAttribute(const std::string &name);
		~HtmlAttribute();
		void setValue(const std::string &value);
	};

	struct HtmlTag {
		std::string Name;
		std::size_t Offset;
		bool Start;
		std::vector<HtmlAttribute> Attributes;

		HtmlTag();
		~HtmlTag();
		void addAttribute(const std::string &name);
		void setLastAttributeValue(const std::string &value);

	private:
		HtmlTag(const HtmlTag&);
		const HtmlTag &operator = (const HtmlTag&);
	};

private:
	static void setTag(HtmlTag &tag, const std::string &fullName);

public:
	virtual void readDocument(ZLInputStream &stream);

protected:
	HtmlReader(const std::string &encoding);
	virtual ~HtmlReader();

protected:
	virtual void startDocumentHandler() = 0;
	virtual void endDocumentHandler() = 0;

	// returns false iff processing must be stopped
	virtual bool tagHandler(const HtmlTag &tag) = 0;
	// returns false iff processing must be stopped
	virtual bool characterDataHandler(const char *text, std::size_t len, bool convert) = 0;

private:
	void appendString(std::string &to, std::string &from);
};

inline HtmlReader::HtmlAttribute::HtmlAttribute(const std::string &name) : Name(name), HasValue(false) {}
inline HtmlReader::HtmlAttribute::~HtmlAttribute() {}
inline void HtmlReader::HtmlAttribute::setValue(const std::string &value) { Value = value; HasValue = true; }

inline HtmlReader::HtmlTag::HtmlTag() : Start(true) {}
inline HtmlReader::HtmlTag::~HtmlTag() {}
inline void HtmlReader::HtmlTag::addAttribute(const std::string &name) { Attributes.push_back(HtmlAttribute(name)); }
inline void HtmlReader::HtmlTag::setLastAttributeValue(const std::string &value) { if (!Attributes.empty()) Attributes.back().setValue(value); }

#endif /* __HTMLREADER_H__ */