Rename to tde-ebook-reader

Signed-off-by: Michele Calgaro <[email protected]>
author: Michele Calgaro <[email protected]> 2024-06-07 23:30:05 +0900
committer: Michele Calgaro <[email protected]> 2024-06-07 23:30:05 +0900
commit: 17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch)
tree: 5ed61937459cb7081089111b0242c01ec178f1f3 /fbreader/src/formats/html/HtmlReader.cpp
parent: 1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff)
download: tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz
tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip
1 files changed, 0 insertions, 373 deletions
diff --git a/fbreader/src/formats/html/HtmlReader.cpp b/fbreader/src/formats/html/HtmlReader.cpp
deleted file mode 100644
index a5ce7fa..0000000
--- a/fbreader/src/formats/html/HtmlReader.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <algorithm>
-#include <cctype>
-
-#include <ZLInputStream.h>
-#include <ZLXMLReader.h>
-#include <ZLFile.h>
-#include <ZLStringUtil.h>
-#include <ZLUnicodeUtil.h>
-
-#include "HtmlReader.h"
-#include "HtmlEntityCollection.h"
-
-HtmlReader::HtmlReader(const std::string &encoding) : EncodedTextReader(encoding) {
-}
-
-HtmlReader::~HtmlReader() {
-}
-
-void HtmlReader::setTag(HtmlTag &tag, const std::string &name) {
-	tag.Attributes.clear();
-
-	if (name.length() == 0) {
-		tag.Name = name;
-		return;
-	}
-
-	tag.Start = name[0] != '/';
-	if (tag.Start) {
-		tag.Name = name;
-	} else {
-		tag.Name = name.substr(1);
-	}
-
-	const std::size_t len = tag.Name.length();
-	for (std::size_t i = 0; i < len; ++i) {
-		tag.Name[i] = std::toupper(tag.Name[i]);
-	}
-}
-
-enum ParseState {
-	PS_TEXT,
-	PS_TAGSTART,
-	PS_TAGNAME,
-	PS_WAIT_END_OF_TAG,
-	PS_ATTRIBUTENAME,
-	PS_ATTRIBUTEVALUE,
-	PS_SKIPTAG,
-	PS_COMMENT,
-	PS_SPECIAL,
-	PS_SPECIAL_IN_ATTRIBUTEVALUE,
-};
-
-enum SpecialType {
-	ST_UNKNOWN,
-	ST_NUM,
-	ST_NAME,
-	ST_DEC,
-	ST_HEX
-};
-
-static bool allowSymbol(SpecialType type, char ch) {
-	return
-		(type == ST_NAME && std::isalpha(ch)) ||
-		(type == ST_DEC && std::isdigit(ch)) ||
-		(type == ST_HEX && std::isxdigit(ch));
-}
-
-static int specialSymbolNumber(SpecialType type, const std::string &txt) {
-	char *end = 0;
-	switch (type) {
-		case ST_NAME:
-			return HtmlEntityCollection::symbolNumber(txt);
-		case ST_DEC:
-			return std::strtol(txt.c_str() + 1, &end, 10);
-		case ST_HEX:
-			return std::strtol(txt.c_str() + 2, &end, 16);
-		default:
-			return 0;
-	}
-}
-
-void HtmlReader::appendString(std::string &to, std::string &from) {
-	if (myConverter.isNull()) {
-		to += from;
-	} else {
-		myConverter->convert(to, from);
-		myConverter->reset();
-	}
-	from.erase();
-}
-
-void HtmlReader::readDocument(ZLInputStream &stream) {
-	if (!stream.open()) {
-		return;
-	}
-
-	startDocumentHandler();
-
-	ParseState state = PS_TEXT;
-	SpecialType state_special = ST_UNKNOWN;
-	std::string currentString;
-	std::string attributeValueString;
-	std::string specialString;
-	int quotationCounter = 0;
-	HtmlTag currentTag;
-	char endOfComment[2] = "\0";
-	
-	const std::size_t BUFSIZE = 2048;
-	char *buffer = new char[BUFSIZE];
-	std::size_t length;
-	std::size_t offset = 0;
-	do {
-		length = stream.read(buffer, BUFSIZE);
-		char *start = buffer;
-		char *endOfBuffer = buffer + length;
-		for (char *ptr = buffer; ptr < endOfBuffer; ++ptr) {
-			switch (state) {
-				case PS_TEXT:
-					if (*ptr == '<') {
-						if (!characterDataHandler(start, ptr - start, true)) {
-							goto endOfProcessing;
-						}
-						start = ptr + 1;
-						state = PS_TAGSTART;
-						currentTag.Offset = offset + (ptr - buffer);
-					}
-					if (*ptr == '&') {
-						if (!characterDataHandler(start, ptr - start, true)) {
-							goto endOfProcessing;
-						}
-						start = ptr + 1;
-						state = PS_SPECIAL;
-						state_special = ST_UNKNOWN;
-					}
-					break;
-				case PS_SPECIAL:
-				case PS_SPECIAL_IN_ATTRIBUTEVALUE:
-					if (state_special == ST_UNKNOWN) {
-						if (*ptr == '#') {
-							state_special = ST_NUM;
-						} else if (std::isalpha(*ptr)) {
-							state_special = ST_NAME;
-						} else {
-							start = ptr;
-							state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
-						}
-					} else if (state_special == ST_NUM) {
-						if (*ptr == 'x') {
-							state_special = ST_HEX;
-						} else if (std::isdigit(*ptr)) {
-							state_special = ST_DEC;
-						} else {
-							start = ptr;
-							state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
-						}
-					} else {
-						if (*ptr == ';') {
-							specialString.append(start, ptr - start);
-							int number = specialSymbolNumber(state_special, specialString);
-							if ((128 <= number) && (number <= 159)) {
-								char ch = number;
-								if (state == PS_SPECIAL) {
-									characterDataHandler(&ch, 1, true);
-								} else {
-									myConverter->convert(attributeValueString, &ch, &ch + 1);
-								}
-							} else if (number != 0) {
-								char buffer[4];
-								int len = ZLUnicodeUtil::ucs4ToUtf8(buffer, number);
-								if (state == PS_SPECIAL) {
-									characterDataHandler(buffer, len, false);
-								} else {
-									attributeValueString.append(buffer, len);
-								}
-							} else {
-								specialString = "&" + specialString + ";";
-								if (state == PS_SPECIAL) {
-									characterDataHandler(specialString.c_str(), specialString.length(), false);
-								} else {
-									attributeValueString += specialString;
-								}
-							}
-							specialString.erase();
-							start = ptr + 1;
-							state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
-						} else if (!allowSymbol(state_special, *ptr)) {
-							start = ptr;
-							state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
-						}
-					}
-					break;
-				case PS_TAGSTART:
-					state = (*ptr == '!') ? PS_COMMENT : PS_TAGNAME;
-					break;
-				case PS_COMMENT:
-					if ((endOfComment[0] == '\0') && (*ptr != '-')) {
-						state = PS_TAGNAME;
-					} else if ((endOfComment[0] == '-') && (endOfComment[1] == '-') && (*ptr == '>')) {
-						start = ptr + 1;
-						state = PS_TEXT;
-						endOfComment[0] = '\0';
-						endOfComment[1] = '\0';
-					} else {
-						endOfComment[0] = endOfComment[1];
-						endOfComment[1] = *ptr;
-					}
-					break;
-				case PS_WAIT_END_OF_TAG:
-					if (*ptr == '>') {
-						start = ptr + 1;
-						state = PS_TEXT;
-					}
-					break;
-				case PS_TAGNAME:
-					if (*ptr == '>' || *ptr == '/' || std::isspace((unsigned char)*ptr)) {
-						currentString.append(start, ptr - start);
-						start = ptr + 1;
-						setTag(currentTag, currentString);
-						currentString.erase();
-						if (currentTag.Name == "") {
-							state = *ptr == '>' ? PS_TEXT : PS_SKIPTAG;
-						} else {
-							if (*ptr == '>') {
-								if (!tagHandler(currentTag)) {
-									goto endOfProcessing;
-								}
-								state = PS_TEXT;
-							} else if (*ptr == '/') {
-								if (!tagHandler(currentTag)) {
-									goto endOfProcessing;
-								}
-								currentTag.Start = false;
-								if (!tagHandler(currentTag)) {
-									goto endOfProcessing;
-								}
-								state = PS_WAIT_END_OF_TAG;
-							} else {
-								state = PS_ATTRIBUTENAME;
-							}
-						}
-					}
-					break;
-				case PS_ATTRIBUTENAME:
-					if (*ptr == '>' || *ptr == '/' || *ptr == '=' || std::isspace((unsigned char)*ptr)) {
-						if (ptr != start || !currentString.empty()) {
-							currentString.append(start, ptr - start);
-							for (unsigned int i = 0; i < currentString.length(); ++i) {
-								currentString[i] = std::toupper(currentString[i]);
-							}
-							currentTag.addAttribute(currentString);
-							currentString.erase();
-						}
-						start = ptr + 1;
-						if (*ptr == '>') {
-							if (!tagHandler(currentTag)) {
-								goto endOfProcessing;
-							}
-							state = PS_TEXT;
-						} else if (*ptr == '/') {
-							if (!tagHandler(currentTag)) {
-								goto endOfProcessing;
-							}
-							currentTag.Start = false;
-							if (!tagHandler(currentTag)) {
-								goto endOfProcessing;
-							}
-							state = PS_WAIT_END_OF_TAG;
-						} else {
-							state = (*ptr == '=') ? PS_ATTRIBUTEVALUE : PS_ATTRIBUTENAME;
-						}
-					}
-					break;
-				case PS_ATTRIBUTEVALUE:
-					if (*ptr == '"') {
-						if (((ptr == start) && currentString.empty()) || (quotationCounter > 0)) {
-							++quotationCounter;
-						}
-					} else if (*ptr == '&') {
-						currentString.append(start, ptr - start);
-						start = ptr + 1;
-						appendString(attributeValueString, currentString);
-						state = PS_SPECIAL_IN_ATTRIBUTEVALUE;
-						state_special = ST_UNKNOWN;
-					} else if (quotationCounter != 1 && (*ptr == '>' || *ptr == '/' || std::isspace((unsigned char)*ptr))) {
-						if (ptr != start || !currentString.empty()) {
-							currentString.append(start, ptr - start);
-							appendString(attributeValueString, currentString);
-							if (attributeValueString[0] == '"') {
-								attributeValueString = attributeValueString.substr(1, attributeValueString.length() - 2);
-							}
-							currentTag.setLastAttributeValue(attributeValueString);
-							attributeValueString.erase();
-							quotationCounter = 0;
-						}
-						start = ptr + 1;
-						if (*ptr == '>') {
-							if (!tagHandler(currentTag)) {
-								goto endOfProcessing;
-							}
-							state = PS_TEXT;
-						} else if (*ptr == '/') {
-							if (!tagHandler(currentTag)) {
-								goto endOfProcessing;
-							}
-							currentTag.Start = false;
-							if (!tagHandler(currentTag)) {
-								goto endOfProcessing;
-							}
-							state = PS_WAIT_END_OF_TAG;
-						} else {
-							state = PS_ATTRIBUTENAME;
-						}
-					}
-					break;
-				case PS_SKIPTAG:
-					if (*ptr == '>') {
-						start = ptr + 1;
-						state = PS_TEXT;
-					}
-					break;
-			}
-		}
-		if (start != endOfBuffer) {
-			switch (state) {
-				case PS_TEXT:
-					if (!characterDataHandler(start, endOfBuffer - start, true)) {
-						goto endOfProcessing;
-					}
-					break;
-				case PS_TAGNAME:
-				case PS_ATTRIBUTENAME:
-				case PS_ATTRIBUTEVALUE:
-					currentString.append(start, endOfBuffer - start);
-					break;
-				case PS_SPECIAL:
-				case PS_SPECIAL_IN_ATTRIBUTEVALUE:
-					specialString.append(start, endOfBuffer - start);
-					break;
-				case PS_TAGSTART:
-				case PS_SKIPTAG:
-				case PS_COMMENT:
-				case PS_WAIT_END_OF_TAG:
-					break;
-			}
-		}
-		offset += length; 
-	} while (length == BUFSIZE);
-endOfProcessing:
-	delete[] buffer;
-
-	endDocumentHandler();
-
-	stream.close();
-}
author	Michele Calgaro <[email protected]>	2024-06-07 23:30:05 +0900
committer	Michele Calgaro <[email protected]>	2024-06-07 23:30:05 +0900
commit	17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch)
tree	5ed61937459cb7081089111b0242c01ec178f1f3 /fbreader/src/formats/html/HtmlReader.cpp
parent	1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff)
download	tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip