diff options
Diffstat (limited to 'fbreader/src/formats/html')
-rw-r--r-- | fbreader/src/formats/html/HtmlBookReader.cpp | 583 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlBookReader.h | 101 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlDescriptionReader.cpp | 82 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlDescriptionReader.h | 48 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlEntityCollection.cpp | 71 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlEntityCollection.h | 38 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlPlugin.cpp | 83 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlPlugin.h | 42 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlReader.cpp | 373 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlReader.h | 92 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlReaderStream.cpp | 128 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlReaderStream.h | 48 | ||||
-rw-r--r-- | fbreader/src/formats/html/HtmlTagActions.h | 158 |
13 files changed, 0 insertions, 1847 deletions
diff --git a/fbreader/src/formats/html/HtmlBookReader.cpp b/fbreader/src/formats/html/HtmlBookReader.cpp deleted file mode 100644 index 321913d..0000000 --- a/fbreader/src/formats/html/HtmlBookReader.cpp +++ /dev/null @@ -1,583 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <cctype> - -#include <ZLFile.h> -#include <ZLFileImage.h> -#include <ZLStringUtil.h> - -#include "HtmlBookReader.h" -#include "HtmlTagActions.h" -#include "../txt/PlainTextFormat.h" -#include "../util/MiscUtil.h" -#include "../../bookmodel/BookModel.h" -#include "../css/StyleSheetParser.h" - -HtmlTagAction::HtmlTagAction(HtmlBookReader &reader) : myReader(reader) { -} - -HtmlTagAction::~HtmlTagAction() { -} - -void HtmlTagAction::reset() { -} - -DummyHtmlTagAction::DummyHtmlTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void DummyHtmlTagAction::run(const HtmlReader::HtmlTag&) { -} - -HtmlControlTagAction::HtmlControlTagAction(HtmlBookReader &reader, FBTextKind kind) : HtmlTagAction(reader), myKind(kind) { -} - -void HtmlControlTagAction::run(const HtmlReader::HtmlTag &tag) { - std::vector<FBTextKind> &list = myReader.myKindList; - int index; - for (index = list.size() - 1; index >= 0; --index) { - if (list[index] == myKind) { - break; - } - } - if (tag.Start) { - if (index == -1) { - bookReader().pushKind(myKind); - myReader.myKindList.push_back(myKind); - bookReader().addControl(myKind, true); - } - } else { - if (index >= 0) { - for (int i = list.size() - 1; i >= index; --i) { - bookReader().addControl(list[i], false); - bookReader().popKind(); - } - for (unsigned int j = index + 1; j < list.size(); ++j) { - bookReader().addControl(list[j], true); - bookReader().pushKind(list[j]); - } - list.erase(list.begin() + index); - } - } -} - -HtmlHeaderTagAction::HtmlHeaderTagAction(HtmlBookReader &reader, FBTextKind kind) : HtmlTagAction(reader), myKind(kind) { -} - -void HtmlHeaderTagAction::run(const HtmlReader::HtmlTag &tag) { - myReader.myIsStarted = false; - if (tag.Start) { - if (myReader.myBuildTableOfContent && !myReader.myIgnoreTitles) { - if (!bookReader().contentsParagraphIsOpen()) { - bookReader().insertEndOfSectionParagraph(); - bookReader().enterTitle(); - bookReader().beginContentsParagraph(); - } - } - bookReader().pushKind(myKind); - } else { - bookReader().popKind(); - if (myReader.myBuildTableOfContent && !myReader.myIgnoreTitles) { - bookReader().endContentsParagraph(); - bookReader().exitTitle(); - } - } - bookReader().beginParagraph(); -} - -HtmlIgnoreTagAction::HtmlIgnoreTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlIgnoreTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - if (myTagNames.find(tag.Name) == myTagNames.end()) { - ++myReader.myIgnoreDataCounter; - myTagNames.insert(tag.Name); - } - } else { - if (myTagNames.find(tag.Name) != myTagNames.end()) { - --myReader.myIgnoreDataCounter; - myTagNames.erase(tag.Name); - } - } -} - -HtmlHrefTagAction::HtmlHrefTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - for (unsigned int i = 0; i < tag.Attributes.size(); ++i) { - if (tag.Attributes[i].Name == "NAME") { - bookReader().addHyperlinkLabel(tag.Attributes[i].Value); - } else if ((hyperlinkType() == REGULAR) && (tag.Attributes[i].Name == "HREF")) { - std::string value = tag.Attributes[i].Value; - if (!myReader.myFileName.empty() && - (value.length() > myReader.myFileName.length()) && - (value.substr(0, myReader.myFileName.length()) == myReader.myFileName)) { - value = value.substr(myReader.myFileName.length()); - } - if (!value.empty()) { - if (value[0] == '#') { - setHyperlinkType(INTERNAL_HYPERLINK); - bookReader().addHyperlinkControl(INTERNAL_HYPERLINK, value.substr(1)); - } else { - FBTextKind hyperlinkType = MiscUtil::referenceType(value); - if (hyperlinkType != INTERNAL_HYPERLINK) { - setHyperlinkType(hyperlinkType); - bookReader().addHyperlinkControl(hyperlinkType, value); - } - } - } - } - } - } else if (hyperlinkType() != REGULAR) { - bookReader().addControl(hyperlinkType(), false); - setHyperlinkType(REGULAR); - } -} - -void HtmlHrefTagAction::reset() { - setHyperlinkType(REGULAR); -} - -FBTextKind HtmlHrefTagAction::hyperlinkType() const { - return myHyperlinkType; -} - -void HtmlHrefTagAction::setHyperlinkType(FBTextKind hyperlinkType) { - myHyperlinkType = hyperlinkType; -} - -HtmlImageTagAction::HtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - bookReader().endParagraph(); - for (unsigned int i = 0; i < tag.Attributes.size(); ++i) { - if (tag.Attributes[i].Name == "SRC") { - const std::string fileName = MiscUtil::decodeHtmlURL(tag.Attributes[i].Value); - const ZLFile file(myReader.myBaseDirPath + fileName); - if (file.exists()) { - bookReader().addImageReference(fileName); - bookReader().addImage(fileName, new ZLFileImage(file, 0)); - } - break; - } - } - bookReader().beginParagraph(); - } -} - -HtmlBreakTagAction::HtmlBreakTagAction(HtmlBookReader &reader, BreakType breakType) : HtmlTagAction(reader), myBreakType(breakType) { -} - -void HtmlBreakTagAction::run(const HtmlReader::HtmlTag &tag) { - if (myReader.myDontBreakParagraph) { - myReader.myDontBreakParagraph = false; - return; - } - - if ((tag.Start && (myBreakType & BREAK_AT_START)) || - (!tag.Start && (myBreakType & BREAK_AT_END))) { - bookReader().endParagraph(); - if (bookReader().isKindStackEmpty()) { - bookReader().pushKind(REGULAR); - } - bookReader().beginParagraph(); - } -} - -HtmlPreTagAction::HtmlPreTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlPreTagAction::run(const HtmlReader::HtmlTag &tag) { - bookReader().endParagraph(); - myReader.myIsPreformatted = tag.Start; - myReader.mySpaceCounter = -1; - myReader.myBreakCounter = 0; - if (myReader.myFormat.breakType() == PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) { - if (tag.Start) { - bookReader().pushKind(PREFORMATTED); - } else { - bookReader().popKind(); - } - } - bookReader().beginParagraph(); -} - -HtmlListTagAction::HtmlListTagAction(HtmlBookReader &reader, int startIndex) : HtmlTagAction(reader), myStartIndex(startIndex) { -} - -void HtmlListTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - myReader.myListNumStack.push(myStartIndex); - } else if (!myReader.myListNumStack.empty()) { - myReader.myListNumStack.pop(); - } -} - -HtmlListItemTagAction::HtmlListItemTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlListItemTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - bookReader().endParagraph(); - bookReader().beginParagraph(); - if (!myReader.myListNumStack.empty()) { - bookReader().addFixedHSpace(3 * myReader.myListNumStack.size()); - int &index = myReader.myListNumStack.top(); - if (index == 0) { - myReader.addConvertedDataToBuffer("\342\200\242 ", 4, false); - } else { - std::string number; - ZLStringUtil::appendNumber(number, index++); - number += ". "; - myReader.addConvertedDataToBuffer(number.data(), number.length(), false); - } - myReader.myDontBreakParagraph = true; - } - } else { - myReader.myDontBreakParagraph = false; - } -} - -HtmlTableTagAction::HtmlTableTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlTableTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - myReader.myIgnoreTitles = true; - } else { - myReader.myIgnoreTitles = false; - } -} - -HtmlStyleTagAction::HtmlStyleTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlStyleTagAction::run(const HtmlReader::HtmlTag &tag) { - myReader.myStyleSheetParser = tag.Start ? new StyleSheetTableParser(myReader.myStyleSheetTable) : 0; - /* - if (!tag.Start) { - myReader.myStyleSheetTable.dump(); - } - */ -} - -shared_ptr<HtmlTagAction> HtmlBookReader::createAction(const std::string &tag) { - if (tag == "EM") { - return new HtmlControlTagAction(*this, EMPHASIS); - } else if (tag == "STRONG") { - return new HtmlControlTagAction(*this, STRONG); - } else if (tag == "B") { - return new HtmlControlTagAction(*this, BOLD); - } else if (tag == "I") { - return new HtmlControlTagAction(*this, ITALIC); - } else if (tag == "TT") { - return new HtmlControlTagAction(*this, CODE); - } else if (tag == "CODE") { - return new HtmlControlTagAction(*this, CODE); - } else if (tag == "CITE") { - return new HtmlControlTagAction(*this, CITE); - } else if (tag == "SUB") { - return new HtmlControlTagAction(*this, SUB); - } else if (tag == "SUP") { - return new HtmlControlTagAction(*this, SUP); - } else if (tag == "H1") { - return new HtmlHeaderTagAction(*this, H1); - } else if (tag == "H2") { - return new HtmlHeaderTagAction(*this, H2); - } else if (tag == "H3") { - return new HtmlHeaderTagAction(*this, H3); - } else if (tag == "H4") { - return new HtmlHeaderTagAction(*this, H4); - } else if (tag == "H5") { - return new HtmlHeaderTagAction(*this, H5); - } else if (tag == "H6") { - return new HtmlHeaderTagAction(*this, H6); - } else if (tag == "HEAD") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "TITLE") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "STYLE") { - return new HtmlStyleTagAction(*this); - } else if (tag == "SELECT") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "SCRIPT") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "A") { - return new HtmlHrefTagAction(*this); - } else if (tag == "TD") { - //return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END); - } else if (tag == "TR") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END); - } else if (tag == "DIV") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END); - } else if (tag == "DT") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START); - } else if (tag == "P") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END); - } else if (tag == "BR") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END); - } else if (tag == "IMG") { - return new HtmlImageTagAction(*this); - } else if (tag == "UL") { - return new HtmlListTagAction(*this, 0); - } else if (tag == "MENU") { - return new HtmlListTagAction(*this, 0); - } else if (tag == "DIR") { - return new HtmlListTagAction(*this, 0); - } else if (tag == "OL") { - return new HtmlListTagAction(*this, 1); - } else if (tag == "LI") { - return new HtmlListItemTagAction(*this); - } else if (tag == "PRE") { - if (myProcessPreTag) { - return new HtmlPreTagAction(*this); - } - } else if (tag == "TABLE") { - return new HtmlTableTagAction(*this); - } - /* - } else if (tag == "DD") { - return 0; - } else if (tag == "DL") { - return 0; - } else if (tag == "DFN") { - return 0; - } else if (tag == "SAMP") { - return 0; - } else if (tag == "KBD") { - return 0; - } else if (tag == "VAR") { - return 0; - } else if (tag == "ABBR") { - return 0; - } else if (tag == "ACRONYM") { - return 0; - } else if (tag == "BLOCKQUOTE") { - return 0; - } else if (tag == "Q") { - return 0; - } else if (tag == "INS") { - return 0; - } else if (tag == "DEL") { - return 0; - } else if (tag == "BODY") { - return 0; - */ - return new DummyHtmlTagAction(*this); -} - -void HtmlBookReader::setBuildTableOfContent(bool build) { - myBuildTableOfContent = build; -} - -void HtmlBookReader::setProcessPreTag(bool process) { - myProcessPreTag = process; -} - -HtmlBookReader::HtmlBookReader(const std::string &baseDirectoryPath, BookModel &model, const PlainTextFormat &format, const std::string &encoding) : HtmlReader(encoding), myBookReader(model), myBaseDirPath(baseDirectoryPath), myFormat(format), myBuildTableOfContent(true), myProcessPreTag(true) { -} - -HtmlBookReader::~HtmlBookReader() { -} - -void HtmlBookReader::addConvertedDataToBuffer(const char *text, std::size_t len, bool convert) { - if (len > 0) { - if (myDontBreakParagraph) { - while (len > 0 && std::isspace(*text)) { - --len; - ++text; - } - if (len == 0) { - return; - } - } - if (convert) { - myConverter->convert(myConverterBuffer, text, text + len); - myBookReader.addData(myConverterBuffer); - myBookReader.addContentsData(myConverterBuffer); - myConverterBuffer.erase(); - } else { - std::string strText(text, len); - myBookReader.addData(strText); - myBookReader.addContentsData(strText); - } - myDontBreakParagraph = false; - } -} - -bool HtmlBookReader::tagHandler(const HtmlTag &tag) { - myConverter->reset(); - - for (unsigned int i = 0; i < tag.Attributes.size(); ++i) { - if (tag.Attributes[i].Name == "ID") { - myBookReader.addHyperlinkLabel(tag.Attributes[i].Value); - break; - } - } - shared_ptr<HtmlTagAction> action = myActionMap[tag.Name]; - if (action.isNull()) { - action = createAction(tag.Name); - myActionMap[tag.Name] = action; - } - action->run(tag); - - return true; -} - -void HtmlBookReader::preformattedCharacterDataHandler(const char *text, std::size_t len, bool convert) { - const char *start = text; - const char *end = text + len; - - int breakType = myFormat.breakType(); - if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) { - for (const char *ptr = text; ptr != end; ++ptr) { - if (*ptr == '\n') { - mySpaceCounter = 0; - if (start < ptr) { - addConvertedDataToBuffer(start, ptr - start, convert); - } else { - static const std::string SPACE = " "; - myBookReader.addData(SPACE); - } - myBookReader.endParagraph(); - myBookReader.beginParagraph(); - start = ptr + 1; - } else if (mySpaceCounter >= 0) { - if (std::isspace((unsigned char)*ptr)) { - ++mySpaceCounter; - } else { - myBookReader.addFixedHSpace(mySpaceCounter); - mySpaceCounter = -1; - } - } - } - addConvertedDataToBuffer(start, end - start, convert); - } else if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT) { - for (const char *ptr = text; ptr != end; ++ptr) { - if (std::isspace((unsigned char)*ptr)) { - if (*ptr == '\n') { - mySpaceCounter = 0; - } else if (mySpaceCounter >= 0) { - ++mySpaceCounter; - } - } else { - if (mySpaceCounter > myFormat.ignoredIndent()) { - if (ptr - start > mySpaceCounter) { - addConvertedDataToBuffer(start, ptr - start - mySpaceCounter, convert); - myBookReader.endParagraph(); - myBookReader.beginParagraph(); - } - start = ptr; - } - mySpaceCounter = -1; - } - } - mySpaceCounter = std::max(mySpaceCounter, 0); - if (end - start > mySpaceCounter) { - addConvertedDataToBuffer(start, end - start - mySpaceCounter, convert); - } - } else if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE) { - for (const char *ptr = start; ptr != end; ++ptr) { - if (std::isspace((unsigned char)*ptr)) { - if (*ptr == '\n') { - ++myBreakCounter; - } - } else { - if (myBreakCounter > 1) { - addConvertedDataToBuffer(start, ptr - start, convert); - myBookReader.endParagraph(); - myBookReader.beginParagraph(); - start = ptr; - } - myBreakCounter = 0; - } - } - addConvertedDataToBuffer(start, end - start, convert); - } -} - -bool HtmlBookReader::characterDataHandler(const char *text, std::size_t len, bool convert) { - if (!myStyleSheetParser.isNull()) { - myStyleSheetParser->parse(text, len); - return true; - } - - if (myIgnoreDataCounter != 0) { - return true; - } - - if (myIsPreformatted) { - preformattedCharacterDataHandler(text, len, convert); - return true; - } - - const char *ptr = text; - const char *end = text + len; - if (!myIsStarted) { - for (; ptr != end; ++ptr) { - if (!std::isspace((unsigned char)*ptr)) { - myIsStarted = true; - break; - } - } - } - if (myIsStarted) { - addConvertedDataToBuffer(ptr, end - ptr, convert); - } - return true; -} - -void HtmlBookReader::startDocumentHandler() { - while (!myListNumStack.empty()) { - myListNumStack.pop(); - } - myConverterBuffer.erase(); - myKindList.clear(); - - myBookReader.reset(); - myBookReader.setMainTextModel(); - myBookReader.pushKind(REGULAR); - myBookReader.beginParagraph(); - myIgnoreDataCounter = 0; - myIsPreformatted = false; - myDontBreakParagraph = false; - for (std::map<std::string,shared_ptr<HtmlTagAction> >::const_iterator it = myActionMap.begin(); it != myActionMap.end(); ++it) { - it->second->reset(); - } - myIsStarted = false; - myIgnoreTitles = false; - - myStyleSheetParser = 0; - - mySpaceCounter = -1; - myBreakCounter = 0; -} - -void HtmlBookReader::endDocumentHandler() { - myBookReader.endParagraph(); -} - -void HtmlBookReader::setFileName(const std::string fileName) { - myFileName = fileName; -} diff --git a/fbreader/src/formats/html/HtmlBookReader.h b/fbreader/src/formats/html/HtmlBookReader.h deleted file mode 100644 index c8d4e32..0000000 --- a/fbreader/src/formats/html/HtmlBookReader.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __HTMLBOOKREADER_H__ -#define __HTMLBOOKREADER_H__ - -#include <stack> - -#include <shared_ptr.h> - -#include "HtmlReader.h" -#include "../../bookmodel/BookReader.h" -#include "../css/StyleSheetTable.h" - -class BookModel; -class PlainTextFormat; -class StyleSheetParser; - -class HtmlTagAction; - -class HtmlBookReader : public HtmlReader { - -public: - HtmlBookReader(const std::string &baseDirectoryPath, BookModel &model, const PlainTextFormat &format, const std::string &encoding); - ~HtmlBookReader(); - void setFileName(const std::string fileName); - -protected: - virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag); - void setBuildTableOfContent(bool build); - void setProcessPreTag(bool process); - -protected: - void startDocumentHandler(); - void endDocumentHandler(); - bool tagHandler(const HtmlTag &tag); - bool characterDataHandler(const char *text, std::size_t len, bool convert); - -private: - void preformattedCharacterDataHandler(const char *text, std::size_t len, bool convert); - void addConvertedDataToBuffer(const char *text, std::size_t len, bool convert); - -protected: - BookReader myBookReader; - std::string myBaseDirPath; - -private: - const PlainTextFormat &myFormat; - int myIgnoreDataCounter; - bool myIsPreformatted; - bool myDontBreakParagraph; - - bool myIsStarted; - bool myBuildTableOfContent; - bool myProcessPreTag; - bool myIgnoreTitles; - std::stack<int> myListNumStack; - - StyleSheetTable myStyleSheetTable; - shared_ptr<StyleSheetParser> myStyleSheetParser; - - int mySpaceCounter; - int myBreakCounter; - std::string myConverterBuffer; - - std::map<std::string,shared_ptr<HtmlTagAction> > myActionMap; - std::vector<FBTextKind> myKindList; - - std::string myFileName; - - friend class HtmlTagAction; - friend class HtmlControlTagAction; - friend class HtmlHeaderTagAction; - friend class HtmlIgnoreTagAction; - friend class HtmlHrefTagAction; - friend class HtmlImageTagAction; - friend class HtmlBreakTagAction; - friend class HtmlPreTagAction; - friend class HtmlListTagAction; - friend class HtmlListItemTagAction; - friend class HtmlTableTagAction; - friend class HtmlStyleTagAction; -}; - -#endif /* __HTMLBOOKREADER_H__ */ diff --git a/fbreader/src/formats/html/HtmlDescriptionReader.cpp b/fbreader/src/formats/html/HtmlDescriptionReader.cpp deleted file mode 100644 index 6ebcb8b..0000000 --- a/fbreader/src/formats/html/HtmlDescriptionReader.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include "HtmlDescriptionReader.h" - -#include "../../library/Book.h" - -HtmlDescriptionReader::HtmlDescriptionReader(Book &book) : HtmlReader(book.encoding()), myBook(book) { - myBook.setTitle(""); -} - -void HtmlDescriptionReader::startDocumentHandler() { - myReadTitle = false; -} - -void HtmlDescriptionReader::endDocumentHandler() { - if (!myBook.title().empty()) { - const char *titleStart = myBook.title().data(); - const char *titleEnd = titleStart + myBook.title().length(); - std::string newTitle; - myConverter->convert(newTitle, titleStart, titleEnd); - myBook.setTitle(newTitle); - } -} - -bool HtmlDescriptionReader::tagHandler(const HtmlTag &tag) { - if (tag.Name == "TITLE") { - if (myReadTitle && !tag.Start) { - myBook.setTitle(myBuffer); - myBuffer.erase(); - } - myReadTitle = tag.Start && myBook.title().empty(); - return true; - } else if (tag.Start && tag.Name == "META") { - std::vector<HtmlAttribute>::const_iterator it = tag.Attributes.begin(); - for (; it != tag.Attributes.end(); ++it) { - if (it->Name == "CONTENT") { - break; - } - } - if (it != tag.Attributes.end()) { - const std::string prefix = "charset="; - std::size_t index = it->Value.find(prefix); - if (index != std::string::npos) { - std::string charset = it->Value.substr(index + prefix.length()); - index = charset.find(';'); - if (index != std::string::npos) { - charset = charset.substr(0, index); - } - index = charset.find(' '); - if (index != std::string::npos) { - charset = charset.substr(0, index); - } - myBook.setEncoding(charset); - } - } - } - return tag.Name != "BODY"; -} - -bool HtmlDescriptionReader::characterDataHandler(const char *text, std::size_t len, bool) { - if (myReadTitle) { - myBuffer.append(text, len); - } - return true; -} diff --git a/fbreader/src/formats/html/HtmlDescriptionReader.h b/fbreader/src/formats/html/HtmlDescriptionReader.h deleted file mode 100644 index 159d4b0..0000000 --- a/fbreader/src/formats/html/HtmlDescriptionReader.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __HTMLDESCRIPTIONREADER_H__ -#define __HTMLDESCRIPTIONREADER_H__ - -#include "HtmlReader.h" - -class Book; - -class HtmlDescriptionReader : public HtmlReader { - -public: - HtmlDescriptionReader(Book &book); - ~HtmlDescriptionReader(); - -protected: - void startDocumentHandler(); - void endDocumentHandler(); - - bool tagHandler(const HtmlTag &tag); - bool characterDataHandler(const char *text, std::size_t len, bool convert); - -private: - bool myReadTitle; - std::string myBuffer; - Book &myBook; -}; - -inline HtmlDescriptionReader::~HtmlDescriptionReader() {} - -#endif /* __HTMLDESCRIPTIONREADER_H__ */ diff --git a/fbreader/src/formats/html/HtmlEntityCollection.cpp b/fbreader/src/formats/html/HtmlEntityCollection.cpp deleted file mode 100644 index bd1bb4e..0000000 --- a/fbreader/src/formats/html/HtmlEntityCollection.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <cstdlib> -#include <cctype> - -#include <ZLibrary.h> -#include <ZLFile.h> -#include <ZLXMLReader.h> - -#include "HtmlEntityCollection.h" - -class CollectionReader : public ZLXMLReader { - -public: - CollectionReader(std::map<std::string,int> &collection); - void startElementHandler(const char *tag, const char **attributes); - -private: - std::map<std::string,int> &myCollection; -}; - -std::map<std::string,int> HtmlEntityCollection::ourCollection; - -int HtmlEntityCollection::symbolNumber(const std::string &name) { - if (ourCollection.empty()) { - CollectionReader(ourCollection).readDocument(ZLFile( - ZLibrary::ApplicationDirectory() + ZLibrary::FileNameDelimiter + - "formats" + ZLibrary::FileNameDelimiter + - "html" + ZLibrary::FileNameDelimiter + "html.ent" - )); - } - std::map<std::string,int>::const_iterator it = ourCollection.find(name); - return it == ourCollection.end() ? 0 : it->second; -} - -CollectionReader::CollectionReader(std::map<std::string,int> &collection) : myCollection(collection) { -} - -void CollectionReader::startElementHandler(const char *tag, const char **attributes) { - static const std::string ENTITY = "entity"; - - if (ENTITY == tag) { - for (int i = 0; i < 4; ++i) { - if (attributes[i] == 0) { - return; - } - } - static const std::string _name = "name"; - static const std::string _number = "number"; - if (_name == attributes[0] && _number == attributes[2]) { - myCollection[attributes[1]] = std::atoi(attributes[3]); - } - } -} diff --git a/fbreader/src/formats/html/HtmlEntityCollection.h b/fbreader/src/formats/html/HtmlEntityCollection.h deleted file mode 100644 index 6f70491..0000000 --- a/fbreader/src/formats/html/HtmlEntityCollection.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __HTMLENTITYCOLLECTION_H__ -#define __HTMLENTITYCOLLECTION_H__ - -#include <string> -#include <map> - -class HtmlEntityCollection { - -public: - static int symbolNumber(const std::string &name); - -private: - static std::map<std::string,int> ourCollection; - -private: - HtmlEntityCollection(); -}; - -#endif /* __HTMLENTITYCOLLECTION_H__ */ diff --git a/fbreader/src/formats/html/HtmlPlugin.cpp b/fbreader/src/formats/html/HtmlPlugin.cpp deleted file mode 100644 index 279e096..0000000 --- a/fbreader/src/formats/html/HtmlPlugin.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <ZLStringUtil.h> -#include <ZLFile.h> -#include <ZLInputStream.h> - -#include "HtmlPlugin.h" -#include "HtmlDescriptionReader.h" -#include "HtmlBookReader.h" -#include "HtmlReaderStream.h" -#include "../txt/PlainTextFormat.h" -#include "../util/MiscUtil.h" -#include "../../library/Book.h" -#include "../../bookmodel/BookModel.h" - -bool HtmlPlugin::acceptsFile(const ZLFile &file) const { - const std::string &extension = file.extension(); - return ZLStringUtil::stringEndsWith(extension, "html") || (extension == "htm"); -} - -bool HtmlPlugin::readMetaInfo(Book &book) const { - shared_ptr<ZLInputStream> stream = book.file().inputStream(); - if (stream.isNull()) { - return false; - } - - shared_ptr<ZLInputStream> htmlStream = new HtmlReaderStream(stream, 50000); - detectEncodingAndLanguage(book, *htmlStream); - if (book.encoding().empty()) { - return false; - } - HtmlDescriptionReader(book).readDocument(*stream); - - return true; -} - -bool HtmlPlugin::readModel(BookModel &model) const { - const Book& book = *model.book(); - const ZLFile &file = book.file(); - shared_ptr<ZLInputStream> stream = file.inputStream(); - if (stream.isNull()) { - return false; - } - - PlainTextFormat format(file); - if (!format.initialized()) { - PlainTextFormatDetector detector; - detector.detect(*stream, format); - } - - std::string directoryPrefix = MiscUtil::htmlDirectoryPrefix(file.path()); - HtmlBookReader reader(directoryPrefix, model, format, book.encoding()); - reader.setFileName(MiscUtil::htmlFileName(file.path())); - reader.readDocument(*stream); - - return true; -} - -FormatInfoPage *HtmlPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) { - return new PlainTextInfoPage(dialog, file, ZLResourceKey("<PRE>"), false); -} - -bool HtmlPlugin::readLanguageAndEncoding(Book &book) const { - (void)book; - return true; -} diff --git a/fbreader/src/formats/html/HtmlPlugin.h b/fbreader/src/formats/html/HtmlPlugin.h deleted file mode 100644 index c66a108..0000000 --- a/fbreader/src/formats/html/HtmlPlugin.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __HTMLPLUGIN_H__ -#define __HTMLPLUGIN_H__ - -#include "../FormatPlugin.h" - -class HtmlPlugin : public FormatPlugin { - -public: - HtmlPlugin(); - ~HtmlPlugin(); - bool providesMetaInfo() const; - bool acceptsFile(const ZLFile &file) const; - bool readMetaInfo(Book &book) const; - bool readLanguageAndEncoding(Book &book) const; - bool readModel(BookModel &model) const; - FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file); -}; - -inline HtmlPlugin::HtmlPlugin() {} -inline HtmlPlugin::~HtmlPlugin() {} -inline bool HtmlPlugin::providesMetaInfo() const { return false; } - -#endif /* __HTMLPLUGIN_H__ */ diff --git a/fbreader/src/formats/html/HtmlReader.cpp b/fbreader/src/formats/html/HtmlReader.cpp deleted file mode 100644 index a5ce7fa..0000000 --- a/fbreader/src/formats/html/HtmlReader.cpp +++ /dev/null @@ -1,373 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <algorithm> -#include <cctype> - -#include <ZLInputStream.h> -#include <ZLXMLReader.h> -#include <ZLFile.h> -#include <ZLStringUtil.h> -#include <ZLUnicodeUtil.h> - -#include "HtmlReader.h" -#include "HtmlEntityCollection.h" - -HtmlReader::HtmlReader(const std::string &encoding) : EncodedTextReader(encoding) { -} - -HtmlReader::~HtmlReader() { -} - -void HtmlReader::setTag(HtmlTag &tag, const std::string &name) { - tag.Attributes.clear(); - - if (name.length() == 0) { - tag.Name = name; - return; - } - - tag.Start = name[0] != '/'; - if (tag.Start) { - tag.Name = name; - } else { - tag.Name = name.substr(1); - } - - const std::size_t len = tag.Name.length(); - for (std::size_t i = 0; i < len; ++i) { - tag.Name[i] = std::toupper(tag.Name[i]); - } -} - -enum ParseState { - PS_TEXT, - PS_TAGSTART, - PS_TAGNAME, - PS_WAIT_END_OF_TAG, - PS_ATTRIBUTENAME, - PS_ATTRIBUTEVALUE, - PS_SKIPTAG, - PS_COMMENT, - PS_SPECIAL, - PS_SPECIAL_IN_ATTRIBUTEVALUE, -}; - -enum SpecialType { - ST_UNKNOWN, - ST_NUM, - ST_NAME, - ST_DEC, - ST_HEX -}; - -static bool allowSymbol(SpecialType type, char ch) { - return - (type == ST_NAME && std::isalpha(ch)) || - (type == ST_DEC && std::isdigit(ch)) || - (type == ST_HEX && std::isxdigit(ch)); -} - -static int specialSymbolNumber(SpecialType type, const std::string &txt) { - char *end = 0; - switch (type) { - case ST_NAME: - return HtmlEntityCollection::symbolNumber(txt); - case ST_DEC: - return std::strtol(txt.c_str() + 1, &end, 10); - case ST_HEX: - return std::strtol(txt.c_str() + 2, &end, 16); - default: - return 0; - } -} - -void HtmlReader::appendString(std::string &to, std::string &from) { - if (myConverter.isNull()) { - to += from; - } else { - myConverter->convert(to, from); - myConverter->reset(); - } - from.erase(); -} - -void HtmlReader::readDocument(ZLInputStream &stream) { - if (!stream.open()) { - return; - } - - startDocumentHandler(); - - ParseState state = PS_TEXT; - SpecialType state_special = ST_UNKNOWN; - std::string currentString; - std::string attributeValueString; - std::string specialString; - int quotationCounter = 0; - HtmlTag currentTag; - char endOfComment[2] = "\0"; - - const std::size_t BUFSIZE = 2048; - char *buffer = new char[BUFSIZE]; - std::size_t length; - std::size_t offset = 0; - do { - length = stream.read(buffer, BUFSIZE); - char *start = buffer; - char *endOfBuffer = buffer + length; - for (char *ptr = buffer; ptr < endOfBuffer; ++ptr) { - switch (state) { - case PS_TEXT: - if (*ptr == '<') { - if (!characterDataHandler(start, ptr - start, true)) { - goto endOfProcessing; - } - start = ptr + 1; - state = PS_TAGSTART; - currentTag.Offset = offset + (ptr - buffer); - } - if (*ptr == '&') { - if (!characterDataHandler(start, ptr - start, true)) { - goto endOfProcessing; - } - start = ptr + 1; - state = PS_SPECIAL; - state_special = ST_UNKNOWN; - } - break; - case PS_SPECIAL: - case PS_SPECIAL_IN_ATTRIBUTEVALUE: - if (state_special == ST_UNKNOWN) { - if (*ptr == '#') { - state_special = ST_NUM; - } else if (std::isalpha(*ptr)) { - state_special = ST_NAME; - } else { - start = ptr; - state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE; - } - } else if (state_special == ST_NUM) { - if (*ptr == 'x') { - state_special = ST_HEX; - } else if (std::isdigit(*ptr)) { - state_special = ST_DEC; - } else { - start = ptr; - state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE; - } - } else { - if (*ptr == ';') { - specialString.append(start, ptr - start); - int number = specialSymbolNumber(state_special, specialString); - if ((128 <= number) && (number <= 159)) { - char ch = number; - if (state == PS_SPECIAL) { - characterDataHandler(&ch, 1, true); - } else { - myConverter->convert(attributeValueString, &ch, &ch + 1); - } - } else if (number != 0) { - char buffer[4]; - int len = ZLUnicodeUtil::ucs4ToUtf8(buffer, number); - if (state == PS_SPECIAL) { - characterDataHandler(buffer, len, false); - } else { - attributeValueString.append(buffer, len); - } - } else { - specialString = "&" + specialString + ";"; - if (state == PS_SPECIAL) { - characterDataHandler(specialString.c_str(), specialString.length(), false); - } else { - attributeValueString += specialString; - } - } - specialString.erase(); - start = ptr + 1; - state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE; - } else if (!allowSymbol(state_special, *ptr)) { - start = ptr; - state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE; - } - } - break; - case PS_TAGSTART: - state = (*ptr == '!') ? PS_COMMENT : PS_TAGNAME; - break; - case PS_COMMENT: - if ((endOfComment[0] == '\0') && (*ptr != '-')) { - state = PS_TAGNAME; - } else if ((endOfComment[0] == '-') && (endOfComment[1] == '-') && (*ptr == '>')) { - start = ptr + 1; - state = PS_TEXT; - endOfComment[0] = '\0'; - endOfComment[1] = '\0'; - } else { - endOfComment[0] = endOfComment[1]; - endOfComment[1] = *ptr; - } - break; - case PS_WAIT_END_OF_TAG: - if (*ptr == '>') { - start = ptr + 1; - state = PS_TEXT; - } - break; - case PS_TAGNAME: - if (*ptr == '>' || *ptr == '/' || std::isspace((unsigned char)*ptr)) { - currentString.append(start, ptr - start); - start = ptr + 1; - setTag(currentTag, currentString); - currentString.erase(); - if (currentTag.Name == "") { - state = *ptr == '>' ? PS_TEXT : PS_SKIPTAG; - } else { - if (*ptr == '>') { - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - state = PS_TEXT; - } else if (*ptr == '/') { - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - currentTag.Start = false; - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - state = PS_WAIT_END_OF_TAG; - } else { - state = PS_ATTRIBUTENAME; - } - } - } - break; - case PS_ATTRIBUTENAME: - if (*ptr == '>' || *ptr == '/' || *ptr == '=' || std::isspace((unsigned char)*ptr)) { - if (ptr != start || !currentString.empty()) { - currentString.append(start, ptr - start); - for (unsigned int i = 0; i < currentString.length(); ++i) { - currentString[i] = std::toupper(currentString[i]); - } - currentTag.addAttribute(currentString); - currentString.erase(); - } - start = ptr + 1; - if (*ptr == '>') { - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - state = PS_TEXT; - } else if (*ptr == '/') { - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - currentTag.Start = false; - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - state = PS_WAIT_END_OF_TAG; - } else { - state = (*ptr == '=') ? PS_ATTRIBUTEVALUE : PS_ATTRIBUTENAME; - } - } - break; - case PS_ATTRIBUTEVALUE: - if (*ptr == '"') { - if (((ptr == start) && currentString.empty()) || (quotationCounter > 0)) { - ++quotationCounter; - } - } else if (*ptr == '&') { - currentString.append(start, ptr - start); - start = ptr + 1; - appendString(attributeValueString, currentString); - state = PS_SPECIAL_IN_ATTRIBUTEVALUE; - state_special = ST_UNKNOWN; - } else if (quotationCounter != 1 && (*ptr == '>' || *ptr == '/' || std::isspace((unsigned char)*ptr))) { - if (ptr != start || !currentString.empty()) { - currentString.append(start, ptr - start); - appendString(attributeValueString, currentString); - if (attributeValueString[0] == '"') { - attributeValueString = attributeValueString.substr(1, attributeValueString.length() - 2); - } - currentTag.setLastAttributeValue(attributeValueString); - attributeValueString.erase(); - quotationCounter = 0; - } - start = ptr + 1; - if (*ptr == '>') { - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - state = PS_TEXT; - } else if (*ptr == '/') { - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - currentTag.Start = false; - if (!tagHandler(currentTag)) { - goto endOfProcessing; - } - state = PS_WAIT_END_OF_TAG; - } else { - state = PS_ATTRIBUTENAME; - } - } - break; - case PS_SKIPTAG: - if (*ptr == '>') { - start = ptr + 1; - state = PS_TEXT; - } - break; - } - } - if (start != endOfBuffer) { - switch (state) { - case PS_TEXT: - if (!characterDataHandler(start, endOfBuffer - start, true)) { - goto endOfProcessing; - } - break; - case PS_TAGNAME: - case PS_ATTRIBUTENAME: - case PS_ATTRIBUTEVALUE: - currentString.append(start, endOfBuffer - start); - break; - case PS_SPECIAL: - case PS_SPECIAL_IN_ATTRIBUTEVALUE: - specialString.append(start, endOfBuffer - start); - break; - case PS_TAGSTART: - case PS_SKIPTAG: - case PS_COMMENT: - case PS_WAIT_END_OF_TAG: - break; - } - } - offset += length; - } while (length == BUFSIZE); -endOfProcessing: - delete[] buffer; - - endDocumentHandler(); - - stream.close(); -} diff --git a/fbreader/src/formats/html/HtmlReader.h b/fbreader/src/formats/html/HtmlReader.h deleted file mode 100644 index 876fad8..0000000 --- a/fbreader/src/formats/html/HtmlReader.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __HTMLREADER_H__ -#define __HTMLREADER_H__ - -#include <string> -#include <vector> - -#include <ZLEncodingConverter.h> -#include "../EncodedTextReader.h" - -class ZLInputStream; - -class HtmlReader : public EncodedTextReader { - -public: - struct HtmlAttribute { - std::string Name; - std::string Value; - bool HasValue; - - HtmlAttribute(const std::string &name); - ~HtmlAttribute(); - void setValue(const std::string &value); - }; - - struct HtmlTag { - std::string Name; - std::size_t Offset; - bool Start; - std::vector<HtmlAttribute> Attributes; - - HtmlTag(); - ~HtmlTag(); - void addAttribute(const std::string &name); - void setLastAttributeValue(const std::string &value); - - private: - HtmlTag(const HtmlTag&); - const HtmlTag &operator = (const HtmlTag&); - }; - -private: - static void setTag(HtmlTag &tag, const std::string &fullName); - -public: - virtual void readDocument(ZLInputStream &stream); - -protected: - HtmlReader(const std::string &encoding); - virtual ~HtmlReader(); - -protected: - virtual void startDocumentHandler() = 0; - virtual void endDocumentHandler() = 0; - - // returns false iff processing must be stopped - virtual bool tagHandler(const HtmlTag &tag) = 0; - // returns false iff processing must be stopped - virtual bool characterDataHandler(const char *text, std::size_t len, bool convert) = 0; - -private: - void appendString(std::string &to, std::string &from); -}; - -inline HtmlReader::HtmlAttribute::HtmlAttribute(const std::string &name) : Name(name), HasValue(false) {} -inline HtmlReader::HtmlAttribute::~HtmlAttribute() {} -inline void HtmlReader::HtmlAttribute::setValue(const std::string &value) { Value = value; HasValue = true; } - -inline HtmlReader::HtmlTag::HtmlTag() : Start(true) {} -inline HtmlReader::HtmlTag::~HtmlTag() {} -inline void HtmlReader::HtmlTag::addAttribute(const std::string &name) { Attributes.push_back(HtmlAttribute(name)); } -inline void HtmlReader::HtmlTag::setLastAttributeValue(const std::string &value) { if (!Attributes.empty()) Attributes.back().setValue(value); } - -#endif /* __HTMLREADER_H__ */ diff --git a/fbreader/src/formats/html/HtmlReaderStream.cpp b/fbreader/src/formats/html/HtmlReaderStream.cpp deleted file mode 100644 index 08c43ae..0000000 --- a/fbreader/src/formats/html/HtmlReaderStream.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <cstdlib> -#include <cstring> -#include <algorithm> - -#include "HtmlReaderStream.h" -#include "HtmlReader.h" - -class HtmlTextOnlyReader : public HtmlReader { - -public: - HtmlTextOnlyReader(char *buffer, std::size_t maxSize); - std::size_t size() const; - -private: - void startDocumentHandler(); - void endDocumentHandler(); - - bool tagHandler(const HtmlTag &tag); - bool characterDataHandler(const char *text, std::size_t len, bool convert); - -private: - char *myBuffer; - std::size_t myMaxSize; - std::size_t myFilledSize; - bool myIgnoreText; -}; - -HtmlTextOnlyReader::HtmlTextOnlyReader(char *buffer, std::size_t maxSize) : HtmlReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myFilledSize(0), myIgnoreText(false) { -} - -std::size_t HtmlTextOnlyReader::size() const { - return myFilledSize; -} - -void HtmlTextOnlyReader::startDocumentHandler() { -} - -void HtmlTextOnlyReader::endDocumentHandler() { -} - -bool HtmlTextOnlyReader::tagHandler(const HtmlTag &tag) { - if (tag.Name == "SCRIPT") { - myIgnoreText = tag.Start; - } - if ((myFilledSize < myMaxSize) && (myFilledSize > 0) && (myBuffer[myFilledSize - 1] != '\n')) { - myBuffer[myFilledSize++] = '\n'; - } - return myFilledSize < myMaxSize; -} - -bool HtmlTextOnlyReader::characterDataHandler(const char *text, std::size_t len, bool) { - if (!myIgnoreText) { - len = std::min((std::size_t)len, myMaxSize - myFilledSize); - std::memcpy(myBuffer + myFilledSize, text, len); - myFilledSize += len; - } - return myFilledSize < myMaxSize; -} - -HtmlReaderStream::HtmlReaderStream(shared_ptr<ZLInputStream> base, std::size_t maxSize) : myBase(base), myBuffer(0), mySize(maxSize) { -} - -HtmlReaderStream::~HtmlReaderStream() { - close(); -} - -bool HtmlReaderStream::open() { - if (myBase.isNull() || !myBase->open()) { - return false; - } - myBuffer = new char[mySize]; - HtmlTextOnlyReader reader(myBuffer, mySize); - reader.readDocument(*myBase); - mySize = reader.size(); - myOffset = 0; - myBase->close(); - return true; -} - -std::size_t HtmlReaderStream::read(char *buffer, std::size_t maxSize) { - maxSize = std::min(maxSize, mySize - myOffset); - if (buffer != 0) { - std::memcpy(buffer, myBuffer, maxSize); - } - myOffset += maxSize; - return maxSize; -} - -void HtmlReaderStream::close() { - if (myBuffer != 0) { - delete[] myBuffer; - myBuffer = 0; - } -} - -void HtmlReaderStream::seek(int offset, bool absoluteOffset) { - if (!absoluteOffset) { - offset += myOffset; - } - myOffset = std::min(mySize, (std::size_t)std::max(0, offset)); -} - -std::size_t HtmlReaderStream::offset() const { - return myOffset; -} - -std::size_t HtmlReaderStream::sizeOfOpened() { - return mySize; -} diff --git a/fbreader/src/formats/html/HtmlReaderStream.h b/fbreader/src/formats/html/HtmlReaderStream.h deleted file mode 100644 index c5c15b8..0000000 --- a/fbreader/src/formats/html/HtmlReaderStream.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2008-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __HTMLREADERSTREAM_H__ -#define __HTMLREADERSTREAM_H__ - -#include <shared_ptr.h> -#include <ZLInputStream.h> - -class HtmlReaderStream : public ZLInputStream { - -public: - HtmlReaderStream(shared_ptr<ZLInputStream> base, std::size_t maxSize); - ~HtmlReaderStream(); - -private: - bool open(); - std::size_t read(char *buffer, std::size_t maxSize); - void close(); - - void seek(int offset, bool absoluteOffset); - std::size_t offset() const; - std::size_t sizeOfOpened(); - -private: - shared_ptr<ZLInputStream> myBase; - char *myBuffer; - std::size_t mySize; - std::size_t myOffset; -}; - -#endif /* __HTMLREADERSTREAM_H__ */ diff --git a/fbreader/src/formats/html/HtmlTagActions.h b/fbreader/src/formats/html/HtmlTagActions.h deleted file mode 100644 index 7da3f20..0000000 --- a/fbreader/src/formats/html/HtmlTagActions.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __HTMLTAGACTIONS_H__ -#define __HTMLTAGACTIONS_H__ - -#include <set> - -#include "HtmlBookReader.h" - -class HtmlTagAction { - -protected: - HtmlTagAction(HtmlBookReader &reader); - -public: - virtual ~HtmlTagAction(); - virtual void run(const HtmlReader::HtmlTag &tag) = 0; - virtual void reset(); - -protected: - BookReader &bookReader(); - -protected: - HtmlBookReader &myReader; -}; - -class DummyHtmlTagAction : public HtmlTagAction { - -public: - DummyHtmlTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); -}; - -class HtmlControlTagAction : public HtmlTagAction { - -public: - HtmlControlTagAction(HtmlBookReader &reader, FBTextKind kind); - void run(const HtmlReader::HtmlTag &tag); - -private: - FBTextKind myKind; -}; - -class HtmlHeaderTagAction : public HtmlTagAction { - -public: - HtmlHeaderTagAction(HtmlBookReader &reader, FBTextKind kind); - void run(const HtmlReader::HtmlTag &tag); - -private: - FBTextKind myKind; -}; - -class HtmlIgnoreTagAction : public HtmlTagAction { - -public: - HtmlIgnoreTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); - -private: - std::set<std::string> myTagNames; -}; - -class HtmlHrefTagAction : public HtmlTagAction { - -public: - HtmlHrefTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); - void reset(); - -protected: - FBTextKind hyperlinkType() const; - void setHyperlinkType(FBTextKind hyperlinkType); - -private: - FBTextKind myHyperlinkType; -}; - -class HtmlImageTagAction : public HtmlTagAction { - -public: - HtmlImageTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); -}; - -class HtmlBreakTagAction : public HtmlTagAction { - -public: - enum BreakType { - BREAK_AT_START = 1, - BREAK_AT_END = 2, - BREAK_AT_START_AND_AT_END = BREAK_AT_START | BREAK_AT_END - }; - HtmlBreakTagAction(HtmlBookReader &reader, BreakType breakType); - void run(const HtmlReader::HtmlTag &tag); - -private: - BreakType myBreakType; -}; - -class HtmlPreTagAction : public HtmlTagAction { - -public: - HtmlPreTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); -}; - -class HtmlListTagAction : public HtmlTagAction { - -public: - HtmlListTagAction(HtmlBookReader &reader, int startIndex); - void run(const HtmlReader::HtmlTag &tag); - -private: - int myStartIndex; -}; - -class HtmlListItemTagAction : public HtmlTagAction { - -public: - HtmlListItemTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); -}; - -class HtmlTableTagAction : public HtmlTagAction { - -public: - HtmlTableTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); -}; - -class HtmlStyleTagAction : public HtmlTagAction { - -public: - HtmlStyleTagAction(HtmlBookReader &reader); - void run(const HtmlReader::HtmlTag &tag); -}; - -inline BookReader &HtmlTagAction::bookReader() { return myReader.myBookReader; } - -#endif /* __HTMLTAGACTIONS_H__ */ |