summaryrefslogtreecommitdiffstats
path: root/fbreader/src/formats/pdb/MobipocketHtmlBookReader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fbreader/src/formats/pdb/MobipocketHtmlBookReader.cpp')
-rw-r--r--fbreader/src/formats/pdb/MobipocketHtmlBookReader.cpp356
1 files changed, 356 insertions, 0 deletions
diff --git a/fbreader/src/formats/pdb/MobipocketHtmlBookReader.cpp b/fbreader/src/formats/pdb/MobipocketHtmlBookReader.cpp
new file mode 100644
index 0000000..cecbfbc
--- /dev/null
+++ b/fbreader/src/formats/pdb/MobipocketHtmlBookReader.cpp
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <algorithm>
+
+#include <ZLFile.h>
+#include <ZLFileImage.h>
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+
+#include "MobipocketHtmlBookReader.h"
+#include "PalmDocStream.h"
+#include "../html/HtmlTagActions.h"
+#include "../../bookmodel/BookModel.h"
+
+class MobipocketHtmlImageTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlImageTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlHrTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlHrTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlHrefTagAction : public HtmlHrefTagAction {
+
+public:
+ MobipocketHtmlHrefTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlGuideTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlGuideTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlReferenceTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlReferenceTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlPagebreakTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlPagebreakTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+MobipocketHtmlImageTagAction::MobipocketHtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "RECINDEX") {
+ int index = std::atoi(tag.Attributes[i].Value.c_str());
+ if (index > 0) {
+ int &imageCounter = ((MobipocketHtmlBookReader&)myReader).myImageCounter;
+ imageCounter = std::max(imageCounter, index);
+ bool stopParagraph = bookReader().paragraphIsOpen();
+ if (stopParagraph) {
+ bookReader().endParagraph();
+ }
+ std::string id;
+ ZLStringUtil::appendNumber(id, index);
+ bookReader().addImageReference(id);
+ if (stopParagraph) {
+ bookReader().beginParagraph();
+ }
+ }
+ break;
+ }
+ }
+ }
+}
+
+MobipocketHtmlHrTagAction::MobipocketHtmlHrTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlHrTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ if (bookReader().contentsParagraphIsOpen()) {
+ bookReader().endContentsParagraph();
+ bookReader().exitTitle();
+ }
+ bookReader().insertEndOfSectionParagraph();
+ }
+}
+
+MobipocketHtmlHrefTagAction::MobipocketHtmlHrefTagAction(HtmlBookReader &reader) : HtmlHrefTagAction(reader) {
+}
+
+MobipocketHtmlPagebreakTagAction::MobipocketHtmlPagebreakTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlPagebreakTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ if (bookReader().contentsParagraphIsOpen()) {
+ bookReader().endContentsParagraph();
+ bookReader().exitTitle();
+ }
+ bookReader().insertEndOfSectionParagraph();
+ }
+}
+
+MobipocketHtmlBookReader::TOCReader::TOCReader(MobipocketHtmlBookReader &reader) : myReader(reader) {
+ reset();
+}
+
+void MobipocketHtmlBookReader::TOCReader::reset() {
+ myEntries.clear();
+
+ myIsActive = false;
+ myStartOffset = (std::size_t)-1;
+ myEndOffset = (std::size_t)-1;
+ myCurrentEntryText.erase();
+}
+
+bool MobipocketHtmlBookReader::TOCReader::rangeContainsPosition(std::size_t position) {
+ return (myStartOffset <= position) && (myEndOffset > position);
+}
+
+void MobipocketHtmlBookReader::TOCReader::startReadEntry(std::size_t position) {
+ myCurrentReference = position;
+ myIsActive = true;
+}
+
+void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
+ if (myIsActive && !myCurrentEntryText.empty()) {
+ std::string converted;
+ myReader.myConverter->convert(converted, myCurrentEntryText);
+ myReader.myConverter->reset();
+ myEntries[myCurrentReference] = converted;
+ myCurrentEntryText.erase();
+ }
+ myIsActive = false;
+}
+
+void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, std::size_t len) {
+ if (myIsActive) {
+ myCurrentEntryText.append(text, len);
+ }
+}
+
+void MobipocketHtmlBookReader::TOCReader::addReference(std::size_t position, const std::string &text) {
+ myEntries[position] = text;
+ if (rangeContainsPosition(position)) {
+ setEndOffset(position);
+ }
+}
+
+void MobipocketHtmlBookReader::TOCReader::setStartOffset(std::size_t position) {
+ myStartOffset = position;
+ std::map<std::size_t,std::string>::const_iterator it = myEntries.lower_bound(position);
+ if (it != myEntries.end()) {
+ ++it;
+ if (it != myEntries.end()) {
+ myEndOffset = it->first;
+ }
+ }
+}
+
+void MobipocketHtmlBookReader::TOCReader::setEndOffset(std::size_t position) {
+ myEndOffset = position;
+}
+
+const std::map<std::size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const {
+ return myEntries;
+}
+
+void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
+ MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
+ if (tag.Start) {
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "FILEPOS") {
+ const std::string &value = tag.Attributes[i].Value;
+ if (!value.empty()) {
+ std::string label = "&";
+ int intValue = std::atoi(value.c_str());
+ if (intValue > 0) {
+ if (reader.myTocReader.rangeContainsPosition(tag.Offset)) {
+ reader.myTocReader.startReadEntry(intValue);
+ if (reader.myTocReader.rangeContainsPosition(intValue)) {
+ reader.myTocReader.setEndOffset(intValue);
+ }
+ }
+ reader.myFileposReferences.insert(intValue);
+ ZLStringUtil::appendNumber(label, intValue);
+ setHyperlinkType(INTERNAL_HYPERLINK);
+ bookReader().addHyperlinkControl(INTERNAL_HYPERLINK, label);
+ return;
+ }
+ }
+ }
+ }
+ } else {
+ reader.myTocReader.endReadEntry();
+ }
+ HtmlHrefTagAction::run(tag);
+}
+
+MobipocketHtmlGuideTagAction::MobipocketHtmlGuideTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlGuideTagAction::run(const HtmlReader::HtmlTag &tag) {
+ MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
+ reader.myInsideGuide = tag.Start;
+}
+
+MobipocketHtmlReferenceTagAction::MobipocketHtmlReferenceTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
+ MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
+ if (reader.myInsideGuide) {
+ std::string title;
+ std::string filepos;
+ bool isTocReference = false;
+ for (std::size_t i = 0; i < tag.Attributes.size(); ++i) {
+ const std::string &name = tag.Attributes[i].Name;
+ const std::string &value = tag.Attributes[i].Value;
+ if (name == "TITLE") {
+ title = value;
+ } else if (name == "FILEPOS") {
+ filepos = value;
+ } else if ((name == "TYPE") && (ZLUnicodeUtil::toUpper(value) == "TOC")) {
+ isTocReference = true;
+ }
+ }
+ if (!title.empty() && !filepos.empty()) {
+ int position = std::atoi(filepos.c_str());
+ if (position > 0) {
+ reader.myTocReader.addReference(position, title);
+ if (isTocReference) {
+ reader.myTocReader.setStartOffset(position);
+ }
+ }
+ }
+ }
+}
+
+shared_ptr<HtmlTagAction> MobipocketHtmlBookReader::createAction(const std::string &tag) {
+ if (tag == "IMG") {
+ return new MobipocketHtmlImageTagAction(*this);
+ } else if (tag == "HR") {
+ return new MobipocketHtmlHrTagAction(*this);
+ } else if (tag == "A") {
+ return new MobipocketHtmlHrefTagAction(*this);
+ } else if (tag == "GUIDE") {
+ return new MobipocketHtmlGuideTagAction(*this);
+ } else if (tag == "REFERENCE") {
+ return new MobipocketHtmlReferenceTagAction(*this);
+ } else if (tag == "MBP:PAGEBREAK") {
+ return new MobipocketHtmlPagebreakTagAction(*this);
+ }
+ return HtmlBookReader::createAction(tag);
+}
+
+void MobipocketHtmlBookReader::startDocumentHandler() {
+ HtmlBookReader::startDocumentHandler();
+ myImageCounter = 0;
+ myInsideGuide = false;
+ myFileposReferences.clear();
+ myPositionToParagraphMap.clear();
+ myTocReader.reset();
+}
+
+bool MobipocketHtmlBookReader::tagHandler(const HtmlTag &tag) {
+ std::size_t paragraphNumber = myBookReader.model().bookTextModel()->paragraphsNumber();
+ if (myBookReader.paragraphIsOpen()) {
+ --paragraphNumber;
+ }
+ myPositionToParagraphMap.push_back(std::make_pair(tag.Offset, paragraphNumber));
+ return HtmlBookReader::tagHandler(tag);
+}
+
+MobipocketHtmlBookReader::MobipocketHtmlBookReader(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding) : HtmlBookReader("", model, format, encoding), myFileName(file.path()), myTocReader(*this) {
+ setBuildTableOfContent(false);
+ setProcessPreTag(false);
+}
+
+bool MobipocketHtmlBookReader::characterDataHandler(const char *text, std::size_t len, bool convert) {
+ myTocReader.appendText(text, len);
+ return HtmlBookReader::characterDataHandler(text, len, convert);
+}
+
+void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
+ HtmlBookReader::readDocument(stream);
+
+ PalmDocStream &pdStream = (PalmDocStream&)stream;
+ int index = pdStream.firstImageLocationIndex(myFileName);
+
+ if (index >= 0) {
+ for (int i = 0; i < myImageCounter; i++) {
+ std::pair<int,int> imageLocation = pdStream.imageLocation(pdStream.header(), i + index);
+ if ((imageLocation.first > 0) && (imageLocation.second > 0)) {
+ std::string id;
+ ZLStringUtil::appendNumber(id, i + 1);
+ myBookReader.addImage(id, new ZLFileImage(ZLFile(myFileName), imageLocation.first, imageLocation.second));
+ }
+ }
+ }
+
+ std::vector<std::pair<std::size_t,std::size_t> >::const_iterator jt = myPositionToParagraphMap.begin();
+ for (std::set<std::size_t>::const_iterator it = myFileposReferences.begin(); it != myFileposReferences.end(); ++it) {
+ while (jt != myPositionToParagraphMap.end() && jt->first < *it) {
+ ++jt;
+ }
+ if (jt == myPositionToParagraphMap.end()) {
+ break;
+ }
+ std::string label = "&";
+ ZLStringUtil::appendNumber(label, *it);
+ myBookReader.addHyperlinkLabel(label, jt->second);
+ }
+
+ jt = myPositionToParagraphMap.begin();
+ const std::map<std::size_t,std::string> &entries = myTocReader.entries();
+ for (std::map<std::size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
+ while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
+ ++jt;
+ }
+ if (jt == myPositionToParagraphMap.end()) {
+ break;
+ }
+ myBookReader.beginContentsParagraph(jt->second);
+ myBookReader.addContentsData(it->second);
+ myBookReader.endContentsParagraph();
+ }
+}