summaryrefslogtreecommitdiffstats
path: root/fbreader/src/formats/doc/DocBookReader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fbreader/src/formats/doc/DocBookReader.cpp')
-rw-r--r--fbreader/src/formats/doc/DocBookReader.cpp377
1 files changed, 0 insertions, 377 deletions
diff --git a/fbreader/src/formats/doc/DocBookReader.cpp b/fbreader/src/formats/doc/DocBookReader.cpp
deleted file mode 100644
index 99f471a..0000000
--- a/fbreader/src/formats/doc/DocBookReader.cpp
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <vector>
-#include <string>
-
-#include <ZLInputStream.h>
-#include <ZLLogger.h>
-#include <ZLFile.h>
-#include <ZLStringUtil.h>
-#include <ZLFileImage.h>
-
-#include "DocBookReader.h"
-#include "../../bookmodel/BookModel.h"
-#include "../../library/Book.h"
-
-#include "OleStorage.h"
-#include "OleMainStream.h"
-
-DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
- myModelReader(model),
- myPictureCounter(0),
- myEncoding(encoding) {
- myReadState = READ_TEXT;
-}
-
-bool DocBookReader::readBook() {
- const ZLFile &file = myModelReader.model().book()->file();
- shared_ptr<ZLInputStream> stream = file.inputStream();
- if (stream.isNull() || !stream->open()) {
- return false;
- }
- myModelReader.setMainTextModel();
- myModelReader.pushKind(REGULAR);
- myModelReader.beginParagraph();
-
- if (!readDocument(stream, true)) {
- return false;
- }
-
- myModelReader.insertEndOfTextParagraph();
- return true;
-}
-
-void DocBookReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
- if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_INFO) {
- myFieldInfoBuffer.push_back(ucs2char);
- return;
- }
- if (myReadState == READ_FIELD && myReadFieldState == DONT_READ_FIELD_TEXT) {
- return;
- }
- if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && ucs2char == WORD_HORIZONTAL_TAB) {
- //to remove pagination from TOC (from doc saved in OpenOffice)
- myReadFieldState = DONT_READ_FIELD_TEXT;
- return;
- }
- std::string utf8String;
- ZLUnicodeUtil::Ucs2String ucs2String;
- ucs2String.push_back(ucs2char);
- ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
- if (!myModelReader.paragraphIsOpen()) {
- myModelReader.beginParagraph();
- }
- myModelReader.addData(utf8String);
-}
-
-void DocBookReader::handleHardLinebreak() {
- if (myModelReader.paragraphIsOpen()) {
- myModelReader.endParagraph();
- }
- myModelReader.beginParagraph();
- if (!myCurrentStyleEntry.isNull()) {
- myModelReader.addStyleEntry(*myCurrentStyleEntry);
- }
- for (std::size_t i = 0; i < myKindStack.size(); ++i) {
- myModelReader.addControl(myKindStack.at(i), true);
- }
-}
-
-void DocBookReader::handleParagraphEnd() {
- if (myModelReader.paragraphIsOpen()) {
- myModelReader.endParagraph();
- }
- myModelReader.beginParagraph();
- myCurrentStyleEntry = 0;
-}
-
-void DocBookReader::handlePageBreak() {
- if (myModelReader.paragraphIsOpen()) {
- myModelReader.endParagraph();
- }
- myCurrentStyleEntry = 0;
- myModelReader.insertEndOfSectionParagraph();
- myModelReader.beginParagraph();
-}
-
-void DocBookReader::handleTableSeparator() {
- handleChar(SPACE);
- handleChar(VERTICAL_LINE);
- handleChar(SPACE);
-}
-
-void DocBookReader::handleTableEndRow() {
- handleParagraphEnd();
-}
-
-void DocBookReader::handleFootNoteMark() {
- //TODO implement
-}
-
-void DocBookReader::handleStartField() {
- if (myReadState == READ_FIELD) { //for nested fields
- handleEndField();
- }
- myReadState = READ_FIELD;
- myReadFieldState = READ_FIELD_INFO;
- myHyperlinkTypeState = NO_HYPERLINK;
-}
-
-void DocBookReader::handleSeparatorField() {
- static const std::string HYPERLINK = "HYPERLINK";
- static const std::string SEQUENCE = "SEQ";
-// static const std::string PAGE = "PAGE";
-// static const std::string PAGEREF = "PAGEREF";
-// static const std::string SHAPE = "SHAPE";
- static const std::string SPACE_DELIMETER = " ";
- static const std::string LOCAL_LINK = "\\l";
- static const std::string QUOTE = "\"";
- myReadFieldState = READ_FIELD_TEXT;
- myHyperlinkTypeState = NO_HYPERLINK;
- ZLUnicodeUtil::Ucs2String buffer = myFieldInfoBuffer;
- myFieldInfoBuffer.clear();
- std::string utf8String;
- ZLUnicodeUtil::ucs2ToUtf8(utf8String, buffer);
- ZLUnicodeUtil::utf8Trim(utf8String);
- if (utf8String.empty()) {
- return;
- }
- std::vector<std::string> result = ZLStringUtil::split(utf8String, SPACE_DELIMETER);
- //TODO split function can returns empty string, maybe fix it
- std::vector<std::string> splitted;
- for (std::size_t i = 0; i < result.size(); ++i) {
- if (!result.at(i).empty()) {
- splitted.push_back(result.at(i));
- }
- }
-
- if (!splitted.empty() && splitted.at(0) == SEQUENCE) {
- myReadFieldState = READ_FIELD_TEXT;
- myHyperlinkTypeState = NO_HYPERLINK;
- return;
- }
-
- if (splitted.size() < 2 || splitted.at(0) != HYPERLINK) {
- myReadFieldState = DONT_READ_FIELD_TEXT;
- //to remove pagination from TOC and not hyperlink fields
- return;
- }
-
- if (splitted.at(1) == LOCAL_LINK) {
- std::string link = parseLink(buffer);
- if (!link.empty()) {
- myModelReader.addHyperlinkControl(INTERNAL_HYPERLINK, link);
- myHyperlinkTypeState = INT_HYPERLINK_INSERTED;
- }
- } else {
- std::string link = parseLink(buffer, true);
- if (!link.empty()) {
- myModelReader.addHyperlinkControl(EXTERNAL_HYPERLINK, link);
- myHyperlinkTypeState = EXT_HYPERLINK_INSERTED;
- }
- }
-}
-
-void DocBookReader::handleEndField() {
- myFieldInfoBuffer.clear();
- if (myReadState == READ_TEXT) {
- return;
- }
- if (myHyperlinkTypeState == EXT_HYPERLINK_INSERTED) {
- myModelReader.addControl(EXTERNAL_HYPERLINK, false);
- } else if (myHyperlinkTypeState == INT_HYPERLINK_INSERTED) {
- myModelReader.addControl(INTERNAL_HYPERLINK, false);
- }
- myReadState = READ_TEXT;
- myHyperlinkTypeState = NO_HYPERLINK;
-
-}
-
-void DocBookReader::handleImage(const ZLFileImage::Blocks &blocks) {
- std::string number;
- ZLStringUtil::appendNumber(number, myPictureCounter++);
- myModelReader.addImageReference(number);
- ZLFile file(myModelReader.model().book()->file().path(), ZLMimeType::IMAGE_AUTO);
- myModelReader.addImage(number, new ZLFileImage(file, blocks, ZLFileImage::ENCODING_NONE));
-}
-
-void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
- if (ucs2char == WORD_MINUS) {
- handleChar(MINUS);
- } else if (ucs2char == WORD_SOFT_HYPHEN) {
- //skip
- } else if (ucs2char == WORD_HORIZONTAL_TAB) {
- handleChar(ucs2char);
- } else {
-// myTextBuffer.clear();
- }
-}
-
-void DocBookReader::handleFontStyle(unsigned int fontStyle) {
- if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && myHyperlinkTypeState != NO_HYPERLINK) {
- //to fix bug with hyperlink, that's only bold and doesn't looks like hyperlink
- return;
- }
- while (!myKindStack.empty()) {
- myModelReader.addControl(myKindStack.back(), false);
- myKindStack.pop_back();
- }
- if (fontStyle & OleMainStream::CharInfo::FONT_BOLD) {
- myKindStack.push_back(BOLD);
- }
- if (fontStyle & OleMainStream::CharInfo::FONT_ITALIC) {
- myKindStack.push_back(ITALIC);
- }
- for (std::size_t i = 0; i < myKindStack.size(); ++i) {
- myModelReader.addControl(myKindStack.at(i), true);
- }
-}
-
-void DocBookReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
- if (styleInfo.HasPageBreakBefore) {
- handlePageBreak();
- }
- shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
-
- switch (styleInfo.Alignment) {
- default: // in that case, use default alignment type
- break;
- case OleMainStream::Style::ALIGNMENT_LEFT:
- entry->setAlignmentType(ALIGN_LEFT);
- break;
- case OleMainStream::Style::ALIGNMENT_RIGHT:
- entry->setAlignmentType(ALIGN_RIGHT);
- break;
- case OleMainStream::Style::ALIGNMENT_CENTER:
- entry->setAlignmentType(ALIGN_CENTER);
- break;
- case OleMainStream::Style::ALIGNMENT_JUSTIFY:
- entry->setAlignmentType(ALIGN_JUSTIFY);
- break;
- }
-
- //TODO in case, where style is heading, but size is small it works wrong
- const ZLTextStyleEntry::SizeUnit unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
- switch (styleInfo.StyleIdCurrent) {
- default:
- break;
- case OleMainStream::Style::STYLE_H1:
- entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 140, unit);
- break;
- case OleMainStream::Style::STYLE_H2:
- entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 120, unit);
- break;
- case OleMainStream::Style::STYLE_H3:
- entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 110, unit);
- break;
- }
- myCurrentStyleEntry = entry;
- myModelReader.addStyleEntry(*myCurrentStyleEntry);
-
- // we should have the same font style, as for the previous paragraph,
- // if it has the same StyleIdCurrent
- if (myCurrentStyleInfo.StyleIdCurrent != OleMainStream::Style::STYLE_INVALID &&
- myCurrentStyleInfo.StyleIdCurrent == styleInfo.StyleIdCurrent) {
- for (std::size_t i = 0; i < myKindStack.size(); ++i) {
- myModelReader.addControl(myKindStack.at(i), true);
- }
- } else {
- myKindStack.clear();
- // fill by the fontstyle, that was got from Stylesheet
- handleFontStyle(styleInfo.CurrentCharInfo.FontStyle);
- }
- myCurrentStyleInfo = styleInfo;
-}
-
-void DocBookReader::handleBookmark(const std::string &name) {
- myModelReader.addHyperlinkLabel(name);
-}
-
-std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode) {
- //TODO add support for HYPERLINK like that:
- // [0x13] HYPERLINK "http://site.ru/some text" \t "_blank" [0x14] text [0x15]
- //Current implementation search for last QUOTE, so, it reads \t and _blank as part of link
- //Last quote searching is need to handle link like that:
- // [0x13] HYPERLINK "http://yandex.ru/yandsearch?text='some text' и "some text2"" [0x14] link text [0x15]
-
- static const ZLUnicodeUtil::Ucs2Char QUOTE = 0x22;
- std::size_t i, first = 0;
- //TODO maybe functions findFirstOf and findLastOf should be in ZLUnicodeUtil class
- for (i = 0; i < s.size(); ++i) {
- if (s.at(i) == QUOTE) {
- first = i;
- break;
- }
- }
- if (i == s.size()) {
- return std::string();
- }
- std::size_t j, last = 0;
- for (j = s.size(); j > 0 ; --j) {
- if (s.at(j - 1) == QUOTE) {
- last = j - 1;
- break;
- }
- }
- if (j == 0 || last == first) {
- return std::string();
- }
-
- ZLUnicodeUtil::Ucs2String link;
- for (std::size_t k = first + 1; k < last; ++k) {
- ZLUnicodeUtil::Ucs2Char ch = s.at(k);
- if (urlencode && ZLUnicodeUtil::isSpace(ch)) {
- //TODO maybe implement function for encoding all signs in url, not only spaces and quotes
- //TODO maybe add backslash support
- link.push_back('%');
- link.push_back('2');
- link.push_back('0');
- } else if (urlencode && ch == QUOTE) {
- link.push_back('%');
- link.push_back('2');
- link.push_back('2');
- } else {
- link.push_back(ch);
- }
- }
- std::string utf8String;
- ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
- return utf8String;
-}
-
-void DocBookReader::footnotesStartHandler() {
- handlePageBreak();
-}
-
-void DocBookReader::ansiDataHandler(const char *buffer, std::size_t len) {
- if (myConverter.isNull()) {
- // lazy converter initialization
- ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
- ZLEncodingConverterInfoPtr info = collection.info(myEncoding);
- myConverter = info.isNull() ? collection.defaultConverter() : info->createConverter();
- }
- std::string utf8String;
- myConverter->convert(utf8String, buffer, buffer + len);
- ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
-}
-
-void DocBookReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
- myBuffer.push_back(symbol);
-}