summaryrefslogtreecommitdiffstats
path: root/fbreader/src/formats/doc
diff options
context:
space:
mode:
Diffstat (limited to 'fbreader/src/formats/doc')
-rw-r--r--fbreader/src/formats/doc/DocBookReader.cpp377
-rw-r--r--fbreader/src/formats/doc/DocBookReader.h103
-rw-r--r--fbreader/src/formats/doc/DocFloatImageReader.cpp384
-rw-r--r--fbreader/src/formats/doc/DocFloatImageReader.h107
-rw-r--r--fbreader/src/formats/doc/DocInlineImageReader.cpp148
-rw-r--r--fbreader/src/formats/doc/DocInlineImageReader.h37
-rw-r--r--fbreader/src/formats/doc/DocMetaInfoReader.cpp38
-rw-r--r--fbreader/src/formats/doc/DocMetaInfoReader.h46
-rw-r--r--fbreader/src/formats/doc/DocPlugin.cpp71
-rw-r--r--fbreader/src/formats/doc/DocPlugin.h39
-rw-r--r--fbreader/src/formats/doc/DocStreams.cpp202
-rw-r--r--fbreader/src/formats/doc/DocStreams.h73
-rw-r--r--fbreader/src/formats/doc/OleMainStream.cpp1085
-rw-r--r--fbreader/src/formats/doc/OleMainStream.h223
-rw-r--r--fbreader/src/formats/doc/OleStorage.cpp304
-rw-r--r--fbreader/src/formats/doc/OleStorage.h92
-rw-r--r--fbreader/src/formats/doc/OleStream.cpp221
-rw-r--r--fbreader/src/formats/doc/OleStream.h58
-rw-r--r--fbreader/src/formats/doc/OleStreamParser.cpp210
-rw-r--r--fbreader/src/formats/doc/OleStreamParser.h101
-rw-r--r--fbreader/src/formats/doc/OleStreamReader.cpp86
-rw-r--r--fbreader/src/formats/doc/OleStreamReader.h46
-rw-r--r--fbreader/src/formats/doc/OleUtil.cpp58
-rw-r--r--fbreader/src/formats/doc/OleUtil.h32
24 files changed, 0 insertions, 4141 deletions
diff --git a/fbreader/src/formats/doc/DocBookReader.cpp b/fbreader/src/formats/doc/DocBookReader.cpp
deleted file mode 100644
index 99f471a..0000000
--- a/fbreader/src/formats/doc/DocBookReader.cpp
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <vector>
-#include <string>
-
-#include <ZLInputStream.h>
-#include <ZLLogger.h>
-#include <ZLFile.h>
-#include <ZLStringUtil.h>
-#include <ZLFileImage.h>
-
-#include "DocBookReader.h"
-#include "../../bookmodel/BookModel.h"
-#include "../../library/Book.h"
-
-#include "OleStorage.h"
-#include "OleMainStream.h"
-
-DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
- myModelReader(model),
- myPictureCounter(0),
- myEncoding(encoding) {
- myReadState = READ_TEXT;
-}
-
-bool DocBookReader::readBook() {
- const ZLFile &file = myModelReader.model().book()->file();
- shared_ptr<ZLInputStream> stream = file.inputStream();
- if (stream.isNull() || !stream->open()) {
- return false;
- }
- myModelReader.setMainTextModel();
- myModelReader.pushKind(REGULAR);
- myModelReader.beginParagraph();
-
- if (!readDocument(stream, true)) {
- return false;
- }
-
- myModelReader.insertEndOfTextParagraph();
- return true;
-}
-
-void DocBookReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
- if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_INFO) {
- myFieldInfoBuffer.push_back(ucs2char);
- return;
- }
- if (myReadState == READ_FIELD && myReadFieldState == DONT_READ_FIELD_TEXT) {
- return;
- }
- if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && ucs2char == WORD_HORIZONTAL_TAB) {
- //to remove pagination from TOC (from doc saved in OpenOffice)
- myReadFieldState = DONT_READ_FIELD_TEXT;
- return;
- }
- std::string utf8String;
- ZLUnicodeUtil::Ucs2String ucs2String;
- ucs2String.push_back(ucs2char);
- ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
- if (!myModelReader.paragraphIsOpen()) {
- myModelReader.beginParagraph();
- }
- myModelReader.addData(utf8String);
-}
-
-void DocBookReader::handleHardLinebreak() {
- if (myModelReader.paragraphIsOpen()) {
- myModelReader.endParagraph();
- }
- myModelReader.beginParagraph();
- if (!myCurrentStyleEntry.isNull()) {
- myModelReader.addStyleEntry(*myCurrentStyleEntry);
- }
- for (std::size_t i = 0; i < myKindStack.size(); ++i) {
- myModelReader.addControl(myKindStack.at(i), true);
- }
-}
-
-void DocBookReader::handleParagraphEnd() {
- if (myModelReader.paragraphIsOpen()) {
- myModelReader.endParagraph();
- }
- myModelReader.beginParagraph();
- myCurrentStyleEntry = 0;
-}
-
-void DocBookReader::handlePageBreak() {
- if (myModelReader.paragraphIsOpen()) {
- myModelReader.endParagraph();
- }
- myCurrentStyleEntry = 0;
- myModelReader.insertEndOfSectionParagraph();
- myModelReader.beginParagraph();
-}
-
-void DocBookReader::handleTableSeparator() {
- handleChar(SPACE);
- handleChar(VERTICAL_LINE);
- handleChar(SPACE);
-}
-
-void DocBookReader::handleTableEndRow() {
- handleParagraphEnd();
-}
-
-void DocBookReader::handleFootNoteMark() {
- //TODO implement
-}
-
-void DocBookReader::handleStartField() {
- if (myReadState == READ_FIELD) { //for nested fields
- handleEndField();
- }
- myReadState = READ_FIELD;
- myReadFieldState = READ_FIELD_INFO;
- myHyperlinkTypeState = NO_HYPERLINK;
-}
-
-void DocBookReader::handleSeparatorField() {
- static const std::string HYPERLINK = "HYPERLINK";
- static const std::string SEQUENCE = "SEQ";
-// static const std::string PAGE = "PAGE";
-// static const std::string PAGEREF = "PAGEREF";
-// static const std::string SHAPE = "SHAPE";
- static const std::string SPACE_DELIMETER = " ";
- static const std::string LOCAL_LINK = "\\l";
- static const std::string QUOTE = "\"";
- myReadFieldState = READ_FIELD_TEXT;
- myHyperlinkTypeState = NO_HYPERLINK;
- ZLUnicodeUtil::Ucs2String buffer = myFieldInfoBuffer;
- myFieldInfoBuffer.clear();
- std::string utf8String;
- ZLUnicodeUtil::ucs2ToUtf8(utf8String, buffer);
- ZLUnicodeUtil::utf8Trim(utf8String);
- if (utf8String.empty()) {
- return;
- }
- std::vector<std::string> result = ZLStringUtil::split(utf8String, SPACE_DELIMETER);
- //TODO split function can returns empty string, maybe fix it
- std::vector<std::string> splitted;
- for (std::size_t i = 0; i < result.size(); ++i) {
- if (!result.at(i).empty()) {
- splitted.push_back(result.at(i));
- }
- }
-
- if (!splitted.empty() && splitted.at(0) == SEQUENCE) {
- myReadFieldState = READ_FIELD_TEXT;
- myHyperlinkTypeState = NO_HYPERLINK;
- return;
- }
-
- if (splitted.size() < 2 || splitted.at(0) != HYPERLINK) {
- myReadFieldState = DONT_READ_FIELD_TEXT;
- //to remove pagination from TOC and not hyperlink fields
- return;
- }
-
- if (splitted.at(1) == LOCAL_LINK) {
- std::string link = parseLink(buffer);
- if (!link.empty()) {
- myModelReader.addHyperlinkControl(INTERNAL_HYPERLINK, link);
- myHyperlinkTypeState = INT_HYPERLINK_INSERTED;
- }
- } else {
- std::string link = parseLink(buffer, true);
- if (!link.empty()) {
- myModelReader.addHyperlinkControl(EXTERNAL_HYPERLINK, link);
- myHyperlinkTypeState = EXT_HYPERLINK_INSERTED;
- }
- }
-}
-
-void DocBookReader::handleEndField() {
- myFieldInfoBuffer.clear();
- if (myReadState == READ_TEXT) {
- return;
- }
- if (myHyperlinkTypeState == EXT_HYPERLINK_INSERTED) {
- myModelReader.addControl(EXTERNAL_HYPERLINK, false);
- } else if (myHyperlinkTypeState == INT_HYPERLINK_INSERTED) {
- myModelReader.addControl(INTERNAL_HYPERLINK, false);
- }
- myReadState = READ_TEXT;
- myHyperlinkTypeState = NO_HYPERLINK;
-
-}
-
-void DocBookReader::handleImage(const ZLFileImage::Blocks &blocks) {
- std::string number;
- ZLStringUtil::appendNumber(number, myPictureCounter++);
- myModelReader.addImageReference(number);
- ZLFile file(myModelReader.model().book()->file().path(), ZLMimeType::IMAGE_AUTO);
- myModelReader.addImage(number, new ZLFileImage(file, blocks, ZLFileImage::ENCODING_NONE));
-}
-
-void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
- if (ucs2char == WORD_MINUS) {
- handleChar(MINUS);
- } else if (ucs2char == WORD_SOFT_HYPHEN) {
- //skip
- } else if (ucs2char == WORD_HORIZONTAL_TAB) {
- handleChar(ucs2char);
- } else {
-// myTextBuffer.clear();
- }
-}
-
-void DocBookReader::handleFontStyle(unsigned int fontStyle) {
- if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && myHyperlinkTypeState != NO_HYPERLINK) {
- //to fix bug with hyperlink, that's only bold and doesn't looks like hyperlink
- return;
- }
- while (!myKindStack.empty()) {
- myModelReader.addControl(myKindStack.back(), false);
- myKindStack.pop_back();
- }
- if (fontStyle & OleMainStream::CharInfo::FONT_BOLD) {
- myKindStack.push_back(BOLD);
- }
- if (fontStyle & OleMainStream::CharInfo::FONT_ITALIC) {
- myKindStack.push_back(ITALIC);
- }
- for (std::size_t i = 0; i < myKindStack.size(); ++i) {
- myModelReader.addControl(myKindStack.at(i), true);
- }
-}
-
-void DocBookReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
- if (styleInfo.HasPageBreakBefore) {
- handlePageBreak();
- }
- shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
-
- switch (styleInfo.Alignment) {
- default: // in that case, use default alignment type
- break;
- case OleMainStream::Style::ALIGNMENT_LEFT:
- entry->setAlignmentType(ALIGN_LEFT);
- break;
- case OleMainStream::Style::ALIGNMENT_RIGHT:
- entry->setAlignmentType(ALIGN_RIGHT);
- break;
- case OleMainStream::Style::ALIGNMENT_CENTER:
- entry->setAlignmentType(ALIGN_CENTER);
- break;
- case OleMainStream::Style::ALIGNMENT_JUSTIFY:
- entry->setAlignmentType(ALIGN_JUSTIFY);
- break;
- }
-
- //TODO in case, where style is heading, but size is small it works wrong
- const ZLTextStyleEntry::SizeUnit unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
- switch (styleInfo.StyleIdCurrent) {
- default:
- break;
- case OleMainStream::Style::STYLE_H1:
- entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 140, unit);
- break;
- case OleMainStream::Style::STYLE_H2:
- entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 120, unit);
- break;
- case OleMainStream::Style::STYLE_H3:
- entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 110, unit);
- break;
- }
- myCurrentStyleEntry = entry;
- myModelReader.addStyleEntry(*myCurrentStyleEntry);
-
- // we should have the same font style, as for the previous paragraph,
- // if it has the same StyleIdCurrent
- if (myCurrentStyleInfo.StyleIdCurrent != OleMainStream::Style::STYLE_INVALID &&
- myCurrentStyleInfo.StyleIdCurrent == styleInfo.StyleIdCurrent) {
- for (std::size_t i = 0; i < myKindStack.size(); ++i) {
- myModelReader.addControl(myKindStack.at(i), true);
- }
- } else {
- myKindStack.clear();
- // fill by the fontstyle, that was got from Stylesheet
- handleFontStyle(styleInfo.CurrentCharInfo.FontStyle);
- }
- myCurrentStyleInfo = styleInfo;
-}
-
-void DocBookReader::handleBookmark(const std::string &name) {
- myModelReader.addHyperlinkLabel(name);
-}
-
-std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode) {
- //TODO add support for HYPERLINK like that:
- // [0x13] HYPERLINK "http://site.ru/some text" \t "_blank" [0x14] text [0x15]
- //Current implementation search for last QUOTE, so, it reads \t and _blank as part of link
- //Last quote searching is need to handle link like that:
- // [0x13] HYPERLINK "http://yandex.ru/yandsearch?text='some text' и "some text2"" [0x14] link text [0x15]
-
- static const ZLUnicodeUtil::Ucs2Char QUOTE = 0x22;
- std::size_t i, first = 0;
- //TODO maybe functions findFirstOf and findLastOf should be in ZLUnicodeUtil class
- for (i = 0; i < s.size(); ++i) {
- if (s.at(i) == QUOTE) {
- first = i;
- break;
- }
- }
- if (i == s.size()) {
- return std::string();
- }
- std::size_t j, last = 0;
- for (j = s.size(); j > 0 ; --j) {
- if (s.at(j - 1) == QUOTE) {
- last = j - 1;
- break;
- }
- }
- if (j == 0 || last == first) {
- return std::string();
- }
-
- ZLUnicodeUtil::Ucs2String link;
- for (std::size_t k = first + 1; k < last; ++k) {
- ZLUnicodeUtil::Ucs2Char ch = s.at(k);
- if (urlencode && ZLUnicodeUtil::isSpace(ch)) {
- //TODO maybe implement function for encoding all signs in url, not only spaces and quotes
- //TODO maybe add backslash support
- link.push_back('%');
- link.push_back('2');
- link.push_back('0');
- } else if (urlencode && ch == QUOTE) {
- link.push_back('%');
- link.push_back('2');
- link.push_back('2');
- } else {
- link.push_back(ch);
- }
- }
- std::string utf8String;
- ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
- return utf8String;
-}
-
-void DocBookReader::footnotesStartHandler() {
- handlePageBreak();
-}
-
-void DocBookReader::ansiDataHandler(const char *buffer, std::size_t len) {
- if (myConverter.isNull()) {
- // lazy converter initialization
- ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
- ZLEncodingConverterInfoPtr info = collection.info(myEncoding);
- myConverter = info.isNull() ? collection.defaultConverter() : info->createConverter();
- }
- std::string utf8String;
- myConverter->convert(utf8String, buffer, buffer + len);
- ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
-}
-
-void DocBookReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
- myBuffer.push_back(symbol);
-}
diff --git a/fbreader/src/formats/doc/DocBookReader.h b/fbreader/src/formats/doc/DocBookReader.h
deleted file mode 100644
index d80fb8e..0000000
--- a/fbreader/src/formats/doc/DocBookReader.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __DOCBOOKREADER_H__
-#define __DOCBOOKREADER_H__
-
-#include <vector>
-
-#include <shared_ptr.h>
-#include <ZLFile.h>
-#include <ZLTextStyleEntry.h>
-#include <ZLEncodingConverter.h>
-
-#include "../../bookmodel/BookReader.h"
-
-#include "OleMainStream.h"
-#include "OleStreamParser.h"
-
-class DocBookReader : public OleStreamParser {
-
-public:
- DocBookReader(BookModel &model, const std::string &encoding);
- ~DocBookReader();
- bool readBook();
-
-private:
- void ansiDataHandler(const char *buffer, std::size_t len);
- void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
- void footnotesStartHandler();
-
- void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
- void handleHardLinebreak();
- void handleParagraphEnd();
- void handlePageBreak();
- void handleTableSeparator();
- void handleTableEndRow();
- void handleFootNoteMark();
- void handleStartField();
- void handleSeparatorField();
- void handleEndField();
- void handleImage(const ZLFileImage::Blocks &blocks);
- void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
-
- //formatting:
- void handleFontStyle(unsigned int fontStyle);
- void handleParagraphStyle(const OleMainStream::Style &styleInfo);
- void handleBookmark(const std::string &name);
-
-private:
- static std::string parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode = false);
-
-private:
- BookReader myModelReader;
-
- ZLUnicodeUtil::Ucs2String myFieldInfoBuffer;
-
- enum {
- READ_FIELD,
- READ_TEXT
- } myReadState;
-
- enum {
- READ_FIELD_TEXT,
- DONT_READ_FIELD_TEXT,
- READ_FIELD_INFO
- } myReadFieldState;
-
- //maybe it should be flag?
- enum {
- NO_HYPERLINK,
- EXT_HYPERLINK_INSERTED,
- INT_HYPERLINK_INSERTED
- } myHyperlinkTypeState;
-
- //formatting
- std::vector<FBTextKind> myKindStack;
- shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry;
- OleMainStream::Style myCurrentStyleInfo;
- unsigned int myPictureCounter;
-
- const std::string myEncoding;
- shared_ptr<ZLEncodingConverter> myConverter;
-};
-
-inline DocBookReader::~DocBookReader() {}
-
-#endif /* __DOCBOOKREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocFloatImageReader.cpp b/fbreader/src/formats/doc/DocFloatImageReader.cpp
deleted file mode 100644
index 8c308e4..0000000
--- a/fbreader/src/formats/doc/DocFloatImageReader.cpp
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <ZLLogger.h>
-
-#include "OleUtil.h"
-#include "OleStream.h"
-#include "OleMainStream.h"
-
-#include "DocFloatImageReader.h"
-
-DocFloatImageReader::DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream) :
- myTableStream(tableStream),
- myMainStream(mainStream),
- myOffset(off),
- myLength(len) {
-}
-
-void DocFloatImageReader::readAll() {
- //OfficeArtContent structure is described at p.405-406 [MS-DOC]
- if (!myTableStream->seek(myOffset, true)) {
- ZLLogger::Instance().println("DocPlugin", "problems with reading float images");
- return;
- }
-
- unsigned int count = 0;
-
- RecordHeader header;
- while (count < myLength) {
- count += readRecordHeader(header, myTableStream);
- switch (header.type) {
- case 0xF000:
- count += readDggContainer(myItem, header.length, myTableStream, myMainStream);
- break;
- case 0xF002:
- count += readDgContainer(myItem, header.length, myTableStream);
- break;
- default:
- return;
- break;
- }
- }
-}
-
-ZLFileImage::Blocks DocFloatImageReader::getBlocksForShapeId(unsigned int shapeId) const {
- FSPContainer container;
- bool found = false;
- for (std::size_t i = 0; !found && i < myItem.FSPs.size(); ++i) {
- if (myItem.FSPs.at(i).fsp.shapeId == shapeId) {
- found = true;
- container = myItem.FSPs.at(i);
- }
- }
-
- if (!found || container.fopte.empty()) {
- return ZLFileImage::Blocks();
- }
-
- for (std::size_t i = 0; i < container.fopte.size(); ++i) {
- const FOPTE &fopte = container.fopte.at(i);
- if (fopte.pId == 0x0104 && !fopte.isComplex) { //0x0104 specifies the BLIP, see p.420 [MS-ODRAW]
- if (fopte.value <= myItem.blips.size() && fopte.value > 0) {
- Blip blip = myItem.blips.at(fopte.value - 1);
- return blip.blocks;
- }
- }
- }
- return ZLFileImage::Blocks();
-}
-
-unsigned int DocFloatImageReader::readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream) {
- //OfficeArtRecordHeader structure is described at p.26 [MS-ODRAW]
- char buffer[8];
- stream->read(buffer, 8);
- unsigned int temp = OleUtil::getU2Bytes(buffer, 0);
- header.version = temp & 0x000F;
- header.instance = temp >> 4;
- header.type = OleUtil::getU2Bytes(buffer, 2);
- header.length = OleUtil::getU4Bytes(buffer, 4);
- return 8;
-}
-
-unsigned int DocFloatImageReader::readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
- //OfficeArtDggContainer structure is described at p.50 [MS-ODRAW]
- RecordHeader header;
- unsigned int count = 0;
-
- while (count < length) {
- count += readRecordHeader(header, stream);
- switch (header.type) {
- case 0xF001:
- count += readBStoreContainer(item, header.length, stream, mainStream);
- break;
- default:
- count += skipRecord(header, stream);
- break;
- }
- }
-
- stream->seek(1, false); //skipping dgglbl (see p.406 [MS-DOC])
- ++count;
-
- return count;
-}
-
-unsigned int DocFloatImageReader::readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
- //OfficeArtBStoreContainer structure is described at p.58 [MS-ODRAW]
- RecordHeader header;
- unsigned int count = 0;
- while (count < length) {
- count += readRecordHeader(header, stream);
- switch (header.type) {
- case 0xF007:
- {
- Blip blip;
- count += readBStoreContainerFileBlock(blip, stream, mainStream);
- item.blips.push_back(blip);
- }
- break;
- default:
- count += skipRecord(header, stream);
- break;
- }
- }
- return count;
-}
-
-unsigned int DocFloatImageReader::skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream) {
- stream->seek(header.length, false);
- return header.length;
-}
-
-unsigned int DocFloatImageReader::readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
- //OfficeArtBStoreContainerFileBlock structure is described at p.59 [MS-ODRAW]
- unsigned int count = readFBSE(blip.storeEntry, stream);
- if (blip.storeEntry.offsetInDelay != (unsigned int)-1) {
- if (mainStream->seek(blip.storeEntry.offsetInDelay, true)) { //see p.70 [MS-ODRAW]
- //TODO maybe we should stop reading float images here
- ZLLogger::Instance().println("DocPlugin", "DocFloatImageReader: problems with seeking for offset");
- return count;
- }
- }
- RecordHeader header;
- unsigned int count2 = readRecordHeader(header, mainStream);
- switch (header.type) {
- case OleMainStream::IMAGE_WMF:
- case OleMainStream::IMAGE_EMF:
- case OleMainStream::IMAGE_PICT:
- count2 += skipRecord(header, mainStream);
- break;
- case OleMainStream::IMAGE_JPEG:
- case OleMainStream::IMAGE_JPEG2:
- case OleMainStream::IMAGE_PNG:
- case OleMainStream::IMAGE_DIB:
- case OleMainStream::IMAGE_TIFF:
- count2 += readBlip(blip, header, mainStream);
- break;
- }
- blip.type = header.type;
- return count;
-}
-
-unsigned int DocFloatImageReader::readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream) {
- //OfficeArtBlip structure is described at p.60-66 [MS-ODRAW]
- stream->seek(16, false); //skipping rgbUid1
- unsigned int count = 16;
-
- bool addField = false;
- switch (header.type) {
- case OleMainStream::IMAGE_PNG:
- if (header.instance == 0x6E1) {
- addField = true;
- }
- break;
- case OleMainStream::IMAGE_JPEG:
- case OleMainStream::IMAGE_JPEG2:
- if (header.instance == 0x46B || header.instance == 0x6E3) {
- addField = true;
- }
- break;
- case OleMainStream::IMAGE_DIB:
- if (header.instance == 0x7A9) {
- addField = true;
- }
- case OleMainStream::IMAGE_TIFF:
- if (header.instance == 0x6E5) {
- addField = true;
- }
- break;
- }
-
- if (addField) {
- stream->seek(16, false); //skipping rgbUid2
- count += 16;
- }
- stream->seek(1, false); //skipping tag
- count += 1;
-
- blip.blocks = stream->getBlockPieceInfoList(stream->offset(), header.length - count);
- count += header.length;
- return count;
-}
-
-unsigned int DocFloatImageReader::readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream) {
- //OfficeArtFBSE structure is described at p.68 [MS-ODRAW]
- stream->seek(2, false); //skipping btWin32 and btMacOS
- stream->seek(16, false); //skipping rgbUid
- stream->seek(2, false); //skipping tag
- fbse.size = read4Bytes(stream);
- fbse.referenceCount = read4Bytes(stream);
- fbse.offsetInDelay = read4Bytes(stream);
- stream->seek(1, false); //skipping unused value
- unsigned int lengthName = read1Byte(stream); //if it should be multiplied on 2?
- stream->seek(2, false); // skipping unused values
- if (lengthName > 0) {
- stream->seek(lengthName, false); //skipping nameData
- }
- return 36 + lengthName;
-}
-
-unsigned int DocFloatImageReader::readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) {
- //OfficeArtDgContainer structure is described at p.52 [MS-ODRAW]
- unsigned int count = 0;
-
- RecordHeader header;
- while (count < length) {
- count += readRecordHeader(header, stream);
- switch (header.type) {
- case 0xF008: //skip OfficeArtFDG record, p. 82 [MS-ODRAW]
- stream->seek(8, false);
- count += 8;
- break;
- case 0xF003:
- count += readSpgrContainer(item, header.length, stream);
- break;
- case 0xF004:
- {
- FSPContainer fspContainer;
- count += readSpContainter(fspContainer, header.length, stream);
- item.FSPs.push_back(fspContainer);
- }
- break;
- default:
- count += skipRecord(header, stream);
- break;
- }
- }
- return count;
-}
-
-unsigned int DocFloatImageReader::readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) {
- //OfficeArtSpgrContainer structure is described at p.56 [MS-ODRAW]
- unsigned count = 0;
- RecordHeader header;
- while (count < length) {
- count += readRecordHeader(header, stream);
- switch (header.type) {
- case 0xF003:
- count += readSpgrContainer(item, header.length, stream);
- break;
- case 0xF004:
- {
- FSPContainer fspContainer;
- count += readSpContainter(fspContainer, header.length, stream);
- item.FSPs.push_back(fspContainer);
- }
- break;
- default:
- count += skipRecord(header, stream);
- break;
- }
- }
- return count;
-}
-
-unsigned int DocFloatImageReader::readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream) {
- //OfficeArtSpContainter structure is described at p.53-55 [MS-ODRAW]
- RecordHeader header;
- unsigned int count = 0;
- while (count < length) {
- count += readRecordHeader(header, stream);
- switch (header.type) {
- case 0xF009: //skip OfficeArtFSPGR record, p.74 [MS-ODRAW]
- stream->seek(16, false);
- count += 16;
- break;
- case 0xF00A:
- count += readFSP(item.fsp, stream);
- break;
- case 0xF00B:
- count += readArrayFOPTE(item.fopte, header.length, stream);
- break;
- case 0xF00E: //OfficeArtAnchor
- case 0xF00F: //OfficeArtChildAnchor, p.75 [MS-ODRAW]
- case 0xF010: //OfficeArtClientAnchor
- stream->seek(4, false);
- count += 4;
- break;
- case 0xF00C:
- case 0xF11F:
- case 0xF11D:
- break;
- default:
- count += skipRecord(header, stream);
- break;
- }
- }
- return count;
-}
-
-unsigned int DocFloatImageReader::readFSP(FSP &fsp, shared_ptr<OleStream> stream) {
- //OfficeArtFSP structure is described at p.76 [MS-ODRAW]
- fsp.shapeId = read4Bytes(stream);
- stream->seek(4, false);
- return 8;
-}
-
-unsigned int DocFloatImageReader::readArrayFOPTE(std::vector<FOPTE> &fopteArray,unsigned int length, shared_ptr<OleStream> stream) {
- //OfficeArtRGFOPTE structure is described at p.98 [MS-ODRAW]
- unsigned int count = 0;
- while (count < length) {
- FOPTE fopte;
- count += readFOPTE(fopte, stream);
- fopteArray.push_back(fopte);
- }
- for (std::size_t i = 0; i < fopteArray.size(); ++i) {
- if (fopteArray.at(i).isComplex) {
- stream->seek(fopteArray.at(i).value, false);
- count += fopteArray.at(i).value;
- }
- }
- return count;
-}
-
-unsigned int DocFloatImageReader::readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream) {
- //OfficeArtFOPTE structure is described at p.32 [MS-ODRAW]
- unsigned int dtemp;
- dtemp = read2Bytes(stream);
- fopte.pId = (dtemp & 0x3fff);
- fopte.isBlipId = ((dtemp & 0x4000) >> 14) == 0x1;
- fopte.isComplex = ((dtemp & 0x8000) >> 15) == 0x1;
- fopte.value = read4Bytes(stream);
- return 6;
-}
-
-unsigned int DocFloatImageReader::read1Byte(shared_ptr<OleStream> stream) {
- char b[1];
- if (stream->read(b, 1) != 1) {
- return 0;
- }
- return OleUtil::getU1Byte(b, 0);
-}
-
-unsigned int DocFloatImageReader::read2Bytes(shared_ptr<OleStream> stream) {
- char b[2];
- if (stream->read(b, 2) != 2) {
- return 0;
- }
- return OleUtil::getU2Bytes(b, 0);
-}
-
-unsigned int DocFloatImageReader::read4Bytes(shared_ptr<OleStream> stream) {
- char b[4];
- if (stream->read(b, 4) != 4) {
- return 0;
- }
- return OleUtil::getU4Bytes(b, 0);
-}
diff --git a/fbreader/src/formats/doc/DocFloatImageReader.h b/fbreader/src/formats/doc/DocFloatImageReader.h
deleted file mode 100644
index d2d6c2e..0000000
--- a/fbreader/src/formats/doc/DocFloatImageReader.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __DOCFLOATIMAGEREADER_H__
-#define __DOCFLOATIMAGEREADER_H__
-
-#include <ZLFileImage.h>
-
-class DocFloatImageReader {
-
-public:
- struct BlipStoreEntry { // see p.68 [MS-ODRAW]
- unsigned int size; // size of blip in stream
- unsigned int referenceCount; // (cRef) reference count for the the blip
- unsigned int offsetInDelay; // foDelay, file offset in the delay stream
- };
-
- struct Blip { //see p.59, p63-66 [MS-ODRAW]
- BlipStoreEntry storeEntry;
- unsigned int type;
- ZLFileImage::Blocks blocks;
- };
-
- struct FSP { //see p.76-77 [MS-ODRAW]
- unsigned int shapeId; //spid
- };
-
- struct FOPTE { //see p.98 and p.32 [MS-ODRAW]
- unsigned int pId; //pid
- bool isBlipId; //fBid
- bool isComplex; //fComplex
- unsigned int value; //op
- };
-
- struct FSPContainer { //see p.53-55 [MS-ODRAW]
- FSP fsp;
- std::vector<FOPTE> fopte;
- };
-
- struct OfficeArtContent { //see p.405-406 [MS-DOC]
- std::vector<Blip> blips; //retrieved from OfficeArtDggContainer
- std::vector<FSPContainer> FSPs; //retrieved from OfficeArtDgContainer
- };
-
- struct RecordHeader { //see p.26 [MS-ODRAW]
- unsigned int version;
- unsigned int instance;
- unsigned int type;
- unsigned int length;
- };
-
-public:
- DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream);
-
-public:
- void readAll();
-
- ZLFileImage::Blocks getBlocksForShapeId(unsigned int shapeId) const;
-
-private:
- static unsigned int readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream);
- static unsigned int readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
-
- static unsigned int readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
- static unsigned int readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
- static unsigned int readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream);
- static unsigned int readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream);
-
- static unsigned int readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream);
- static unsigned int readArrayFOPTE(std::vector<FOPTE> &fopte, unsigned int length, shared_ptr<OleStream> stream);
- static unsigned int readFSP(FSP &fsp, shared_ptr<OleStream> stream);
- static unsigned int readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream);
- static unsigned int readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream);
- static unsigned int readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream);
-
- static unsigned int skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream);
-
- static unsigned int read1Byte(shared_ptr<OleStream> stream);
- static unsigned int read2Bytes(shared_ptr<OleStream> stream);
- static unsigned int read4Bytes(shared_ptr<OleStream> stream);
-
-private:
- shared_ptr<OleStream> myTableStream;
- shared_ptr<OleStream> myMainStream;
- unsigned int myOffset;
- unsigned int myLength;
-
- OfficeArtContent myItem;
-};
-
-#endif /* __DOCFLOATIMAGEREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocInlineImageReader.cpp b/fbreader/src/formats/doc/DocInlineImageReader.cpp
deleted file mode 100644
index 69ce74f..0000000
--- a/fbreader/src/formats/doc/DocInlineImageReader.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include "OleUtil.h"
-#include "OleMainStream.h"
-
-#include "DocInlineImageReader.h"
-
-DocInlineImageReader::DocInlineImageReader(shared_ptr<OleStream> dataStream) :
- myDataStream(dataStream) {
-}
-
-ZLFileImage::Blocks DocInlineImageReader::getImagePieceInfo(unsigned int dataPos) {
- if (myDataStream.isNull()) {
- return ZLFileImage::Blocks();
- }
- if (!myDataStream->seek(dataPos, true)) {
- return ZLFileImage::Blocks();
- }
-
- //reading PICF structure (see p. 421 [MS-DOC])
- unsigned int picfHeaderSize = 4 + 2 + 8; //record length, headerLength and storage format
- char headerBuffer[picfHeaderSize];
- if (myDataStream->read(headerBuffer, picfHeaderSize) != picfHeaderSize) {
- return ZLFileImage::Blocks();
- }
- unsigned int length = OleUtil::getU4Bytes(headerBuffer, 0);
- unsigned int headerLength = OleUtil::getU2Bytes(headerBuffer, 4);
- unsigned int formatType = OleUtil::getU2Bytes(headerBuffer, 6);
-
- if (formatType != 0x0064) { //external link to some file; see p.394 [MS-DOC]
- //TODO implement
- return ZLFileImage::Blocks();
- }
- if (headerLength >= length) {
- return ZLFileImage::Blocks();
- }
-
- //reading OfficeArtInlineSpContainer structure; see p.421 [MS-DOC] and p.56 [MS-ODRAW]
- if (!myDataStream->seek(headerLength - picfHeaderSize, false)) { //skip header
- return ZLFileImage::Blocks();
- }
-
- char buffer[8]; //for OfficeArtRecordHeader structure; see p.69 [MS-ODRAW]
- bool found = false;
- unsigned int curOffset = 0;
- for (curOffset = headerLength; !found && curOffset + 8 <= length; curOffset += 8) {
- if (myDataStream->read(buffer, 8) != 8) {
- return ZLFileImage::Blocks();
- }
- unsigned int recordInstance = OleUtil::getU2Bytes(buffer, 0) >> 4;
- unsigned int recordType = OleUtil::getU2Bytes(buffer, 2);
- unsigned int recordLen = OleUtil::getU4Bytes(buffer, 4);
-
- switch (recordType) {
- case 0xF000: case 0xF001: case 0xF002: case 0xF003: case 0xF004: case 0xF005:
- break;
- case 0xF007:
- {
- myDataStream->seek(33, false);
- char tmpBuf[1];
- myDataStream->read(tmpBuf, 1);
- unsigned int nameLength = OleUtil::getU1Byte(tmpBuf, 0);
- myDataStream->seek(nameLength * 2 + 2, false);
- curOffset += 33 + 1 + nameLength * 2 + 2;
- }
- break;
- case 0xF008:
- myDataStream->seek(8, false);
- curOffset += 8;
- break;
- case 0xF009:
- myDataStream->seek(16, false);
- curOffset += 16;
- break;
- case 0xF006: case 0xF00A: case 0xF00B: case 0xF00D: case 0xF00E: case 0xF00F: case 0xF010: case 0xF011: case 0xF122:
- myDataStream->seek(recordLen, false);
- curOffset += recordLen;
- break;
- case OleMainStream::IMAGE_EMF:
- case OleMainStream::IMAGE_WMF:
- case OleMainStream::IMAGE_PICT:
- //TODO implement
- return ZLFileImage::Blocks();
- case OleMainStream::IMAGE_JPEG:
- case OleMainStream::IMAGE_JPEG2:
- myDataStream->seek(17, false);
- curOffset += 17;
- if (recordInstance == 0x46B || recordInstance == 0x6E3) {
- myDataStream->seek(16, false);
- curOffset += 16;
- }
- found = true;
- break;
- case OleMainStream::IMAGE_PNG:
- myDataStream->seek(17, false);
- curOffset += 17;
- if (recordInstance == 0x6E1) {
- myDataStream->seek(16, false);
- curOffset += 16;
- }
- found = true;
- break;
- case OleMainStream::IMAGE_DIB: // DIB = BMP without 14-bytes header
- myDataStream->seek(17, false);
- curOffset += 17;
- if (recordInstance == 0x7A9) {
- myDataStream->seek(16, false);
- curOffset += 16;
- }
- found = true;
- break;
- case OleMainStream::IMAGE_TIFF:
- myDataStream->seek(17, false);
- curOffset += 17;
- if (recordInstance == 0x6E5) {
- myDataStream->seek(16, false);
- curOffset += 16;
- }
- found = true;
- break;
- case 0xF00C:
- default:
- return ZLFileImage::Blocks();
- }
- }
-
- if (!found) {
- return ZLFileImage::Blocks();
- }
- return myDataStream->getBlockPieceInfoList(dataPos + curOffset, length - curOffset);
-}
diff --git a/fbreader/src/formats/doc/DocInlineImageReader.h b/fbreader/src/formats/doc/DocInlineImageReader.h
deleted file mode 100644
index 9dab9ae..0000000
--- a/fbreader/src/formats/doc/DocInlineImageReader.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __DOCINLINEIMAGEREADER_H__
-#define __DOCINLINEIMAGEREADER_H__
-
-#include <vector>
-
-#include "OleStream.h"
-
-class DocInlineImageReader {
-
-public:
- DocInlineImageReader(shared_ptr<OleStream> dataStream);
- ZLFileImage::Blocks getImagePieceInfo(unsigned int dataPos);
-
-private:
- shared_ptr<OleStream> myDataStream;
-};
-
-#endif /* __DOCINLINEIMAGEREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocMetaInfoReader.cpp b/fbreader/src/formats/doc/DocMetaInfoReader.cpp
deleted file mode 100644
index 37b39c2..0000000
--- a/fbreader/src/formats/doc/DocMetaInfoReader.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <ZLInputStream.h>
-
-#include "../../library/Book.h"
-
-#include "DocMetaInfoReader.h"
-
-DocMetaInfoReader::DocMetaInfoReader(Book &book) : myBook(book) {
- myBook.removeAllAuthors();
- myBook.setTitle(std::string());
- myBook.setLanguage(std::string());
- myBook.removeAllTags();
-}
-
-bool DocMetaInfoReader::readMetaInfo() {
- myBook.removeAllAuthors();
- myBook.setTitle(myBook.file().name(true));
- myBook.removeAllTags();
- return true;
-}
diff --git a/fbreader/src/formats/doc/DocMetaInfoReader.h b/fbreader/src/formats/doc/DocMetaInfoReader.h
deleted file mode 100644
index db26d29..0000000
--- a/fbreader/src/formats/doc/DocMetaInfoReader.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __DOCMETAINFOREADER_H__
-#define __DOCMETAINFOREADER_H__
-
-#include <string>
-
-class Book;
-
-class DocMetaInfoReader {
-
-public:
- DocMetaInfoReader(Book &book);
- ~DocMetaInfoReader();
- bool readMetaInfo();
-
- /*
- void startElementHandler(int tag, const char **attributes);
- void endElementHandler(int tag);
- void characterDataHandler(const char *text, std::size_t len);
- */
-
-private:
- Book &myBook;
-};
-
-inline DocMetaInfoReader::~DocMetaInfoReader() {}
-
-#endif /* __DOCMETAINFOREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocPlugin.cpp b/fbreader/src/formats/doc/DocPlugin.cpp
deleted file mode 100644
index ef6f511..0000000
--- a/fbreader/src/formats/doc/DocPlugin.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <ZLFile.h>
-#include <ZLInputStream.h>
-#include <ZLLogger.h>
-#include <ZLImage.h>
-#include <ZLEncodingConverter.h>
-
-#include "DocPlugin.h"
-#include "DocMetaInfoReader.h"
-#include "DocBookReader.h"
-#include "DocStreams.h"
-#include "../../bookmodel/BookModel.h"
-#include "../../library/Book.h"
-
-DocPlugin::DocPlugin() {
-}
-
-DocPlugin::~DocPlugin() {
-}
-
-bool DocPlugin::providesMetaInfo() const {
- return true;
-}
-
-const std::string DocPlugin::supportedFileType() const {
- return "doc";
-}
-
-bool DocPlugin::acceptsFile(const ZLFile &file) const {
- return file.extension() == "doc";
-}
-
-bool DocPlugin::readMetaInfo(Book &book) const {
- if (!DocMetaInfoReader(book).readMetaInfo()) {
- return false;
- }
-
- shared_ptr<ZLInputStream> stream = new DocAnsiStream(book.file(), 50000);
- if (!detectEncodingAndLanguage(book, *stream)) {
- stream = new DocUcs2Stream(book.file(), 50000);
- detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true);
- }
-
- return true;
-}
-
-bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const {
- return true;
-}
-
-bool DocPlugin::readModel(BookModel &model) const {
- return DocBookReader(model, model.book()->encoding()).readBook();
-}
diff --git a/fbreader/src/formats/doc/DocPlugin.h b/fbreader/src/formats/doc/DocPlugin.h
deleted file mode 100644
index 93b1803..0000000
--- a/fbreader/src/formats/doc/DocPlugin.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __DOCPLUGIN_H__
-#define __DOCPLUGIN_H__
-
-#include "../FormatPlugin.h"
-
-class DocPlugin : public FormatPlugin {
-
-public:
- DocPlugin();
- ~DocPlugin();
- bool providesMetaInfo() const;
-
- const std::string supportedFileType() const;
- bool acceptsFile(const ZLFile &file) const;
- bool readMetaInfo(Book &book) const;
- bool readLanguageAndEncoding(Book &book) const;
- bool readModel(BookModel &model) const;
-};
-
-#endif /* __DOCPLUGIN_H__ */
diff --git a/fbreader/src/formats/doc/DocStreams.cpp b/fbreader/src/formats/doc/DocStreams.cpp
deleted file mode 100644
index b21e15a..0000000
--- a/fbreader/src/formats/doc/DocStreams.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <cstring>
-#include <cstdlib>
-#include <string>
-
-#include "DocStreams.h"
-#include "OleStreamReader.h"
-
-class DocReader : public OleStreamReader {
-
-public:
- DocReader(char *buffer, std::size_t maxSize);
- ~DocReader();
- std::size_t readSize() const;
-
-private:
- bool readStream(OleMainStream &stream);
- void ansiDataHandler(const char *buffer, std::size_t len);
- void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
- void footnotesStartHandler();
-
-protected:
- char *myBuffer;
- const std::size_t myMaxSize;
- std::size_t myActualSize;
-};
-
-class DocAnsiReader : public DocReader {
-
-public:
- DocAnsiReader(char *buffer, std::size_t maxSize);
- ~DocAnsiReader();
-
-private:
- void ansiDataHandler(const char *buffer, std::size_t len);
-};
-
-class DocUcs2Reader : public DocReader {
-
-public:
- DocUcs2Reader(char *buffer, std::size_t maxSize);
- ~DocUcs2Reader();
-
-private:
- void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
-};
-
-DocReader::DocReader(char *buffer, std::size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
-}
-
-DocReader::~DocReader() {
-}
-
-bool DocReader::readStream(OleMainStream &stream) {
- // TODO make 2 optmizations:
- // 1) If another piece is too big, reading of next piece can be stopped if some size parameter will be specified
- // (it can be transfered as a parameter (with default 0 value, that means no need to use it) to readNextPiece method)
- // 2) We can specify as a parameter for readNextPiece, what kind of piece should be read next (ANSI or not ANSI).
- // As type of piece is known already, there's no necessary to read other pieces.
- while (myActualSize < myMaxSize) {
- if (!readNextPiece(stream)) {
- break;
- }
- }
- return true;
-}
-
-void DocReader::ansiDataHandler(const char*, std::size_t) {
-}
-
-void DocReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char) {
-}
-
-void DocReader::footnotesStartHandler() {
-}
-
-std::size_t DocReader::readSize() const {
- return myActualSize;
-}
-
-DocAnsiReader::DocAnsiReader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) {
-}
-
-DocAnsiReader::~DocAnsiReader() {
-}
-
-void DocAnsiReader::ansiDataHandler(const char *buffer, std::size_t dataLength) {
- if (myActualSize < myMaxSize) {
- const std::size_t len = std::min(dataLength, myMaxSize - myActualSize);
- std::strncpy(myBuffer + myActualSize, buffer, len);
- myActualSize += len;
- }
-}
-
-DocUcs2Reader::DocUcs2Reader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) {
-}
-
-DocUcs2Reader::~DocUcs2Reader() {
-}
-
-void DocUcs2Reader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
- if (myActualSize < myMaxSize) {
- char buffer[4];
- const std::size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol);
- const std::size_t len = std::min(dataLength, myMaxSize - myActualSize);
- std::strncpy(myBuffer + myActualSize, buffer, len);
- myActualSize += len;
- }
-}
-
-DocStream::DocStream(const ZLFile& file, std::size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
-}
-
-DocStream::~DocStream() {
- close();
-}
-
-bool DocStream::open() {
- if (mySize != 0) {
- myBuffer = new char[mySize];
- }
- shared_ptr<DocReader> reader = createReader(myBuffer, mySize);
- shared_ptr<ZLInputStream> stream = myFile.inputStream();
- if (stream.isNull() || !stream->open()) {
- return false;
- }
- if (!reader->readDocument(stream, false)) {
- return false;
- }
- mySize = reader->readSize();
- myOffset = 0;
- return true;
-}
-
-std::size_t DocStream::read(char *buffer, std::size_t maxSize) {
- maxSize = std::min(maxSize, mySize - myOffset);
- if (buffer != 0 && myBuffer != 0) {
- std::memcpy(buffer, myBuffer + myOffset, maxSize);
- }
- myOffset += maxSize;
- return maxSize;
-}
-
-void DocStream::close() {
- if (myBuffer != 0) {
- delete[] myBuffer;
- myBuffer = 0;
- }
-}
-
-void DocStream::seek(int offset, bool absoluteOffset) {
- if (!absoluteOffset) {
- offset += myOffset;
- }
- myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
-}
-
-std::size_t DocStream::offset() const {
- return myOffset;
-}
-
-std::size_t DocStream::sizeOfOpened() {
- return mySize;
-}
-
-DocAnsiStream::DocAnsiStream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) {
-}
-
-DocAnsiStream::~DocAnsiStream() {
-}
-
-shared_ptr<DocReader> DocAnsiStream::createReader(char *buffer, std::size_t maxSize) {
- return new DocAnsiReader(buffer, maxSize);
-}
-
-DocUcs2Stream::DocUcs2Stream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) {
-}
-
-DocUcs2Stream::~DocUcs2Stream() {
-}
-
-shared_ptr<DocReader> DocUcs2Stream::createReader(char *buffer, std::size_t maxSize) {
- return new DocUcs2Reader(buffer, maxSize);
-}
diff --git a/fbreader/src/formats/doc/DocStreams.h b/fbreader/src/formats/doc/DocStreams.h
deleted file mode 100644
index 4b1538a..0000000
--- a/fbreader/src/formats/doc/DocStreams.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __DOCSTREAMS_H__
-#define __DOCSTREAMS_H__
-
-#include <ZLFile.h>
-#include <ZLInputStream.h>
-
-class DocReader;
-
-class DocStream : public ZLInputStream {
-
-public:
- DocStream(const ZLFile& file, std::size_t maxSize);
- ~DocStream();
-
-private:
- bool open();
- std::size_t read(char *buffer, std::size_t maxSize);
- void close();
-
- void seek(int offset, bool absoluteOffset);
- std::size_t offset() const;
- std::size_t sizeOfOpened();
-
-protected:
- virtual shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize) = 0;
-
-private:
- const ZLFile myFile;
- char *myBuffer;
- std::size_t mySize;
- std::size_t myOffset;
-};
-
-class DocAnsiStream : public DocStream {
-
-public:
- DocAnsiStream(const ZLFile& file, std::size_t maxSize);
- ~DocAnsiStream();
-
-private:
- shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize);
-};
-
-class DocUcs2Stream : public DocStream {
-
-public:
- DocUcs2Stream(const ZLFile& file, std::size_t maxSize);
- ~DocUcs2Stream();
-
-private:
- shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize);
-};
-
-#endif /* __DOCSTREAMS_H__ */
diff --git a/fbreader/src/formats/doc/OleMainStream.cpp b/fbreader/src/formats/doc/OleMainStream.cpp
deleted file mode 100644
index fe829e6..0000000
--- a/fbreader/src/formats/doc/OleMainStream.cpp
+++ /dev/null
@@ -1,1085 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <string>
-
-#include <ZLLogger.h>
-#include <ZLUnicodeUtil.h>
-
-#include "OleUtil.h"
-#include "OleStorage.h"
-
-#include "DocInlineImageReader.h"
-
-#include "OleMainStream.h"
-
-OleMainStream::Style::Style() :
- StyleIdCurrent(STYLE_INVALID),
- StyleIdNext(STYLE_INVALID),
- HasPageBreakBefore(false),
- BeforeParagraphIndent(0),
- AfterParagraphIndent(0),
- LeftIndent(0),
- FirstLineIndent(0),
- RightIndent(0),
- Alignment(ALIGNMENT_DEFAULT) {
-}
-
-OleMainStream::CharInfo::CharInfo() : FontStyle(FONT_REGULAR), FontSize(20) {
-}
-
-OleMainStream::SectionInfo::SectionInfo() : CharPosition(0), IsNewPage(true) {
-}
-
-OleMainStream::InlineImageInfo::InlineImageInfo() : DataPosition(0) {
-}
-
-OleMainStream::FloatImageInfo::FloatImageInfo() : ShapeId(0) {
-}
-
-OleMainStream::OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) : OleStream(storage, oleEntry, stream) {
-}
-
-bool OleMainStream::open(bool doReadFormattingData) {
- if (OleStream::open() == false) {
- return false;
- }
-
- static const std::size_t HEADER_SIZE = 768; //size of data in header of main stream
- char headerBuffer[HEADER_SIZE];
- seek(0, true);
-
- if (read(headerBuffer, HEADER_SIZE) != HEADER_SIZE) {
- return false;
- }
-
- bool result = readFIB(headerBuffer);
- if (!result) {
- return false;
- }
-
- // determining table stream number
- unsigned int tableNumber = (OleUtil::getU2Bytes(headerBuffer, 0xA) & 0x0200) ? 1 : 0;
- std::string tableName = tableNumber == 0 ? "0" : "1";
- tableName += "Table";
- OleEntry tableEntry;
- result = myStorage->getEntryByName(tableName, tableEntry);
-
- if (!result) {
- // cant't find table stream (that can be only in case if file format is below Word 7/8), so building simple table stream
- // TODO: CHECK may be not all old documents have ANSI
- ZLLogger::Instance().println("DocPlugin", "cant't find table stream, building own simple piece table, that includes all charachters");
- Piece piece = {myStartOfText, myEndOfText - myStartOfText, true, Piece::PIECE_TEXT, 0};
- myPieces.push_back(piece);
- return true;
- }
-
- result = readPieceTable(headerBuffer, tableEntry);
-
- if (!result) {
- ZLLogger::Instance().println("DocPlugin", "error during reading piece table");
- return false;
- }
-
- if (!doReadFormattingData) {
- return true;
- }
-
- OleEntry dataEntry;
- if (myStorage->getEntryByName("Data", dataEntry)) {
- myDataStream = new OleStream(myStorage, dataEntry, myBaseStream);
- }
-
- //result of reading following structures doesn't check, because all these
- //problems can be ignored, and document can be showed anyway, maybe with wrong formatting
- readBookmarks(headerBuffer, tableEntry);
- readStylesheet(headerBuffer, tableEntry);
- //readSectionsInfoTable(headerBuffer, tableEntry); //it isn't used now
- readParagraphStyleTable(headerBuffer, tableEntry);
- readCharInfoTable(headerBuffer, tableEntry);
- readFloatingImages(headerBuffer, tableEntry);
- return true;
-}
-
-const OleMainStream::Pieces &OleMainStream::getPieces() const {
- return myPieces;
-}
-
-const OleMainStream::CharInfoList &OleMainStream::getCharInfoList() const {
- return myCharInfoList;
-}
-
-const OleMainStream::StyleInfoList &OleMainStream::getStyleInfoList() const {
- return myStyleInfoList;
-}
-
-const OleMainStream::BookmarksList &OleMainStream::getBookmarks() const {
- return myBookmarks;
-}
-
-const OleMainStream::InlineImageInfoList &OleMainStream::getInlineImageInfoList() const {
- return myInlineImageInfoList;
-}
-
-const OleMainStream::FloatImageInfoList &OleMainStream::getFloatImageInfoList() const {
- return myFloatImageInfoList;
-}
-
-ZLFileImage::Blocks OleMainStream::getFloatImage(unsigned int shapeId) const {
- if (myFLoatImageReader.isNull()) {
- return ZLFileImage::Blocks();
- }
- return myFLoatImageReader->getBlocksForShapeId(shapeId);
-}
-
-ZLFileImage::Blocks OleMainStream::getInlineImage(unsigned int dataPosition) const {
- if (myDataStream.isNull()) {
- return ZLFileImage::Blocks();
- }
- DocInlineImageReader imageReader(myDataStream);
- return imageReader.getImagePieceInfo(dataPosition);
-}
-
-bool OleMainStream::readFIB(const char *headerBuffer) {
- int flags = OleUtil::getU2Bytes(headerBuffer, 0xA); //offset for flags
-
- if (flags & 0x0004) { //flag for complex format
- ZLLogger::Instance().println("DocPlugin", "This was fast-saved. Some information is lost");
- //lostInfo = (flags & 0xF0) >> 4);
- }
-
- if (flags & 0x1000) { //flag for using extending charset
- ZLLogger::Instance().println("DocPlugin", "File uses extended character set (get_word8_char)");
- } else {
- ZLLogger::Instance().println("DocPlugin", "File uses get_8bit_char character set");
- }
-
- if (flags & 0x100) { //flag for encrypted files
- ZLLogger::Instance().println("DocPlugin", "File is encrypted");
- // Encryption key = %08lx ; NumUtil::get4Bytes(header, 14)
- return false;
- }
-
- unsigned int charset = OleUtil::getU2Bytes(headerBuffer, 0x14); //offset for charset number
- if (charset && charset != 0x100) { //0x100 = default charset
- ZLLogger::Instance().println("DocPlugin", "Using not default character set %d");
- } else {
- ZLLogger::Instance().println("DocPlugin", "Using default character set");
- }
-
- myStartOfText = OleUtil::get4Bytes(headerBuffer, 0x18); //offset for start of text value
- myEndOfText = OleUtil::get4Bytes(headerBuffer, 0x1c); //offset for end of text value
- return true;
-}
-
-void OleMainStream::splitPieces(const Pieces &s, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary) {
- Pieces source = s;
- dest1.clear();
- dest2.clear();
-
- int sumLength = 0;
- std::size_t i = 0;
- for (i = 0; i < source.size(); ++i) {
- Piece piece = source.at(i);
- if (piece.Length + sumLength >= boundary) {
- Piece piece2 = piece;
-
- piece.Length = boundary - sumLength;
- piece.Type = type1;
-
- piece2.Type = type2;
- piece2.Offset += piece.Length * 2;
- piece2.Length -= piece.Length;
-
- if (piece.Length > 0) {
- dest1.push_back(piece);
- }
- if (piece2.Length > 0) {
- dest2.push_back(piece2);
- }
- ++i;
- break;
- }
- sumLength += piece.Length;
- piece.Type = type1;
- dest1.push_back(piece);
- }
- for (; i < source.size(); ++i) {
- Piece piece = source.at(i);
- piece.Type = type2;
- dest2.push_back(piece);
- }
-
-}
-
-std::string OleMainStream::getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream) {
- unsigned int clxOffset = OleUtil::getU4Bytes(headerBuffer, 0x01A2); //offset for CLX structure
- unsigned int clxLength = OleUtil::getU4Bytes(headerBuffer, 0x01A6); //offset for value of CLX structure length
-
- //1 step : loading CLX table from table stream
- char *clxBuffer = new char[clxLength];
- if (!tableStream.seek(clxOffset, true)) {
- ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- error for seeking to CLX structure");
- return std::string();
- }
- if (tableStream.read(clxBuffer, clxLength) != clxLength) {
- ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure length is invalid");
- return std::string();
- }
- std::string clx(clxBuffer, clxLength);
- delete[] clxBuffer;
-
- //2 step: searching for pieces table buffer at CLX
- //(determines it by 0x02 as start symbol)
- std::size_t from = 0;
- std::size_t i;
- std::string pieceTableBuffer;
- while ((i = clx.find_first_of(0x02, from)) != std::string::npos) {
- if (clx.size() < i + 1 + 4) {
- ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure has invalid format");
- return std::string();
- }
- unsigned int pieceTableLength = OleUtil::getU4Bytes(clx.c_str(), i + 1);
- pieceTableBuffer = std::string(clx, i + 1 + 4);
- if (pieceTableBuffer.length() != pieceTableLength) {
- from = i + 1;
- continue;
- }
- break;
- }
- return pieceTableBuffer;
-}
-
-
-bool OleMainStream::readPieceTable(const char *headerBuffer, const OleEntry &tableEntry) {
- OleStream tableStream(myStorage, tableEntry, myBaseStream);
- std::string piecesTableBuffer = getPiecesTableBuffer(headerBuffer, tableStream);
-
- if (piecesTableBuffer.empty()) {
- return false;
- }
-
- //getting count of Character Positions for different types of subdocuments in Main Stream
- int ccpText = OleUtil::get4Bytes(headerBuffer, 0x004C); //text
- int ccpFtn = OleUtil::get4Bytes(headerBuffer, 0x0050); //footnote subdocument
- int ccpHdd = OleUtil::get4Bytes(headerBuffer, 0x0054); //header subdocument
- int ccpMcr = OleUtil::get4Bytes(headerBuffer, 0x0058); //macro subdocument
- int ccpAtn = OleUtil::get4Bytes(headerBuffer, 0x005C); //comment subdocument
- int ccpEdn = OleUtil::get4Bytes(headerBuffer, 0x0060); //endnote subdocument
- int ccpTxbx = OleUtil::get4Bytes(headerBuffer, 0x0064); //textbox subdocument
- int ccpHdrTxbx = OleUtil::get4Bytes(headerBuffer, 0x0068); //textbox subdocument of the header
- int lastCP = ccpFtn + ccpHdd + ccpMcr + ccpAtn + ccpEdn + ccpTxbx + ccpHdrTxbx;
- if (lastCP != 0) {
- ++lastCP;
- }
- lastCP += ccpText;
-
- //getting the CP (character positions) and CP descriptors
- std::vector<int> cp; //array of character positions for pieces
- unsigned int j = 0;
- for (j = 0; ; j += 4) {
- if (piecesTableBuffer.size() < j + 4) {
- ZLLogger::Instance().println("DocPlugin", "invalid piece table, cp ends not with a lastcp");
- break;
- }
- int curCP = OleUtil::get4Bytes(piecesTableBuffer.c_str(), j);
- cp.push_back(curCP);
- if (curCP == lastCP) {
- break;
- }
- }
-
- if (cp.size() < 2) {
- ZLLogger::Instance().println("DocPlugin", "invalid piece table, < 2 pieces");
- return false;
- }
-
- std::vector<std::string> descriptors;
- for (std::size_t k = 0; k < cp.size() - 1; ++k) {
- //j + 4, because it should be taken after CP in PiecesTable Buffer
- //k * 8, because it should be taken 8 byte for each descriptor
- std::size_t substrFrom = j + 4 + k * 8;
- if (piecesTableBuffer.size() < substrFrom + 8) {
- ZLLogger::Instance().println("DocPlugin", "invalid piece table, problems with descriptors reading");
- break;
- }
- descriptors.push_back(piecesTableBuffer.substr(substrFrom, 8));
- }
-
- //filling the Pieces vector
- std::size_t minValidSize = std::min(cp.size() - 1, descriptors.size());
- if (minValidSize == 0) {
- ZLLogger::Instance().println("DocPlugin", "invalid piece table, there are no pieces");
- return false;
- }
-
- for (std::size_t i = 0; i < minValidSize; ++i) {
- //4byte integer with offset and ANSI flag
- int fcValue = OleUtil::get4Bytes(descriptors.at(i).c_str(), 0x2); //offset for piece structure
- Piece piece;
- piece.IsANSI = (fcValue & 0x40000000) == 0x40000000; //ansi flag
- piece.Offset = fcValue & 0x3FFFFFFF; //gettting offset for current piece
- piece.Length = cp.at(i + 1) - cp.at(i);
- myPieces.push_back(piece);
- }
-
- //split pieces into different types
- Pieces piecesText, piecesFootnote, piecesOther;
- splitPieces(myPieces, piecesText, piecesFootnote, Piece::PIECE_TEXT, Piece::PIECE_FOOTNOTE, ccpText);
- splitPieces(piecesFootnote, piecesFootnote, piecesOther, Piece::PIECE_FOOTNOTE, Piece::PIECE_OTHER, ccpFtn);
-
- myPieces.clear();
- for (std::size_t i = 0; i < piecesText.size(); ++i) {
- myPieces.push_back(piecesText.at(i));
- }
- for (std::size_t i = 0; i < piecesFootnote.size(); ++i) {
- myPieces.push_back(piecesFootnote.at(i));
- }
- for (std::size_t i = 0; i < piecesOther.size(); ++i) {
- myPieces.push_back(piecesOther.at(i));
- }
-
- //converting length and offset depending on isANSI
- for (std::size_t i = 0; i < myPieces.size(); ++i) {
- Piece &piece = myPieces.at(i);
- if (!piece.IsANSI) {
- piece.Length *= 2;
- } else {
- piece.Offset /= 2;
- }
- }
-
- //filling startCP field
- unsigned int curStartCP = 0;
- for (std::size_t i = 0; i < myPieces.size(); ++i) {
- Piece &piece = myPieces.at(i);
- piece.startCP = curStartCP;
- if (piece.IsANSI) {
- curStartCP += piece.Length;
- } else {
- curStartCP += piece.Length / 2;
- }
- }
- return true;
-}
-
-bool OleMainStream::readBookmarks(const char *headerBuffer, const OleEntry &tableEntry) {
- //SttbfBkmk structure is a table of bookmark name strings
- unsigned int beginNamesInfo = OleUtil::getU4Bytes(headerBuffer, 0x142); // address of SttbfBkmk structure
- std::size_t namesInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x146); // length of SttbfBkmk structure
-
- if (namesInfoLength == 0) {
- return true; //there's no bookmarks
- }
-
- OleStream tableStream(myStorage, tableEntry, myBaseStream);
- std::string buffer;
- if (!readToBuffer(buffer, beginNamesInfo, namesInfoLength, tableStream)) {
- return false;
- }
-
- unsigned int recordsNumber = OleUtil::getU2Bytes(buffer.c_str(), 0x2); //count of records
-
- std::vector<std::string> names;
- unsigned int offset = 0x6; //initial offset
- for (unsigned int i = 0; i < recordsNumber; ++i) {
- if (buffer.size() < offset + 2) {
- ZLLogger::Instance().println("DocPlugin", "problmes with reading bookmarks names");
- break;
- }
- unsigned int length = OleUtil::getU2Bytes(buffer.c_str(), offset) * 2; //length of string in bytes
- ZLUnicodeUtil::Ucs2String name;
- for (unsigned int j = 0; j < length; j+=2) {
- char ch1 = buffer.at(offset + 2 + j);
- char ch2 = buffer.at(offset + 2 + j + 1);
- ZLUnicodeUtil::Ucs2Char ucs2Char = (unsigned int)ch1 | ((unsigned int)ch2 << 8);
- name.push_back(ucs2Char);
- }
- std::string utf8Name;
- ZLUnicodeUtil::ucs2ToUtf8(utf8Name, name);
- names.push_back(utf8Name);
- offset += length + 2;
- }
-
- //plcfBkmkf structure is table recording beginning CPs of bookmarks
- unsigned int beginCharPosInfo = OleUtil::getU4Bytes(headerBuffer, 0x14A); // address of plcfBkmkf structure
- std::size_t charPosInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x14E); // length of plcfBkmkf structure
-
- if (charPosInfoLen == 0) {
- return true; //there's no bookmarks
- }
-
- if (!readToBuffer(buffer, beginCharPosInfo, charPosInfoLen, tableStream)) {
- return false;
- }
-
- static const unsigned int BKF_SIZE = 4;
- std::size_t size = calcCountOfPLC(charPosInfoLen, BKF_SIZE);
- std::vector<unsigned int> charPage;
- for (std::size_t index = 0, offset = 0; index < size; ++index, offset += 4) {
- charPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
- }
-
- for (std::size_t i = 0; i < names.size(); ++i) {
- if (i >= charPage.size()) {
- break; //for the case if something in these structures goes wrong, to not to lose all bookmarks
- }
- Bookmark bookmark;
- bookmark.CharPosition = charPage.at(i);
- bookmark.Name = names.at(i);
- myBookmarks.push_back(bookmark);
- }
-
- return true;
-}
-
-bool OleMainStream::readStylesheet(const char *headerBuffer, const OleEntry &tableEntry) {
- //STSH structure is a stylesheet
- unsigned int beginStshInfo = OleUtil::getU4Bytes(headerBuffer, 0xa2); // address of STSH structure
- std::size_t stshInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xa6); // length of STSH structure
-
- OleStream tableStream(myStorage, tableEntry, myBaseStream);
- char *buffer = new char[stshInfoLength];
- if (!tableStream.seek(beginStshInfo, true)) {
- ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure");
- return false;
- }
- if (tableStream.read(buffer, stshInfoLength) != stshInfoLength) {
- ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure, invalid length");
- return false;
- }
-
- std::size_t stdCount = (std::size_t)OleUtil::getU2Bytes(buffer, 2);
- std::size_t stdBaseInFile = (std::size_t)OleUtil::getU2Bytes(buffer, 4);
- myStyleSheet.resize(stdCount);
-
- std::vector<bool> isFilled;
- isFilled.resize(stdCount, false);
-
- std::size_t stdLen = 0;
- bool styleSheetWasChanged = false;
- do { //make it in while loop, because some base style can be after their successors
- styleSheetWasChanged = false;
- for (std::size_t index = 0, offset = 2 + (std::size_t)OleUtil::getU2Bytes(buffer, 0); index < stdCount; index++, offset += 2 + stdLen) {
- stdLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset);
- if (isFilled.at(index)) {
- continue;
- }
-
- if (stdLen == 0) {
- //if record is empty, left it default
- isFilled[index] = true;
- continue;
- }
-
- Style styleInfo = myStyleSheet.at(index);
-
- const unsigned int styleAndBaseType = OleUtil::getU2Bytes(buffer, offset + 4);
- const unsigned int styleType = styleAndBaseType % 16;
- const unsigned int baseStyleId = styleAndBaseType / 16;
- if (baseStyleId == Style::STYLE_NIL || baseStyleId == Style::STYLE_USER) {
- //if based on nil or user style, left default
- } else {
- int baseStyleIndex = getStyleIndex(baseStyleId, isFilled, myStyleSheet);
- if (baseStyleIndex < 0) {
- //this base style is not filled yet, so pass it at some time
- continue;
- }
- styleInfo = myStyleSheet.at(baseStyleIndex);
- styleInfo.StyleIdCurrent = Style::STYLE_INVALID;
- }
-
- // parse STD structure
- unsigned int tmp = OleUtil::getU2Bytes(buffer, offset + 6);
- unsigned int upxCount = tmp % 16;
- styleInfo.StyleIdNext = tmp / 16;
-
- //adding current style
- myStyleSheet[index] = styleInfo;
- isFilled[index] = true;
- styleSheetWasChanged = true;
-
- std::size_t pos = 2 + stdBaseInFile;
- std::size_t nameLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
- nameLen = nameLen * 2 + 2; //from Unicode characters to bytes + Unicode null charachter length
- pos += 2 + nameLen;
- if (pos % 2 != 0) {
- ++pos;
- }
- if (pos >= stdLen) {
- continue;
- }
- std::size_t upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
- if (pos + upxLen > stdLen) {
- //UPX length too large
- continue;
- }
- //for style info styleType must be equal 1
- if (styleType == 1 && upxCount >= 1) {
- if (upxLen >= 2) {
- styleInfo.StyleIdCurrent = OleUtil::getU2Bytes(buffer, offset + pos + 2);
- getStyleInfo(0, buffer + offset + pos + 4, upxLen - 2, styleInfo);
- myStyleSheet[index] = styleInfo;
- }
- pos += 2 + upxLen;
- if (pos % 2 != 0) {
- ++pos;
- }
- upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
- }
- if (upxLen == 0 || pos + upxLen > stdLen) {
- //too small/too large
- continue;
- }
- //for char info styleType can be equal 1 or 2
- if ((styleType == 1 && upxCount >= 2) || (styleType == 2 && upxCount >= 1)) {
- CharInfo charInfo;
- getCharInfo(0, Style::STYLE_INVALID, buffer + offset + pos + 2, upxLen, charInfo);
- styleInfo.CurrentCharInfo = charInfo;
- myStyleSheet[index] = styleInfo;
- }
- }
- } while (styleSheetWasChanged);
- delete[] buffer;
- return true;
-}
-
-bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
- //PlcfbteChpx structure is table with formatting for particular run of text
- unsigned int beginCharInfo = OleUtil::getU4Bytes(headerBuffer, 0xfa); // address of PlcfbteChpx structure
- std::size_t charInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xfe); // length of PlcfbteChpx structure
- if (charInfoLength < 4) {
- return false;
- }
-
- OleStream tableStream(myStorage, tableEntry, myBaseStream);
- std::string buffer;
- if (!readToBuffer(buffer, beginCharInfo, charInfoLength, tableStream)) {
- return false;
- }
-
- static const unsigned int CHPX_SIZE = 4;
- std::size_t size = calcCountOfPLC(charInfoLength, CHPX_SIZE);
- std::vector<unsigned int> charBlocks;
- for (std::size_t index = 0, offset = (size + 1) * 4; index < size; ++index, offset += CHPX_SIZE) {
- charBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
- }
-
- char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
- for (std::size_t index = 0; index < charBlocks.size(); ++index) {
- seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
- if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
- return false;
- }
- unsigned int crun = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with crun (count of 'run of text')
- for (unsigned int index2 = 0; index2 < crun; ++index2) {
- unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
- unsigned int chpxOffset = 2 * OleUtil::getU1Byte(formatPageBuffer, (crun + 1) * 4 + index2);
- unsigned int len = OleUtil::getU1Byte(formatPageBuffer, chpxOffset);
- unsigned int charPos = 0;
- if (!offsetToCharPos(offset, charPos, myPieces)) {
- continue;
- }
- unsigned int styleId = getStyleIdByCharPos(charPos, myStyleInfoList);
-
- CharInfo charInfo = getStyleFromStylesheet(styleId, myStyleSheet).CurrentCharInfo;
- if (chpxOffset != 0) {
- getCharInfo(chpxOffset, styleId, formatPageBuffer + 1, len - 1, charInfo);
- }
- myCharInfoList.push_back(CharPosToCharInfo(charPos, charInfo));
-
- if (chpxOffset != 0) {
- InlineImageInfo pictureInfo;
- if (getInlineImageInfo(chpxOffset, formatPageBuffer + 1, len - 1, pictureInfo)) {
- myInlineImageInfoList.push_back(CharPosToInlineImageInfo(charPos, pictureInfo));
- }
- }
-
- }
- }
- delete[] formatPageBuffer;
- return true;
-}
-
-bool OleMainStream::readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry) {
- //Plcspa structure is a table with information for FSPA (File Shape Address)
- unsigned int beginPicturesInfo = OleUtil::getU4Bytes(headerBuffer, 0x01DA); // address of Plcspa structure
- if (beginPicturesInfo == 0) {
- return true; //there's no office art objects
- }
- unsigned int picturesInfoLength = OleUtil::getU4Bytes(headerBuffer, 0x01DE); // length of Plcspa structure
- if (picturesInfoLength < 4) {
- return false;
- }
-
- OleStream tableStream(myStorage, tableEntry, myBaseStream);
- std::string buffer;
- if (!readToBuffer(buffer, beginPicturesInfo, picturesInfoLength, tableStream)) {
- return false;
- }
-
- static const unsigned int SPA_SIZE = 26;
- std::size_t size = calcCountOfPLC(picturesInfoLength, SPA_SIZE);
-
- std::vector<unsigned int> picturesBlocks;
- for (std::size_t index = 0, tOffset = 0; index < size; ++index, tOffset += 4) {
- picturesBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset));
- }
-
- for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += SPA_SIZE) {
- unsigned int spid = OleUtil::getU4Bytes(buffer.c_str(), tOffset);
- FloatImageInfo info;
- unsigned int charPos = picturesBlocks.at(index);
- info.ShapeId = spid;
- myFloatImageInfoList.push_back(CharPosToFloatImageInfo(charPos, info));
- }
-
- //DggInfo structure is office art object table data
- unsigned int beginOfficeArtContent = OleUtil::getU4Bytes(headerBuffer, 0x22A); // address of DggInfo structure
- if (beginOfficeArtContent == 0) {
- return true; //there's no office art objects
- }
- unsigned int officeArtContentLength = OleUtil::getU4Bytes(headerBuffer, 0x022E); // length of DggInfo structure
- if (officeArtContentLength < 4) {
- return false;
- }
-
- shared_ptr<OleStream> newTableStream = new OleStream(myStorage, tableEntry, myBaseStream);
- shared_ptr<OleStream> newMainStream = new OleStream(myStorage, myOleEntry, myBaseStream);
- if (newTableStream->open() && newMainStream->open()) {
- myFLoatImageReader = new DocFloatImageReader(beginOfficeArtContent, officeArtContentLength, newTableStream, newMainStream);
- myFLoatImageReader->readAll();
- }
- return true;
-}
-
-bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry) {
- //PlcBtePapx structure is table with formatting for all paragraphs
- unsigned int beginParagraphInfo = OleUtil::getU4Bytes(headerBuffer, 0x102); // address of PlcBtePapx structure
- std::size_t paragraphInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x106); // length of PlcBtePapx structure
- if (paragraphInfoLength < 4) {
- return false;
- }
-
- OleStream tableStream(myStorage, tableEntry, myBaseStream);
- std::string buffer;
- if (!readToBuffer(buffer, beginParagraphInfo, paragraphInfoLength, tableStream)) {
- return false;
- }
-
- static const unsigned int PAPX_SIZE = 4;
- std::size_t size = calcCountOfPLC(paragraphInfoLength, PAPX_SIZE);
-
- std::vector<unsigned int> paragraphBlocks;
- for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += PAPX_SIZE) {
- paragraphBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset));
- }
-
- char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
- for (std::size_t index = 0; index < paragraphBlocks.size(); ++index) {
- seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
- if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
- return false;
- }
- const unsigned int paragraphsCount = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with 'cpara' value (count of paragraphs)
- for (unsigned int index2 = 0; index2 < paragraphsCount; ++index2) {
- const unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
- unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (paragraphsCount + 1) * 4 + index2 * 13) * 2;
- if (papxOffset <= 0) {
- continue;
- }
- unsigned int len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
- if (len == 0) {
- ++papxOffset;
- len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
- }
-
- const unsigned int styleId = OleUtil::getU2Bytes(formatPageBuffer, papxOffset + 1);
- Style styleInfo = getStyleFromStylesheet(styleId, myStyleSheet);
-
- if (len >= 3) {
- getStyleInfo(papxOffset, formatPageBuffer + 3, len - 3, styleInfo);
- }
-
- unsigned int charPos = 0;
- if (!offsetToCharPos(offset, charPos, myPieces)) {
- continue;
- }
- myStyleInfoList.push_back(CharPosToStyle(charPos, styleInfo));
- }
- }
- delete[] formatPageBuffer;
- return true;
-}
-
-bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
- //PlcfSed structure is a section table
- unsigned int beginOfText = OleUtil::getU4Bytes(headerBuffer, 0x18); //address of text's begin in main stream
- unsigned int beginSectInfo = OleUtil::getU4Bytes(headerBuffer, 0xca); //address if PlcfSed structure
-
- std::size_t sectInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xce); //length of PlcfSed structure
- if (sectInfoLen < 4) {
- return false;
- }
-
- OleStream tableStream(myStorage, tableEntry, myBaseStream);
- std::string buffer;
- if (!readToBuffer(buffer, beginSectInfo, sectInfoLen, tableStream)) {
- return false;
- }
-
- static const unsigned int SED_SIZE = 12;
- std::size_t decriptorsCount = calcCountOfPLC(sectInfoLen, SED_SIZE);
-
- //saving the section offsets (in character positions)
- std::vector<unsigned int> charPos;
- for (std::size_t index = 0, tOffset = 0; index < decriptorsCount; ++index, tOffset += 4) {
- unsigned int ulTextOffset = OleUtil::getU4Bytes(buffer.c_str(), tOffset);
- charPos.push_back(beginOfText + ulTextOffset);
- }
-
- //saving sepx offsets
- std::vector<unsigned int> sectPage;
- for (std::size_t index = 0, tOffset = (decriptorsCount + 1) * 4; index < decriptorsCount; ++index, tOffset += SED_SIZE) {
- sectPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset + 2));
- }
-
- //reading the section properties
- char tmpBuffer[2];
- for (std::size_t index = 0; index < sectPage.size(); ++index) {
- if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info
- SectionInfo sectionInfo;
- sectionInfo.CharPosition = charPos.at(index);
- mySectionInfoList.push_back(sectionInfo);
- continue;
- }
- //getting number of bytes to read
- if (!seek(sectPage.at(index), true)) {
- continue;
- }
- if (read(tmpBuffer, 2) != 2) {
- continue;
- }
- std::size_t bytes = 2 + (std::size_t)OleUtil::getU2Bytes(tmpBuffer, 0);
-
- if (!seek(sectPage.at(index), true)) {
- continue;
- }
- char *formatPageBuffer = new char[bytes];
- if (read(formatPageBuffer, bytes) != bytes) {
- delete[] formatPageBuffer;
- continue;
- }
- SectionInfo sectionInfo;
- sectionInfo.CharPosition = charPos.at(index);
- getSectionInfo(formatPageBuffer + 2, bytes - 2, sectionInfo);
- mySectionInfoList.push_back(sectionInfo);
- delete[] formatPageBuffer;
- }
- return true;
-}
-
-void OleMainStream::getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo) {
- int tmp, toDelete, toAdd;
- unsigned int offset = 0;
- while (bytes >= offset + 2) {
- unsigned int curPrlLength = 0;
- switch (OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset)) {
- case 0x2403:
- styleInfo.Alignment = (Style::AlignmentType)OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2);
- break;
- case 0x4610:
- styleInfo.LeftIndent += OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
- if (styleInfo.LeftIndent < 0) {
- styleInfo.LeftIndent = 0;
- }
- break;
- case 0xc60d: // ChgTabsPapx
- case 0xc615: // ChgTabs
- tmp = OleUtil::get1Byte(grpprlBuffer, papxOffset + offset + 2);
- if (tmp < 2) {
- curPrlLength = 1;
- break;
- }
- toDelete = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 3);
- if (tmp < 2 + 2 * toDelete) {
- curPrlLength = 1;
- break;
- }
- toAdd = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 4 + 2 * toDelete);
- if (tmp < 2 + 2 * toDelete + 2 * toAdd) {
- curPrlLength = 1;
- break;
- }
- break;
- case 0x840e:
- styleInfo.RightIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
- break;
- case 0x840f:
- styleInfo.LeftIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
- break;
- case 0x8411:
- styleInfo.FirstLineIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
- break;
- case 0xa413:
- styleInfo.BeforeParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
- break;
- case 0xa414:
- styleInfo.AfterParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
- break;
- case 0x2407:
- styleInfo.HasPageBreakBefore = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2) == 0x01;
- break;
- default:
- break;
- }
- if (curPrlLength == 0) {
- curPrlLength = getPrlLength(grpprlBuffer, papxOffset + offset);
- }
- offset += curPrlLength;
- }
-
-}
-
-void OleMainStream::getCharInfo(unsigned int chpxOffset, unsigned int /*styleId*/, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo) {
- unsigned int sprm = 0; //single propery modifier
- unsigned int offset = 0;
- while (bytes >= offset + 2) {
- switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) {
- case 0x0835: //bold
- sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
- switch (sprm) {
- case UNSET:
- charInfo.FontStyle &= ~CharInfo::FONT_BOLD;
- break;
- case SET:
- charInfo.FontStyle |= CharInfo::FONT_BOLD;
- break;
- case UNCHANGED:
- break;
- case NEGATION:
- charInfo.FontStyle ^= CharInfo::FONT_BOLD;
- break;
- default:
- break;
- }
- break;
- case 0x0836: //italic
- sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
- switch (sprm) {
- case UNSET:
- charInfo.FontStyle &= ~CharInfo::FONT_ITALIC;
- break;
- case SET:
- charInfo.FontStyle |= CharInfo::FONT_ITALIC;
- break;
- case UNCHANGED:
- break;
- case NEGATION:
- charInfo.FontStyle ^= CharInfo::FONT_ITALIC;
- break;
- default:
- break;
- }
- break;
- case 0x4a43: //size of font
- charInfo.FontSize = OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset + 2);
- break;
- default:
- break;
- }
- offset += getPrlLength(grpprlBuffer, chpxOffset + offset);
- }
-
-}
-
-void OleMainStream::getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo &sectionInfo) {
- unsigned int tmp;
- std::size_t offset = 0;
- while (bytes >= offset + 2) {
- switch (OleUtil::getU2Bytes(grpprlBuffer, offset)) {
- case 0x3009: //new page
- tmp = OleUtil::getU1Byte(grpprlBuffer, offset + 2);
- sectionInfo.IsNewPage = (tmp != 0 && tmp != 1);
- break;
- default:
- break;
- }
- offset += getPrlLength(grpprlBuffer, offset);
- }
-}
-
-bool OleMainStream::getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo) {
- //p. 105 of [MS-DOC] documentation
- unsigned int offset = 0;
- bool isFound = false;
- while (bytes >= offset + 2) {
- switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) {
- case 0x080a: // ole object, p.107 [MS-DOC]
- if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) == 0x01) {
- return false;
- }
- break;
- case 0x0806: // is not a picture, but a binary data? (sprmCFData, p.106 [MS-DOC])
- if (OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2) == 0x01) {
- return false;
- }
- break;
-// case 0x0855: // sprmCFSpec, p.117 [MS-DOC], MUST BE applied with a value of 1 (see p.105 [MS-DOC])
-// if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) != 0x01) {
-// return false;
-// }
-// break;
- case 0x6a03: // location p.105 [MS-DOC]
- pictureInfo.DataPosition = OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2);
- isFound = true;
- break;
- default:
- break;
- }
- offset += getPrlLength(grpprlBuffer, chpxOffset + offset);
- }
- return isFound;
-}
-
-OleMainStream::Style OleMainStream::getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet) {
- //TODO optimize it: StyleSheet can be map structure with styleId key
- Style style;
- if (styleId != Style::STYLE_INVALID && styleId != Style::STYLE_NIL && styleId != Style::STYLE_USER) {
- for (std::size_t index = 0; index < stylesheet.size(); ++index) {
- if (stylesheet.at(index).StyleIdCurrent == styleId) {
- return stylesheet.at(index);
- }
- }
- }
- style.StyleIdCurrent = styleId;
- return style;
-}
-
-int OleMainStream::getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet) {
- //TODO optimize it: StyleSheet can be map structure with styleId key
- //in that case, this method will be excess
- if (styleId == Style::STYLE_INVALID) {
- return -1;
- }
- for (int index = 0; index < (int)stylesheet.size(); ++index) {
- if (isFilled.at(index) && stylesheet.at(index).StyleIdCurrent == styleId) {
- return index;
- }
- }
- return -1;
-}
-
-unsigned int OleMainStream::getStyleIdByCharPos(unsigned int charPos, const StyleInfoList &styleInfoList) {
- unsigned int styleId = Style::STYLE_INVALID;
- for (std::size_t i = 0; i < styleInfoList.size(); ++i) {
- const Style &info = styleInfoList.at(i).second;
- if (i == styleInfoList.size() - 1) { //if last
- styleId = info.StyleIdCurrent;
- break;
- }
- unsigned int curOffset = styleInfoList.at(i).first;
- unsigned int nextOffset = styleInfoList.at(i + 1).first;
- if (charPos >= curOffset && charPos < nextOffset) {
- styleId = info.StyleIdCurrent;
- break;
- }
- }
- return styleId;
-}
-
-bool OleMainStream::offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces) {
- if (pieces.empty()) {
- return false;
- }
- if ((unsigned int)pieces.front().Offset > offset) {
- charPos = 0;
- return true;
- }
- if ((unsigned int)(pieces.back().Offset + pieces.back().Length) <= offset) {
- return false;
- }
-
- std::size_t pieceNumber = 0;
- for (std::size_t i = 0; i < pieces.size(); ++i) {
- if (i == pieces.size() - 1) { //if last
- pieceNumber = i;
- break;
- }
- unsigned int curOffset = pieces.at(i).Offset;
- unsigned int nextOffset = pieces.at(i + 1).Offset;
- if (offset >= curOffset && offset < nextOffset) {
- pieceNumber = i;
- break;
- }
- }
-
- const Piece &piece = pieces.at(pieceNumber);
- unsigned int diffOffset = offset - piece.Offset;
- if (!piece.IsANSI) {
- diffOffset /= 2;
- }
- charPos = piece.startCP + diffOffset;
- return true;
-}
-
-bool OleMainStream::readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream) {
- char *buffer = new char[length];
- stream.seek(offset, true);
- if (stream.read(buffer, length) != length) {
- return false;
- }
- result = std::string(buffer, length);
- delete[] buffer;
- return true;
-}
-
-unsigned int OleMainStream::calcCountOfPLC(unsigned int totalSize, unsigned int elementSize) {
- //calculates count of elements in PLC structure, formula from p.30 [MS-DOC]
- return (totalSize - 4) / (4 + elementSize);
-}
-
-unsigned int OleMainStream::getPrlLength(const char *grpprlBuffer, unsigned int byteNumber) {
- unsigned int tmp;
- unsigned int opCode = OleUtil::getU2Bytes(grpprlBuffer, byteNumber);
- switch (opCode & 0xe000) {
- case 0x0000:
- case 0x2000:
- return 3;
- case 0x4000:
- case 0x8000:
- case 0xA000:
- return 4;
- case 0xE000:
- return 5;
- case 0x6000:
- return 6;
- case 0xC000:
- //counting of info length
- tmp = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 2);
- if (opCode == 0xc615 && tmp == 255) {
- unsigned int del = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 3);
- unsigned int add = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 4 + del * 4);
- tmp = 2 + del * 4 + add * 3;
- }
- return 3 + tmp;
- default:
- return 1;
- }
-}
diff --git a/fbreader/src/formats/doc/OleMainStream.h b/fbreader/src/formats/doc/OleMainStream.h
deleted file mode 100644
index 378f037..0000000
--- a/fbreader/src/formats/doc/OleMainStream.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __OLEMAINSTREAM_H__
-#define __OLEMAINSTREAM_H__
-
-#include <vector>
-#include <string>
-
-#include "OleStream.h"
-#include "DocFloatImageReader.h"
-
-class OleMainStream : public OleStream {
-
-public:
- struct Piece {
- enum PieceType {
- PIECE_TEXT,
- PIECE_FOOTNOTE,
- PIECE_OTHER
- };
-
- int Offset; // TODO: maybe make it unsigned int
- int Length; // TODO: maybe make it unsigned int
- bool IsANSI;
- PieceType Type;
- unsigned int startCP;
- };
- typedef std::vector<Piece> Pieces;
-
- struct CharInfo {
- enum Font {
- FONT_REGULAR = 0,
- FONT_BOLD = 1 << 0,
- FONT_ITALIC = 1 << 1,
- FONT_UNDERLINE = 1 << 2,
- FONT_CAPITALS = 1 << 3,
- FONT_SMALL_CAPS = 1 << 4,
- FONT_STRIKE = 1 << 5,
- FONT_HIDDEN = 1 << 6,
- FONT_MARKDEL = 1 << 7,
- FONT_SUPERSCRIPT = 1 << 8,
- FONT_SUBSCRIPT = 1 << 9
- };
-
- unsigned int FontStyle;
- unsigned int FontSize;
-
- CharInfo();
- };
- typedef std::pair<unsigned int, CharInfo> CharPosToCharInfo;
- typedef std::vector<CharPosToCharInfo > CharInfoList;
-
- struct Style {
- enum AlignmentType {
- ALIGNMENT_LEFT = 0x00,
- ALIGNMENT_CENTER = 0x01,
- ALIGNMENT_RIGHT = 0x02,
- ALIGNMENT_JUSTIFY = 0x03,
- ALIGNMENT_DEFAULT // for case if alignment is not setted by word
- };
-
- // style Ids:
- // (this is not full list of possible style ids, enum is used for using in switch-case)
- enum StyleID {
- STYLE_H1 = 0x1,
- STYLE_H2 = 0x2,
- STYLE_H3 = 0x3,
- STYLE_USER = 0xFFE,
- STYLE_NIL = 0xFFF,
- STYLE_INVALID = 0xFFFF
- };
-
- unsigned int StyleIdCurrent;
- unsigned int StyleIdNext; // Next style unless overruled
-
- bool HasPageBreakBefore;
- unsigned int BeforeParagraphIndent; // Vertical indent before paragraph, pixels
- unsigned int AfterParagraphIndent; // Vertical indent after paragraph, pixels
- int LeftIndent;
- int FirstLineIndent;
- int RightIndent;
- AlignmentType Alignment;
- CharInfo CurrentCharInfo;
-
- Style();
- };
-
- typedef std::pair<unsigned int, Style> CharPosToStyle;
- typedef std::vector<CharPosToStyle> StyleInfoList;
- typedef std::vector<Style> StyleSheet;
-
- struct SectionInfo {
- unsigned int CharPosition;
- bool IsNewPage;
-
- SectionInfo();
- };
- typedef std::vector<SectionInfo> SectionInfoList;
-
- struct Bookmark {
- unsigned int CharPosition;
- std::string Name;
- };
- typedef std::vector<Bookmark> BookmarksList;
-
- struct InlineImageInfo {
- unsigned int DataPosition;
-
- InlineImageInfo();
- };
- typedef std::pair<unsigned int, InlineImageInfo> CharPosToInlineImageInfo;
- typedef std::vector<CharPosToInlineImageInfo> InlineImageInfoList;
-
- struct FloatImageInfo {
- unsigned int ShapeId;
- FloatImageInfo();
- };
- typedef std::pair<unsigned int, FloatImageInfo> CharPosToFloatImageInfo;
- typedef std::vector<CharPosToFloatImageInfo> FloatImageInfoList;
-
- enum ImageType { //see p. 60 [MS-ODRAW]
- IMAGE_EMF = 0xF01A,
- IMAGE_WMF = 0xF01B,
- IMAGE_PICT = 0xF01C,
- IMAGE_JPEG = 0xF01D,
- IMAGE_PNG = 0xF01E,
- IMAGE_DIB = 0xF01F,
- IMAGE_TIFF = 0xF029,
- IMAGE_JPEG2 = 0xF02A
- };
-
-public:
- OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
-
-public:
- bool open(bool doReadFormattingData);
- const Pieces &getPieces() const;
- const CharInfoList &getCharInfoList() const;
- const StyleInfoList &getStyleInfoList() const;
- const BookmarksList &getBookmarks() const;
- const InlineImageInfoList &getInlineImageInfoList() const;
- const FloatImageInfoList &getFloatImageInfoList() const;
-
- ZLFileImage::Blocks getFloatImage(unsigned int shapeId) const;
- ZLFileImage::Blocks getInlineImage(unsigned int dataPos) const;
-
-private:
- bool readFIB(const char *headerBuffer);
- bool readPieceTable(const char *headerBuffer, const OleEntry &tableEntry);
- bool readBookmarks(const char *headerBuffer, const OleEntry &tableEntry);
- bool readStylesheet(const char *headerBuffer, const OleEntry &tableEntry);
- bool readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
- bool readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry);
- bool readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
- bool readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry);
-
-private: //readPieceTable helpers methods
- static std::string getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream);
- static void splitPieces(const Pieces &source, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary);
-
-private: //formatting reader helpers methods
- static unsigned int getPrlLength(const char *grpprlBuffer, unsigned int byteNumber);
- static void getCharInfo(unsigned int chpxOffset, unsigned int styleId, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo);
- static void getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo);
- static void getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo &sectionInfo);
- static bool getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo);
-
- static Style getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet);
- static int getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet);
- static unsigned int getStyleIdByCharPos(unsigned int offset, const StyleInfoList &styleInfoList);
-
- static bool offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces);
- static bool readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream);
-
- static unsigned int calcCountOfPLC(unsigned int totalSize, unsigned int elementSize);
-
-private:
- enum PrlFlag {
- UNSET = 0,
- SET = 1,
- UNCHANGED = 128,
- NEGATION = 129
- };
-
-private:
- int myStartOfText;
- int myEndOfText;
-
- Pieces myPieces;
-
- StyleSheet myStyleSheet;
-
- CharInfoList myCharInfoList;
- StyleInfoList myStyleInfoList;
- SectionInfoList mySectionInfoList;
- InlineImageInfoList myInlineImageInfoList;
- FloatImageInfoList myFloatImageInfoList;
-
- BookmarksList myBookmarks;
-
- shared_ptr<OleStream> myDataStream;
-
- shared_ptr<DocFloatImageReader> myFLoatImageReader;
-};
-
-#endif /* __OLEMAINSTREAM_H__ */
diff --git a/fbreader/src/formats/doc/OleStorage.cpp b/fbreader/src/formats/doc/OleStorage.cpp
deleted file mode 100644
index a7ab81a..0000000
--- a/fbreader/src/formats/doc/OleStorage.cpp
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <ZLLogger.h>
-
-#include "OleStorage.h"
-#include "OleUtil.h"
-
-#include <cstring>
-
-const std::size_t OleStorage::BBD_BLOCK_SIZE = 512;
-
-OleStorage::OleStorage() {
- clear();
-}
-
-void OleStorage::clear() {
- myInputStream = 0;
- mySectorSize = 0;
- myShortSectorSize = 0;
- myStreamSize = 0;
- myRootEntryIndex = -1;
-
- myDIFAT.clear();
- myBBD.clear();
- mySBD.clear();
- myProperties.clear();
- myEntries.clear();
-}
-
-
-
-bool OleStorage::init(shared_ptr<ZLInputStream> stream, std::size_t streamSize) {
- clear();
-
- myInputStream = stream;
- myStreamSize = streamSize;
- myInputStream->seek(0, true);
-
- char oleBuf[BBD_BLOCK_SIZE];
- std::size_t ret = myInputStream->read(oleBuf, BBD_BLOCK_SIZE);
- if (ret != BBD_BLOCK_SIZE) {
- clear();
- return false;
- }
- static const char OLE_SIGN[] = {(char)0xD0, (char)0xCF, (char)0x11, (char)0xE0, (char)0xA1, (char)0xB1, (char)0x1A, (char)0xE1, 0};
- if (std::strncmp(oleBuf, OLE_SIGN, 8) != 0) {
- clear();
- return false;
- }
- mySectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x1e); //offset for value of big sector size
- myShortSectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x20); //offset for value of small sector size
-
- if (readDIFAT(oleBuf) && readBBD(oleBuf) && readSBD(oleBuf) && readProperties(oleBuf) && readAllEntries()) {
- return true;
- }
- clear();
- return false;
-}
-
-bool OleStorage::readDIFAT(char *oleBuf) {
- int difatBlock = OleUtil::get4Bytes(oleBuf, 0x44); //address for first difat sector
- int difatSectorNumbers = OleUtil::get4Bytes(oleBuf, 0x48); //numbers of additional difat records
-
- //436 of difat records are stored in header, by offset 0x4c
- for (unsigned int i = 0; i < 436; i += 4) {
- myDIFAT.push_back(OleUtil::get4Bytes(oleBuf + 0x4c, i));
- }
-
- //for files > 6.78 mb we need read additional DIFAT fields
- for (int i = 0; difatBlock > 0 && i < difatSectorNumbers; ++i) {
- ZLLogger::Instance().println("DocPlugin", "Read additional data for DIFAT");
- char buffer[mySectorSize];
- myInputStream->seek(BBD_BLOCK_SIZE + difatBlock * mySectorSize, true);
- if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
- ZLLogger::Instance().println("DocPlugin", "Error read DIFAT!");
- return false;
- }
- for (unsigned int j = 0; j < (mySectorSize - 4); j += 4) {
- myDIFAT.push_back(OleUtil::get4Bytes(buffer, j));
- }
- difatBlock = OleUtil::get4Bytes(buffer, mySectorSize - 4); //next DIFAT block is pointed at the end of the sector
- }
-
- //removing unusable DIFAT links
- //0xFFFFFFFF means "free section"
- while (!myDIFAT.empty() && myDIFAT.back() == (int)0xFFFFFFFF) {
- myDIFAT.pop_back();
- }
- return true;
-}
-
-bool OleStorage::readBBD(char *oleBuf) {
- char buffer[mySectorSize];
- unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks
-
- if (myDIFAT.size() < bbdNumberBlocks) {
- //TODO maybe add check on myDIFAT == bbdNumberBlocks
- ZLLogger::Instance().println("DocPlugin", "Wrong number of FAT blocks value");
- return false;
- }
-
- for (unsigned int i = 0; i < bbdNumberBlocks; ++i) {
- int bbdSector = myDIFAT.at(i);
- if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) {
- ZLLogger::Instance().println("DocPlugin", "Bad BBD entry!");
- return false;
- }
- myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true);
- if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
- ZLLogger::Instance().println("DocPlugin", "Error during reading BBD!");
- return false;
- }
- for (unsigned int j = 0; j < mySectorSize; j += 4) {
- myBBD.push_back(OleUtil::get4Bytes(buffer, j));
- }
- }
- return true;
-}
-
-bool OleStorage::readSBD(char *oleBuf) {
- int sbdCur = OleUtil::get4Bytes(oleBuf, 0x3c); //address of first small sector
- int sbdCount = OleUtil::get4Bytes(oleBuf, 0x40); //count of small sectors
-
- if (sbdCur <= 0) {
- ZLLogger::Instance().println("DocPlugin", "There's no SBD, don't read it");
- return true;
- }
-
- char buffer[mySectorSize];
- for (int i = 0; i < sbdCount; ++i) {
- if (i != 0) {
- if (sbdCur < 0 || (unsigned int)sbdCur >= myBBD.size()) {
- ZLLogger::Instance().println("DocPlugin", "error during parsing SBD");
- return false;
- }
- sbdCur = myBBD.at(sbdCur);
- }
- if (sbdCur <= 0) {
- break;
- }
- myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true);
- if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
- ZLLogger::Instance().println("DocPlugin", "reading error during parsing SBD");
- return false;
- }
- for (unsigned int j = 0; j < mySectorSize; j += 4) {
- mySBD.push_back(OleUtil::get4Bytes(buffer, j));
- }
-
- }
- return true;
-}
-
-bool OleStorage::readProperties(char *oleBuf) {
- int propCur = OleUtil::get4Bytes(oleBuf, 0x30); //offset for address of sector with first property
- if (propCur < 0) {
- ZLLogger::Instance().println("DocPlugin", "Wrong first directory sector location");
- return false;
- }
-
- char buffer[mySectorSize];
- do {
- myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true);
- if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
- ZLLogger::Instance().println("DocPlugin", "Error during reading properties");
- return false;
- }
- for (unsigned int j = 0; j < mySectorSize; j += 128) {
- myProperties.push_back(std::string(buffer + j, 128));
- }
- if (propCur < 0 || (std::size_t)propCur >= myBBD.size()) {
- break;
- }
- propCur = myBBD.at(propCur);
- } while (propCur >= 0 && propCur < (int)(myStreamSize / mySectorSize));
- return true;
-}
-
-bool OleStorage::readAllEntries() {
- int propCount = myProperties.size();
- for (int i = 0; i < propCount; ++i) {
- OleEntry entry;
- bool result = readOleEntry(i, entry);
- if (!result) {
- break;
- }
- if (entry.type == OleEntry::ROOT_DIR) {
- myRootEntryIndex = i;
- }
- myEntries.push_back(entry);
- }
- if (myRootEntryIndex < 0) {
- return false;
- }
- return true;
-}
-
-bool OleStorage::readOleEntry(int propNumber, OleEntry &e) {
- static const std::string ROOT_ENTRY = "Root Entry";
-
- std::string property = myProperties.at(propNumber);
-
- char oleType = property.at(0x42); //offset for Ole Type
- if (oleType != 1 && oleType != 2 && oleType != 3 && oleType != 5) {
- ZLLogger::Instance().println("DocPlugin", "entry -- not right ole type");
- return false;
- }
-
- e.type = (OleEntry::Type)oleType;
-
- int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length
- e.name.clear();
- e.name.reserve(33); //max size of entry name
-
- if ((unsigned int)nameLength >= property.size()) {
- return false;
- }
- for (int i = 0; i < nameLength; i+=2) {
- char c = property.at(i);
- if (c != 0) {
- e.name += c;
- }
- }
-
- e.length = OleUtil::getU4Bytes(property.c_str(), 0x78); //offset for entry's length value
- e.isBigBlock = e.length >= 0x1000 || e.name == ROOT_ENTRY;
-
- // Read sector chain
- if (property.size() < 0x74 + 4) {
- ZLLogger::Instance().println("DocPlugin", "problems with reading ole entry");
- return false;
- }
- int chainCur = OleUtil::get4Bytes(property.c_str(), 0x74); //offset for start block of entry
- if (chainCur >= 0 && (chainCur <= (int)(myStreamSize / (e.isBigBlock ? mySectorSize : myShortSectorSize)))) {
- //filling blocks with chains
- do {
- e.blocks.push_back((unsigned int)chainCur);
- if (e.isBigBlock && (std::size_t)chainCur < myBBD.size()) {
- chainCur = myBBD.at(chainCur);
- } else if (!mySBD.empty() && (std::size_t)chainCur < mySBD.size()) {
- chainCur = mySBD.at(chainCur);
- } else {
- chainCur = -1;
- }
- } while (chainCur > 0 &&
- chainCur < (int)(e.isBigBlock ? myBBD.size() : mySBD.size()) &&
- e.blocks.size() <= e.length / (e.isBigBlock ? mySectorSize : myShortSectorSize));
- }
- e.length = std::min(e.length, (unsigned int)((e.isBigBlock ? mySectorSize : myShortSectorSize) * e.blocks.size()));
- return true;
-}
-
-bool OleStorage::countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const {
- //TODO maybe better syntax can be used?
- if (e.blocks.size() <= (std::size_t)blockNumber) {
- ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, blockNumber is invalid");
- return false;
- }
- if (e.isBigBlock) {
- result = BBD_BLOCK_SIZE + e.blocks.at(blockNumber) * mySectorSize;
- } else {
- unsigned int sbdPerSector = mySectorSize / myShortSectorSize;
- unsigned int sbdSectorNumber = e.blocks.at(blockNumber) / sbdPerSector;
- unsigned int sbdSectorMod = e.blocks.at(blockNumber) % sbdPerSector;
- if (myEntries.at(myRootEntryIndex).blocks.size() <= (std::size_t)sbdSectorNumber) {
- ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, invalid sbd data");
- return false;
- }
- result = BBD_BLOCK_SIZE + myEntries.at(myRootEntryIndex).blocks.at(sbdSectorNumber) * mySectorSize + sbdSectorMod * myShortSectorSize;
- }
- return true;
-}
-
-bool OleStorage::getEntryByName(std::string name, OleEntry &returnEntry) const {
- //TODO fix the workaround for duplicates streams: now it takes a stream with max length
- unsigned int maxLength = 0;
- for (std::size_t i = 0; i < myEntries.size(); ++i) {
- const OleEntry &entry = myEntries.at(i);
- if (entry.name == name && entry.length >= maxLength) {
- returnEntry = entry;
- maxLength = entry.length;
- }
- }
- return maxLength > 0;
-}
-
-
diff --git a/fbreader/src/formats/doc/OleStorage.h b/fbreader/src/formats/doc/OleStorage.h
deleted file mode 100644
index 584ee94..0000000
--- a/fbreader/src/formats/doc/OleStorage.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __OLESTORAGE_H__
-#define __OLESTORAGE_H__
-
-#include <algorithm>
-#include <vector>
-#include <string>
-
-#include <ZLInputStream.h>
-
-struct OleEntry {
- enum Type {
- DIR = 1,
- STREAM = 2,
- ROOT_DIR = 5,
- LOCK_BYTES =3
- };
-
- typedef std::vector<unsigned int> Blocks;
-
- std::string name;
- unsigned int length;
- Type type;
- Blocks blocks;
- bool isBigBlock;
-};
-
-class OleStorage {
-
-public:
- static const std::size_t BBD_BLOCK_SIZE;
-
-public:
- OleStorage();
- bool init(shared_ptr<ZLInputStream>, std::size_t streamSize);
- void clear();
- const std::vector<OleEntry> &getEntries() const;
- bool getEntryByName(std::string name, OleEntry &entry) const;
-
- unsigned int getSectorSize() const;
- unsigned int getShortSectorSize() const;
-
-public: //TODO make private
- bool countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const;
-
-private:
- bool readDIFAT(char *oleBuf);
- bool readBBD(char *oleBuf);
- bool readSBD(char *oleBuf);
- bool readProperties(char *oleBuf);
-
- bool readAllEntries();
- bool readOleEntry(int propNumber, OleEntry &entry);
-
-private:
-
- shared_ptr<ZLInputStream> myInputStream;
- unsigned int mySectorSize, myShortSectorSize;
-
- std::size_t myStreamSize;
- std::vector<int> myDIFAT; //double-indirect file allocation table
- std::vector<int> myBBD; //Big Block Depot
- std::vector<int> mySBD; //Small Block Depot
- std::vector<std::string> myProperties;
- std::vector<OleEntry> myEntries;
- int myRootEntryIndex;
-
-};
-
-inline const std::vector<OleEntry> &OleStorage::getEntries() const { return myEntries; }
-inline unsigned int OleStorage::getSectorSize() const { return mySectorSize; }
-inline unsigned int OleStorage::getShortSectorSize() const { return myShortSectorSize; }
-
-#endif /* __OLESTORAGE_H__ */
diff --git a/fbreader/src/formats/doc/OleStream.cpp b/fbreader/src/formats/doc/OleStream.cpp
deleted file mode 100644
index 8de1cc4..0000000
--- a/fbreader/src/formats/doc/OleStream.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <ZLLogger.h>
-
-#include "OleStream.h"
-#include "OleUtil.h"
-
-OleStream::OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) :
- myStorage(storage),
- myOleEntry(oleEntry),
- myBaseStream(stream) {
- myOleOffset = 0;
-}
-
-
-bool OleStream::open() {
- if (myOleEntry.type != OleEntry::STREAM) {
- return false;
- }
- return true;
-}
-
-std::size_t OleStream::read(char *buffer, std::size_t maxSize) {
- std::size_t length = maxSize;
- std::size_t readedBytes = 0;
- std::size_t bytesLeftInCurBlock;
- unsigned int newFileOffset;
-
- unsigned int curBlockNumber, modBlock;
- std::size_t toReadBlocks, toReadBytes;
-
- if (myOleOffset + length > myOleEntry.length) {
- length = myOleEntry.length - myOleOffset;
- }
-
- std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
-
- curBlockNumber = myOleOffset / sectorSize;
- if (curBlockNumber >= myOleEntry.blocks.size()) {
- return 0;
- }
- modBlock = myOleOffset % sectorSize;
- bytesLeftInCurBlock = sectorSize - modBlock;
- if (bytesLeftInCurBlock < length) {
- toReadBlocks = (length - bytesLeftInCurBlock) / sectorSize;
- toReadBytes = (length - bytesLeftInCurBlock) % sectorSize;
- } else {
- toReadBlocks = toReadBytes = 0;
- }
-
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
- return 0;
- }
- newFileOffset += modBlock;
-
- myBaseStream->seek(newFileOffset, true);
-
- readedBytes = myBaseStream->read(buffer, std::min(length, bytesLeftInCurBlock));
- for (std::size_t i = 0; i < toReadBlocks; ++i) {
- if (++curBlockNumber >= myOleEntry.blocks.size()) {
- break;
- }
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
- return readedBytes;
- }
- myBaseStream->seek(newFileOffset, true);
- readedBytes += myBaseStream->read(buffer + readedBytes, std::min(length - readedBytes, sectorSize));
- }
- if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) {
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
- return readedBytes;
- }
- myBaseStream->seek(newFileOffset, true);
- readedBytes += myBaseStream->read(buffer + readedBytes, toReadBytes);
- }
- myOleOffset += readedBytes;
- return readedBytes;
-}
-
-bool OleStream::eof() const {
- return (myOleOffset >= myOleEntry.length);
-}
-
-
-void OleStream::close() {
-}
-
-bool OleStream::seek(unsigned int offset, bool absoluteOffset) {
- unsigned int newOleOffset = 0;
- unsigned int newFileOffset;
-
- if (absoluteOffset) {
- newOleOffset = offset;
- } else {
- newOleOffset = myOleOffset + offset;
- }
-
- newOleOffset = std::min(newOleOffset, myOleEntry.length);
-
- unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
- unsigned int blockNumber = newOleOffset / sectorSize;
- if (blockNumber >= myOleEntry.blocks.size()) {
- return false;
- }
-
- unsigned int modBlock = newOleOffset % sectorSize;
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, blockNumber, newFileOffset)) {
- return false;
- }
- newFileOffset += modBlock;
- myBaseStream->seek(newFileOffset, true);
- myOleOffset = newOleOffset;
- return true;
-}
-
-std::size_t OleStream::offset() {
- return myOleOffset;
-}
-
-ZLFileImage::Blocks OleStream::getBlockPieceInfoList(unsigned int offset, unsigned int size) const {
- ZLFileImage::Blocks list;
- unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
- unsigned int curBlockNumber = offset / sectorSize;
- if (curBlockNumber >= myOleEntry.blocks.size()) {
- return list;
- }
- unsigned int modBlock = offset % sectorSize;
- unsigned int startFileOffset = 0;
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, startFileOffset)) {
- return ZLFileImage::Blocks();
- }
- startFileOffset += modBlock;
-
- unsigned int bytesLeftInCurBlock = sectorSize - modBlock;
- unsigned int toReadBlocks = 0, toReadBytes = 0;
- if (bytesLeftInCurBlock < size) {
- toReadBlocks = (size - bytesLeftInCurBlock) / sectorSize;
- toReadBytes = (size - bytesLeftInCurBlock) % sectorSize;
- }
-
- unsigned int readedBytes = std::min(size, bytesLeftInCurBlock);
- list.push_back(ZLFileImage::Block(startFileOffset, readedBytes));
-
- for (unsigned int i = 0; i < toReadBlocks; ++i) {
- if (++curBlockNumber >= myOleEntry.blocks.size()) {
- break;
- }
- unsigned int newFileOffset = 0;
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
- return ZLFileImage::Blocks();
- }
- unsigned int readbytes = std::min(size - readedBytes, sectorSize);
- list.push_back(ZLFileImage::Block(newFileOffset, readbytes));
- readedBytes += readbytes;
- }
- if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) {
- unsigned int newFileOffset = 0;
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
- return ZLFileImage::Blocks();
- }
- unsigned int readbytes = toReadBytes;
- list.push_back(ZLFileImage::Block(newFileOffset, readbytes));
- readedBytes += readbytes;
- }
-
- return concatBlocks(list);
-}
-
-ZLFileImage::Blocks OleStream::concatBlocks(const ZLFileImage::Blocks &blocks) {
- if (blocks.size() < 2) {
- return blocks;
- }
- ZLFileImage::Blocks optList;
- ZLFileImage::Block curBlock = blocks.at(0);
- unsigned int nextOffset = curBlock.offset + curBlock.size;
- for (std::size_t i = 1; i < blocks.size(); ++i) {
- ZLFileImage::Block b = blocks.at(i);
- if (b.offset == nextOffset) {
- curBlock.size += b.size;
- nextOffset += b.size;
- } else {
- optList.push_back(curBlock);
- curBlock = b;
- nextOffset = curBlock.offset + curBlock.size;
- }
- }
- optList.push_back(curBlock);
- return optList;
-}
-
-std::size_t OleStream::fileOffset() {
- //TODO maybe remove this method, it doesn't use at this time
- std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
- unsigned int curBlockNumber = myOleOffset / sectorSize;
- if (curBlockNumber >= myOleEntry.blocks.size()) {
- return 0;
- }
- unsigned int modBlock = myOleOffset % sectorSize;
- unsigned int curOffset = 0;
- if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, curOffset)) {
- return 0; //TODO maybe remove -1?
- }
- return curOffset + modBlock;
-}
diff --git a/fbreader/src/formats/doc/OleStream.h b/fbreader/src/formats/doc/OleStream.h
deleted file mode 100644
index 861c7cb..0000000
--- a/fbreader/src/formats/doc/OleStream.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __OLESTREAM_H__
-#define __OLESTREAM_H__
-
-#include <ZLFileImage.h>
-
-#include "OleStorage.h"
-
-class OleStream {
-
-public:
- OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
-
-public:
- bool open();
- std::size_t read(char *buffer, std::size_t maxSize);
- void close();
-
-public:
- bool seek(unsigned int offset, bool absoluteOffset);
- std::size_t offset();
-
-public:
- ZLFileImage::Blocks getBlockPieceInfoList(unsigned int offset, unsigned int size) const;
- static ZLFileImage::Blocks concatBlocks(const ZLFileImage::Blocks &blocks);
- std::size_t fileOffset();
-
-public:
- bool eof() const;
-
-protected:
- shared_ptr<OleStorage> myStorage;
-
- OleEntry myOleEntry;
- shared_ptr<ZLInputStream> myBaseStream;
-
- unsigned int myOleOffset;
-};
-
-#endif /* __OLESTREAM_H__ */
diff --git a/fbreader/src/formats/doc/OleStreamParser.cpp b/fbreader/src/formats/doc/OleStreamParser.cpp
deleted file mode 100644
index 0a9c62d..0000000
--- a/fbreader/src/formats/doc/OleStreamParser.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-//#include <cctype>
-//#include <cstring>
-
-#include <ZLLogger.h>
-
-#include "OleMainStream.h"
-#include "OleUtil.h"
-#include "OleStreamParser.h"
-
-//word's control chars:
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008;
-
-//unicode values:
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D;
-const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C;
-
-OleStreamParser::OleStreamParser() {
- myCurBufferPosition = 0;
-
- myCurCharPos = 0;
- myNextStyleInfoIndex = 0;
- myNextCharInfoIndex = 0;
- myNextBookmarkIndex = 0;
- myNextInlineImageInfoIndex = 0;
- myNextFloatImageInfoIndex = 0;
-}
-
-bool OleStreamParser::readStream(OleMainStream &oleMainStream) {
- ZLUnicodeUtil::Ucs2Char ucs2char;
- bool tabMode = false;
- while (getUcs2Char(oleMainStream, ucs2char)) {
- if (tabMode) {
- tabMode = false;
- if (ucs2char == WORD_TABLE_SEPARATOR) {
- handleTableEndRow();
- continue;
- } else {
- handleTableSeparator();
- }
- }
-
- if (ucs2char < 32) {
- switch (ucs2char) {
- case NULL_SYMBOL:
- break;
- case WORD_HARD_LINEBREAK:
- handleHardLinebreak();
- break;
- case WORD_END_OF_PARAGRAPH:
- case WORD_PAGE_BREAK:
- handleParagraphEnd();
- break;
- case WORD_TABLE_SEPARATOR:
- tabMode = true;
- break;
- case WORD_FOOTNOTE_MARK:
- handleFootNoteMark();
- break;
- case WORD_START_FIELD:
- handleStartField();
- break;
- case WORD_SEPARATOR_FIELD:
- handleSeparatorField();
- break;
- case WORD_END_FIELD:
- handleEndField();
- break;
- case INLINE_IMAGE:
- case FLOAT_IMAGE:
- break;
- default:
- handleOtherControlChar(ucs2char);
- break;
- }
- } else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
- continue; //skip
- } else {
- handleChar(ucs2char);
- }
- }
-
- return true;
-}
-
-bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
- while (myCurBufferPosition >= myBuffer.size()) {
- myBuffer.clear();
- myCurBufferPosition = 0;
- if (!readNextPiece(stream)) {
- return false;
- }
- }
- ucs2char = myBuffer.at(myCurBufferPosition++);
- processStyles(stream);
-
- switch (ucs2char) {
- case INLINE_IMAGE:
- processInlineImage(stream);
- break;
- case FLOAT_IMAGE:
- processFloatImage(stream);
- break;
- }
- ++myCurCharPos;
- return true;
-}
-
-void OleStreamParser::processInlineImage(OleMainStream &stream) {
- const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
- if (imageInfoList.empty()) {
- return;
- }
- //seek to curCharPos, because not all entries are real pictures
- while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
- ++myNextInlineImageInfoIndex;
- }
- while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
- OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
- ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
- if (!list.empty()) {
- handleImage(list);
- }
- ++myNextInlineImageInfoIndex;
- }
-}
-
-void OleStreamParser::processFloatImage(OleMainStream &stream) {
- const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
- if (imageInfoList.empty()) {
- return;
- }
- //seek to curCharPos, because not all entries are real pictures
- while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
- ++myNextFloatImageInfoIndex;
- }
- while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
- OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
- ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
- if (!list.empty()) {
- handleImage(list);
- }
- ++myNextFloatImageInfoIndex;
- }
-}
-
-void OleStreamParser::processStyles(OleMainStream &stream) {
- const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
- if (!styleInfoList.empty()) {
- while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
- OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
- handleParagraphStyle(info);
- ++myNextStyleInfoIndex;
- }
- }
-
- const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
- if (!charInfoList.empty()) {
- while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
- OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
- handleFontStyle(info.FontStyle);
- ++myNextCharInfoIndex;
- }
- }
-
- const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
- if (!bookmarksList.empty()) {
- while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
- OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
- handleBookmark(bookmark.Name);
- ++myNextBookmarkIndex;
- }
- }
-}
diff --git a/fbreader/src/formats/doc/OleStreamParser.h b/fbreader/src/formats/doc/OleStreamParser.h
deleted file mode 100644
index 1adec2f..0000000
--- a/fbreader/src/formats/doc/OleStreamParser.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __OLESTREAMPARSER_H__
-#define __OLESTREAMPARSER_H__
-
-#include <ZLUnicodeUtil.h>
-
-#include "OleMainStream.h"
-#include "OleStreamReader.h"
-
-class OleStreamParser : public OleStreamReader {
-
-public:
- //word's control chars:
- static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
- static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
- static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
- static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
- static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
- static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
- static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
- static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
- static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
- static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
- static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
- static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
- static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
- static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
-
- //unicode values:
- static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
- static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
- static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
- static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
- static const ZLUnicodeUtil::Ucs2Char SPACE;
- static const ZLUnicodeUtil::Ucs2Char MINUS;
- static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
-
-public:
- OleStreamParser();
-
-private:
- bool readStream(OleMainStream &stream);
-
-protected:
- virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
- virtual void handleHardLinebreak() = 0;
- virtual void handleParagraphEnd() = 0;
- virtual void handlePageBreak() = 0;
- virtual void handleTableSeparator() = 0;
- virtual void handleTableEndRow() = 0;
- virtual void handleFootNoteMark() = 0;
- virtual void handleStartField() = 0;
- virtual void handleSeparatorField() = 0;
- virtual void handleEndField() = 0;
- virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
- virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
-
- virtual void handleFontStyle(unsigned int fontStyle) = 0;
- virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
- virtual void handleBookmark(const std::string &name) = 0;
-
-private:
- bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
- void processInlineImage(OleMainStream &stream);
- void processFloatImage(OleMainStream &stream);
- void processStyles(OleMainStream &stream);
-
-private:
-protected:
- ZLUnicodeUtil::Ucs2String myBuffer;
-private:
- std::size_t myCurBufferPosition;
-
- unsigned int myCurCharPos;
-
- std::size_t myNextStyleInfoIndex;
- std::size_t myNextCharInfoIndex;
- std::size_t myNextBookmarkIndex;
- std::size_t myNextInlineImageInfoIndex;
- std::size_t myNextFloatImageInfoIndex;
-};
-
-#endif /* __OLESTREAMPARSER_H__ */
diff --git a/fbreader/src/formats/doc/OleStreamReader.cpp b/fbreader/src/formats/doc/OleStreamReader.cpp
deleted file mode 100644
index 224489a..0000000
--- a/fbreader/src/formats/doc/OleStreamReader.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <ZLLogger.h>
-
-#include "OleMainStream.h"
-#include "OleUtil.h"
-#include "OleStreamReader.h"
-
-OleStreamReader::OleStreamReader() : myNextPieceNumber(0) {
-}
-
-bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream, bool doReadFormattingData) {
- static const std::string WORD_DOCUMENT = "WordDocument";
-
- shared_ptr<OleStorage> storage = new OleStorage;
-
- if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
- ZLLogger::Instance().println("DocPlugin", "Broken OLE file");
- return false;
- }
-
- OleEntry wordDocumentEntry;
- if (!storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry)) {
- return false;
- }
-
- OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
- if (!oleStream.open(doReadFormattingData)) {
- ZLLogger::Instance().println("DocPlugin", "Cannot open OleMainStream");
- return false;
- }
- return readStream(oleStream);
-}
-
-bool OleStreamReader::readNextPiece(OleMainStream &stream) {
- const OleMainStream::Pieces &pieces = stream.getPieces();
- if (myNextPieceNumber >= pieces.size()) {
- return false;
- }
- const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
-
- if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) {
- footnotesStartHandler();
- } else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) {
- return false;
- }
-
- if (!stream.seek(piece.Offset, true)) {
- //TODO maybe in that case we should take next piece?
- return false;
- }
- char *textBuffer = new char[piece.Length];
- std::size_t readBytes = stream.read(textBuffer, piece.Length);
- if (readBytes != (std::size_t)piece.Length) {
- ZLLogger::Instance().println("DocPlugin", "not all bytes have been read from piece");
- }
-
- if (!piece.IsANSI) {
- for (std::size_t i = 0; i < readBytes; i += 2) {
- ucs2SymbolHandler(OleUtil::getU2Bytes(textBuffer, i));
- }
- } else {
- ansiDataHandler(textBuffer, readBytes);
- }
- ++myNextPieceNumber;
- delete[] textBuffer;
-
- return true;
-}
diff --git a/fbreader/src/formats/doc/OleStreamReader.h b/fbreader/src/formats/doc/OleStreamReader.h
deleted file mode 100644
index 2d2a0ae..0000000
--- a/fbreader/src/formats/doc/OleStreamReader.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __OLESTREAMREADER_H__
-#define __OLESTREAMREADER_H__
-
-#include <ZLUnicodeUtil.h>
-
-#include "OleMainStream.h"
-
-class OleStreamReader {
-
-public:
- OleStreamReader();
- bool readDocument(shared_ptr<ZLInputStream> stream, bool doReadFormattingData);
-
-protected:
- virtual bool readStream(OleMainStream &stream) = 0;
-
- bool readNextPiece(OleMainStream &stream);
-
- virtual void ansiDataHandler(const char *buffer, std::size_t len) = 0;
- virtual void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0;
- virtual void footnotesStartHandler() = 0;
-
-private:
- std::size_t myNextPieceNumber;
-};
-
-#endif /* __OLESTREAMREADER_H__ */
diff --git a/fbreader/src/formats/doc/OleUtil.cpp b/fbreader/src/formats/doc/OleUtil.cpp
deleted file mode 100644
index 2e8f685..0000000
--- a/fbreader/src/formats/doc/OleUtil.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include "OleUtil.h"
-
-int OleUtil::get4Bytes(const char *buffer, unsigned int offset) {
- const unsigned char *buf = (const unsigned char*)buffer;
- return
- (int)buf[offset]
- | ((int)buf[offset+1] << 8)
- | ((int)buf[offset+2] << 16)
- | ((int)buf[offset+3] << 24);
-}
-
-unsigned int OleUtil::getU4Bytes(const char *buffer, unsigned int offset) {
- const unsigned char *buf = (const unsigned char*)buffer;
- return
- (unsigned int)buf[offset]
- | ((unsigned int)buf[offset+1] << 8)
- | ((unsigned int)buf[offset+2] << 16)
- | ((unsigned int)buf[offset+3] << 24);
-}
-
-unsigned int OleUtil::getU2Bytes(const char *buffer, unsigned int offset) {
- const unsigned char *buf = (const unsigned char*)buffer;
- return
- (unsigned int)buf[offset]
- | ((unsigned int)buf[offset+1] << 8);
-}
-
-unsigned int OleUtil::getU1Byte(const char *buffer, unsigned int offset) {
- const unsigned char *buf = (const unsigned char*)buffer;
- return (unsigned int)buf[offset];
-}
-
-int OleUtil::get1Byte(const char *buffer, unsigned int offset) {
- const unsigned char *buf = (const unsigned char*)buffer;
- return (int)buf[offset];
-}
-
-
-
diff --git a/fbreader/src/formats/doc/OleUtil.h b/fbreader/src/formats/doc/OleUtil.h
deleted file mode 100644
index 531c769..0000000
--- a/fbreader/src/formats/doc/OleUtil.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __OLEUTIL_H__
-#define __OLEUTIL_H__
-
-class OleUtil {
-public:
- static int get4Bytes(const char *buffer, unsigned int offset);
- static unsigned int getU4Bytes(const char *buffer, unsigned int offset);
- static unsigned int getU2Bytes(const char *buffer, unsigned int offset);
- static unsigned int getU1Byte(const char *buffer, unsigned int offset);
- static int get1Byte(const char *buffer, unsigned int offset);
-};
-
-#endif /* __OLEUTIL_H__ */