diff options
Diffstat (limited to 'fbreader/src/formats/doc')
24 files changed, 0 insertions, 4141 deletions
diff --git a/fbreader/src/formats/doc/DocBookReader.cpp b/fbreader/src/formats/doc/DocBookReader.cpp deleted file mode 100644 index 99f471a..0000000 --- a/fbreader/src/formats/doc/DocBookReader.cpp +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <vector> -#include <string> - -#include <ZLInputStream.h> -#include <ZLLogger.h> -#include <ZLFile.h> -#include <ZLStringUtil.h> -#include <ZLFileImage.h> - -#include "DocBookReader.h" -#include "../../bookmodel/BookModel.h" -#include "../../library/Book.h" - -#include "OleStorage.h" -#include "OleMainStream.h" - -DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) : - myModelReader(model), - myPictureCounter(0), - myEncoding(encoding) { - myReadState = READ_TEXT; -} - -bool DocBookReader::readBook() { - const ZLFile &file = myModelReader.model().book()->file(); - shared_ptr<ZLInputStream> stream = file.inputStream(); - if (stream.isNull() || !stream->open()) { - return false; - } - myModelReader.setMainTextModel(); - myModelReader.pushKind(REGULAR); - myModelReader.beginParagraph(); - - if (!readDocument(stream, true)) { - return false; - } - - myModelReader.insertEndOfTextParagraph(); - return true; -} - -void DocBookReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) { - if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_INFO) { - myFieldInfoBuffer.push_back(ucs2char); - return; - } - if (myReadState == READ_FIELD && myReadFieldState == DONT_READ_FIELD_TEXT) { - return; - } - if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && ucs2char == WORD_HORIZONTAL_TAB) { - //to remove pagination from TOC (from doc saved in OpenOffice) - myReadFieldState = DONT_READ_FIELD_TEXT; - return; - } - std::string utf8String; - ZLUnicodeUtil::Ucs2String ucs2String; - ucs2String.push_back(ucs2char); - ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String); - if (!myModelReader.paragraphIsOpen()) { - myModelReader.beginParagraph(); - } - myModelReader.addData(utf8String); -} - -void DocBookReader::handleHardLinebreak() { - if (myModelReader.paragraphIsOpen()) { - myModelReader.endParagraph(); - } - myModelReader.beginParagraph(); - if (!myCurrentStyleEntry.isNull()) { - myModelReader.addStyleEntry(*myCurrentStyleEntry); - } - for (std::size_t i = 0; i < myKindStack.size(); ++i) { - myModelReader.addControl(myKindStack.at(i), true); - } -} - -void DocBookReader::handleParagraphEnd() { - if (myModelReader.paragraphIsOpen()) { - myModelReader.endParagraph(); - } - myModelReader.beginParagraph(); - myCurrentStyleEntry = 0; -} - -void DocBookReader::handlePageBreak() { - if (myModelReader.paragraphIsOpen()) { - myModelReader.endParagraph(); - } - myCurrentStyleEntry = 0; - myModelReader.insertEndOfSectionParagraph(); - myModelReader.beginParagraph(); -} - -void DocBookReader::handleTableSeparator() { - handleChar(SPACE); - handleChar(VERTICAL_LINE); - handleChar(SPACE); -} - -void DocBookReader::handleTableEndRow() { - handleParagraphEnd(); -} - -void DocBookReader::handleFootNoteMark() { - //TODO implement -} - -void DocBookReader::handleStartField() { - if (myReadState == READ_FIELD) { //for nested fields - handleEndField(); - } - myReadState = READ_FIELD; - myReadFieldState = READ_FIELD_INFO; - myHyperlinkTypeState = NO_HYPERLINK; -} - -void DocBookReader::handleSeparatorField() { - static const std::string HYPERLINK = "HYPERLINK"; - static const std::string SEQUENCE = "SEQ"; -// static const std::string PAGE = "PAGE"; -// static const std::string PAGEREF = "PAGEREF"; -// static const std::string SHAPE = "SHAPE"; - static const std::string SPACE_DELIMETER = " "; - static const std::string LOCAL_LINK = "\\l"; - static const std::string QUOTE = "\""; - myReadFieldState = READ_FIELD_TEXT; - myHyperlinkTypeState = NO_HYPERLINK; - ZLUnicodeUtil::Ucs2String buffer = myFieldInfoBuffer; - myFieldInfoBuffer.clear(); - std::string utf8String; - ZLUnicodeUtil::ucs2ToUtf8(utf8String, buffer); - ZLUnicodeUtil::utf8Trim(utf8String); - if (utf8String.empty()) { - return; - } - std::vector<std::string> result = ZLStringUtil::split(utf8String, SPACE_DELIMETER); - //TODO split function can returns empty string, maybe fix it - std::vector<std::string> splitted; - for (std::size_t i = 0; i < result.size(); ++i) { - if (!result.at(i).empty()) { - splitted.push_back(result.at(i)); - } - } - - if (!splitted.empty() && splitted.at(0) == SEQUENCE) { - myReadFieldState = READ_FIELD_TEXT; - myHyperlinkTypeState = NO_HYPERLINK; - return; - } - - if (splitted.size() < 2 || splitted.at(0) != HYPERLINK) { - myReadFieldState = DONT_READ_FIELD_TEXT; - //to remove pagination from TOC and not hyperlink fields - return; - } - - if (splitted.at(1) == LOCAL_LINK) { - std::string link = parseLink(buffer); - if (!link.empty()) { - myModelReader.addHyperlinkControl(INTERNAL_HYPERLINK, link); - myHyperlinkTypeState = INT_HYPERLINK_INSERTED; - } - } else { - std::string link = parseLink(buffer, true); - if (!link.empty()) { - myModelReader.addHyperlinkControl(EXTERNAL_HYPERLINK, link); - myHyperlinkTypeState = EXT_HYPERLINK_INSERTED; - } - } -} - -void DocBookReader::handleEndField() { - myFieldInfoBuffer.clear(); - if (myReadState == READ_TEXT) { - return; - } - if (myHyperlinkTypeState == EXT_HYPERLINK_INSERTED) { - myModelReader.addControl(EXTERNAL_HYPERLINK, false); - } else if (myHyperlinkTypeState == INT_HYPERLINK_INSERTED) { - myModelReader.addControl(INTERNAL_HYPERLINK, false); - } - myReadState = READ_TEXT; - myHyperlinkTypeState = NO_HYPERLINK; - -} - -void DocBookReader::handleImage(const ZLFileImage::Blocks &blocks) { - std::string number; - ZLStringUtil::appendNumber(number, myPictureCounter++); - myModelReader.addImageReference(number); - ZLFile file(myModelReader.model().book()->file().path(), ZLMimeType::IMAGE_AUTO); - myModelReader.addImage(number, new ZLFileImage(file, blocks, ZLFileImage::ENCODING_NONE)); -} - -void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) { - if (ucs2char == WORD_MINUS) { - handleChar(MINUS); - } else if (ucs2char == WORD_SOFT_HYPHEN) { - //skip - } else if (ucs2char == WORD_HORIZONTAL_TAB) { - handleChar(ucs2char); - } else { -// myTextBuffer.clear(); - } -} - -void DocBookReader::handleFontStyle(unsigned int fontStyle) { - if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && myHyperlinkTypeState != NO_HYPERLINK) { - //to fix bug with hyperlink, that's only bold and doesn't looks like hyperlink - return; - } - while (!myKindStack.empty()) { - myModelReader.addControl(myKindStack.back(), false); - myKindStack.pop_back(); - } - if (fontStyle & OleMainStream::CharInfo::FONT_BOLD) { - myKindStack.push_back(BOLD); - } - if (fontStyle & OleMainStream::CharInfo::FONT_ITALIC) { - myKindStack.push_back(ITALIC); - } - for (std::size_t i = 0; i < myKindStack.size(); ++i) { - myModelReader.addControl(myKindStack.at(i), true); - } -} - -void DocBookReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) { - if (styleInfo.HasPageBreakBefore) { - handlePageBreak(); - } - shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY); - - switch (styleInfo.Alignment) { - default: // in that case, use default alignment type - break; - case OleMainStream::Style::ALIGNMENT_LEFT: - entry->setAlignmentType(ALIGN_LEFT); - break; - case OleMainStream::Style::ALIGNMENT_RIGHT: - entry->setAlignmentType(ALIGN_RIGHT); - break; - case OleMainStream::Style::ALIGNMENT_CENTER: - entry->setAlignmentType(ALIGN_CENTER); - break; - case OleMainStream::Style::ALIGNMENT_JUSTIFY: - entry->setAlignmentType(ALIGN_JUSTIFY); - break; - } - - //TODO in case, where style is heading, but size is small it works wrong - const ZLTextStyleEntry::SizeUnit unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT; - switch (styleInfo.StyleIdCurrent) { - default: - break; - case OleMainStream::Style::STYLE_H1: - entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 140, unit); - break; - case OleMainStream::Style::STYLE_H2: - entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 120, unit); - break; - case OleMainStream::Style::STYLE_H3: - entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 110, unit); - break; - } - myCurrentStyleEntry = entry; - myModelReader.addStyleEntry(*myCurrentStyleEntry); - - // we should have the same font style, as for the previous paragraph, - // if it has the same StyleIdCurrent - if (myCurrentStyleInfo.StyleIdCurrent != OleMainStream::Style::STYLE_INVALID && - myCurrentStyleInfo.StyleIdCurrent == styleInfo.StyleIdCurrent) { - for (std::size_t i = 0; i < myKindStack.size(); ++i) { - myModelReader.addControl(myKindStack.at(i), true); - } - } else { - myKindStack.clear(); - // fill by the fontstyle, that was got from Stylesheet - handleFontStyle(styleInfo.CurrentCharInfo.FontStyle); - } - myCurrentStyleInfo = styleInfo; -} - -void DocBookReader::handleBookmark(const std::string &name) { - myModelReader.addHyperlinkLabel(name); -} - -std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode) { - //TODO add support for HYPERLINK like that: - // [0x13] HYPERLINK "http://site.ru/some text" \t "_blank" [0x14] text [0x15] - //Current implementation search for last QUOTE, so, it reads \t and _blank as part of link - //Last quote searching is need to handle link like that: - // [0x13] HYPERLINK "http://yandex.ru/yandsearch?text='some text' и "some text2"" [0x14] link text [0x15] - - static const ZLUnicodeUtil::Ucs2Char QUOTE = 0x22; - std::size_t i, first = 0; - //TODO maybe functions findFirstOf and findLastOf should be in ZLUnicodeUtil class - for (i = 0; i < s.size(); ++i) { - if (s.at(i) == QUOTE) { - first = i; - break; - } - } - if (i == s.size()) { - return std::string(); - } - std::size_t j, last = 0; - for (j = s.size(); j > 0 ; --j) { - if (s.at(j - 1) == QUOTE) { - last = j - 1; - break; - } - } - if (j == 0 || last == first) { - return std::string(); - } - - ZLUnicodeUtil::Ucs2String link; - for (std::size_t k = first + 1; k < last; ++k) { - ZLUnicodeUtil::Ucs2Char ch = s.at(k); - if (urlencode && ZLUnicodeUtil::isSpace(ch)) { - //TODO maybe implement function for encoding all signs in url, not only spaces and quotes - //TODO maybe add backslash support - link.push_back('%'); - link.push_back('2'); - link.push_back('0'); - } else if (urlencode && ch == QUOTE) { - link.push_back('%'); - link.push_back('2'); - link.push_back('2'); - } else { - link.push_back(ch); - } - } - std::string utf8String; - ZLUnicodeUtil::ucs2ToUtf8(utf8String, link); - return utf8String; -} - -void DocBookReader::footnotesStartHandler() { - handlePageBreak(); -} - -void DocBookReader::ansiDataHandler(const char *buffer, std::size_t len) { - if (myConverter.isNull()) { - // lazy converter initialization - ZLEncodingCollection &collection = ZLEncodingCollection::Instance(); - ZLEncodingConverterInfoPtr info = collection.info(myEncoding); - myConverter = info.isNull() ? collection.defaultConverter() : info->createConverter(); - } - std::string utf8String; - myConverter->convert(utf8String, buffer, buffer + len); - ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String); -} - -void DocBookReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) { - myBuffer.push_back(symbol); -} diff --git a/fbreader/src/formats/doc/DocBookReader.h b/fbreader/src/formats/doc/DocBookReader.h deleted file mode 100644 index d80fb8e..0000000 --- a/fbreader/src/formats/doc/DocBookReader.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __DOCBOOKREADER_H__ -#define __DOCBOOKREADER_H__ - -#include <vector> - -#include <shared_ptr.h> -#include <ZLFile.h> -#include <ZLTextStyleEntry.h> -#include <ZLEncodingConverter.h> - -#include "../../bookmodel/BookReader.h" - -#include "OleMainStream.h" -#include "OleStreamParser.h" - -class DocBookReader : public OleStreamParser { - -public: - DocBookReader(BookModel &model, const std::string &encoding); - ~DocBookReader(); - bool readBook(); - -private: - void ansiDataHandler(const char *buffer, std::size_t len); - void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol); - void footnotesStartHandler(); - - void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char); - void handleHardLinebreak(); - void handleParagraphEnd(); - void handlePageBreak(); - void handleTableSeparator(); - void handleTableEndRow(); - void handleFootNoteMark(); - void handleStartField(); - void handleSeparatorField(); - void handleEndField(); - void handleImage(const ZLFileImage::Blocks &blocks); - void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char); - - //formatting: - void handleFontStyle(unsigned int fontStyle); - void handleParagraphStyle(const OleMainStream::Style &styleInfo); - void handleBookmark(const std::string &name); - -private: - static std::string parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode = false); - -private: - BookReader myModelReader; - - ZLUnicodeUtil::Ucs2String myFieldInfoBuffer; - - enum { - READ_FIELD, - READ_TEXT - } myReadState; - - enum { - READ_FIELD_TEXT, - DONT_READ_FIELD_TEXT, - READ_FIELD_INFO - } myReadFieldState; - - //maybe it should be flag? - enum { - NO_HYPERLINK, - EXT_HYPERLINK_INSERTED, - INT_HYPERLINK_INSERTED - } myHyperlinkTypeState; - - //formatting - std::vector<FBTextKind> myKindStack; - shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry; - OleMainStream::Style myCurrentStyleInfo; - unsigned int myPictureCounter; - - const std::string myEncoding; - shared_ptr<ZLEncodingConverter> myConverter; -}; - -inline DocBookReader::~DocBookReader() {} - -#endif /* __DOCBOOKREADER_H__ */ diff --git a/fbreader/src/formats/doc/DocFloatImageReader.cpp b/fbreader/src/formats/doc/DocFloatImageReader.cpp deleted file mode 100644 index 8c308e4..0000000 --- a/fbreader/src/formats/doc/DocFloatImageReader.cpp +++ /dev/null @@ -1,384 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <ZLLogger.h> - -#include "OleUtil.h" -#include "OleStream.h" -#include "OleMainStream.h" - -#include "DocFloatImageReader.h" - -DocFloatImageReader::DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream) : - myTableStream(tableStream), - myMainStream(mainStream), - myOffset(off), - myLength(len) { -} - -void DocFloatImageReader::readAll() { - //OfficeArtContent structure is described at p.405-406 [MS-DOC] - if (!myTableStream->seek(myOffset, true)) { - ZLLogger::Instance().println("DocPlugin", "problems with reading float images"); - return; - } - - unsigned int count = 0; - - RecordHeader header; - while (count < myLength) { - count += readRecordHeader(header, myTableStream); - switch (header.type) { - case 0xF000: - count += readDggContainer(myItem, header.length, myTableStream, myMainStream); - break; - case 0xF002: - count += readDgContainer(myItem, header.length, myTableStream); - break; - default: - return; - break; - } - } -} - -ZLFileImage::Blocks DocFloatImageReader::getBlocksForShapeId(unsigned int shapeId) const { - FSPContainer container; - bool found = false; - for (std::size_t i = 0; !found && i < myItem.FSPs.size(); ++i) { - if (myItem.FSPs.at(i).fsp.shapeId == shapeId) { - found = true; - container = myItem.FSPs.at(i); - } - } - - if (!found || container.fopte.empty()) { - return ZLFileImage::Blocks(); - } - - for (std::size_t i = 0; i < container.fopte.size(); ++i) { - const FOPTE &fopte = container.fopte.at(i); - if (fopte.pId == 0x0104 && !fopte.isComplex) { //0x0104 specifies the BLIP, see p.420 [MS-ODRAW] - if (fopte.value <= myItem.blips.size() && fopte.value > 0) { - Blip blip = myItem.blips.at(fopte.value - 1); - return blip.blocks; - } - } - } - return ZLFileImage::Blocks(); -} - -unsigned int DocFloatImageReader::readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream) { - //OfficeArtRecordHeader structure is described at p.26 [MS-ODRAW] - char buffer[8]; - stream->read(buffer, 8); - unsigned int temp = OleUtil::getU2Bytes(buffer, 0); - header.version = temp & 0x000F; - header.instance = temp >> 4; - header.type = OleUtil::getU2Bytes(buffer, 2); - header.length = OleUtil::getU4Bytes(buffer, 4); - return 8; -} - -unsigned int DocFloatImageReader::readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) { - //OfficeArtDggContainer structure is described at p.50 [MS-ODRAW] - RecordHeader header; - unsigned int count = 0; - - while (count < length) { - count += readRecordHeader(header, stream); - switch (header.type) { - case 0xF001: - count += readBStoreContainer(item, header.length, stream, mainStream); - break; - default: - count += skipRecord(header, stream); - break; - } - } - - stream->seek(1, false); //skipping dgglbl (see p.406 [MS-DOC]) - ++count; - - return count; -} - -unsigned int DocFloatImageReader::readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) { - //OfficeArtBStoreContainer structure is described at p.58 [MS-ODRAW] - RecordHeader header; - unsigned int count = 0; - while (count < length) { - count += readRecordHeader(header, stream); - switch (header.type) { - case 0xF007: - { - Blip blip; - count += readBStoreContainerFileBlock(blip, stream, mainStream); - item.blips.push_back(blip); - } - break; - default: - count += skipRecord(header, stream); - break; - } - } - return count; -} - -unsigned int DocFloatImageReader::skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream) { - stream->seek(header.length, false); - return header.length; -} - -unsigned int DocFloatImageReader::readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) { - //OfficeArtBStoreContainerFileBlock structure is described at p.59 [MS-ODRAW] - unsigned int count = readFBSE(blip.storeEntry, stream); - if (blip.storeEntry.offsetInDelay != (unsigned int)-1) { - if (mainStream->seek(blip.storeEntry.offsetInDelay, true)) { //see p.70 [MS-ODRAW] - //TODO maybe we should stop reading float images here - ZLLogger::Instance().println("DocPlugin", "DocFloatImageReader: problems with seeking for offset"); - return count; - } - } - RecordHeader header; - unsigned int count2 = readRecordHeader(header, mainStream); - switch (header.type) { - case OleMainStream::IMAGE_WMF: - case OleMainStream::IMAGE_EMF: - case OleMainStream::IMAGE_PICT: - count2 += skipRecord(header, mainStream); - break; - case OleMainStream::IMAGE_JPEG: - case OleMainStream::IMAGE_JPEG2: - case OleMainStream::IMAGE_PNG: - case OleMainStream::IMAGE_DIB: - case OleMainStream::IMAGE_TIFF: - count2 += readBlip(blip, header, mainStream); - break; - } - blip.type = header.type; - return count; -} - -unsigned int DocFloatImageReader::readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream) { - //OfficeArtBlip structure is described at p.60-66 [MS-ODRAW] - stream->seek(16, false); //skipping rgbUid1 - unsigned int count = 16; - - bool addField = false; - switch (header.type) { - case OleMainStream::IMAGE_PNG: - if (header.instance == 0x6E1) { - addField = true; - } - break; - case OleMainStream::IMAGE_JPEG: - case OleMainStream::IMAGE_JPEG2: - if (header.instance == 0x46B || header.instance == 0x6E3) { - addField = true; - } - break; - case OleMainStream::IMAGE_DIB: - if (header.instance == 0x7A9) { - addField = true; - } - case OleMainStream::IMAGE_TIFF: - if (header.instance == 0x6E5) { - addField = true; - } - break; - } - - if (addField) { - stream->seek(16, false); //skipping rgbUid2 - count += 16; - } - stream->seek(1, false); //skipping tag - count += 1; - - blip.blocks = stream->getBlockPieceInfoList(stream->offset(), header.length - count); - count += header.length; - return count; -} - -unsigned int DocFloatImageReader::readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream) { - //OfficeArtFBSE structure is described at p.68 [MS-ODRAW] - stream->seek(2, false); //skipping btWin32 and btMacOS - stream->seek(16, false); //skipping rgbUid - stream->seek(2, false); //skipping tag - fbse.size = read4Bytes(stream); - fbse.referenceCount = read4Bytes(stream); - fbse.offsetInDelay = read4Bytes(stream); - stream->seek(1, false); //skipping unused value - unsigned int lengthName = read1Byte(stream); //if it should be multiplied on 2? - stream->seek(2, false); // skipping unused values - if (lengthName > 0) { - stream->seek(lengthName, false); //skipping nameData - } - return 36 + lengthName; -} - -unsigned int DocFloatImageReader::readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) { - //OfficeArtDgContainer structure is described at p.52 [MS-ODRAW] - unsigned int count = 0; - - RecordHeader header; - while (count < length) { - count += readRecordHeader(header, stream); - switch (header.type) { - case 0xF008: //skip OfficeArtFDG record, p. 82 [MS-ODRAW] - stream->seek(8, false); - count += 8; - break; - case 0xF003: - count += readSpgrContainer(item, header.length, stream); - break; - case 0xF004: - { - FSPContainer fspContainer; - count += readSpContainter(fspContainer, header.length, stream); - item.FSPs.push_back(fspContainer); - } - break; - default: - count += skipRecord(header, stream); - break; - } - } - return count; -} - -unsigned int DocFloatImageReader::readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) { - //OfficeArtSpgrContainer structure is described at p.56 [MS-ODRAW] - unsigned count = 0; - RecordHeader header; - while (count < length) { - count += readRecordHeader(header, stream); - switch (header.type) { - case 0xF003: - count += readSpgrContainer(item, header.length, stream); - break; - case 0xF004: - { - FSPContainer fspContainer; - count += readSpContainter(fspContainer, header.length, stream); - item.FSPs.push_back(fspContainer); - } - break; - default: - count += skipRecord(header, stream); - break; - } - } - return count; -} - -unsigned int DocFloatImageReader::readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream) { - //OfficeArtSpContainter structure is described at p.53-55 [MS-ODRAW] - RecordHeader header; - unsigned int count = 0; - while (count < length) { - count += readRecordHeader(header, stream); - switch (header.type) { - case 0xF009: //skip OfficeArtFSPGR record, p.74 [MS-ODRAW] - stream->seek(16, false); - count += 16; - break; - case 0xF00A: - count += readFSP(item.fsp, stream); - break; - case 0xF00B: - count += readArrayFOPTE(item.fopte, header.length, stream); - break; - case 0xF00E: //OfficeArtAnchor - case 0xF00F: //OfficeArtChildAnchor, p.75 [MS-ODRAW] - case 0xF010: //OfficeArtClientAnchor - stream->seek(4, false); - count += 4; - break; - case 0xF00C: - case 0xF11F: - case 0xF11D: - break; - default: - count += skipRecord(header, stream); - break; - } - } - return count; -} - -unsigned int DocFloatImageReader::readFSP(FSP &fsp, shared_ptr<OleStream> stream) { - //OfficeArtFSP structure is described at p.76 [MS-ODRAW] - fsp.shapeId = read4Bytes(stream); - stream->seek(4, false); - return 8; -} - -unsigned int DocFloatImageReader::readArrayFOPTE(std::vector<FOPTE> &fopteArray,unsigned int length, shared_ptr<OleStream> stream) { - //OfficeArtRGFOPTE structure is described at p.98 [MS-ODRAW] - unsigned int count = 0; - while (count < length) { - FOPTE fopte; - count += readFOPTE(fopte, stream); - fopteArray.push_back(fopte); - } - for (std::size_t i = 0; i < fopteArray.size(); ++i) { - if (fopteArray.at(i).isComplex) { - stream->seek(fopteArray.at(i).value, false); - count += fopteArray.at(i).value; - } - } - return count; -} - -unsigned int DocFloatImageReader::readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream) { - //OfficeArtFOPTE structure is described at p.32 [MS-ODRAW] - unsigned int dtemp; - dtemp = read2Bytes(stream); - fopte.pId = (dtemp & 0x3fff); - fopte.isBlipId = ((dtemp & 0x4000) >> 14) == 0x1; - fopte.isComplex = ((dtemp & 0x8000) >> 15) == 0x1; - fopte.value = read4Bytes(stream); - return 6; -} - -unsigned int DocFloatImageReader::read1Byte(shared_ptr<OleStream> stream) { - char b[1]; - if (stream->read(b, 1) != 1) { - return 0; - } - return OleUtil::getU1Byte(b, 0); -} - -unsigned int DocFloatImageReader::read2Bytes(shared_ptr<OleStream> stream) { - char b[2]; - if (stream->read(b, 2) != 2) { - return 0; - } - return OleUtil::getU2Bytes(b, 0); -} - -unsigned int DocFloatImageReader::read4Bytes(shared_ptr<OleStream> stream) { - char b[4]; - if (stream->read(b, 4) != 4) { - return 0; - } - return OleUtil::getU4Bytes(b, 0); -} diff --git a/fbreader/src/formats/doc/DocFloatImageReader.h b/fbreader/src/formats/doc/DocFloatImageReader.h deleted file mode 100644 index d2d6c2e..0000000 --- a/fbreader/src/formats/doc/DocFloatImageReader.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __DOCFLOATIMAGEREADER_H__ -#define __DOCFLOATIMAGEREADER_H__ - -#include <ZLFileImage.h> - -class DocFloatImageReader { - -public: - struct BlipStoreEntry { // see p.68 [MS-ODRAW] - unsigned int size; // size of blip in stream - unsigned int referenceCount; // (cRef) reference count for the the blip - unsigned int offsetInDelay; // foDelay, file offset in the delay stream - }; - - struct Blip { //see p.59, p63-66 [MS-ODRAW] - BlipStoreEntry storeEntry; - unsigned int type; - ZLFileImage::Blocks blocks; - }; - - struct FSP { //see p.76-77 [MS-ODRAW] - unsigned int shapeId; //spid - }; - - struct FOPTE { //see p.98 and p.32 [MS-ODRAW] - unsigned int pId; //pid - bool isBlipId; //fBid - bool isComplex; //fComplex - unsigned int value; //op - }; - - struct FSPContainer { //see p.53-55 [MS-ODRAW] - FSP fsp; - std::vector<FOPTE> fopte; - }; - - struct OfficeArtContent { //see p.405-406 [MS-DOC] - std::vector<Blip> blips; //retrieved from OfficeArtDggContainer - std::vector<FSPContainer> FSPs; //retrieved from OfficeArtDgContainer - }; - - struct RecordHeader { //see p.26 [MS-ODRAW] - unsigned int version; - unsigned int instance; - unsigned int type; - unsigned int length; - }; - -public: - DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream); - -public: - void readAll(); - - ZLFileImage::Blocks getBlocksForShapeId(unsigned int shapeId) const; - -private: - static unsigned int readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream); - static unsigned int readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream); - - static unsigned int readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream); - static unsigned int readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream); - static unsigned int readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream); - static unsigned int readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream); - - static unsigned int readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream); - static unsigned int readArrayFOPTE(std::vector<FOPTE> &fopte, unsigned int length, shared_ptr<OleStream> stream); - static unsigned int readFSP(FSP &fsp, shared_ptr<OleStream> stream); - static unsigned int readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream); - static unsigned int readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream); - static unsigned int readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream); - - static unsigned int skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream); - - static unsigned int read1Byte(shared_ptr<OleStream> stream); - static unsigned int read2Bytes(shared_ptr<OleStream> stream); - static unsigned int read4Bytes(shared_ptr<OleStream> stream); - -private: - shared_ptr<OleStream> myTableStream; - shared_ptr<OleStream> myMainStream; - unsigned int myOffset; - unsigned int myLength; - - OfficeArtContent myItem; -}; - -#endif /* __DOCFLOATIMAGEREADER_H__ */ diff --git a/fbreader/src/formats/doc/DocInlineImageReader.cpp b/fbreader/src/formats/doc/DocInlineImageReader.cpp deleted file mode 100644 index 69ce74f..0000000 --- a/fbreader/src/formats/doc/DocInlineImageReader.cpp +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include "OleUtil.h" -#include "OleMainStream.h" - -#include "DocInlineImageReader.h" - -DocInlineImageReader::DocInlineImageReader(shared_ptr<OleStream> dataStream) : - myDataStream(dataStream) { -} - -ZLFileImage::Blocks DocInlineImageReader::getImagePieceInfo(unsigned int dataPos) { - if (myDataStream.isNull()) { - return ZLFileImage::Blocks(); - } - if (!myDataStream->seek(dataPos, true)) { - return ZLFileImage::Blocks(); - } - - //reading PICF structure (see p. 421 [MS-DOC]) - unsigned int picfHeaderSize = 4 + 2 + 8; //record length, headerLength and storage format - char headerBuffer[picfHeaderSize]; - if (myDataStream->read(headerBuffer, picfHeaderSize) != picfHeaderSize) { - return ZLFileImage::Blocks(); - } - unsigned int length = OleUtil::getU4Bytes(headerBuffer, 0); - unsigned int headerLength = OleUtil::getU2Bytes(headerBuffer, 4); - unsigned int formatType = OleUtil::getU2Bytes(headerBuffer, 6); - - if (formatType != 0x0064) { //external link to some file; see p.394 [MS-DOC] - //TODO implement - return ZLFileImage::Blocks(); - } - if (headerLength >= length) { - return ZLFileImage::Blocks(); - } - - //reading OfficeArtInlineSpContainer structure; see p.421 [MS-DOC] and p.56 [MS-ODRAW] - if (!myDataStream->seek(headerLength - picfHeaderSize, false)) { //skip header - return ZLFileImage::Blocks(); - } - - char buffer[8]; //for OfficeArtRecordHeader structure; see p.69 [MS-ODRAW] - bool found = false; - unsigned int curOffset = 0; - for (curOffset = headerLength; !found && curOffset + 8 <= length; curOffset += 8) { - if (myDataStream->read(buffer, 8) != 8) { - return ZLFileImage::Blocks(); - } - unsigned int recordInstance = OleUtil::getU2Bytes(buffer, 0) >> 4; - unsigned int recordType = OleUtil::getU2Bytes(buffer, 2); - unsigned int recordLen = OleUtil::getU4Bytes(buffer, 4); - - switch (recordType) { - case 0xF000: case 0xF001: case 0xF002: case 0xF003: case 0xF004: case 0xF005: - break; - case 0xF007: - { - myDataStream->seek(33, false); - char tmpBuf[1]; - myDataStream->read(tmpBuf, 1); - unsigned int nameLength = OleUtil::getU1Byte(tmpBuf, 0); - myDataStream->seek(nameLength * 2 + 2, false); - curOffset += 33 + 1 + nameLength * 2 + 2; - } - break; - case 0xF008: - myDataStream->seek(8, false); - curOffset += 8; - break; - case 0xF009: - myDataStream->seek(16, false); - curOffset += 16; - break; - case 0xF006: case 0xF00A: case 0xF00B: case 0xF00D: case 0xF00E: case 0xF00F: case 0xF010: case 0xF011: case 0xF122: - myDataStream->seek(recordLen, false); - curOffset += recordLen; - break; - case OleMainStream::IMAGE_EMF: - case OleMainStream::IMAGE_WMF: - case OleMainStream::IMAGE_PICT: - //TODO implement - return ZLFileImage::Blocks(); - case OleMainStream::IMAGE_JPEG: - case OleMainStream::IMAGE_JPEG2: - myDataStream->seek(17, false); - curOffset += 17; - if (recordInstance == 0x46B || recordInstance == 0x6E3) { - myDataStream->seek(16, false); - curOffset += 16; - } - found = true; - break; - case OleMainStream::IMAGE_PNG: - myDataStream->seek(17, false); - curOffset += 17; - if (recordInstance == 0x6E1) { - myDataStream->seek(16, false); - curOffset += 16; - } - found = true; - break; - case OleMainStream::IMAGE_DIB: // DIB = BMP without 14-bytes header - myDataStream->seek(17, false); - curOffset += 17; - if (recordInstance == 0x7A9) { - myDataStream->seek(16, false); - curOffset += 16; - } - found = true; - break; - case OleMainStream::IMAGE_TIFF: - myDataStream->seek(17, false); - curOffset += 17; - if (recordInstance == 0x6E5) { - myDataStream->seek(16, false); - curOffset += 16; - } - found = true; - break; - case 0xF00C: - default: - return ZLFileImage::Blocks(); - } - } - - if (!found) { - return ZLFileImage::Blocks(); - } - return myDataStream->getBlockPieceInfoList(dataPos + curOffset, length - curOffset); -} diff --git a/fbreader/src/formats/doc/DocInlineImageReader.h b/fbreader/src/formats/doc/DocInlineImageReader.h deleted file mode 100644 index 9dab9ae..0000000 --- a/fbreader/src/formats/doc/DocInlineImageReader.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __DOCINLINEIMAGEREADER_H__ -#define __DOCINLINEIMAGEREADER_H__ - -#include <vector> - -#include "OleStream.h" - -class DocInlineImageReader { - -public: - DocInlineImageReader(shared_ptr<OleStream> dataStream); - ZLFileImage::Blocks getImagePieceInfo(unsigned int dataPos); - -private: - shared_ptr<OleStream> myDataStream; -}; - -#endif /* __DOCINLINEIMAGEREADER_H__ */ diff --git a/fbreader/src/formats/doc/DocMetaInfoReader.cpp b/fbreader/src/formats/doc/DocMetaInfoReader.cpp deleted file mode 100644 index 37b39c2..0000000 --- a/fbreader/src/formats/doc/DocMetaInfoReader.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <ZLInputStream.h> - -#include "../../library/Book.h" - -#include "DocMetaInfoReader.h" - -DocMetaInfoReader::DocMetaInfoReader(Book &book) : myBook(book) { - myBook.removeAllAuthors(); - myBook.setTitle(std::string()); - myBook.setLanguage(std::string()); - myBook.removeAllTags(); -} - -bool DocMetaInfoReader::readMetaInfo() { - myBook.removeAllAuthors(); - myBook.setTitle(myBook.file().name(true)); - myBook.removeAllTags(); - return true; -} diff --git a/fbreader/src/formats/doc/DocMetaInfoReader.h b/fbreader/src/formats/doc/DocMetaInfoReader.h deleted file mode 100644 index db26d29..0000000 --- a/fbreader/src/formats/doc/DocMetaInfoReader.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __DOCMETAINFOREADER_H__ -#define __DOCMETAINFOREADER_H__ - -#include <string> - -class Book; - -class DocMetaInfoReader { - -public: - DocMetaInfoReader(Book &book); - ~DocMetaInfoReader(); - bool readMetaInfo(); - - /* - void startElementHandler(int tag, const char **attributes); - void endElementHandler(int tag); - void characterDataHandler(const char *text, std::size_t len); - */ - -private: - Book &myBook; -}; - -inline DocMetaInfoReader::~DocMetaInfoReader() {} - -#endif /* __DOCMETAINFOREADER_H__ */ diff --git a/fbreader/src/formats/doc/DocPlugin.cpp b/fbreader/src/formats/doc/DocPlugin.cpp deleted file mode 100644 index ef6f511..0000000 --- a/fbreader/src/formats/doc/DocPlugin.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <ZLFile.h> -#include <ZLInputStream.h> -#include <ZLLogger.h> -#include <ZLImage.h> -#include <ZLEncodingConverter.h> - -#include "DocPlugin.h" -#include "DocMetaInfoReader.h" -#include "DocBookReader.h" -#include "DocStreams.h" -#include "../../bookmodel/BookModel.h" -#include "../../library/Book.h" - -DocPlugin::DocPlugin() { -} - -DocPlugin::~DocPlugin() { -} - -bool DocPlugin::providesMetaInfo() const { - return true; -} - -const std::string DocPlugin::supportedFileType() const { - return "doc"; -} - -bool DocPlugin::acceptsFile(const ZLFile &file) const { - return file.extension() == "doc"; -} - -bool DocPlugin::readMetaInfo(Book &book) const { - if (!DocMetaInfoReader(book).readMetaInfo()) { - return false; - } - - shared_ptr<ZLInputStream> stream = new DocAnsiStream(book.file(), 50000); - if (!detectEncodingAndLanguage(book, *stream)) { - stream = new DocUcs2Stream(book.file(), 50000); - detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true); - } - - return true; -} - -bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const { - return true; -} - -bool DocPlugin::readModel(BookModel &model) const { - return DocBookReader(model, model.book()->encoding()).readBook(); -} diff --git a/fbreader/src/formats/doc/DocPlugin.h b/fbreader/src/formats/doc/DocPlugin.h deleted file mode 100644 index 93b1803..0000000 --- a/fbreader/src/formats/doc/DocPlugin.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __DOCPLUGIN_H__ -#define __DOCPLUGIN_H__ - -#include "../FormatPlugin.h" - -class DocPlugin : public FormatPlugin { - -public: - DocPlugin(); - ~DocPlugin(); - bool providesMetaInfo() const; - - const std::string supportedFileType() const; - bool acceptsFile(const ZLFile &file) const; - bool readMetaInfo(Book &book) const; - bool readLanguageAndEncoding(Book &book) const; - bool readModel(BookModel &model) const; -}; - -#endif /* __DOCPLUGIN_H__ */ diff --git a/fbreader/src/formats/doc/DocStreams.cpp b/fbreader/src/formats/doc/DocStreams.cpp deleted file mode 100644 index b21e15a..0000000 --- a/fbreader/src/formats/doc/DocStreams.cpp +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <cstring> -#include <cstdlib> -#include <string> - -#include "DocStreams.h" -#include "OleStreamReader.h" - -class DocReader : public OleStreamReader { - -public: - DocReader(char *buffer, std::size_t maxSize); - ~DocReader(); - std::size_t readSize() const; - -private: - bool readStream(OleMainStream &stream); - void ansiDataHandler(const char *buffer, std::size_t len); - void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol); - void footnotesStartHandler(); - -protected: - char *myBuffer; - const std::size_t myMaxSize; - std::size_t myActualSize; -}; - -class DocAnsiReader : public DocReader { - -public: - DocAnsiReader(char *buffer, std::size_t maxSize); - ~DocAnsiReader(); - -private: - void ansiDataHandler(const char *buffer, std::size_t len); -}; - -class DocUcs2Reader : public DocReader { - -public: - DocUcs2Reader(char *buffer, std::size_t maxSize); - ~DocUcs2Reader(); - -private: - void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol); -}; - -DocReader::DocReader(char *buffer, std::size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) { -} - -DocReader::~DocReader() { -} - -bool DocReader::readStream(OleMainStream &stream) { - // TODO make 2 optmizations: - // 1) If another piece is too big, reading of next piece can be stopped if some size parameter will be specified - // (it can be transfered as a parameter (with default 0 value, that means no need to use it) to readNextPiece method) - // 2) We can specify as a parameter for readNextPiece, what kind of piece should be read next (ANSI or not ANSI). - // As type of piece is known already, there's no necessary to read other pieces. - while (myActualSize < myMaxSize) { - if (!readNextPiece(stream)) { - break; - } - } - return true; -} - -void DocReader::ansiDataHandler(const char*, std::size_t) { -} - -void DocReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char) { -} - -void DocReader::footnotesStartHandler() { -} - -std::size_t DocReader::readSize() const { - return myActualSize; -} - -DocAnsiReader::DocAnsiReader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) { -} - -DocAnsiReader::~DocAnsiReader() { -} - -void DocAnsiReader::ansiDataHandler(const char *buffer, std::size_t dataLength) { - if (myActualSize < myMaxSize) { - const std::size_t len = std::min(dataLength, myMaxSize - myActualSize); - std::strncpy(myBuffer + myActualSize, buffer, len); - myActualSize += len; - } -} - -DocUcs2Reader::DocUcs2Reader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) { -} - -DocUcs2Reader::~DocUcs2Reader() { -} - -void DocUcs2Reader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) { - if (myActualSize < myMaxSize) { - char buffer[4]; - const std::size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol); - const std::size_t len = std::min(dataLength, myMaxSize - myActualSize); - std::strncpy(myBuffer + myActualSize, buffer, len); - myActualSize += len; - } -} - -DocStream::DocStream(const ZLFile& file, std::size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) { -} - -DocStream::~DocStream() { - close(); -} - -bool DocStream::open() { - if (mySize != 0) { - myBuffer = new char[mySize]; - } - shared_ptr<DocReader> reader = createReader(myBuffer, mySize); - shared_ptr<ZLInputStream> stream = myFile.inputStream(); - if (stream.isNull() || !stream->open()) { - return false; - } - if (!reader->readDocument(stream, false)) { - return false; - } - mySize = reader->readSize(); - myOffset = 0; - return true; -} - -std::size_t DocStream::read(char *buffer, std::size_t maxSize) { - maxSize = std::min(maxSize, mySize - myOffset); - if (buffer != 0 && myBuffer != 0) { - std::memcpy(buffer, myBuffer + myOffset, maxSize); - } - myOffset += maxSize; - return maxSize; -} - -void DocStream::close() { - if (myBuffer != 0) { - delete[] myBuffer; - myBuffer = 0; - } -} - -void DocStream::seek(int offset, bool absoluteOffset) { - if (!absoluteOffset) { - offset += myOffset; - } - myOffset = std::min(mySize, (std::size_t)std::max(0, offset)); -} - -std::size_t DocStream::offset() const { - return myOffset; -} - -std::size_t DocStream::sizeOfOpened() { - return mySize; -} - -DocAnsiStream::DocAnsiStream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) { -} - -DocAnsiStream::~DocAnsiStream() { -} - -shared_ptr<DocReader> DocAnsiStream::createReader(char *buffer, std::size_t maxSize) { - return new DocAnsiReader(buffer, maxSize); -} - -DocUcs2Stream::DocUcs2Stream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) { -} - -DocUcs2Stream::~DocUcs2Stream() { -} - -shared_ptr<DocReader> DocUcs2Stream::createReader(char *buffer, std::size_t maxSize) { - return new DocUcs2Reader(buffer, maxSize); -} diff --git a/fbreader/src/formats/doc/DocStreams.h b/fbreader/src/formats/doc/DocStreams.h deleted file mode 100644 index 4b1538a..0000000 --- a/fbreader/src/formats/doc/DocStreams.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2008-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __DOCSTREAMS_H__ -#define __DOCSTREAMS_H__ - -#include <ZLFile.h> -#include <ZLInputStream.h> - -class DocReader; - -class DocStream : public ZLInputStream { - -public: - DocStream(const ZLFile& file, std::size_t maxSize); - ~DocStream(); - -private: - bool open(); - std::size_t read(char *buffer, std::size_t maxSize); - void close(); - - void seek(int offset, bool absoluteOffset); - std::size_t offset() const; - std::size_t sizeOfOpened(); - -protected: - virtual shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize) = 0; - -private: - const ZLFile myFile; - char *myBuffer; - std::size_t mySize; - std::size_t myOffset; -}; - -class DocAnsiStream : public DocStream { - -public: - DocAnsiStream(const ZLFile& file, std::size_t maxSize); - ~DocAnsiStream(); - -private: - shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize); -}; - -class DocUcs2Stream : public DocStream { - -public: - DocUcs2Stream(const ZLFile& file, std::size_t maxSize); - ~DocUcs2Stream(); - -private: - shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize); -}; - -#endif /* __DOCSTREAMS_H__ */ diff --git a/fbreader/src/formats/doc/OleMainStream.cpp b/fbreader/src/formats/doc/OleMainStream.cpp deleted file mode 100644 index fe829e6..0000000 --- a/fbreader/src/formats/doc/OleMainStream.cpp +++ /dev/null @@ -1,1085 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <string> - -#include <ZLLogger.h> -#include <ZLUnicodeUtil.h> - -#include "OleUtil.h" -#include "OleStorage.h" - -#include "DocInlineImageReader.h" - -#include "OleMainStream.h" - -OleMainStream::Style::Style() : - StyleIdCurrent(STYLE_INVALID), - StyleIdNext(STYLE_INVALID), - HasPageBreakBefore(false), - BeforeParagraphIndent(0), - AfterParagraphIndent(0), - LeftIndent(0), - FirstLineIndent(0), - RightIndent(0), - Alignment(ALIGNMENT_DEFAULT) { -} - -OleMainStream::CharInfo::CharInfo() : FontStyle(FONT_REGULAR), FontSize(20) { -} - -OleMainStream::SectionInfo::SectionInfo() : CharPosition(0), IsNewPage(true) { -} - -OleMainStream::InlineImageInfo::InlineImageInfo() : DataPosition(0) { -} - -OleMainStream::FloatImageInfo::FloatImageInfo() : ShapeId(0) { -} - -OleMainStream::OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) : OleStream(storage, oleEntry, stream) { -} - -bool OleMainStream::open(bool doReadFormattingData) { - if (OleStream::open() == false) { - return false; - } - - static const std::size_t HEADER_SIZE = 768; //size of data in header of main stream - char headerBuffer[HEADER_SIZE]; - seek(0, true); - - if (read(headerBuffer, HEADER_SIZE) != HEADER_SIZE) { - return false; - } - - bool result = readFIB(headerBuffer); - if (!result) { - return false; - } - - // determining table stream number - unsigned int tableNumber = (OleUtil::getU2Bytes(headerBuffer, 0xA) & 0x0200) ? 1 : 0; - std::string tableName = tableNumber == 0 ? "0" : "1"; - tableName += "Table"; - OleEntry tableEntry; - result = myStorage->getEntryByName(tableName, tableEntry); - - if (!result) { - // cant't find table stream (that can be only in case if file format is below Word 7/8), so building simple table stream - // TODO: CHECK may be not all old documents have ANSI - ZLLogger::Instance().println("DocPlugin", "cant't find table stream, building own simple piece table, that includes all charachters"); - Piece piece = {myStartOfText, myEndOfText - myStartOfText, true, Piece::PIECE_TEXT, 0}; - myPieces.push_back(piece); - return true; - } - - result = readPieceTable(headerBuffer, tableEntry); - - if (!result) { - ZLLogger::Instance().println("DocPlugin", "error during reading piece table"); - return false; - } - - if (!doReadFormattingData) { - return true; - } - - OleEntry dataEntry; - if (myStorage->getEntryByName("Data", dataEntry)) { - myDataStream = new OleStream(myStorage, dataEntry, myBaseStream); - } - - //result of reading following structures doesn't check, because all these - //problems can be ignored, and document can be showed anyway, maybe with wrong formatting - readBookmarks(headerBuffer, tableEntry); - readStylesheet(headerBuffer, tableEntry); - //readSectionsInfoTable(headerBuffer, tableEntry); //it isn't used now - readParagraphStyleTable(headerBuffer, tableEntry); - readCharInfoTable(headerBuffer, tableEntry); - readFloatingImages(headerBuffer, tableEntry); - return true; -} - -const OleMainStream::Pieces &OleMainStream::getPieces() const { - return myPieces; -} - -const OleMainStream::CharInfoList &OleMainStream::getCharInfoList() const { - return myCharInfoList; -} - -const OleMainStream::StyleInfoList &OleMainStream::getStyleInfoList() const { - return myStyleInfoList; -} - -const OleMainStream::BookmarksList &OleMainStream::getBookmarks() const { - return myBookmarks; -} - -const OleMainStream::InlineImageInfoList &OleMainStream::getInlineImageInfoList() const { - return myInlineImageInfoList; -} - -const OleMainStream::FloatImageInfoList &OleMainStream::getFloatImageInfoList() const { - return myFloatImageInfoList; -} - -ZLFileImage::Blocks OleMainStream::getFloatImage(unsigned int shapeId) const { - if (myFLoatImageReader.isNull()) { - return ZLFileImage::Blocks(); - } - return myFLoatImageReader->getBlocksForShapeId(shapeId); -} - -ZLFileImage::Blocks OleMainStream::getInlineImage(unsigned int dataPosition) const { - if (myDataStream.isNull()) { - return ZLFileImage::Blocks(); - } - DocInlineImageReader imageReader(myDataStream); - return imageReader.getImagePieceInfo(dataPosition); -} - -bool OleMainStream::readFIB(const char *headerBuffer) { - int flags = OleUtil::getU2Bytes(headerBuffer, 0xA); //offset for flags - - if (flags & 0x0004) { //flag for complex format - ZLLogger::Instance().println("DocPlugin", "This was fast-saved. Some information is lost"); - //lostInfo = (flags & 0xF0) >> 4); - } - - if (flags & 0x1000) { //flag for using extending charset - ZLLogger::Instance().println("DocPlugin", "File uses extended character set (get_word8_char)"); - } else { - ZLLogger::Instance().println("DocPlugin", "File uses get_8bit_char character set"); - } - - if (flags & 0x100) { //flag for encrypted files - ZLLogger::Instance().println("DocPlugin", "File is encrypted"); - // Encryption key = %08lx ; NumUtil::get4Bytes(header, 14) - return false; - } - - unsigned int charset = OleUtil::getU2Bytes(headerBuffer, 0x14); //offset for charset number - if (charset && charset != 0x100) { //0x100 = default charset - ZLLogger::Instance().println("DocPlugin", "Using not default character set %d"); - } else { - ZLLogger::Instance().println("DocPlugin", "Using default character set"); - } - - myStartOfText = OleUtil::get4Bytes(headerBuffer, 0x18); //offset for start of text value - myEndOfText = OleUtil::get4Bytes(headerBuffer, 0x1c); //offset for end of text value - return true; -} - -void OleMainStream::splitPieces(const Pieces &s, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary) { - Pieces source = s; - dest1.clear(); - dest2.clear(); - - int sumLength = 0; - std::size_t i = 0; - for (i = 0; i < source.size(); ++i) { - Piece piece = source.at(i); - if (piece.Length + sumLength >= boundary) { - Piece piece2 = piece; - - piece.Length = boundary - sumLength; - piece.Type = type1; - - piece2.Type = type2; - piece2.Offset += piece.Length * 2; - piece2.Length -= piece.Length; - - if (piece.Length > 0) { - dest1.push_back(piece); - } - if (piece2.Length > 0) { - dest2.push_back(piece2); - } - ++i; - break; - } - sumLength += piece.Length; - piece.Type = type1; - dest1.push_back(piece); - } - for (; i < source.size(); ++i) { - Piece piece = source.at(i); - piece.Type = type2; - dest2.push_back(piece); - } - -} - -std::string OleMainStream::getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream) { - unsigned int clxOffset = OleUtil::getU4Bytes(headerBuffer, 0x01A2); //offset for CLX structure - unsigned int clxLength = OleUtil::getU4Bytes(headerBuffer, 0x01A6); //offset for value of CLX structure length - - //1 step : loading CLX table from table stream - char *clxBuffer = new char[clxLength]; - if (!tableStream.seek(clxOffset, true)) { - ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- error for seeking to CLX structure"); - return std::string(); - } - if (tableStream.read(clxBuffer, clxLength) != clxLength) { - ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure length is invalid"); - return std::string(); - } - std::string clx(clxBuffer, clxLength); - delete[] clxBuffer; - - //2 step: searching for pieces table buffer at CLX - //(determines it by 0x02 as start symbol) - std::size_t from = 0; - std::size_t i; - std::string pieceTableBuffer; - while ((i = clx.find_first_of(0x02, from)) != std::string::npos) { - if (clx.size() < i + 1 + 4) { - ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure has invalid format"); - return std::string(); - } - unsigned int pieceTableLength = OleUtil::getU4Bytes(clx.c_str(), i + 1); - pieceTableBuffer = std::string(clx, i + 1 + 4); - if (pieceTableBuffer.length() != pieceTableLength) { - from = i + 1; - continue; - } - break; - } - return pieceTableBuffer; -} - - -bool OleMainStream::readPieceTable(const char *headerBuffer, const OleEntry &tableEntry) { - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string piecesTableBuffer = getPiecesTableBuffer(headerBuffer, tableStream); - - if (piecesTableBuffer.empty()) { - return false; - } - - //getting count of Character Positions for different types of subdocuments in Main Stream - int ccpText = OleUtil::get4Bytes(headerBuffer, 0x004C); //text - int ccpFtn = OleUtil::get4Bytes(headerBuffer, 0x0050); //footnote subdocument - int ccpHdd = OleUtil::get4Bytes(headerBuffer, 0x0054); //header subdocument - int ccpMcr = OleUtil::get4Bytes(headerBuffer, 0x0058); //macro subdocument - int ccpAtn = OleUtil::get4Bytes(headerBuffer, 0x005C); //comment subdocument - int ccpEdn = OleUtil::get4Bytes(headerBuffer, 0x0060); //endnote subdocument - int ccpTxbx = OleUtil::get4Bytes(headerBuffer, 0x0064); //textbox subdocument - int ccpHdrTxbx = OleUtil::get4Bytes(headerBuffer, 0x0068); //textbox subdocument of the header - int lastCP = ccpFtn + ccpHdd + ccpMcr + ccpAtn + ccpEdn + ccpTxbx + ccpHdrTxbx; - if (lastCP != 0) { - ++lastCP; - } - lastCP += ccpText; - - //getting the CP (character positions) and CP descriptors - std::vector<int> cp; //array of character positions for pieces - unsigned int j = 0; - for (j = 0; ; j += 4) { - if (piecesTableBuffer.size() < j + 4) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, cp ends not with a lastcp"); - break; - } - int curCP = OleUtil::get4Bytes(piecesTableBuffer.c_str(), j); - cp.push_back(curCP); - if (curCP == lastCP) { - break; - } - } - - if (cp.size() < 2) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, < 2 pieces"); - return false; - } - - std::vector<std::string> descriptors; - for (std::size_t k = 0; k < cp.size() - 1; ++k) { - //j + 4, because it should be taken after CP in PiecesTable Buffer - //k * 8, because it should be taken 8 byte for each descriptor - std::size_t substrFrom = j + 4 + k * 8; - if (piecesTableBuffer.size() < substrFrom + 8) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, problems with descriptors reading"); - break; - } - descriptors.push_back(piecesTableBuffer.substr(substrFrom, 8)); - } - - //filling the Pieces vector - std::size_t minValidSize = std::min(cp.size() - 1, descriptors.size()); - if (minValidSize == 0) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, there are no pieces"); - return false; - } - - for (std::size_t i = 0; i < minValidSize; ++i) { - //4byte integer with offset and ANSI flag - int fcValue = OleUtil::get4Bytes(descriptors.at(i).c_str(), 0x2); //offset for piece structure - Piece piece; - piece.IsANSI = (fcValue & 0x40000000) == 0x40000000; //ansi flag - piece.Offset = fcValue & 0x3FFFFFFF; //gettting offset for current piece - piece.Length = cp.at(i + 1) - cp.at(i); - myPieces.push_back(piece); - } - - //split pieces into different types - Pieces piecesText, piecesFootnote, piecesOther; - splitPieces(myPieces, piecesText, piecesFootnote, Piece::PIECE_TEXT, Piece::PIECE_FOOTNOTE, ccpText); - splitPieces(piecesFootnote, piecesFootnote, piecesOther, Piece::PIECE_FOOTNOTE, Piece::PIECE_OTHER, ccpFtn); - - myPieces.clear(); - for (std::size_t i = 0; i < piecesText.size(); ++i) { - myPieces.push_back(piecesText.at(i)); - } - for (std::size_t i = 0; i < piecesFootnote.size(); ++i) { - myPieces.push_back(piecesFootnote.at(i)); - } - for (std::size_t i = 0; i < piecesOther.size(); ++i) { - myPieces.push_back(piecesOther.at(i)); - } - - //converting length and offset depending on isANSI - for (std::size_t i = 0; i < myPieces.size(); ++i) { - Piece &piece = myPieces.at(i); - if (!piece.IsANSI) { - piece.Length *= 2; - } else { - piece.Offset /= 2; - } - } - - //filling startCP field - unsigned int curStartCP = 0; - for (std::size_t i = 0; i < myPieces.size(); ++i) { - Piece &piece = myPieces.at(i); - piece.startCP = curStartCP; - if (piece.IsANSI) { - curStartCP += piece.Length; - } else { - curStartCP += piece.Length / 2; - } - } - return true; -} - -bool OleMainStream::readBookmarks(const char *headerBuffer, const OleEntry &tableEntry) { - //SttbfBkmk structure is a table of bookmark name strings - unsigned int beginNamesInfo = OleUtil::getU4Bytes(headerBuffer, 0x142); // address of SttbfBkmk structure - std::size_t namesInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x146); // length of SttbfBkmk structure - - if (namesInfoLength == 0) { - return true; //there's no bookmarks - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginNamesInfo, namesInfoLength, tableStream)) { - return false; - } - - unsigned int recordsNumber = OleUtil::getU2Bytes(buffer.c_str(), 0x2); //count of records - - std::vector<std::string> names; - unsigned int offset = 0x6; //initial offset - for (unsigned int i = 0; i < recordsNumber; ++i) { - if (buffer.size() < offset + 2) { - ZLLogger::Instance().println("DocPlugin", "problmes with reading bookmarks names"); - break; - } - unsigned int length = OleUtil::getU2Bytes(buffer.c_str(), offset) * 2; //length of string in bytes - ZLUnicodeUtil::Ucs2String name; - for (unsigned int j = 0; j < length; j+=2) { - char ch1 = buffer.at(offset + 2 + j); - char ch2 = buffer.at(offset + 2 + j + 1); - ZLUnicodeUtil::Ucs2Char ucs2Char = (unsigned int)ch1 | ((unsigned int)ch2 << 8); - name.push_back(ucs2Char); - } - std::string utf8Name; - ZLUnicodeUtil::ucs2ToUtf8(utf8Name, name); - names.push_back(utf8Name); - offset += length + 2; - } - - //plcfBkmkf structure is table recording beginning CPs of bookmarks - unsigned int beginCharPosInfo = OleUtil::getU4Bytes(headerBuffer, 0x14A); // address of plcfBkmkf structure - std::size_t charPosInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x14E); // length of plcfBkmkf structure - - if (charPosInfoLen == 0) { - return true; //there's no bookmarks - } - - if (!readToBuffer(buffer, beginCharPosInfo, charPosInfoLen, tableStream)) { - return false; - } - - static const unsigned int BKF_SIZE = 4; - std::size_t size = calcCountOfPLC(charPosInfoLen, BKF_SIZE); - std::vector<unsigned int> charPage; - for (std::size_t index = 0, offset = 0; index < size; ++index, offset += 4) { - charPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset)); - } - - for (std::size_t i = 0; i < names.size(); ++i) { - if (i >= charPage.size()) { - break; //for the case if something in these structures goes wrong, to not to lose all bookmarks - } - Bookmark bookmark; - bookmark.CharPosition = charPage.at(i); - bookmark.Name = names.at(i); - myBookmarks.push_back(bookmark); - } - - return true; -} - -bool OleMainStream::readStylesheet(const char *headerBuffer, const OleEntry &tableEntry) { - //STSH structure is a stylesheet - unsigned int beginStshInfo = OleUtil::getU4Bytes(headerBuffer, 0xa2); // address of STSH structure - std::size_t stshInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xa6); // length of STSH structure - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - char *buffer = new char[stshInfoLength]; - if (!tableStream.seek(beginStshInfo, true)) { - ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure"); - return false; - } - if (tableStream.read(buffer, stshInfoLength) != stshInfoLength) { - ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure, invalid length"); - return false; - } - - std::size_t stdCount = (std::size_t)OleUtil::getU2Bytes(buffer, 2); - std::size_t stdBaseInFile = (std::size_t)OleUtil::getU2Bytes(buffer, 4); - myStyleSheet.resize(stdCount); - - std::vector<bool> isFilled; - isFilled.resize(stdCount, false); - - std::size_t stdLen = 0; - bool styleSheetWasChanged = false; - do { //make it in while loop, because some base style can be after their successors - styleSheetWasChanged = false; - for (std::size_t index = 0, offset = 2 + (std::size_t)OleUtil::getU2Bytes(buffer, 0); index < stdCount; index++, offset += 2 + stdLen) { - stdLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset); - if (isFilled.at(index)) { - continue; - } - - if (stdLen == 0) { - //if record is empty, left it default - isFilled[index] = true; - continue; - } - - Style styleInfo = myStyleSheet.at(index); - - const unsigned int styleAndBaseType = OleUtil::getU2Bytes(buffer, offset + 4); - const unsigned int styleType = styleAndBaseType % 16; - const unsigned int baseStyleId = styleAndBaseType / 16; - if (baseStyleId == Style::STYLE_NIL || baseStyleId == Style::STYLE_USER) { - //if based on nil or user style, left default - } else { - int baseStyleIndex = getStyleIndex(baseStyleId, isFilled, myStyleSheet); - if (baseStyleIndex < 0) { - //this base style is not filled yet, so pass it at some time - continue; - } - styleInfo = myStyleSheet.at(baseStyleIndex); - styleInfo.StyleIdCurrent = Style::STYLE_INVALID; - } - - // parse STD structure - unsigned int tmp = OleUtil::getU2Bytes(buffer, offset + 6); - unsigned int upxCount = tmp % 16; - styleInfo.StyleIdNext = tmp / 16; - - //adding current style - myStyleSheet[index] = styleInfo; - isFilled[index] = true; - styleSheetWasChanged = true; - - std::size_t pos = 2 + stdBaseInFile; - std::size_t nameLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos); - nameLen = nameLen * 2 + 2; //from Unicode characters to bytes + Unicode null charachter length - pos += 2 + nameLen; - if (pos % 2 != 0) { - ++pos; - } - if (pos >= stdLen) { - continue; - } - std::size_t upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos); - if (pos + upxLen > stdLen) { - //UPX length too large - continue; - } - //for style info styleType must be equal 1 - if (styleType == 1 && upxCount >= 1) { - if (upxLen >= 2) { - styleInfo.StyleIdCurrent = OleUtil::getU2Bytes(buffer, offset + pos + 2); - getStyleInfo(0, buffer + offset + pos + 4, upxLen - 2, styleInfo); - myStyleSheet[index] = styleInfo; - } - pos += 2 + upxLen; - if (pos % 2 != 0) { - ++pos; - } - upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos); - } - if (upxLen == 0 || pos + upxLen > stdLen) { - //too small/too large - continue; - } - //for char info styleType can be equal 1 or 2 - if ((styleType == 1 && upxCount >= 2) || (styleType == 2 && upxCount >= 1)) { - CharInfo charInfo; - getCharInfo(0, Style::STYLE_INVALID, buffer + offset + pos + 2, upxLen, charInfo); - styleInfo.CurrentCharInfo = charInfo; - myStyleSheet[index] = styleInfo; - } - } - } while (styleSheetWasChanged); - delete[] buffer; - return true; -} - -bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry) { - //PlcfbteChpx structure is table with formatting for particular run of text - unsigned int beginCharInfo = OleUtil::getU4Bytes(headerBuffer, 0xfa); // address of PlcfbteChpx structure - std::size_t charInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xfe); // length of PlcfbteChpx structure - if (charInfoLength < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginCharInfo, charInfoLength, tableStream)) { - return false; - } - - static const unsigned int CHPX_SIZE = 4; - std::size_t size = calcCountOfPLC(charInfoLength, CHPX_SIZE); - std::vector<unsigned int> charBlocks; - for (std::size_t index = 0, offset = (size + 1) * 4; index < size; ++index, offset += CHPX_SIZE) { - charBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset)); - } - - char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE]; - for (std::size_t index = 0; index < charBlocks.size(); ++index) { - seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true); - if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) { - return false; - } - unsigned int crun = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with crun (count of 'run of text') - for (unsigned int index2 = 0; index2 < crun; ++index2) { - unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4); - unsigned int chpxOffset = 2 * OleUtil::getU1Byte(formatPageBuffer, (crun + 1) * 4 + index2); - unsigned int len = OleUtil::getU1Byte(formatPageBuffer, chpxOffset); - unsigned int charPos = 0; - if (!offsetToCharPos(offset, charPos, myPieces)) { - continue; - } - unsigned int styleId = getStyleIdByCharPos(charPos, myStyleInfoList); - - CharInfo charInfo = getStyleFromStylesheet(styleId, myStyleSheet).CurrentCharInfo; - if (chpxOffset != 0) { - getCharInfo(chpxOffset, styleId, formatPageBuffer + 1, len - 1, charInfo); - } - myCharInfoList.push_back(CharPosToCharInfo(charPos, charInfo)); - - if (chpxOffset != 0) { - InlineImageInfo pictureInfo; - if (getInlineImageInfo(chpxOffset, formatPageBuffer + 1, len - 1, pictureInfo)) { - myInlineImageInfoList.push_back(CharPosToInlineImageInfo(charPos, pictureInfo)); - } - } - - } - } - delete[] formatPageBuffer; - return true; -} - -bool OleMainStream::readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry) { - //Plcspa structure is a table with information for FSPA (File Shape Address) - unsigned int beginPicturesInfo = OleUtil::getU4Bytes(headerBuffer, 0x01DA); // address of Plcspa structure - if (beginPicturesInfo == 0) { - return true; //there's no office art objects - } - unsigned int picturesInfoLength = OleUtil::getU4Bytes(headerBuffer, 0x01DE); // length of Plcspa structure - if (picturesInfoLength < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginPicturesInfo, picturesInfoLength, tableStream)) { - return false; - } - - static const unsigned int SPA_SIZE = 26; - std::size_t size = calcCountOfPLC(picturesInfoLength, SPA_SIZE); - - std::vector<unsigned int> picturesBlocks; - for (std::size_t index = 0, tOffset = 0; index < size; ++index, tOffset += 4) { - picturesBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset)); - } - - for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += SPA_SIZE) { - unsigned int spid = OleUtil::getU4Bytes(buffer.c_str(), tOffset); - FloatImageInfo info; - unsigned int charPos = picturesBlocks.at(index); - info.ShapeId = spid; - myFloatImageInfoList.push_back(CharPosToFloatImageInfo(charPos, info)); - } - - //DggInfo structure is office art object table data - unsigned int beginOfficeArtContent = OleUtil::getU4Bytes(headerBuffer, 0x22A); // address of DggInfo structure - if (beginOfficeArtContent == 0) { - return true; //there's no office art objects - } - unsigned int officeArtContentLength = OleUtil::getU4Bytes(headerBuffer, 0x022E); // length of DggInfo structure - if (officeArtContentLength < 4) { - return false; - } - - shared_ptr<OleStream> newTableStream = new OleStream(myStorage, tableEntry, myBaseStream); - shared_ptr<OleStream> newMainStream = new OleStream(myStorage, myOleEntry, myBaseStream); - if (newTableStream->open() && newMainStream->open()) { - myFLoatImageReader = new DocFloatImageReader(beginOfficeArtContent, officeArtContentLength, newTableStream, newMainStream); - myFLoatImageReader->readAll(); - } - return true; -} - -bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry) { - //PlcBtePapx structure is table with formatting for all paragraphs - unsigned int beginParagraphInfo = OleUtil::getU4Bytes(headerBuffer, 0x102); // address of PlcBtePapx structure - std::size_t paragraphInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x106); // length of PlcBtePapx structure - if (paragraphInfoLength < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginParagraphInfo, paragraphInfoLength, tableStream)) { - return false; - } - - static const unsigned int PAPX_SIZE = 4; - std::size_t size = calcCountOfPLC(paragraphInfoLength, PAPX_SIZE); - - std::vector<unsigned int> paragraphBlocks; - for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += PAPX_SIZE) { - paragraphBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset)); - } - - char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE]; - for (std::size_t index = 0; index < paragraphBlocks.size(); ++index) { - seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true); - if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) { - return false; - } - const unsigned int paragraphsCount = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with 'cpara' value (count of paragraphs) - for (unsigned int index2 = 0; index2 < paragraphsCount; ++index2) { - const unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4); - unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (paragraphsCount + 1) * 4 + index2 * 13) * 2; - if (papxOffset <= 0) { - continue; - } - unsigned int len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2; - if (len == 0) { - ++papxOffset; - len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2; - } - - const unsigned int styleId = OleUtil::getU2Bytes(formatPageBuffer, papxOffset + 1); - Style styleInfo = getStyleFromStylesheet(styleId, myStyleSheet); - - if (len >= 3) { - getStyleInfo(papxOffset, formatPageBuffer + 3, len - 3, styleInfo); - } - - unsigned int charPos = 0; - if (!offsetToCharPos(offset, charPos, myPieces)) { - continue; - } - myStyleInfoList.push_back(CharPosToStyle(charPos, styleInfo)); - } - } - delete[] formatPageBuffer; - return true; -} - -bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry) { - //PlcfSed structure is a section table - unsigned int beginOfText = OleUtil::getU4Bytes(headerBuffer, 0x18); //address of text's begin in main stream - unsigned int beginSectInfo = OleUtil::getU4Bytes(headerBuffer, 0xca); //address if PlcfSed structure - - std::size_t sectInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xce); //length of PlcfSed structure - if (sectInfoLen < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginSectInfo, sectInfoLen, tableStream)) { - return false; - } - - static const unsigned int SED_SIZE = 12; - std::size_t decriptorsCount = calcCountOfPLC(sectInfoLen, SED_SIZE); - - //saving the section offsets (in character positions) - std::vector<unsigned int> charPos; - for (std::size_t index = 0, tOffset = 0; index < decriptorsCount; ++index, tOffset += 4) { - unsigned int ulTextOffset = OleUtil::getU4Bytes(buffer.c_str(), tOffset); - charPos.push_back(beginOfText + ulTextOffset); - } - - //saving sepx offsets - std::vector<unsigned int> sectPage; - for (std::size_t index = 0, tOffset = (decriptorsCount + 1) * 4; index < decriptorsCount; ++index, tOffset += SED_SIZE) { - sectPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset + 2)); - } - - //reading the section properties - char tmpBuffer[2]; - for (std::size_t index = 0; index < sectPage.size(); ++index) { - if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info - SectionInfo sectionInfo; - sectionInfo.CharPosition = charPos.at(index); - mySectionInfoList.push_back(sectionInfo); - continue; - } - //getting number of bytes to read - if (!seek(sectPage.at(index), true)) { - continue; - } - if (read(tmpBuffer, 2) != 2) { - continue; - } - std::size_t bytes = 2 + (std::size_t)OleUtil::getU2Bytes(tmpBuffer, 0); - - if (!seek(sectPage.at(index), true)) { - continue; - } - char *formatPageBuffer = new char[bytes]; - if (read(formatPageBuffer, bytes) != bytes) { - delete[] formatPageBuffer; - continue; - } - SectionInfo sectionInfo; - sectionInfo.CharPosition = charPos.at(index); - getSectionInfo(formatPageBuffer + 2, bytes - 2, sectionInfo); - mySectionInfoList.push_back(sectionInfo); - delete[] formatPageBuffer; - } - return true; -} - -void OleMainStream::getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo) { - int tmp, toDelete, toAdd; - unsigned int offset = 0; - while (bytes >= offset + 2) { - unsigned int curPrlLength = 0; - switch (OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset)) { - case 0x2403: - styleInfo.Alignment = (Style::AlignmentType)OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x4610: - styleInfo.LeftIndent += OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - if (styleInfo.LeftIndent < 0) { - styleInfo.LeftIndent = 0; - } - break; - case 0xc60d: // ChgTabsPapx - case 0xc615: // ChgTabs - tmp = OleUtil::get1Byte(grpprlBuffer, papxOffset + offset + 2); - if (tmp < 2) { - curPrlLength = 1; - break; - } - toDelete = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 3); - if (tmp < 2 + 2 * toDelete) { - curPrlLength = 1; - break; - } - toAdd = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 4 + 2 * toDelete); - if (tmp < 2 + 2 * toDelete + 2 * toAdd) { - curPrlLength = 1; - break; - } - break; - case 0x840e: - styleInfo.RightIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x840f: - styleInfo.LeftIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x8411: - styleInfo.FirstLineIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0xa413: - styleInfo.BeforeParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0xa414: - styleInfo.AfterParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x2407: - styleInfo.HasPageBreakBefore = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2) == 0x01; - break; - default: - break; - } - if (curPrlLength == 0) { - curPrlLength = getPrlLength(grpprlBuffer, papxOffset + offset); - } - offset += curPrlLength; - } - -} - -void OleMainStream::getCharInfo(unsigned int chpxOffset, unsigned int /*styleId*/, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo) { - unsigned int sprm = 0; //single propery modifier - unsigned int offset = 0; - while (bytes >= offset + 2) { - switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) { - case 0x0835: //bold - sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2); - switch (sprm) { - case UNSET: - charInfo.FontStyle &= ~CharInfo::FONT_BOLD; - break; - case SET: - charInfo.FontStyle |= CharInfo::FONT_BOLD; - break; - case UNCHANGED: - break; - case NEGATION: - charInfo.FontStyle ^= CharInfo::FONT_BOLD; - break; - default: - break; - } - break; - case 0x0836: //italic - sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2); - switch (sprm) { - case UNSET: - charInfo.FontStyle &= ~CharInfo::FONT_ITALIC; - break; - case SET: - charInfo.FontStyle |= CharInfo::FONT_ITALIC; - break; - case UNCHANGED: - break; - case NEGATION: - charInfo.FontStyle ^= CharInfo::FONT_ITALIC; - break; - default: - break; - } - break; - case 0x4a43: //size of font - charInfo.FontSize = OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset + 2); - break; - default: - break; - } - offset += getPrlLength(grpprlBuffer, chpxOffset + offset); - } - -} - -void OleMainStream::getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo §ionInfo) { - unsigned int tmp; - std::size_t offset = 0; - while (bytes >= offset + 2) { - switch (OleUtil::getU2Bytes(grpprlBuffer, offset)) { - case 0x3009: //new page - tmp = OleUtil::getU1Byte(grpprlBuffer, offset + 2); - sectionInfo.IsNewPage = (tmp != 0 && tmp != 1); - break; - default: - break; - } - offset += getPrlLength(grpprlBuffer, offset); - } -} - -bool OleMainStream::getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo) { - //p. 105 of [MS-DOC] documentation - unsigned int offset = 0; - bool isFound = false; - while (bytes >= offset + 2) { - switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) { - case 0x080a: // ole object, p.107 [MS-DOC] - if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) == 0x01) { - return false; - } - break; - case 0x0806: // is not a picture, but a binary data? (sprmCFData, p.106 [MS-DOC]) - if (OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2) == 0x01) { - return false; - } - break; -// case 0x0855: // sprmCFSpec, p.117 [MS-DOC], MUST BE applied with a value of 1 (see p.105 [MS-DOC]) -// if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) != 0x01) { -// return false; -// } -// break; - case 0x6a03: // location p.105 [MS-DOC] - pictureInfo.DataPosition = OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2); - isFound = true; - break; - default: - break; - } - offset += getPrlLength(grpprlBuffer, chpxOffset + offset); - } - return isFound; -} - -OleMainStream::Style OleMainStream::getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet) { - //TODO optimize it: StyleSheet can be map structure with styleId key - Style style; - if (styleId != Style::STYLE_INVALID && styleId != Style::STYLE_NIL && styleId != Style::STYLE_USER) { - for (std::size_t index = 0; index < stylesheet.size(); ++index) { - if (stylesheet.at(index).StyleIdCurrent == styleId) { - return stylesheet.at(index); - } - } - } - style.StyleIdCurrent = styleId; - return style; -} - -int OleMainStream::getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet) { - //TODO optimize it: StyleSheet can be map structure with styleId key - //in that case, this method will be excess - if (styleId == Style::STYLE_INVALID) { - return -1; - } - for (int index = 0; index < (int)stylesheet.size(); ++index) { - if (isFilled.at(index) && stylesheet.at(index).StyleIdCurrent == styleId) { - return index; - } - } - return -1; -} - -unsigned int OleMainStream::getStyleIdByCharPos(unsigned int charPos, const StyleInfoList &styleInfoList) { - unsigned int styleId = Style::STYLE_INVALID; - for (std::size_t i = 0; i < styleInfoList.size(); ++i) { - const Style &info = styleInfoList.at(i).second; - if (i == styleInfoList.size() - 1) { //if last - styleId = info.StyleIdCurrent; - break; - } - unsigned int curOffset = styleInfoList.at(i).first; - unsigned int nextOffset = styleInfoList.at(i + 1).first; - if (charPos >= curOffset && charPos < nextOffset) { - styleId = info.StyleIdCurrent; - break; - } - } - return styleId; -} - -bool OleMainStream::offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces) { - if (pieces.empty()) { - return false; - } - if ((unsigned int)pieces.front().Offset > offset) { - charPos = 0; - return true; - } - if ((unsigned int)(pieces.back().Offset + pieces.back().Length) <= offset) { - return false; - } - - std::size_t pieceNumber = 0; - for (std::size_t i = 0; i < pieces.size(); ++i) { - if (i == pieces.size() - 1) { //if last - pieceNumber = i; - break; - } - unsigned int curOffset = pieces.at(i).Offset; - unsigned int nextOffset = pieces.at(i + 1).Offset; - if (offset >= curOffset && offset < nextOffset) { - pieceNumber = i; - break; - } - } - - const Piece &piece = pieces.at(pieceNumber); - unsigned int diffOffset = offset - piece.Offset; - if (!piece.IsANSI) { - diffOffset /= 2; - } - charPos = piece.startCP + diffOffset; - return true; -} - -bool OleMainStream::readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream) { - char *buffer = new char[length]; - stream.seek(offset, true); - if (stream.read(buffer, length) != length) { - return false; - } - result = std::string(buffer, length); - delete[] buffer; - return true; -} - -unsigned int OleMainStream::calcCountOfPLC(unsigned int totalSize, unsigned int elementSize) { - //calculates count of elements in PLC structure, formula from p.30 [MS-DOC] - return (totalSize - 4) / (4 + elementSize); -} - -unsigned int OleMainStream::getPrlLength(const char *grpprlBuffer, unsigned int byteNumber) { - unsigned int tmp; - unsigned int opCode = OleUtil::getU2Bytes(grpprlBuffer, byteNumber); - switch (opCode & 0xe000) { - case 0x0000: - case 0x2000: - return 3; - case 0x4000: - case 0x8000: - case 0xA000: - return 4; - case 0xE000: - return 5; - case 0x6000: - return 6; - case 0xC000: - //counting of info length - tmp = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 2); - if (opCode == 0xc615 && tmp == 255) { - unsigned int del = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 3); - unsigned int add = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 4 + del * 4); - tmp = 2 + del * 4 + add * 3; - } - return 3 + tmp; - default: - return 1; - } -} diff --git a/fbreader/src/formats/doc/OleMainStream.h b/fbreader/src/formats/doc/OleMainStream.h deleted file mode 100644 index 378f037..0000000 --- a/fbreader/src/formats/doc/OleMainStream.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __OLEMAINSTREAM_H__ -#define __OLEMAINSTREAM_H__ - -#include <vector> -#include <string> - -#include "OleStream.h" -#include "DocFloatImageReader.h" - -class OleMainStream : public OleStream { - -public: - struct Piece { - enum PieceType { - PIECE_TEXT, - PIECE_FOOTNOTE, - PIECE_OTHER - }; - - int Offset; // TODO: maybe make it unsigned int - int Length; // TODO: maybe make it unsigned int - bool IsANSI; - PieceType Type; - unsigned int startCP; - }; - typedef std::vector<Piece> Pieces; - - struct CharInfo { - enum Font { - FONT_REGULAR = 0, - FONT_BOLD = 1 << 0, - FONT_ITALIC = 1 << 1, - FONT_UNDERLINE = 1 << 2, - FONT_CAPITALS = 1 << 3, - FONT_SMALL_CAPS = 1 << 4, - FONT_STRIKE = 1 << 5, - FONT_HIDDEN = 1 << 6, - FONT_MARKDEL = 1 << 7, - FONT_SUPERSCRIPT = 1 << 8, - FONT_SUBSCRIPT = 1 << 9 - }; - - unsigned int FontStyle; - unsigned int FontSize; - - CharInfo(); - }; - typedef std::pair<unsigned int, CharInfo> CharPosToCharInfo; - typedef std::vector<CharPosToCharInfo > CharInfoList; - - struct Style { - enum AlignmentType { - ALIGNMENT_LEFT = 0x00, - ALIGNMENT_CENTER = 0x01, - ALIGNMENT_RIGHT = 0x02, - ALIGNMENT_JUSTIFY = 0x03, - ALIGNMENT_DEFAULT // for case if alignment is not setted by word - }; - - // style Ids: - // (this is not full list of possible style ids, enum is used for using in switch-case) - enum StyleID { - STYLE_H1 = 0x1, - STYLE_H2 = 0x2, - STYLE_H3 = 0x3, - STYLE_USER = 0xFFE, - STYLE_NIL = 0xFFF, - STYLE_INVALID = 0xFFFF - }; - - unsigned int StyleIdCurrent; - unsigned int StyleIdNext; // Next style unless overruled - - bool HasPageBreakBefore; - unsigned int BeforeParagraphIndent; // Vertical indent before paragraph, pixels - unsigned int AfterParagraphIndent; // Vertical indent after paragraph, pixels - int LeftIndent; - int FirstLineIndent; - int RightIndent; - AlignmentType Alignment; - CharInfo CurrentCharInfo; - - Style(); - }; - - typedef std::pair<unsigned int, Style> CharPosToStyle; - typedef std::vector<CharPosToStyle> StyleInfoList; - typedef std::vector<Style> StyleSheet; - - struct SectionInfo { - unsigned int CharPosition; - bool IsNewPage; - - SectionInfo(); - }; - typedef std::vector<SectionInfo> SectionInfoList; - - struct Bookmark { - unsigned int CharPosition; - std::string Name; - }; - typedef std::vector<Bookmark> BookmarksList; - - struct InlineImageInfo { - unsigned int DataPosition; - - InlineImageInfo(); - }; - typedef std::pair<unsigned int, InlineImageInfo> CharPosToInlineImageInfo; - typedef std::vector<CharPosToInlineImageInfo> InlineImageInfoList; - - struct FloatImageInfo { - unsigned int ShapeId; - FloatImageInfo(); - }; - typedef std::pair<unsigned int, FloatImageInfo> CharPosToFloatImageInfo; - typedef std::vector<CharPosToFloatImageInfo> FloatImageInfoList; - - enum ImageType { //see p. 60 [MS-ODRAW] - IMAGE_EMF = 0xF01A, - IMAGE_WMF = 0xF01B, - IMAGE_PICT = 0xF01C, - IMAGE_JPEG = 0xF01D, - IMAGE_PNG = 0xF01E, - IMAGE_DIB = 0xF01F, - IMAGE_TIFF = 0xF029, - IMAGE_JPEG2 = 0xF02A - }; - -public: - OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream); - -public: - bool open(bool doReadFormattingData); - const Pieces &getPieces() const; - const CharInfoList &getCharInfoList() const; - const StyleInfoList &getStyleInfoList() const; - const BookmarksList &getBookmarks() const; - const InlineImageInfoList &getInlineImageInfoList() const; - const FloatImageInfoList &getFloatImageInfoList() const; - - ZLFileImage::Blocks getFloatImage(unsigned int shapeId) const; - ZLFileImage::Blocks getInlineImage(unsigned int dataPos) const; - -private: - bool readFIB(const char *headerBuffer); - bool readPieceTable(const char *headerBuffer, const OleEntry &tableEntry); - bool readBookmarks(const char *headerBuffer, const OleEntry &tableEntry); - bool readStylesheet(const char *headerBuffer, const OleEntry &tableEntry); - bool readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry); - bool readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry); - bool readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry); - bool readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry); - -private: //readPieceTable helpers methods - static std::string getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream); - static void splitPieces(const Pieces &source, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary); - -private: //formatting reader helpers methods - static unsigned int getPrlLength(const char *grpprlBuffer, unsigned int byteNumber); - static void getCharInfo(unsigned int chpxOffset, unsigned int styleId, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo); - static void getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo); - static void getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo §ionInfo); - static bool getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo); - - static Style getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet); - static int getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet); - static unsigned int getStyleIdByCharPos(unsigned int offset, const StyleInfoList &styleInfoList); - - static bool offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces); - static bool readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream); - - static unsigned int calcCountOfPLC(unsigned int totalSize, unsigned int elementSize); - -private: - enum PrlFlag { - UNSET = 0, - SET = 1, - UNCHANGED = 128, - NEGATION = 129 - }; - -private: - int myStartOfText; - int myEndOfText; - - Pieces myPieces; - - StyleSheet myStyleSheet; - - CharInfoList myCharInfoList; - StyleInfoList myStyleInfoList; - SectionInfoList mySectionInfoList; - InlineImageInfoList myInlineImageInfoList; - FloatImageInfoList myFloatImageInfoList; - - BookmarksList myBookmarks; - - shared_ptr<OleStream> myDataStream; - - shared_ptr<DocFloatImageReader> myFLoatImageReader; -}; - -#endif /* __OLEMAINSTREAM_H__ */ diff --git a/fbreader/src/formats/doc/OleStorage.cpp b/fbreader/src/formats/doc/OleStorage.cpp deleted file mode 100644 index a7ab81a..0000000 --- a/fbreader/src/formats/doc/OleStorage.cpp +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <ZLLogger.h> - -#include "OleStorage.h" -#include "OleUtil.h" - -#include <cstring> - -const std::size_t OleStorage::BBD_BLOCK_SIZE = 512; - -OleStorage::OleStorage() { - clear(); -} - -void OleStorage::clear() { - myInputStream = 0; - mySectorSize = 0; - myShortSectorSize = 0; - myStreamSize = 0; - myRootEntryIndex = -1; - - myDIFAT.clear(); - myBBD.clear(); - mySBD.clear(); - myProperties.clear(); - myEntries.clear(); -} - - - -bool OleStorage::init(shared_ptr<ZLInputStream> stream, std::size_t streamSize) { - clear(); - - myInputStream = stream; - myStreamSize = streamSize; - myInputStream->seek(0, true); - - char oleBuf[BBD_BLOCK_SIZE]; - std::size_t ret = myInputStream->read(oleBuf, BBD_BLOCK_SIZE); - if (ret != BBD_BLOCK_SIZE) { - clear(); - return false; - } - static const char OLE_SIGN[] = {(char)0xD0, (char)0xCF, (char)0x11, (char)0xE0, (char)0xA1, (char)0xB1, (char)0x1A, (char)0xE1, 0}; - if (std::strncmp(oleBuf, OLE_SIGN, 8) != 0) { - clear(); - return false; - } - mySectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x1e); //offset for value of big sector size - myShortSectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x20); //offset for value of small sector size - - if (readDIFAT(oleBuf) && readBBD(oleBuf) && readSBD(oleBuf) && readProperties(oleBuf) && readAllEntries()) { - return true; - } - clear(); - return false; -} - -bool OleStorage::readDIFAT(char *oleBuf) { - int difatBlock = OleUtil::get4Bytes(oleBuf, 0x44); //address for first difat sector - int difatSectorNumbers = OleUtil::get4Bytes(oleBuf, 0x48); //numbers of additional difat records - - //436 of difat records are stored in header, by offset 0x4c - for (unsigned int i = 0; i < 436; i += 4) { - myDIFAT.push_back(OleUtil::get4Bytes(oleBuf + 0x4c, i)); - } - - //for files > 6.78 mb we need read additional DIFAT fields - for (int i = 0; difatBlock > 0 && i < difatSectorNumbers; ++i) { - ZLLogger::Instance().println("DocPlugin", "Read additional data for DIFAT"); - char buffer[mySectorSize]; - myInputStream->seek(BBD_BLOCK_SIZE + difatBlock * mySectorSize, true); - if (myInputStream->read(buffer, mySectorSize) != mySectorSize) { - ZLLogger::Instance().println("DocPlugin", "Error read DIFAT!"); - return false; - } - for (unsigned int j = 0; j < (mySectorSize - 4); j += 4) { - myDIFAT.push_back(OleUtil::get4Bytes(buffer, j)); - } - difatBlock = OleUtil::get4Bytes(buffer, mySectorSize - 4); //next DIFAT block is pointed at the end of the sector - } - - //removing unusable DIFAT links - //0xFFFFFFFF means "free section" - while (!myDIFAT.empty() && myDIFAT.back() == (int)0xFFFFFFFF) { - myDIFAT.pop_back(); - } - return true; -} - -bool OleStorage::readBBD(char *oleBuf) { - char buffer[mySectorSize]; - unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks - - if (myDIFAT.size() < bbdNumberBlocks) { - //TODO maybe add check on myDIFAT == bbdNumberBlocks - ZLLogger::Instance().println("DocPlugin", "Wrong number of FAT blocks value"); - return false; - } - - for (unsigned int i = 0; i < bbdNumberBlocks; ++i) { - int bbdSector = myDIFAT.at(i); - if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) { - ZLLogger::Instance().println("DocPlugin", "Bad BBD entry!"); - return false; - } - myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true); - if (myInputStream->read(buffer, mySectorSize) != mySectorSize) { - ZLLogger::Instance().println("DocPlugin", "Error during reading BBD!"); - return false; - } - for (unsigned int j = 0; j < mySectorSize; j += 4) { - myBBD.push_back(OleUtil::get4Bytes(buffer, j)); - } - } - return true; -} - -bool OleStorage::readSBD(char *oleBuf) { - int sbdCur = OleUtil::get4Bytes(oleBuf, 0x3c); //address of first small sector - int sbdCount = OleUtil::get4Bytes(oleBuf, 0x40); //count of small sectors - - if (sbdCur <= 0) { - ZLLogger::Instance().println("DocPlugin", "There's no SBD, don't read it"); - return true; - } - - char buffer[mySectorSize]; - for (int i = 0; i < sbdCount; ++i) { - if (i != 0) { - if (sbdCur < 0 || (unsigned int)sbdCur >= myBBD.size()) { - ZLLogger::Instance().println("DocPlugin", "error during parsing SBD"); - return false; - } - sbdCur = myBBD.at(sbdCur); - } - if (sbdCur <= 0) { - break; - } - myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true); - if (myInputStream->read(buffer, mySectorSize) != mySectorSize) { - ZLLogger::Instance().println("DocPlugin", "reading error during parsing SBD"); - return false; - } - for (unsigned int j = 0; j < mySectorSize; j += 4) { - mySBD.push_back(OleUtil::get4Bytes(buffer, j)); - } - - } - return true; -} - -bool OleStorage::readProperties(char *oleBuf) { - int propCur = OleUtil::get4Bytes(oleBuf, 0x30); //offset for address of sector with first property - if (propCur < 0) { - ZLLogger::Instance().println("DocPlugin", "Wrong first directory sector location"); - return false; - } - - char buffer[mySectorSize]; - do { - myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true); - if (myInputStream->read(buffer, mySectorSize) != mySectorSize) { - ZLLogger::Instance().println("DocPlugin", "Error during reading properties"); - return false; - } - for (unsigned int j = 0; j < mySectorSize; j += 128) { - myProperties.push_back(std::string(buffer + j, 128)); - } - if (propCur < 0 || (std::size_t)propCur >= myBBD.size()) { - break; - } - propCur = myBBD.at(propCur); - } while (propCur >= 0 && propCur < (int)(myStreamSize / mySectorSize)); - return true; -} - -bool OleStorage::readAllEntries() { - int propCount = myProperties.size(); - for (int i = 0; i < propCount; ++i) { - OleEntry entry; - bool result = readOleEntry(i, entry); - if (!result) { - break; - } - if (entry.type == OleEntry::ROOT_DIR) { - myRootEntryIndex = i; - } - myEntries.push_back(entry); - } - if (myRootEntryIndex < 0) { - return false; - } - return true; -} - -bool OleStorage::readOleEntry(int propNumber, OleEntry &e) { - static const std::string ROOT_ENTRY = "Root Entry"; - - std::string property = myProperties.at(propNumber); - - char oleType = property.at(0x42); //offset for Ole Type - if (oleType != 1 && oleType != 2 && oleType != 3 && oleType != 5) { - ZLLogger::Instance().println("DocPlugin", "entry -- not right ole type"); - return false; - } - - e.type = (OleEntry::Type)oleType; - - int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length - e.name.clear(); - e.name.reserve(33); //max size of entry name - - if ((unsigned int)nameLength >= property.size()) { - return false; - } - for (int i = 0; i < nameLength; i+=2) { - char c = property.at(i); - if (c != 0) { - e.name += c; - } - } - - e.length = OleUtil::getU4Bytes(property.c_str(), 0x78); //offset for entry's length value - e.isBigBlock = e.length >= 0x1000 || e.name == ROOT_ENTRY; - - // Read sector chain - if (property.size() < 0x74 + 4) { - ZLLogger::Instance().println("DocPlugin", "problems with reading ole entry"); - return false; - } - int chainCur = OleUtil::get4Bytes(property.c_str(), 0x74); //offset for start block of entry - if (chainCur >= 0 && (chainCur <= (int)(myStreamSize / (e.isBigBlock ? mySectorSize : myShortSectorSize)))) { - //filling blocks with chains - do { - e.blocks.push_back((unsigned int)chainCur); - if (e.isBigBlock && (std::size_t)chainCur < myBBD.size()) { - chainCur = myBBD.at(chainCur); - } else if (!mySBD.empty() && (std::size_t)chainCur < mySBD.size()) { - chainCur = mySBD.at(chainCur); - } else { - chainCur = -1; - } - } while (chainCur > 0 && - chainCur < (int)(e.isBigBlock ? myBBD.size() : mySBD.size()) && - e.blocks.size() <= e.length / (e.isBigBlock ? mySectorSize : myShortSectorSize)); - } - e.length = std::min(e.length, (unsigned int)((e.isBigBlock ? mySectorSize : myShortSectorSize) * e.blocks.size())); - return true; -} - -bool OleStorage::countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const { - //TODO maybe better syntax can be used? - if (e.blocks.size() <= (std::size_t)blockNumber) { - ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, blockNumber is invalid"); - return false; - } - if (e.isBigBlock) { - result = BBD_BLOCK_SIZE + e.blocks.at(blockNumber) * mySectorSize; - } else { - unsigned int sbdPerSector = mySectorSize / myShortSectorSize; - unsigned int sbdSectorNumber = e.blocks.at(blockNumber) / sbdPerSector; - unsigned int sbdSectorMod = e.blocks.at(blockNumber) % sbdPerSector; - if (myEntries.at(myRootEntryIndex).blocks.size() <= (std::size_t)sbdSectorNumber) { - ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, invalid sbd data"); - return false; - } - result = BBD_BLOCK_SIZE + myEntries.at(myRootEntryIndex).blocks.at(sbdSectorNumber) * mySectorSize + sbdSectorMod * myShortSectorSize; - } - return true; -} - -bool OleStorage::getEntryByName(std::string name, OleEntry &returnEntry) const { - //TODO fix the workaround for duplicates streams: now it takes a stream with max length - unsigned int maxLength = 0; - for (std::size_t i = 0; i < myEntries.size(); ++i) { - const OleEntry &entry = myEntries.at(i); - if (entry.name == name && entry.length >= maxLength) { - returnEntry = entry; - maxLength = entry.length; - } - } - return maxLength > 0; -} - - diff --git a/fbreader/src/formats/doc/OleStorage.h b/fbreader/src/formats/doc/OleStorage.h deleted file mode 100644 index 584ee94..0000000 --- a/fbreader/src/formats/doc/OleStorage.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __OLESTORAGE_H__ -#define __OLESTORAGE_H__ - -#include <algorithm> -#include <vector> -#include <string> - -#include <ZLInputStream.h> - -struct OleEntry { - enum Type { - DIR = 1, - STREAM = 2, - ROOT_DIR = 5, - LOCK_BYTES =3 - }; - - typedef std::vector<unsigned int> Blocks; - - std::string name; - unsigned int length; - Type type; - Blocks blocks; - bool isBigBlock; -}; - -class OleStorage { - -public: - static const std::size_t BBD_BLOCK_SIZE; - -public: - OleStorage(); - bool init(shared_ptr<ZLInputStream>, std::size_t streamSize); - void clear(); - const std::vector<OleEntry> &getEntries() const; - bool getEntryByName(std::string name, OleEntry &entry) const; - - unsigned int getSectorSize() const; - unsigned int getShortSectorSize() const; - -public: //TODO make private - bool countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const; - -private: - bool readDIFAT(char *oleBuf); - bool readBBD(char *oleBuf); - bool readSBD(char *oleBuf); - bool readProperties(char *oleBuf); - - bool readAllEntries(); - bool readOleEntry(int propNumber, OleEntry &entry); - -private: - - shared_ptr<ZLInputStream> myInputStream; - unsigned int mySectorSize, myShortSectorSize; - - std::size_t myStreamSize; - std::vector<int> myDIFAT; //double-indirect file allocation table - std::vector<int> myBBD; //Big Block Depot - std::vector<int> mySBD; //Small Block Depot - std::vector<std::string> myProperties; - std::vector<OleEntry> myEntries; - int myRootEntryIndex; - -}; - -inline const std::vector<OleEntry> &OleStorage::getEntries() const { return myEntries; } -inline unsigned int OleStorage::getSectorSize() const { return mySectorSize; } -inline unsigned int OleStorage::getShortSectorSize() const { return myShortSectorSize; } - -#endif /* __OLESTORAGE_H__ */ diff --git a/fbreader/src/formats/doc/OleStream.cpp b/fbreader/src/formats/doc/OleStream.cpp deleted file mode 100644 index 8de1cc4..0000000 --- a/fbreader/src/formats/doc/OleStream.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <ZLLogger.h> - -#include "OleStream.h" -#include "OleUtil.h" - -OleStream::OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) : - myStorage(storage), - myOleEntry(oleEntry), - myBaseStream(stream) { - myOleOffset = 0; -} - - -bool OleStream::open() { - if (myOleEntry.type != OleEntry::STREAM) { - return false; - } - return true; -} - -std::size_t OleStream::read(char *buffer, std::size_t maxSize) { - std::size_t length = maxSize; - std::size_t readedBytes = 0; - std::size_t bytesLeftInCurBlock; - unsigned int newFileOffset; - - unsigned int curBlockNumber, modBlock; - std::size_t toReadBlocks, toReadBytes; - - if (myOleOffset + length > myOleEntry.length) { - length = myOleEntry.length - myOleOffset; - } - - std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize()); - - curBlockNumber = myOleOffset / sectorSize; - if (curBlockNumber >= myOleEntry.blocks.size()) { - return 0; - } - modBlock = myOleOffset % sectorSize; - bytesLeftInCurBlock = sectorSize - modBlock; - if (bytesLeftInCurBlock < length) { - toReadBlocks = (length - bytesLeftInCurBlock) / sectorSize; - toReadBytes = (length - bytesLeftInCurBlock) % sectorSize; - } else { - toReadBlocks = toReadBytes = 0; - } - - if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) { - return 0; - } - newFileOffset += modBlock; - - myBaseStream->seek(newFileOffset, true); - - readedBytes = myBaseStream->read(buffer, std::min(length, bytesLeftInCurBlock)); - for (std::size_t i = 0; i < toReadBlocks; ++i) { - if (++curBlockNumber >= myOleEntry.blocks.size()) { - break; - } - if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) { - return readedBytes; - } - myBaseStream->seek(newFileOffset, true); - readedBytes += myBaseStream->read(buffer + readedBytes, std::min(length - readedBytes, sectorSize)); - } - if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) { - if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) { - return readedBytes; - } - myBaseStream->seek(newFileOffset, true); - readedBytes += myBaseStream->read(buffer + readedBytes, toReadBytes); - } - myOleOffset += readedBytes; - return readedBytes; -} - -bool OleStream::eof() const { - return (myOleOffset >= myOleEntry.length); -} - - -void OleStream::close() { -} - -bool OleStream::seek(unsigned int offset, bool absoluteOffset) { - unsigned int newOleOffset = 0; - unsigned int newFileOffset; - - if (absoluteOffset) { - newOleOffset = offset; - } else { - newOleOffset = myOleOffset + offset; - } - - newOleOffset = std::min(newOleOffset, myOleEntry.length); - - unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize()); - unsigned int blockNumber = newOleOffset / sectorSize; - if (blockNumber >= myOleEntry.blocks.size()) { - return false; - } - - unsigned int modBlock = newOleOffset % sectorSize; - if (!myStorage->countFileOffsetOfBlock(myOleEntry, blockNumber, newFileOffset)) { - return false; - } - newFileOffset += modBlock; - myBaseStream->seek(newFileOffset, true); - myOleOffset = newOleOffset; - return true; -} - -std::size_t OleStream::offset() { - return myOleOffset; -} - -ZLFileImage::Blocks OleStream::getBlockPieceInfoList(unsigned int offset, unsigned int size) const { - ZLFileImage::Blocks list; - unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize()); - unsigned int curBlockNumber = offset / sectorSize; - if (curBlockNumber >= myOleEntry.blocks.size()) { - return list; - } - unsigned int modBlock = offset % sectorSize; - unsigned int startFileOffset = 0; - if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, startFileOffset)) { - return ZLFileImage::Blocks(); - } - startFileOffset += modBlock; - - unsigned int bytesLeftInCurBlock = sectorSize - modBlock; - unsigned int toReadBlocks = 0, toReadBytes = 0; - if (bytesLeftInCurBlock < size) { - toReadBlocks = (size - bytesLeftInCurBlock) / sectorSize; - toReadBytes = (size - bytesLeftInCurBlock) % sectorSize; - } - - unsigned int readedBytes = std::min(size, bytesLeftInCurBlock); - list.push_back(ZLFileImage::Block(startFileOffset, readedBytes)); - - for (unsigned int i = 0; i < toReadBlocks; ++i) { - if (++curBlockNumber >= myOleEntry.blocks.size()) { - break; - } - unsigned int newFileOffset = 0; - if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) { - return ZLFileImage::Blocks(); - } - unsigned int readbytes = std::min(size - readedBytes, sectorSize); - list.push_back(ZLFileImage::Block(newFileOffset, readbytes)); - readedBytes += readbytes; - } - if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) { - unsigned int newFileOffset = 0; - if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) { - return ZLFileImage::Blocks(); - } - unsigned int readbytes = toReadBytes; - list.push_back(ZLFileImage::Block(newFileOffset, readbytes)); - readedBytes += readbytes; - } - - return concatBlocks(list); -} - -ZLFileImage::Blocks OleStream::concatBlocks(const ZLFileImage::Blocks &blocks) { - if (blocks.size() < 2) { - return blocks; - } - ZLFileImage::Blocks optList; - ZLFileImage::Block curBlock = blocks.at(0); - unsigned int nextOffset = curBlock.offset + curBlock.size; - for (std::size_t i = 1; i < blocks.size(); ++i) { - ZLFileImage::Block b = blocks.at(i); - if (b.offset == nextOffset) { - curBlock.size += b.size; - nextOffset += b.size; - } else { - optList.push_back(curBlock); - curBlock = b; - nextOffset = curBlock.offset + curBlock.size; - } - } - optList.push_back(curBlock); - return optList; -} - -std::size_t OleStream::fileOffset() { - //TODO maybe remove this method, it doesn't use at this time - std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize()); - unsigned int curBlockNumber = myOleOffset / sectorSize; - if (curBlockNumber >= myOleEntry.blocks.size()) { - return 0; - } - unsigned int modBlock = myOleOffset % sectorSize; - unsigned int curOffset = 0; - if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, curOffset)) { - return 0; //TODO maybe remove -1? - } - return curOffset + modBlock; -} diff --git a/fbreader/src/formats/doc/OleStream.h b/fbreader/src/formats/doc/OleStream.h deleted file mode 100644 index 861c7cb..0000000 --- a/fbreader/src/formats/doc/OleStream.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __OLESTREAM_H__ -#define __OLESTREAM_H__ - -#include <ZLFileImage.h> - -#include "OleStorage.h" - -class OleStream { - -public: - OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream); - -public: - bool open(); - std::size_t read(char *buffer, std::size_t maxSize); - void close(); - -public: - bool seek(unsigned int offset, bool absoluteOffset); - std::size_t offset(); - -public: - ZLFileImage::Blocks getBlockPieceInfoList(unsigned int offset, unsigned int size) const; - static ZLFileImage::Blocks concatBlocks(const ZLFileImage::Blocks &blocks); - std::size_t fileOffset(); - -public: - bool eof() const; - -protected: - shared_ptr<OleStorage> myStorage; - - OleEntry myOleEntry; - shared_ptr<ZLInputStream> myBaseStream; - - unsigned int myOleOffset; -}; - -#endif /* __OLESTREAM_H__ */ diff --git a/fbreader/src/formats/doc/OleStreamParser.cpp b/fbreader/src/formats/doc/OleStreamParser.cpp deleted file mode 100644 index 0a9c62d..0000000 --- a/fbreader/src/formats/doc/OleStreamParser.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -//#include <cctype> -//#include <cstring> - -#include <ZLLogger.h> - -#include "OleMainStream.h" -#include "OleUtil.h" -#include "OleStreamParser.h" - -//word's control chars: -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008; - -//unicode values: -const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D; -const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C; - -OleStreamParser::OleStreamParser() { - myCurBufferPosition = 0; - - myCurCharPos = 0; - myNextStyleInfoIndex = 0; - myNextCharInfoIndex = 0; - myNextBookmarkIndex = 0; - myNextInlineImageInfoIndex = 0; - myNextFloatImageInfoIndex = 0; -} - -bool OleStreamParser::readStream(OleMainStream &oleMainStream) { - ZLUnicodeUtil::Ucs2Char ucs2char; - bool tabMode = false; - while (getUcs2Char(oleMainStream, ucs2char)) { - if (tabMode) { - tabMode = false; - if (ucs2char == WORD_TABLE_SEPARATOR) { - handleTableEndRow(); - continue; - } else { - handleTableSeparator(); - } - } - - if (ucs2char < 32) { - switch (ucs2char) { - case NULL_SYMBOL: - break; - case WORD_HARD_LINEBREAK: - handleHardLinebreak(); - break; - case WORD_END_OF_PARAGRAPH: - case WORD_PAGE_BREAK: - handleParagraphEnd(); - break; - case WORD_TABLE_SEPARATOR: - tabMode = true; - break; - case WORD_FOOTNOTE_MARK: - handleFootNoteMark(); - break; - case WORD_START_FIELD: - handleStartField(); - break; - case WORD_SEPARATOR_FIELD: - handleSeparatorField(); - break; - case WORD_END_FIELD: - handleEndField(); - break; - case INLINE_IMAGE: - case FLOAT_IMAGE: - break; - default: - handleOtherControlChar(ucs2char); - break; - } - } else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) { - continue; //skip - } else { - handleChar(ucs2char); - } - } - - return true; -} - -bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) { - while (myCurBufferPosition >= myBuffer.size()) { - myBuffer.clear(); - myCurBufferPosition = 0; - if (!readNextPiece(stream)) { - return false; - } - } - ucs2char = myBuffer.at(myCurBufferPosition++); - processStyles(stream); - - switch (ucs2char) { - case INLINE_IMAGE: - processInlineImage(stream); - break; - case FLOAT_IMAGE: - processFloatImage(stream); - break; - } - ++myCurCharPos; - return true; -} - -void OleStreamParser::processInlineImage(OleMainStream &stream) { - const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList(); - if (imageInfoList.empty()) { - return; - } - //seek to curCharPos, because not all entries are real pictures - while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) { - ++myNextInlineImageInfoIndex; - } - while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) { - OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second; - ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition); - if (!list.empty()) { - handleImage(list); - } - ++myNextInlineImageInfoIndex; - } -} - -void OleStreamParser::processFloatImage(OleMainStream &stream) { - const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList(); - if (imageInfoList.empty()) { - return; - } - //seek to curCharPos, because not all entries are real pictures - while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) { - ++myNextFloatImageInfoIndex; - } - while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) { - OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second; - ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId); - if (!list.empty()) { - handleImage(list); - } - ++myNextFloatImageInfoIndex; - } -} - -void OleStreamParser::processStyles(OleMainStream &stream) { - const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList(); - if (!styleInfoList.empty()) { - while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) { - OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second; - handleParagraphStyle(info); - ++myNextStyleInfoIndex; - } - } - - const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList(); - if (!charInfoList.empty()) { - while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) { - OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second; - handleFontStyle(info.FontStyle); - ++myNextCharInfoIndex; - } - } - - const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks(); - if (!bookmarksList.empty()) { - while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) { - OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex); - handleBookmark(bookmark.Name); - ++myNextBookmarkIndex; - } - } -} diff --git a/fbreader/src/formats/doc/OleStreamParser.h b/fbreader/src/formats/doc/OleStreamParser.h deleted file mode 100644 index 1adec2f..0000000 --- a/fbreader/src/formats/doc/OleStreamParser.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __OLESTREAMPARSER_H__ -#define __OLESTREAMPARSER_H__ - -#include <ZLUnicodeUtil.h> - -#include "OleMainStream.h" -#include "OleStreamReader.h" - -class OleStreamParser : public OleStreamReader { - -public: - //word's control chars: - static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK; - static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR; - static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB; - static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK; - static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK; - static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH; - static const ZLUnicodeUtil::Ucs2Char WORD_MINUS; - static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN; - static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD; - static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD; - static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD; - static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE; - static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE; - static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE; - - //unicode values: - static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL; - static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR; - static const ZLUnicodeUtil::Ucs2Char LINE_FEED; - static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN; - static const ZLUnicodeUtil::Ucs2Char SPACE; - static const ZLUnicodeUtil::Ucs2Char MINUS; - static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE; - -public: - OleStreamParser(); - -private: - bool readStream(OleMainStream &stream); - -protected: - virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0; - virtual void handleHardLinebreak() = 0; - virtual void handleParagraphEnd() = 0; - virtual void handlePageBreak() = 0; - virtual void handleTableSeparator() = 0; - virtual void handleTableEndRow() = 0; - virtual void handleFootNoteMark() = 0; - virtual void handleStartField() = 0; - virtual void handleSeparatorField() = 0; - virtual void handleEndField() = 0; - virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0; - virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0; - - virtual void handleFontStyle(unsigned int fontStyle) = 0; - virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0; - virtual void handleBookmark(const std::string &name) = 0; - -private: - bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char); - void processInlineImage(OleMainStream &stream); - void processFloatImage(OleMainStream &stream); - void processStyles(OleMainStream &stream); - -private: -protected: - ZLUnicodeUtil::Ucs2String myBuffer; -private: - std::size_t myCurBufferPosition; - - unsigned int myCurCharPos; - - std::size_t myNextStyleInfoIndex; - std::size_t myNextCharInfoIndex; - std::size_t myNextBookmarkIndex; - std::size_t myNextInlineImageInfoIndex; - std::size_t myNextFloatImageInfoIndex; -}; - -#endif /* __OLESTREAMPARSER_H__ */ diff --git a/fbreader/src/formats/doc/OleStreamReader.cpp b/fbreader/src/formats/doc/OleStreamReader.cpp deleted file mode 100644 index 224489a..0000000 --- a/fbreader/src/formats/doc/OleStreamReader.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <ZLLogger.h> - -#include "OleMainStream.h" -#include "OleUtil.h" -#include "OleStreamReader.h" - -OleStreamReader::OleStreamReader() : myNextPieceNumber(0) { -} - -bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream, bool doReadFormattingData) { - static const std::string WORD_DOCUMENT = "WordDocument"; - - shared_ptr<OleStorage> storage = new OleStorage; - - if (!storage->init(inputStream, inputStream->sizeOfOpened())) { - ZLLogger::Instance().println("DocPlugin", "Broken OLE file"); - return false; - } - - OleEntry wordDocumentEntry; - if (!storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry)) { - return false; - } - - OleMainStream oleStream(storage, wordDocumentEntry, inputStream); - if (!oleStream.open(doReadFormattingData)) { - ZLLogger::Instance().println("DocPlugin", "Cannot open OleMainStream"); - return false; - } - return readStream(oleStream); -} - -bool OleStreamReader::readNextPiece(OleMainStream &stream) { - const OleMainStream::Pieces &pieces = stream.getPieces(); - if (myNextPieceNumber >= pieces.size()) { - return false; - } - const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber); - - if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) { - footnotesStartHandler(); - } else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) { - return false; - } - - if (!stream.seek(piece.Offset, true)) { - //TODO maybe in that case we should take next piece? - return false; - } - char *textBuffer = new char[piece.Length]; - std::size_t readBytes = stream.read(textBuffer, piece.Length); - if (readBytes != (std::size_t)piece.Length) { - ZLLogger::Instance().println("DocPlugin", "not all bytes have been read from piece"); - } - - if (!piece.IsANSI) { - for (std::size_t i = 0; i < readBytes; i += 2) { - ucs2SymbolHandler(OleUtil::getU2Bytes(textBuffer, i)); - } - } else { - ansiDataHandler(textBuffer, readBytes); - } - ++myNextPieceNumber; - delete[] textBuffer; - - return true; -} diff --git a/fbreader/src/formats/doc/OleStreamReader.h b/fbreader/src/formats/doc/OleStreamReader.h deleted file mode 100644 index 2d2a0ae..0000000 --- a/fbreader/src/formats/doc/OleStreamReader.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __OLESTREAMREADER_H__ -#define __OLESTREAMREADER_H__ - -#include <ZLUnicodeUtil.h> - -#include "OleMainStream.h" - -class OleStreamReader { - -public: - OleStreamReader(); - bool readDocument(shared_ptr<ZLInputStream> stream, bool doReadFormattingData); - -protected: - virtual bool readStream(OleMainStream &stream) = 0; - - bool readNextPiece(OleMainStream &stream); - - virtual void ansiDataHandler(const char *buffer, std::size_t len) = 0; - virtual void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0; - virtual void footnotesStartHandler() = 0; - -private: - std::size_t myNextPieceNumber; -}; - -#endif /* __OLESTREAMREADER_H__ */ diff --git a/fbreader/src/formats/doc/OleUtil.cpp b/fbreader/src/formats/doc/OleUtil.cpp deleted file mode 100644 index 2e8f685..0000000 --- a/fbreader/src/formats/doc/OleUtil.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include "OleUtil.h" - -int OleUtil::get4Bytes(const char *buffer, unsigned int offset) { - const unsigned char *buf = (const unsigned char*)buffer; - return - (int)buf[offset] - | ((int)buf[offset+1] << 8) - | ((int)buf[offset+2] << 16) - | ((int)buf[offset+3] << 24); -} - -unsigned int OleUtil::getU4Bytes(const char *buffer, unsigned int offset) { - const unsigned char *buf = (const unsigned char*)buffer; - return - (unsigned int)buf[offset] - | ((unsigned int)buf[offset+1] << 8) - | ((unsigned int)buf[offset+2] << 16) - | ((unsigned int)buf[offset+3] << 24); -} - -unsigned int OleUtil::getU2Bytes(const char *buffer, unsigned int offset) { - const unsigned char *buf = (const unsigned char*)buffer; - return - (unsigned int)buf[offset] - | ((unsigned int)buf[offset+1] << 8); -} - -unsigned int OleUtil::getU1Byte(const char *buffer, unsigned int offset) { - const unsigned char *buf = (const unsigned char*)buffer; - return (unsigned int)buf[offset]; -} - -int OleUtil::get1Byte(const char *buffer, unsigned int offset) { - const unsigned char *buf = (const unsigned char*)buffer; - return (int)buf[offset]; -} - - - diff --git a/fbreader/src/formats/doc/OleUtil.h b/fbreader/src/formats/doc/OleUtil.h deleted file mode 100644 index 531c769..0000000 --- a/fbreader/src/formats/doc/OleUtil.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef __OLEUTIL_H__ -#define __OLEUTIL_H__ - -class OleUtil { -public: - static int get4Bytes(const char *buffer, unsigned int offset); - static unsigned int getU4Bytes(const char *buffer, unsigned int offset); - static unsigned int getU2Bytes(const char *buffer, unsigned int offset); - static unsigned int getU1Byte(const char *buffer, unsigned int offset); - static int get1Byte(const char *buffer, unsigned int offset); -}; - -#endif /* __OLEUTIL_H__ */ |