summaryrefslogtreecommitdiffstats
path: root/reader/src/formats/doc/OleStreamParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'reader/src/formats/doc/OleStreamParser.cpp')
-rw-r--r--reader/src/formats/doc/OleStreamParser.cpp210
1 files changed, 210 insertions, 0 deletions
diff --git a/reader/src/formats/doc/OleStreamParser.cpp b/reader/src/formats/doc/OleStreamParser.cpp
new file mode 100644
index 0000000..0a9c62d
--- /dev/null
+++ b/reader/src/formats/doc/OleStreamParser.cpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+//#include <cctype>
+//#include <cstring>
+
+#include <ZLLogger.h>
+
+#include "OleMainStream.h"
+#include "OleUtil.h"
+#include "OleStreamParser.h"
+
+//word's control chars:
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008;
+
+//unicode values:
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C;
+
+OleStreamParser::OleStreamParser() {
+ myCurBufferPosition = 0;
+
+ myCurCharPos = 0;
+ myNextStyleInfoIndex = 0;
+ myNextCharInfoIndex = 0;
+ myNextBookmarkIndex = 0;
+ myNextInlineImageInfoIndex = 0;
+ myNextFloatImageInfoIndex = 0;
+}
+
+bool OleStreamParser::readStream(OleMainStream &oleMainStream) {
+ ZLUnicodeUtil::Ucs2Char ucs2char;
+ bool tabMode = false;
+ while (getUcs2Char(oleMainStream, ucs2char)) {
+ if (tabMode) {
+ tabMode = false;
+ if (ucs2char == WORD_TABLE_SEPARATOR) {
+ handleTableEndRow();
+ continue;
+ } else {
+ handleTableSeparator();
+ }
+ }
+
+ if (ucs2char < 32) {
+ switch (ucs2char) {
+ case NULL_SYMBOL:
+ break;
+ case WORD_HARD_LINEBREAK:
+ handleHardLinebreak();
+ break;
+ case WORD_END_OF_PARAGRAPH:
+ case WORD_PAGE_BREAK:
+ handleParagraphEnd();
+ break;
+ case WORD_TABLE_SEPARATOR:
+ tabMode = true;
+ break;
+ case WORD_FOOTNOTE_MARK:
+ handleFootNoteMark();
+ break;
+ case WORD_START_FIELD:
+ handleStartField();
+ break;
+ case WORD_SEPARATOR_FIELD:
+ handleSeparatorField();
+ break;
+ case WORD_END_FIELD:
+ handleEndField();
+ break;
+ case INLINE_IMAGE:
+ case FLOAT_IMAGE:
+ break;
+ default:
+ handleOtherControlChar(ucs2char);
+ break;
+ }
+ } else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
+ continue; //skip
+ } else {
+ handleChar(ucs2char);
+ }
+ }
+
+ return true;
+}
+
+bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
+ while (myCurBufferPosition >= myBuffer.size()) {
+ myBuffer.clear();
+ myCurBufferPosition = 0;
+ if (!readNextPiece(stream)) {
+ return false;
+ }
+ }
+ ucs2char = myBuffer.at(myCurBufferPosition++);
+ processStyles(stream);
+
+ switch (ucs2char) {
+ case INLINE_IMAGE:
+ processInlineImage(stream);
+ break;
+ case FLOAT_IMAGE:
+ processFloatImage(stream);
+ break;
+ }
+ ++myCurCharPos;
+ return true;
+}
+
+void OleStreamParser::processInlineImage(OleMainStream &stream) {
+ const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
+ if (imageInfoList.empty()) {
+ return;
+ }
+ //seek to curCharPos, because not all entries are real pictures
+ while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
+ ++myNextInlineImageInfoIndex;
+ }
+ while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
+ OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
+ ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
+ if (!list.empty()) {
+ handleImage(list);
+ }
+ ++myNextInlineImageInfoIndex;
+ }
+}
+
+void OleStreamParser::processFloatImage(OleMainStream &stream) {
+ const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
+ if (imageInfoList.empty()) {
+ return;
+ }
+ //seek to curCharPos, because not all entries are real pictures
+ while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
+ ++myNextFloatImageInfoIndex;
+ }
+ while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
+ OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
+ ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
+ if (!list.empty()) {
+ handleImage(list);
+ }
+ ++myNextFloatImageInfoIndex;
+ }
+}
+
+void OleStreamParser::processStyles(OleMainStream &stream) {
+ const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
+ if (!styleInfoList.empty()) {
+ while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
+ OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
+ handleParagraphStyle(info);
+ ++myNextStyleInfoIndex;
+ }
+ }
+
+ const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
+ if (!charInfoList.empty()) {
+ while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
+ OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
+ handleFontStyle(info.FontStyle);
+ ++myNextCharInfoIndex;
+ }
+ }
+
+ const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
+ if (!bookmarksList.empty()) {
+ while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
+ OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
+ handleBookmark(bookmark.Name);
+ ++myNextBookmarkIndex;
+ }
+ }
+}