summaryrefslogtreecommitdiffstats
path: root/fbreader/src/formats/rtf/RtfReader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fbreader/src/formats/rtf/RtfReader.cpp')
-rw-r--r--fbreader/src/formats/rtf/RtfReader.cpp470
1 files changed, 470 insertions, 0 deletions
diff --git a/fbreader/src/formats/rtf/RtfReader.cpp b/fbreader/src/formats/rtf/RtfReader.cpp
new file mode 100644
index 0000000..91fea0c
--- /dev/null
+++ b/fbreader/src/formats/rtf/RtfReader.cpp
@@ -0,0 +1,470 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <cctype>
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "RtfReader.h"
+
+std::map<std::string, RtfCommand*> RtfReader::ourKeywordMap;
+
+static const int rtfStreamBufferSize = 4096;
+
+RtfReader::RtfReader(const std::string &encoding) : EncodedTextReader(encoding) {
+ myNextImageMimeType = ZLMimeType::EMPTY;
+}
+
+RtfReader::~RtfReader() {
+}
+
+RtfCommand::~RtfCommand() {
+}
+
+void RtfDummyCommand::run(RtfReader&, int*) const {
+}
+
+void RtfNewParagraphCommand::run(RtfReader &reader, int*) const {
+ reader.newParagraph();
+}
+
+RtfFontPropertyCommand::RtfFontPropertyCommand(RtfReader::FontProperty property) : myProperty(property) {
+}
+
+void RtfFontPropertyCommand::run(RtfReader &reader, int *parameter) const {
+ const bool start = (parameter == 0) || (*parameter != 0);
+ switch (myProperty) {
+ case RtfReader::FONT_BOLD:
+ if (reader.myState.Bold != start) {
+ reader.myState.Bold = start;
+ reader.setFontProperty(RtfReader::FONT_BOLD);
+ }
+ break;
+ case RtfReader::FONT_ITALIC:
+ if (reader.myState.Italic != start) {
+ reader.myState.Italic = start;
+ reader.setFontProperty(RtfReader::FONT_ITALIC);
+ }
+ break;
+ case RtfReader::FONT_UNDERLINED:
+ if (reader.myState.Underlined != start) {
+ reader.myState.Underlined = start;
+ reader.setFontProperty(RtfReader::FONT_UNDERLINED);
+ }
+ break;
+ }
+}
+
+RtfAlignmentCommand::RtfAlignmentCommand(ZLTextAlignmentType alignment) : myAlignment(alignment) {
+}
+
+void RtfAlignmentCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Alignment != myAlignment) {
+ reader.myState.Alignment = myAlignment;
+ reader.setAlignment();
+ }
+}
+
+RtfCharCommand::RtfCharCommand(const std::string &chr) : myChar(chr) {
+}
+
+void RtfCharCommand::run(RtfReader &reader, int*) const {
+ reader.processCharData(myChar.data(), myChar.length(), false);
+}
+
+RtfDestinationCommand::RtfDestinationCommand(RtfReader::DestinationType destination) : myDestination(destination) {
+}
+
+void RtfDestinationCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Destination == myDestination) {
+ return;
+ }
+ reader.myState.Destination = myDestination;
+ if (myDestination == RtfReader::DESTINATION_PICTURE) {
+ reader.myState.ReadDataAsHex = true;
+ reader.myNextImageMimeType = ZLMimeType::EMPTY;
+ }
+ reader.switchDestination(myDestination, true);
+}
+
+void RtfStyleCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Destination == RtfReader::DESTINATION_STYLESHEET) {
+ //std::cerr << "Add style index: " << val << "\n";
+
+ //sprintf(style_attributes[0], "%i", val);
+ } else /*if (myState.Destination == rdsContent)*/ {
+ //std::cerr << "Set style index: " << val << "\n";
+
+ //sprintf(style_attributes[0], "%i", val);
+ }
+}
+
+void RtfCodepageCommand::run(RtfReader &reader, int *parameter) const {
+ if (parameter != 0) {
+ reader.setEncoding(*parameter);
+ }
+}
+
+void RtfSpecialCommand::run(RtfReader &reader, int*) const {
+ reader.mySpecialMode = true;
+}
+
+RtfPictureCommand::RtfPictureCommand(shared_ptr<ZLMimeType> mimeType) : myMimeType(mimeType) {
+}
+
+void RtfPictureCommand::run(RtfReader &reader, int*) const {
+ reader.myNextImageMimeType = myMimeType;
+}
+
+void RtfFontResetCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Bold) {
+ reader.myState.Bold = false;
+ reader.setFontProperty(RtfReader::FONT_BOLD);
+ }
+ if (reader.myState.Italic) {
+ reader.myState.Italic = false;
+ reader.setFontProperty(RtfReader::FONT_ITALIC);
+ }
+ if (reader.myState.Underlined) {
+ reader.myState.Underlined = false;
+ reader.setFontProperty(RtfReader::FONT_UNDERLINED);
+ }
+}
+
+void RtfReader::addAction(const std::string &tag, RtfCommand *command) {
+ ourKeywordMap.insert(std::make_pair(tag, command));
+}
+
+void RtfReader::fillKeywordMap() {
+ if (ourKeywordMap.empty()) {
+ addAction("*", new RtfSpecialCommand());
+ addAction("ansicpg", new RtfCodepageCommand());
+
+ static const char *keywordsToSkip[] = {"buptim", "colortbl", "comment", "creatim", "doccomm", "fonttbl", "footer", "footerf", "footerl", "footerr", "ftncn", "ftnsep", "ftnsepc", "header", "headerf", "headerl", "headerr", "keywords", "operator", "printim", "private1", "revtim", "rxe", "subject", "tc", "txe", "xe", 0};
+ RtfCommand *skipCommand = new RtfDestinationCommand(RtfReader::DESTINATION_SKIP);
+ for (const char **i = keywordsToSkip; *i != 0; ++i) {
+ addAction(*i, skipCommand);
+ }
+ addAction("shppict", new RtfDummyCommand());
+ addAction("info", new RtfDestinationCommand(RtfReader::DESTINATION_INFO));
+ addAction("title", new RtfDestinationCommand(RtfReader::DESTINATION_TITLE));
+ addAction("author", new RtfDestinationCommand(RtfReader::DESTINATION_AUTHOR));
+ addAction("pict", new RtfDestinationCommand(RtfReader::DESTINATION_PICTURE));
+ addAction("stylesheet", new RtfDestinationCommand(RtfReader::DESTINATION_STYLESHEET));
+ addAction("footnote", new RtfDestinationCommand(RtfReader::DESTINATION_FOOTNOTE));
+
+ RtfCommand *newParagraphCommand = new RtfNewParagraphCommand();
+ addAction("\n", newParagraphCommand);
+ addAction("\r", newParagraphCommand);
+ addAction("par", newParagraphCommand);
+
+ addAction("\x09", new RtfCharCommand("\x09"));
+ addAction("_", new RtfCharCommand("-"));
+ addAction("\\", new RtfCharCommand("\\"));
+ addAction("{", new RtfCharCommand("{"));
+ addAction("}", new RtfCharCommand("}"));
+ addAction("bullet", new RtfCharCommand("\xE2\x80\xA2")); // &bullet;
+ addAction("endash", new RtfCharCommand("\xE2\x80\x93")); // &ndash;
+ addAction("emdash", new RtfCharCommand("\xE2\x80\x94")); // &mdash;
+ addAction("~", new RtfCharCommand("\xC0\xA0")); // &nbsp;
+ addAction("enspace", new RtfCharCommand("\xE2\x80\x82")); // &emsp;
+ addAction("emspace", new RtfCharCommand("\xE2\x80\x83")); // &ensp;
+ addAction("lquote", new RtfCharCommand("\xE2\x80\x98")); // &lsquo;
+ addAction("rquote", new RtfCharCommand("\xE2\x80\x99")); // &rsquo;
+ addAction("ldblquote", new RtfCharCommand("\xE2\x80\x9C")); // &ldquo;
+ addAction("rdblquote", new RtfCharCommand("\xE2\x80\x9D")); // &rdquo;
+
+ addAction("jpegblip", new RtfPictureCommand(ZLMimeType::IMAGE_JPEG));
+ addAction("pngblip", new RtfPictureCommand(ZLMimeType::IMAGE_PNG));
+
+ addAction("s", new RtfStyleCommand());
+
+ addAction("qc", new RtfAlignmentCommand(ALIGN_CENTER));
+ addAction("ql", new RtfAlignmentCommand(ALIGN_LEFT));
+ addAction("qr", new RtfAlignmentCommand(ALIGN_RIGHT));
+ addAction("qj", new RtfAlignmentCommand(ALIGN_JUSTIFY));
+ addAction("pard", new RtfAlignmentCommand(ALIGN_UNDEFINED));
+
+ addAction("b", new RtfFontPropertyCommand(RtfReader::FONT_BOLD));
+ addAction("i", new RtfFontPropertyCommand(RtfReader::FONT_ITALIC));
+ addAction("u", new RtfFontPropertyCommand(RtfReader::FONT_UNDERLINED));
+ addAction("plain", new RtfFontResetCommand());
+ }
+}
+
+bool RtfReader::parseDocument() {
+ enum {
+ READ_NORMAL_DATA,
+ READ_BINARY_DATA,
+ READ_HEX_SYMBOL,
+ READ_KEYWORD,
+ READ_KEYWORD_PARAMETER,
+ READ_END_OF_FILE
+ } parserState = READ_NORMAL_DATA;
+
+ std::string keyword;
+ std::string parameterString;
+ std::string hexString;
+ int imageStartOffset = -1;
+
+ while (!myIsInterrupted) {
+ const char *ptr = myStreamBuffer;
+ const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize);
+ if (ptr == end) {
+ break;
+ }
+ const char *dataStart = ptr;
+ bool readNextChar = true;
+ while (ptr != end) {
+ switch (parserState) {
+ case READ_END_OF_FILE:
+ if (*ptr != '}' && !std::isspace(*ptr)) {
+ return false;
+ }
+ break;
+ case READ_BINARY_DATA:
+ // TODO: optimize
+ processCharData(ptr, 1);
+ --myBinaryDataSize;
+ if (myBinaryDataSize == 0) {
+ parserState = READ_NORMAL_DATA;
+ }
+ break;
+ case READ_NORMAL_DATA:
+ switch (*ptr) {
+ case '{':
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+ myStateStack.push(myState);
+ myState.ReadDataAsHex = false;
+ break;
+ case '}':
+ {
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+
+ if (imageStartOffset >= 0) {
+ if (ZLMimeType::EMPTY != myNextImageMimeType) {
+ const int imageSize = myStream->offset() + (ptr - end) - imageStartOffset;
+ insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize);
+ }
+ imageStartOffset = -1;
+ }
+
+ if (myStateStack.empty()) {
+ parserState = READ_END_OF_FILE;
+ break;
+ }
+
+ if (myState.Destination != myStateStack.top().Destination) {
+ switchDestination(myState.Destination, false);
+ switchDestination(myStateStack.top().Destination, true);
+ }
+
+ bool oldItalic = myState.Italic;
+ bool oldBold = myState.Bold;
+ bool oldUnderlined = myState.Underlined;
+ ZLTextAlignmentType oldAlignment = myState.Alignment;
+ myState = myStateStack.top();
+ myStateStack.pop();
+
+ if (myState.Italic != oldItalic) {
+ setFontProperty(RtfReader::FONT_ITALIC);
+ }
+ if (myState.Bold != oldBold) {
+ setFontProperty(RtfReader::FONT_BOLD);
+ }
+ if (myState.Underlined != oldUnderlined) {
+ setFontProperty(RtfReader::FONT_UNDERLINED);
+ }
+ if (myState.Alignment != oldAlignment) {
+ setAlignment();
+ }
+
+ break;
+ }
+ case '\\':
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+ keyword.erase();
+ parserState = READ_KEYWORD;
+ break;
+ case 0x0d:
+ case 0x0a: // cr and lf are noise characters...
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+ break;
+ default:
+ if (myState.ReadDataAsHex) {
+ if (imageStartOffset == -1) {
+ imageStartOffset = myStream->offset() + (ptr - end);
+ }
+ }
+ break;
+ }
+ break;
+ case READ_HEX_SYMBOL:
+ hexString += *ptr;
+ if (hexString.size() == 2) {
+ char ch = std::strtol(hexString.c_str(), 0, 16);
+ hexString.erase();
+ processCharData(&ch, 1);
+ parserState = READ_NORMAL_DATA;
+ dataStart = ptr + 1;
+ }
+ break;
+ case READ_KEYWORD:
+ if (!std::isalpha(*ptr)) {
+ if ((ptr == dataStart) && (keyword.empty())) {
+ if (*ptr == '\'') {
+ parserState = READ_HEX_SYMBOL;
+ } else {
+ keyword = *ptr;
+ processKeyword(keyword);
+ parserState = READ_NORMAL_DATA;
+ }
+ dataStart = ptr + 1;
+ } else {
+ keyword.append(dataStart, ptr - dataStart);
+ if (*ptr == '-' || std::isdigit(*ptr)) {
+ dataStart = ptr;
+ parserState = READ_KEYWORD_PARAMETER;
+ } else {
+ readNextChar = *ptr == ' ';
+ processKeyword(keyword);
+ parserState = READ_NORMAL_DATA;
+ dataStart = readNextChar ? ptr + 1 : ptr;
+ }
+ }
+ }
+ break;
+ case READ_KEYWORD_PARAMETER:
+ if (!std::isdigit(*ptr)) {
+ parameterString.append(dataStart, ptr - dataStart);
+ int parameter = std::atoi(parameterString.c_str());
+ parameterString.erase();
+ readNextChar = *ptr == ' ';
+ if ((keyword == "bin") && (parameter > 0)) {
+ myBinaryDataSize = parameter;
+ parserState = READ_BINARY_DATA;
+ } else {
+ processKeyword(keyword, &parameter);
+ parserState = READ_NORMAL_DATA;
+ }
+ dataStart = readNextChar ? ptr + 1 : ptr;
+ }
+ break;
+ }
+ if (readNextChar) {
+ ++ptr;
+ } else {
+ readNextChar = true;
+ }
+ }
+ if (dataStart < end) {
+ switch (parserState) {
+ case READ_NORMAL_DATA:
+ processCharData(dataStart, end - dataStart);
+ case READ_KEYWORD:
+ keyword.append(dataStart, end - dataStart);
+ break;
+ case READ_KEYWORD_PARAMETER:
+ parameterString.append(dataStart, end - dataStart);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return myIsInterrupted || myStateStack.empty();
+}
+
+void RtfReader::processKeyword(const std::string &keyword, int *parameter) {
+ const bool wasSpecialMode = mySpecialMode;
+ mySpecialMode = false;
+ if (myState.Destination == RtfReader::DESTINATION_SKIP) {
+ return;
+ }
+
+ std::map<std::string, RtfCommand*>::const_iterator it = ourKeywordMap.find(keyword);
+
+ if (it == ourKeywordMap.end()) {
+ if (wasSpecialMode) {
+ myState.Destination = RtfReader::DESTINATION_SKIP;
+ }
+ return;
+ }
+
+ it->second->run(*this, parameter);
+}
+
+void RtfReader::processCharData(const char *data, std::size_t len, bool convert) {
+ if (myState.Destination != RtfReader::DESTINATION_SKIP) {
+ addCharData(data, len, convert);
+ }
+}
+
+void RtfReader::interrupt() {
+ myIsInterrupted = true;
+}
+
+bool RtfReader::readDocument(const ZLFile &file) {
+ myFileName = file.path();
+ myStream = file.inputStream();
+ if (myStream.isNull() || !myStream->open()) {
+ return false;
+ }
+
+ fillKeywordMap();
+
+ myStreamBuffer = new char[rtfStreamBufferSize];
+
+ myIsInterrupted = false;
+
+ mySpecialMode = false;
+
+ myState.Alignment = ALIGN_UNDEFINED;
+ myState.Italic = false;
+ myState.Bold = false;
+ myState.Underlined = false;
+ myState.Destination = RtfReader::DESTINATION_NONE;
+ myState.ReadDataAsHex = false;
+
+ bool code = parseDocument();
+
+ while (!myStateStack.empty()) {
+ myStateStack.pop();
+ }
+
+ delete[] myStreamBuffer;
+ myStream->close();
+
+ return code;
+}