diff options
Diffstat (limited to 'fbreader/src/formats/rtf/RtfReader.cpp')
-rw-r--r-- | fbreader/src/formats/rtf/RtfReader.cpp | 470 |
1 files changed, 0 insertions, 470 deletions
diff --git a/fbreader/src/formats/rtf/RtfReader.cpp b/fbreader/src/formats/rtf/RtfReader.cpp deleted file mode 100644 index 91fea0c..0000000 --- a/fbreader/src/formats/rtf/RtfReader.cpp +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <cstdlib> -#include <cctype> - -#include <ZLFile.h> -#include <ZLInputStream.h> - -#include "RtfReader.h" - -std::map<std::string, RtfCommand*> RtfReader::ourKeywordMap; - -static const int rtfStreamBufferSize = 4096; - -RtfReader::RtfReader(const std::string &encoding) : EncodedTextReader(encoding) { - myNextImageMimeType = ZLMimeType::EMPTY; -} - -RtfReader::~RtfReader() { -} - -RtfCommand::~RtfCommand() { -} - -void RtfDummyCommand::run(RtfReader&, int*) const { -} - -void RtfNewParagraphCommand::run(RtfReader &reader, int*) const { - reader.newParagraph(); -} - -RtfFontPropertyCommand::RtfFontPropertyCommand(RtfReader::FontProperty property) : myProperty(property) { -} - -void RtfFontPropertyCommand::run(RtfReader &reader, int *parameter) const { - const bool start = (parameter == 0) || (*parameter != 0); - switch (myProperty) { - case RtfReader::FONT_BOLD: - if (reader.myState.Bold != start) { - reader.myState.Bold = start; - reader.setFontProperty(RtfReader::FONT_BOLD); - } - break; - case RtfReader::FONT_ITALIC: - if (reader.myState.Italic != start) { - reader.myState.Italic = start; - reader.setFontProperty(RtfReader::FONT_ITALIC); - } - break; - case RtfReader::FONT_UNDERLINED: - if (reader.myState.Underlined != start) { - reader.myState.Underlined = start; - reader.setFontProperty(RtfReader::FONT_UNDERLINED); - } - break; - } -} - -RtfAlignmentCommand::RtfAlignmentCommand(ZLTextAlignmentType alignment) : myAlignment(alignment) { -} - -void RtfAlignmentCommand::run(RtfReader &reader, int*) const { - if (reader.myState.Alignment != myAlignment) { - reader.myState.Alignment = myAlignment; - reader.setAlignment(); - } -} - -RtfCharCommand::RtfCharCommand(const std::string &chr) : myChar(chr) { -} - -void RtfCharCommand::run(RtfReader &reader, int*) const { - reader.processCharData(myChar.data(), myChar.length(), false); -} - -RtfDestinationCommand::RtfDestinationCommand(RtfReader::DestinationType destination) : myDestination(destination) { -} - -void RtfDestinationCommand::run(RtfReader &reader, int*) const { - if (reader.myState.Destination == myDestination) { - return; - } - reader.myState.Destination = myDestination; - if (myDestination == RtfReader::DESTINATION_PICTURE) { - reader.myState.ReadDataAsHex = true; - reader.myNextImageMimeType = ZLMimeType::EMPTY; - } - reader.switchDestination(myDestination, true); -} - -void RtfStyleCommand::run(RtfReader &reader, int*) const { - if (reader.myState.Destination == RtfReader::DESTINATION_STYLESHEET) { - //std::cerr << "Add style index: " << val << "\n"; - - //sprintf(style_attributes[0], "%i", val); - } else /*if (myState.Destination == rdsContent)*/ { - //std::cerr << "Set style index: " << val << "\n"; - - //sprintf(style_attributes[0], "%i", val); - } -} - -void RtfCodepageCommand::run(RtfReader &reader, int *parameter) const { - if (parameter != 0) { - reader.setEncoding(*parameter); - } -} - -void RtfSpecialCommand::run(RtfReader &reader, int*) const { - reader.mySpecialMode = true; -} - -RtfPictureCommand::RtfPictureCommand(shared_ptr<ZLMimeType> mimeType) : myMimeType(mimeType) { -} - -void RtfPictureCommand::run(RtfReader &reader, int*) const { - reader.myNextImageMimeType = myMimeType; -} - -void RtfFontResetCommand::run(RtfReader &reader, int*) const { - if (reader.myState.Bold) { - reader.myState.Bold = false; - reader.setFontProperty(RtfReader::FONT_BOLD); - } - if (reader.myState.Italic) { - reader.myState.Italic = false; - reader.setFontProperty(RtfReader::FONT_ITALIC); - } - if (reader.myState.Underlined) { - reader.myState.Underlined = false; - reader.setFontProperty(RtfReader::FONT_UNDERLINED); - } -} - -void RtfReader::addAction(const std::string &tag, RtfCommand *command) { - ourKeywordMap.insert(std::make_pair(tag, command)); -} - -void RtfReader::fillKeywordMap() { - if (ourKeywordMap.empty()) { - addAction("*", new RtfSpecialCommand()); - addAction("ansicpg", new RtfCodepageCommand()); - - static const char *keywordsToSkip[] = {"buptim", "colortbl", "comment", "creatim", "doccomm", "fonttbl", "footer", "footerf", "footerl", "footerr", "ftncn", "ftnsep", "ftnsepc", "header", "headerf", "headerl", "headerr", "keywords", "operator", "printim", "private1", "revtim", "rxe", "subject", "tc", "txe", "xe", 0}; - RtfCommand *skipCommand = new RtfDestinationCommand(RtfReader::DESTINATION_SKIP); - for (const char **i = keywordsToSkip; *i != 0; ++i) { - addAction(*i, skipCommand); - } - addAction("shppict", new RtfDummyCommand()); - addAction("info", new RtfDestinationCommand(RtfReader::DESTINATION_INFO)); - addAction("title", new RtfDestinationCommand(RtfReader::DESTINATION_TITLE)); - addAction("author", new RtfDestinationCommand(RtfReader::DESTINATION_AUTHOR)); - addAction("pict", new RtfDestinationCommand(RtfReader::DESTINATION_PICTURE)); - addAction("stylesheet", new RtfDestinationCommand(RtfReader::DESTINATION_STYLESHEET)); - addAction("footnote", new RtfDestinationCommand(RtfReader::DESTINATION_FOOTNOTE)); - - RtfCommand *newParagraphCommand = new RtfNewParagraphCommand(); - addAction("\n", newParagraphCommand); - addAction("\r", newParagraphCommand); - addAction("par", newParagraphCommand); - - addAction("\x09", new RtfCharCommand("\x09")); - addAction("_", new RtfCharCommand("-")); - addAction("\\", new RtfCharCommand("\\")); - addAction("{", new RtfCharCommand("{")); - addAction("}", new RtfCharCommand("}")); - addAction("bullet", new RtfCharCommand("\xE2\x80\xA2")); // • - addAction("endash", new RtfCharCommand("\xE2\x80\x93")); // – - addAction("emdash", new RtfCharCommand("\xE2\x80\x94")); // — - addAction("~", new RtfCharCommand("\xC0\xA0")); // - addAction("enspace", new RtfCharCommand("\xE2\x80\x82")); //   - addAction("emspace", new RtfCharCommand("\xE2\x80\x83")); //   - addAction("lquote", new RtfCharCommand("\xE2\x80\x98")); // ‘ - addAction("rquote", new RtfCharCommand("\xE2\x80\x99")); // ’ - addAction("ldblquote", new RtfCharCommand("\xE2\x80\x9C")); // “ - addAction("rdblquote", new RtfCharCommand("\xE2\x80\x9D")); // ” - - addAction("jpegblip", new RtfPictureCommand(ZLMimeType::IMAGE_JPEG)); - addAction("pngblip", new RtfPictureCommand(ZLMimeType::IMAGE_PNG)); - - addAction("s", new RtfStyleCommand()); - - addAction("qc", new RtfAlignmentCommand(ALIGN_CENTER)); - addAction("ql", new RtfAlignmentCommand(ALIGN_LEFT)); - addAction("qr", new RtfAlignmentCommand(ALIGN_RIGHT)); - addAction("qj", new RtfAlignmentCommand(ALIGN_JUSTIFY)); - addAction("pard", new RtfAlignmentCommand(ALIGN_UNDEFINED)); - - addAction("b", new RtfFontPropertyCommand(RtfReader::FONT_BOLD)); - addAction("i", new RtfFontPropertyCommand(RtfReader::FONT_ITALIC)); - addAction("u", new RtfFontPropertyCommand(RtfReader::FONT_UNDERLINED)); - addAction("plain", new RtfFontResetCommand()); - } -} - -bool RtfReader::parseDocument() { - enum { - READ_NORMAL_DATA, - READ_BINARY_DATA, - READ_HEX_SYMBOL, - READ_KEYWORD, - READ_KEYWORD_PARAMETER, - READ_END_OF_FILE - } parserState = READ_NORMAL_DATA; - - std::string keyword; - std::string parameterString; - std::string hexString; - int imageStartOffset = -1; - - while (!myIsInterrupted) { - const char *ptr = myStreamBuffer; - const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize); - if (ptr == end) { - break; - } - const char *dataStart = ptr; - bool readNextChar = true; - while (ptr != end) { - switch (parserState) { - case READ_END_OF_FILE: - if (*ptr != '}' && !std::isspace(*ptr)) { - return false; - } - break; - case READ_BINARY_DATA: - // TODO: optimize - processCharData(ptr, 1); - --myBinaryDataSize; - if (myBinaryDataSize == 0) { - parserState = READ_NORMAL_DATA; - } - break; - case READ_NORMAL_DATA: - switch (*ptr) { - case '{': - if (ptr > dataStart) { - processCharData(dataStart, ptr - dataStart); - } - dataStart = ptr + 1; - myStateStack.push(myState); - myState.ReadDataAsHex = false; - break; - case '}': - { - if (ptr > dataStart) { - processCharData(dataStart, ptr - dataStart); - } - dataStart = ptr + 1; - - if (imageStartOffset >= 0) { - if (ZLMimeType::EMPTY != myNextImageMimeType) { - const int imageSize = myStream->offset() + (ptr - end) - imageStartOffset; - insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize); - } - imageStartOffset = -1; - } - - if (myStateStack.empty()) { - parserState = READ_END_OF_FILE; - break; - } - - if (myState.Destination != myStateStack.top().Destination) { - switchDestination(myState.Destination, false); - switchDestination(myStateStack.top().Destination, true); - } - - bool oldItalic = myState.Italic; - bool oldBold = myState.Bold; - bool oldUnderlined = myState.Underlined; - ZLTextAlignmentType oldAlignment = myState.Alignment; - myState = myStateStack.top(); - myStateStack.pop(); - - if (myState.Italic != oldItalic) { - setFontProperty(RtfReader::FONT_ITALIC); - } - if (myState.Bold != oldBold) { - setFontProperty(RtfReader::FONT_BOLD); - } - if (myState.Underlined != oldUnderlined) { - setFontProperty(RtfReader::FONT_UNDERLINED); - } - if (myState.Alignment != oldAlignment) { - setAlignment(); - } - - break; - } - case '\\': - if (ptr > dataStart) { - processCharData(dataStart, ptr - dataStart); - } - dataStart = ptr + 1; - keyword.erase(); - parserState = READ_KEYWORD; - break; - case 0x0d: - case 0x0a: // cr and lf are noise characters... - if (ptr > dataStart) { - processCharData(dataStart, ptr - dataStart); - } - dataStart = ptr + 1; - break; - default: - if (myState.ReadDataAsHex) { - if (imageStartOffset == -1) { - imageStartOffset = myStream->offset() + (ptr - end); - } - } - break; - } - break; - case READ_HEX_SYMBOL: - hexString += *ptr; - if (hexString.size() == 2) { - char ch = std::strtol(hexString.c_str(), 0, 16); - hexString.erase(); - processCharData(&ch, 1); - parserState = READ_NORMAL_DATA; - dataStart = ptr + 1; - } - break; - case READ_KEYWORD: - if (!std::isalpha(*ptr)) { - if ((ptr == dataStart) && (keyword.empty())) { - if (*ptr == '\'') { - parserState = READ_HEX_SYMBOL; - } else { - keyword = *ptr; - processKeyword(keyword); - parserState = READ_NORMAL_DATA; - } - dataStart = ptr + 1; - } else { - keyword.append(dataStart, ptr - dataStart); - if (*ptr == '-' || std::isdigit(*ptr)) { - dataStart = ptr; - parserState = READ_KEYWORD_PARAMETER; - } else { - readNextChar = *ptr == ' '; - processKeyword(keyword); - parserState = READ_NORMAL_DATA; - dataStart = readNextChar ? ptr + 1 : ptr; - } - } - } - break; - case READ_KEYWORD_PARAMETER: - if (!std::isdigit(*ptr)) { - parameterString.append(dataStart, ptr - dataStart); - int parameter = std::atoi(parameterString.c_str()); - parameterString.erase(); - readNextChar = *ptr == ' '; - if ((keyword == "bin") && (parameter > 0)) { - myBinaryDataSize = parameter; - parserState = READ_BINARY_DATA; - } else { - processKeyword(keyword, ¶meter); - parserState = READ_NORMAL_DATA; - } - dataStart = readNextChar ? ptr + 1 : ptr; - } - break; - } - if (readNextChar) { - ++ptr; - } else { - readNextChar = true; - } - } - if (dataStart < end) { - switch (parserState) { - case READ_NORMAL_DATA: - processCharData(dataStart, end - dataStart); - case READ_KEYWORD: - keyword.append(dataStart, end - dataStart); - break; - case READ_KEYWORD_PARAMETER: - parameterString.append(dataStart, end - dataStart); - break; - default: - break; - } - } - } - - return myIsInterrupted || myStateStack.empty(); -} - -void RtfReader::processKeyword(const std::string &keyword, int *parameter) { - const bool wasSpecialMode = mySpecialMode; - mySpecialMode = false; - if (myState.Destination == RtfReader::DESTINATION_SKIP) { - return; - } - - std::map<std::string, RtfCommand*>::const_iterator it = ourKeywordMap.find(keyword); - - if (it == ourKeywordMap.end()) { - if (wasSpecialMode) { - myState.Destination = RtfReader::DESTINATION_SKIP; - } - return; - } - - it->second->run(*this, parameter); -} - -void RtfReader::processCharData(const char *data, std::size_t len, bool convert) { - if (myState.Destination != RtfReader::DESTINATION_SKIP) { - addCharData(data, len, convert); - } -} - -void RtfReader::interrupt() { - myIsInterrupted = true; -} - -bool RtfReader::readDocument(const ZLFile &file) { - myFileName = file.path(); - myStream = file.inputStream(); - if (myStream.isNull() || !myStream->open()) { - return false; - } - - fillKeywordMap(); - - myStreamBuffer = new char[rtfStreamBufferSize]; - - myIsInterrupted = false; - - mySpecialMode = false; - - myState.Alignment = ALIGN_UNDEFINED; - myState.Italic = false; - myState.Bold = false; - myState.Underlined = false; - myState.Destination = RtfReader::DESTINATION_NONE; - myState.ReadDataAsHex = false; - - bool code = parseDocument(); - - while (!myStateStack.empty()) { - myStateStack.pop(); - } - - delete[] myStreamBuffer; - myStream->close(); - - return code; -} |