diff options
Diffstat (limited to 'reader/src/formats/util')
-rw-r--r-- | reader/src/formats/util/EntityFilesCollector.cpp | 62 | ||||
-rw-r--r-- | reader/src/formats/util/EntityFilesCollector.h | 42 | ||||
-rw-r--r-- | reader/src/formats/util/MergedStream.cpp | 72 | ||||
-rw-r--r-- | reader/src/formats/util/MergedStream.h | 45 | ||||
-rw-r--r-- | reader/src/formats/util/MiscUtil.cpp | 91 | ||||
-rw-r--r-- | reader/src/formats/util/MiscUtil.h | 39 | ||||
-rw-r--r-- | reader/src/formats/util/TextFormatDetector.cpp | 77 | ||||
-rw-r--r-- | reader/src/formats/util/TextFormatDetector.h | 35 | ||||
-rw-r--r-- | reader/src/formats/util/XMLTextStream.cpp | 124 | ||||
-rw-r--r-- | reader/src/formats/util/XMLTextStream.h | 52 |
10 files changed, 639 insertions, 0 deletions
diff --git a/reader/src/formats/util/EntityFilesCollector.cpp b/reader/src/formats/util/EntityFilesCollector.cpp new file mode 100644 index 0000000..075bd29 --- /dev/null +++ b/reader/src/formats/util/EntityFilesCollector.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <ZLStringUtil.h> +#include <ZLibrary.h> +#include <ZLFile.h> +#include <ZLDir.h> + +#include "EntityFilesCollector.h" + +EntityFilesCollector *EntityFilesCollector::ourInstance = 0; + +EntityFilesCollector &EntityFilesCollector::Instance() { + if (ourInstance == 0) { + ourInstance = new EntityFilesCollector(); + } + return *ourInstance; +} + +const std::vector<std::string> &EntityFilesCollector::externalDTDs(const std::string &format) { + std::map<std::string,std::vector<std::string> >::const_iterator it = myCollections.find(format); + if (it != myCollections.end()) { + return it->second; + } + + std::vector<std::string> &collection = myCollections[format]; + + std::string directoryName = + ZLibrary::ApplicationDirectory() + ZLibrary::FileNameDelimiter + + "formats" + ZLibrary::FileNameDelimiter + format; + shared_ptr<ZLDir> dtdPath = ZLFile(directoryName).directory(); + if (!dtdPath.isNull()) { + std::vector<std::string> files; + dtdPath->collectFiles(files, false); + for (std::vector<std::string>::const_iterator it = files.begin(); it != files.end(); ++it) { + if (ZLStringUtil::stringEndsWith(*it, ".ent")) { + collection.push_back(dtdPath->itemPath(*it)); + } + } + } + + return collection; +} + +EntityFilesCollector::EntityFilesCollector() { +} diff --git a/reader/src/formats/util/EntityFilesCollector.h b/reader/src/formats/util/EntityFilesCollector.h new file mode 100644 index 0000000..9967b3d --- /dev/null +++ b/reader/src/formats/util/EntityFilesCollector.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __ENTITYFILESCOLLECTOR_H__ +#define __ENTITYFILESCOLLECTOR_H__ + +#include <map> +#include <vector> +#include <string> + +class EntityFilesCollector { + +public: + static EntityFilesCollector &Instance(); + + const std::vector<std::string> &externalDTDs(const std::string &format); + +private: + EntityFilesCollector(); + +private: + static EntityFilesCollector *ourInstance; + std::map<std::string,std::vector<std::string> > myCollections; +}; + +#endif /* __ENTITYFILESCOLLECTOR_H__ */ diff --git a/reader/src/formats/util/MergedStream.cpp b/reader/src/formats/util/MergedStream.cpp new file mode 100644 index 0000000..1a26a33 --- /dev/null +++ b/reader/src/formats/util/MergedStream.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include "MergedStream.h" + +bool MergedStream::open() { + close(); + resetToStart(); + myOffset = 0; + myCurrentStream = nextStream(); + return !myCurrentStream.isNull() && myCurrentStream->open(); +} + +std::size_t MergedStream::read(char *buffer, std::size_t maxSize) { + std::size_t bytesToRead = maxSize; + while ((bytesToRead > 0) && !myCurrentStream.isNull()) { + std::size_t len = myCurrentStream->read(buffer, bytesToRead); + bytesToRead -= len; + if (buffer != 0) { + buffer += len; + } + if (bytesToRead != 0) { + if (buffer != 0) { + *buffer++ = '\n'; + } + bytesToRead--; + myCurrentStream = nextStream(); + if (myCurrentStream.isNull() || !myCurrentStream->open()) { + break; + } + } + } + myOffset += maxSize - bytesToRead; + return maxSize - bytesToRead; +} + +void MergedStream::close() { + myCurrentStream.reset(); +} + +void MergedStream::seek(int offset, bool absoluteOffset) { + // works for nonnegative offsets only + if (absoluteOffset) { + offset -= myOffset; + } + read(0, offset); +} + +std::size_t MergedStream::offset() const { + return myOffset; +} + +std::size_t MergedStream::sizeOfOpened() { + // coudn't be implemented + return 0; +} diff --git a/reader/src/formats/util/MergedStream.h b/reader/src/formats/util/MergedStream.h new file mode 100644 index 0000000..3f982ee --- /dev/null +++ b/reader/src/formats/util/MergedStream.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __MERGEDSTREAM_H__ +#define __MERGEDSTREAM_H__ + +#include <shared_ptr.h> +#include <ZLInputStream.h> + +class MergedStream : public ZLInputStream { + +protected: + virtual shared_ptr<ZLInputStream> nextStream() = 0; + virtual void resetToStart() = 0; + +private: + bool open(); + std::size_t read(char *buffer, std::size_t maxSize); + void close(); + void seek(int offset, bool absoluteOffset); + std::size_t offset() const; + std::size_t sizeOfOpened(); + +private: + shared_ptr<ZLInputStream> myCurrentStream; + std::size_t myOffset; +}; + +#endif /* __MERGEDSTREAM_H__ */ diff --git a/reader/src/formats/util/MiscUtil.cpp b/reader/src/formats/util/MiscUtil.cpp new file mode 100644 index 0000000..1a91406 --- /dev/null +++ b/reader/src/formats/util/MiscUtil.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cstdlib> + +#include <ZLApplication.h> +#include <ZLFile.h> +#include <ZLStringUtil.h> + +#include "MiscUtil.h" + +FBTextKind MiscUtil::referenceType(const std::string &link) { + std::string lowerCasedLink = link; + bool isFileReference = + ZLStringUtil::stringStartsWith(lowerCasedLink, "http://") || + ZLStringUtil::stringStartsWith(lowerCasedLink, "https://") || + ZLStringUtil::stringStartsWith(lowerCasedLink, "ftp://"); + if (!isFileReference) { + return ZLStringUtil::stringStartsWith(lowerCasedLink, "mailto:") ? EXTERNAL_HYPERLINK : INTERNAL_HYPERLINK; + } + static const std::string FeedBooksPrefix0 = "http://feedbooks.com/book/stanza/"; + static const std::string FeedBooksPrefix1 = "http://www.feedbooks.com/book/stanza/"; + bool isBookHyperlink = + ZLStringUtil::stringStartsWith(lowerCasedLink, FeedBooksPrefix0) || + ZLStringUtil::stringStartsWith(lowerCasedLink, FeedBooksPrefix1) || + ZLStringUtil::stringEndsWith(lowerCasedLink, ".epub") || + ZLStringUtil::stringEndsWith(lowerCasedLink, ".mobi") || + ZLStringUtil::stringEndsWith(lowerCasedLink, ".chm") || + ZLStringUtil::stringEndsWith(lowerCasedLink, ".fb2"); + return isBookHyperlink ? BOOK_HYPERLINK : EXTERNAL_HYPERLINK; +} + +std::string MiscUtil::htmlDirectoryPrefix(const std::string &fileName) { + ZLFile file(fileName); + std::string shortName = file.name(false); + std::string path = file.path(); + int index = -1; + if ((path.length() > shortName.length()) && + (path[path.length() - shortName.length() - 1] == ':')) { + index = shortName.rfind('/'); + } + return path.substr(0, path.length() - shortName.length() + index + 1); +} + +std::string MiscUtil::htmlFileName(const std::string &fileName) { + ZLFile file(fileName); + std::string shortName = file.name(false); + std::string path = file.path(); + int index = -1; + if ((path.length() > shortName.length()) && + (path[path.length() - shortName.length() - 1] == ':')) { + index = shortName.rfind('/'); + } + return path.substr(path.length() - shortName.length() + index + 1); +} + +std::string MiscUtil::decodeHtmlURL(const std::string &encoded) { + char buffer[3]; + buffer[2] = '\0'; + + std::string decoded; + const int len = encoded.length(); + decoded.reserve(len); + for (int i = 0; i < len; i++) { + if ((encoded[i] == '%') && (i < len - 2)) { + buffer[0] = *(encoded.data() + i + 1); + buffer[1] = *(encoded.data() + i + 2); + decoded += (char)std::strtol(buffer, 0, 16); + i += 2; + } else { + decoded += encoded[i]; + } + } + return decoded; +} diff --git a/reader/src/formats/util/MiscUtil.h b/reader/src/formats/util/MiscUtil.h new file mode 100644 index 0000000..c47d84a --- /dev/null +++ b/reader/src/formats/util/MiscUtil.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __MISCUTIL_H__ +#define __MISCUTIL_H__ + +#include <string> + +#include "../../bookmodel/FBTextKind.h" + +class MiscUtil { + +private: + MiscUtil(); + +public: + static FBTextKind referenceType(const std::string &link); + static std::string htmlDirectoryPrefix(const std::string &fileName); + static std::string htmlFileName(const std::string &fileName); + static std::string decodeHtmlURL(const std::string &encodedURL); +}; + +#endif /* __MISCUTIL_H__ */ diff --git a/reader/src/formats/util/TextFormatDetector.cpp b/reader/src/formats/util/TextFormatDetector.cpp new file mode 100644 index 0000000..4a3ef67 --- /dev/null +++ b/reader/src/formats/util/TextFormatDetector.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cstring> +#include <cctype> +#include <algorithm> + +#include <ZLInputStream.h> +#include <ZLUnicodeUtil.h> + +#include "TextFormatDetector.h" + +TextFormatDetector::TextFormatDetector() { +} + +TextFormatDetector::~TextFormatDetector() { +} + +bool TextFormatDetector::isHtml(ZLInputStream &stream) const { + if (!stream.open()) { + return false; + } + + const std::size_t bufferSize = 1024; + char *buffer = new char[bufferSize]; + std::string sixBytes; + int valuableBytesCounter = 0; + bool skipFlag = true; + while (valuableBytesCounter < 6) { + std::size_t size = stream.read(buffer, bufferSize); + if (size == 0) { + break; + } + std::size_t index; + for (index = 0; skipFlag && (index < size); ++index) { + if (!std::isspace((unsigned char)buffer[index])) { + skipFlag = false; + break; + } + } + if (!skipFlag && index < size) { + int bytes = std::min(6 - valuableBytesCounter, (int)(size - index)); + sixBytes = std::string(buffer + index, bytes); + valuableBytesCounter += bytes; + } + } + stream.close(); + delete[] buffer; + return ZLUnicodeUtil::toLower(sixBytes) == "<html>"; +} + +bool TextFormatDetector::isPPL(ZLInputStream &stream) const { + if (!stream.open()) { + return false; + } + + char buffer[5]; + bool result = stream.read(buffer, 5) == 5 && std::strncmp(buffer, "PPL\r\n", 5) == 0; + stream.close(); + return result; +} diff --git a/reader/src/formats/util/TextFormatDetector.h b/reader/src/formats/util/TextFormatDetector.h new file mode 100644 index 0000000..c86b90b --- /dev/null +++ b/reader/src/formats/util/TextFormatDetector.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __TEXTFORMATDETECTOR_H__ +#define __TEXTFORMATDETECTOR_H__ + +class ZLInputStream; + +class TextFormatDetector { + +public: + TextFormatDetector(); + ~TextFormatDetector(); + + bool isHtml(ZLInputStream &stream) const; + bool isPPL(ZLInputStream &stream) const; +}; + +#endif /* __TEXTFORMATDETECTOR_H__ */ diff --git a/reader/src/formats/util/XMLTextStream.cpp b/reader/src/formats/util/XMLTextStream.cpp new file mode 100644 index 0000000..19343a1 --- /dev/null +++ b/reader/src/formats/util/XMLTextStream.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cstring> + +#include <ZLXMLReader.h> +#include <ZLUnicodeUtil.h> + +#include <ZLPlainAsynchronousInputStream.h> + +#include "XMLTextStream.h" + +class XMLTextReader : public ZLXMLReader { + +public: + XMLTextReader(std::string &buffer, const std::string &startTag); + +private: + void startElementHandler(const char *tag, const char **attributes); + void characterDataHandler(const char *text, std::size_t len); + +private: + const std::string myStartTag; + std::string &myBuffer; + bool myStarted; +}; + +XMLTextReader::XMLTextReader(std::string &buffer, const std::string &startTag) : myStartTag(ZLUnicodeUtil::toLower(startTag)), myBuffer(buffer), myStarted(myStartTag.empty()) { +} + +void XMLTextReader::startElementHandler(const char *tag, const char**) { + if (!myStarted && (myStartTag == ZLUnicodeUtil::toLower(tag))) { + myStarted = true; + } +} + +void XMLTextReader::characterDataHandler(const char *text, std::size_t len) { + if (myStarted) { + myBuffer.append(text, len); + } +} + +XMLTextStream::XMLTextStream(shared_ptr<ZLInputStream> base, const std::string &startTag) : myBase(base), myStreamBuffer(2048, '\0') { + myReader = new XMLTextReader(myDataBuffer, startTag); +} + +XMLTextStream::~XMLTextStream() { +} + +bool XMLTextStream::open() { + close(); + if (myBase.isNull() || !myBase->open()) { + return false; + } + myStream = new ZLPlainAsynchronousInputStream(); + myOffset = 0; + return true; +} + +std::size_t XMLTextStream::read(char *buffer, std::size_t maxSize) { + while (myDataBuffer.size() < maxSize) { + std::size_t len = myBase->read((char*)myStreamBuffer.data(), 2048); + /*if ((len == 0) || !myReader->readFromBuffer(myStreamBuffer.data(), len)) { + break; + }*/ + if (len == 0) { + break; + } + myStream->setBuffer(myStreamBuffer.data(), len); + if (!myReader->readDocument(myStream)) { + break; + } + } + std::size_t realSize = std::min(myDataBuffer.size(), maxSize); + if (buffer != 0) { + std::memcpy(buffer, myDataBuffer.data(), realSize); + } + myDataBuffer.erase(0, realSize); + myOffset += realSize; + return realSize; +} + +void XMLTextStream::close() { + if (!myStream.isNull()) { + myStream->setEof(); + myReader->readDocument(myStream); + myStream.reset(); + } + myBase->close(); + myDataBuffer.erase(); +} + +void XMLTextStream::seek(int offset, bool absoluteOffset) { + // works for nonnegative offsets only + if (absoluteOffset) { + offset -= myOffset; + } + read(0, offset); +} + +std::size_t XMLTextStream::offset() const { + return myOffset; +} + +std::size_t XMLTextStream::sizeOfOpened() { + // couldn't be implemented + return 0; +} diff --git a/reader/src/formats/util/XMLTextStream.h b/reader/src/formats/util/XMLTextStream.h new file mode 100644 index 0000000..f3151c6 --- /dev/null +++ b/reader/src/formats/util/XMLTextStream.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __XMLTEXTSTREAM_H__ +#define __XMLTEXTSTREAM_H__ + +#include <shared_ptr.h> +#include <ZLInputStream.h> +#include <ZLAsynchronousInputStream.h> + +class XMLTextReader; + +class XMLTextStream : public ZLInputStream { + +public: + XMLTextStream(shared_ptr<ZLInputStream> base, const std::string &startTag); + ~XMLTextStream(); + +private: + bool open(); + std::size_t read(char *buffer, std::size_t maxSize); + void close(); + void seek(int offset, bool absoluteOffset); + std::size_t offset() const; + std::size_t sizeOfOpened(); + +private: + shared_ptr<ZLInputStream> myBase; + shared_ptr<XMLTextReader> myReader; + shared_ptr<ZLAsynchronousInputStream> myStream; + std::string myStreamBuffer; + std::string myDataBuffer; + std::size_t myOffset; +}; + +#endif /* __XMLTEXTSTREAM_H__ */ |