summaryrefslogtreecommitdiffstats
path: root/reader/src/formats
diff options
context:
space:
mode:
authorMichele Calgaro <[email protected]>2024-06-07 23:30:05 +0900
committerMichele Calgaro <[email protected]>2024-06-07 23:30:05 +0900
commit17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch)
tree5ed61937459cb7081089111b0242c01ec178f1f3 /reader/src/formats
parent1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff)
downloadtde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz
tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip
Rename to tde-ebook-reader
Signed-off-by: Michele Calgaro <[email protected]>
Diffstat (limited to 'reader/src/formats')
-rw-r--r--reader/src/formats/EncodedTextReader.cpp29
-rw-r--r--reader/src/formats/EncodedTextReader.h37
-rw-r--r--reader/src/formats/FormatPlugin.cpp106
-rw-r--r--reader/src/formats/FormatPlugin.h99
-rw-r--r--reader/src/formats/PluginCollection.cpp89
-rw-r--r--reader/src/formats/chm/BitStream.cpp44
-rw-r--r--reader/src/formats/chm/BitStream.h111
-rw-r--r--reader/src/formats/chm/CHMFile.cpp490
-rw-r--r--reader/src/formats/chm/CHMFile.h128
-rw-r--r--reader/src/formats/chm/CHMFileImage.cpp33
-rw-r--r--reader/src/formats/chm/CHMFileImage.h40
-rw-r--r--reader/src/formats/chm/CHMPlugin.cpp252
-rw-r--r--reader/src/formats/chm/CHMPlugin.h41
-rw-r--r--reader/src/formats/chm/CHMReferenceCollection.cpp91
-rw-r--r--reader/src/formats/chm/CHMReferenceCollection.h50
-rw-r--r--reader/src/formats/chm/E8Decoder.cpp61
-rw-r--r--reader/src/formats/chm/HHCReader.cpp107
-rw-r--r--reader/src/formats/chm/HHCReader.h57
-rw-r--r--reader/src/formats/chm/HHCReferenceCollector.cpp62
-rw-r--r--reader/src/formats/chm/HHCReferenceCollector.h45
-rw-r--r--reader/src/formats/chm/HtmlSectionReader.cpp128
-rw-r--r--reader/src/formats/chm/HtmlSectionReader.h50
-rw-r--r--reader/src/formats/chm/HuffmanDecoder.cpp60
-rw-r--r--reader/src/formats/chm/HuffmanDecoder.h53
-rw-r--r--reader/src/formats/chm/LZXDecompressor.cpp287
-rw-r--r--reader/src/formats/chm/LZXDecompressor.h88
-rw-r--r--reader/src/formats/css/StyleSheetParser.cpp244
-rw-r--r--reader/src/formats/css/StyleSheetParser.h84
-rw-r--r--reader/src/formats/css/StyleSheetTable.cpp267
-rw-r--r--reader/src/formats/css/StyleSheetTable.h76
-rw-r--r--reader/src/formats/doc/DocBookReader.cpp377
-rw-r--r--reader/src/formats/doc/DocBookReader.h103
-rw-r--r--reader/src/formats/doc/DocFloatImageReader.cpp384
-rw-r--r--reader/src/formats/doc/DocFloatImageReader.h107
-rw-r--r--reader/src/formats/doc/DocInlineImageReader.cpp148
-rw-r--r--reader/src/formats/doc/DocInlineImageReader.h37
-rw-r--r--reader/src/formats/doc/DocMetaInfoReader.cpp38
-rw-r--r--reader/src/formats/doc/DocMetaInfoReader.h46
-rw-r--r--reader/src/formats/doc/DocPlugin.cpp71
-rw-r--r--reader/src/formats/doc/DocPlugin.h39
-rw-r--r--reader/src/formats/doc/DocStreams.cpp202
-rw-r--r--reader/src/formats/doc/DocStreams.h73
-rw-r--r--reader/src/formats/doc/OleMainStream.cpp1085
-rw-r--r--reader/src/formats/doc/OleMainStream.h223
-rw-r--r--reader/src/formats/doc/OleStorage.cpp304
-rw-r--r--reader/src/formats/doc/OleStorage.h92
-rw-r--r--reader/src/formats/doc/OleStream.cpp221
-rw-r--r--reader/src/formats/doc/OleStream.h58
-rw-r--r--reader/src/formats/doc/OleStreamParser.cpp210
-rw-r--r--reader/src/formats/doc/OleStreamParser.h101
-rw-r--r--reader/src/formats/doc/OleStreamReader.cpp86
-rw-r--r--reader/src/formats/doc/OleStreamReader.h46
-rw-r--r--reader/src/formats/doc/OleUtil.cpp58
-rw-r--r--reader/src/formats/doc/OleUtil.h32
-rw-r--r--reader/src/formats/docbook/DocBookBookReader.cpp111
-rw-r--r--reader/src/formats/docbook/DocBookBookReader.h45
-rw-r--r--reader/src/formats/docbook/DocBookDescriptionReader.cpp137
-rw-r--r--reader/src/formats/docbook/DocBookDescriptionReader.h56
-rw-r--r--reader/src/formats/docbook/DocBookPlugin.cpp43
-rw-r--r--reader/src/formats/docbook/DocBookPlugin.h41
-rw-r--r--reader/src/formats/docbook/DocBookReader.cpp71
-rw-r--r--reader/src/formats/docbook/DocBookReader.h95
-rw-r--r--reader/src/formats/dummy/DummyBookReader.cpp42
-rw-r--r--reader/src/formats/dummy/DummyBookReader.h44
-rw-r--r--reader/src/formats/dummy/DummyMetaInfoReader.cpp40
-rw-r--r--reader/src/formats/dummy/DummyMetaInfoReader.h46
-rw-r--r--reader/src/formats/dummy/DummyPlugin.cpp57
-rw-r--r--reader/src/formats/dummy/DummyPlugin.h38
-rwxr-xr-xreader/src/formats/dummy/createPlugin.sh12
-rw-r--r--reader/src/formats/fb2/FB2BookReader.cpp336
-rw-r--r--reader/src/formats/fb2/FB2BookReader.h61
-rw-r--r--reader/src/formats/fb2/FB2CoverReader.cpp92
-rw-r--r--reader/src/formats/fb2/FB2CoverReader.h49
-rw-r--r--reader/src/formats/fb2/FB2MetaInfoReader.cpp206
-rw-r--r--reader/src/formats/fb2/FB2MetaInfoReader.h60
-rw-r--r--reader/src/formats/fb2/FB2Plugin.cpp48
-rw-r--r--reader/src/formats/fb2/FB2Plugin.h42
-rw-r--r--reader/src/formats/fb2/FB2Reader.cpp89
-rw-r--r--reader/src/formats/fb2/FB2Reader.h94
-rw-r--r--reader/src/formats/fb2/FB2TagManager.cpp124
-rw-r--r--reader/src/formats/fb2/FB2TagManager.h45
-rw-r--r--reader/src/formats/html/HtmlBookReader.cpp583
-rw-r--r--reader/src/formats/html/HtmlBookReader.h101
-rw-r--r--reader/src/formats/html/HtmlDescriptionReader.cpp82
-rw-r--r--reader/src/formats/html/HtmlDescriptionReader.h48
-rw-r--r--reader/src/formats/html/HtmlEntityCollection.cpp71
-rw-r--r--reader/src/formats/html/HtmlEntityCollection.h38
-rw-r--r--reader/src/formats/html/HtmlPlugin.cpp83
-rw-r--r--reader/src/formats/html/HtmlPlugin.h42
-rw-r--r--reader/src/formats/html/HtmlReader.cpp373
-rw-r--r--reader/src/formats/html/HtmlReader.h92
-rw-r--r--reader/src/formats/html/HtmlReaderStream.cpp128
-rw-r--r--reader/src/formats/html/HtmlReaderStream.h48
-rw-r--r--reader/src/formats/html/HtmlTagActions.h158
-rw-r--r--reader/src/formats/oeb/NCXReader.cpp131
-rw-r--r--reader/src/formats/oeb/NCXReader.h69
-rw-r--r--reader/src/formats/oeb/OEBBookReader.cpp273
-rw-r--r--reader/src/formats/oeb/OEBBookReader.h70
-rw-r--r--reader/src/formats/oeb/OEBCoverReader.cpp136
-rw-r--r--reader/src/formats/oeb/OEBCoverReader.h56
-rw-r--r--reader/src/formats/oeb/OEBMetaInfoReader.cpp194
-rw-r--r--reader/src/formats/oeb/OEBMetaInfoReader.h63
-rw-r--r--reader/src/formats/oeb/OEBPlugin.cpp149
-rw-r--r--reader/src/formats/oeb/OEBPlugin.h40
-rw-r--r--reader/src/formats/oeb/OEBTextStream.cpp101
-rw-r--r--reader/src/formats/oeb/OEBTextStream.h43
-rw-r--r--reader/src/formats/oeb/XHTMLImageFinder.cpp54
-rw-r--r--reader/src/formats/oeb/XHTMLImageFinder.h43
-rw-r--r--reader/src/formats/openreader/ORBookReader.cpp185
-rw-r--r--reader/src/formats/openreader/ORBookReader.h77
-rw-r--r--reader/src/formats/openreader/ORDescriptionReader.cpp88
-rw-r--r--reader/src/formats/openreader/ORDescriptionReader.h53
-rw-r--r--reader/src/formats/openreader/OpenReaderPlugin.cpp52
-rw-r--r--reader/src/formats/openreader/OpenReaderPlugin.h36
-rw-r--r--reader/src/formats/pdb/BitReader.cpp57
-rw-r--r--reader/src/formats/pdb/BitReader.h39
-rw-r--r--reader/src/formats/pdb/DocDecompressor.cpp103
-rw-r--r--reader/src/formats/pdb/DocDecompressor.h36
-rw-r--r--reader/src/formats/pdb/EReaderPlugin.cpp125
-rw-r--r--reader/src/formats/pdb/EReaderStream.cpp289
-rw-r--r--reader/src/formats/pdb/EReaderStream.h88
-rw-r--r--reader/src/formats/pdb/HtmlMetainfoReader.cpp89
-rw-r--r--reader/src/formats/pdb/HtmlMetainfoReader.h60
-rw-r--r--reader/src/formats/pdb/HuffDecompressor.cpp192
-rw-r--r--reader/src/formats/pdb/HuffDecompressor.h63
-rw-r--r--reader/src/formats/pdb/MobipocketHtmlBookReader.cpp356
-rw-r--r--reader/src/formats/pdb/MobipocketHtmlBookReader.h89
-rw-r--r--reader/src/formats/pdb/MobipocketPlugin.cpp229
-rw-r--r--reader/src/formats/pdb/PalmDocLikePlugin.cpp40
-rw-r--r--reader/src/formats/pdb/PalmDocLikeStream.cpp78
-rw-r--r--reader/src/formats/pdb/PalmDocLikeStream.h58
-rw-r--r--reader/src/formats/pdb/PalmDocPlugin.cpp54
-rw-r--r--reader/src/formats/pdb/PalmDocStream.cpp209
-rw-r--r--reader/src/formats/pdb/PalmDocStream.h50
-rw-r--r--reader/src/formats/pdb/PdbPlugin.cpp69
-rw-r--r--reader/src/formats/pdb/PdbPlugin.h119
-rw-r--r--reader/src/formats/pdb/PdbReader.cpp108
-rw-r--r--reader/src/formats/pdb/PdbReader.h82
-rw-r--r--reader/src/formats/pdb/PdbStream.cpp109
-rw-r--r--reader/src/formats/pdb/PdbStream.h72
-rw-r--r--reader/src/formats/pdb/PluckerBookReader.cpp528
-rw-r--r--reader/src/formats/pdb/PluckerBookReader.h89
-rw-r--r--reader/src/formats/pdb/PluckerImages.cpp80
-rw-r--r--reader/src/formats/pdb/PluckerImages.h79
-rw-r--r--reader/src/formats/pdb/PluckerPlugin.cpp48
-rw-r--r--reader/src/formats/pdb/PluckerTextStream.cpp159
-rw-r--r--reader/src/formats/pdb/PluckerTextStream.h48
-rw-r--r--reader/src/formats/pdb/PmlBookReader.cpp227
-rw-r--r--reader/src/formats/pdb/PmlBookReader.h73
-rw-r--r--reader/src/formats/pdb/PmlReader.cpp407
-rw-r--r--reader/src/formats/pdb/PmlReader.h117
-rw-r--r--reader/src/formats/pdb/SimplePdbPlugin.cpp75
-rw-r--r--reader/src/formats/pdb/ZTXTPlugin.cpp43
-rw-r--r--reader/src/formats/pdb/ZTXTStream.cpp77
-rw-r--r--reader/src/formats/pdb/ZTXTStream.h45
-rw-r--r--reader/src/formats/pdf/PdfBookReader.cpp261
-rw-r--r--reader/src/formats/pdf/PdfBookReader.h52
-rw-r--r--reader/src/formats/pdf/PdfDescriptionReader.cpp29
-rw-r--r--reader/src/formats/pdf/PdfDescriptionReader.h40
-rw-r--r--reader/src/formats/pdf/PdfObject.cpp450
-rw-r--r--reader/src/formats/pdf/PdfObject.h201
-rw-r--r--reader/src/formats/pdf/PdfPlugin.cpp42
-rw-r--r--reader/src/formats/pdf/PdfPlugin.h41
-rw-r--r--reader/src/formats/pdf/StringStream.cpp55
-rw-r--r--reader/src/formats/pdf/StringStream.h44
-rw-r--r--reader/src/formats/rtf/RtfBookReader.cpp232
-rw-r--r--reader/src/formats/rtf/RtfBookReader.h71
-rw-r--r--reader/src/formats/rtf/RtfDescriptionReader.cpp100
-rw-r--r--reader/src/formats/rtf/RtfDescriptionReader.h55
-rw-r--r--reader/src/formats/rtf/RtfPlugin.cpp63
-rw-r--r--reader/src/formats/rtf/RtfPlugin.h35
-rw-r--r--reader/src/formats/rtf/RtfReader.cpp470
-rw-r--r--reader/src/formats/rtf/RtfReader.h209
-rw-r--r--reader/src/formats/rtf/RtfReaderStream.cpp175
-rw-r--r--reader/src/formats/rtf/RtfReaderStream.h50
-rw-r--r--reader/src/formats/tcr/PPLBookReader.cpp129
-rw-r--r--reader/src/formats/tcr/PPLBookReader.h51
-rw-r--r--reader/src/formats/tcr/TcrPlugin.cpp82
-rw-r--r--reader/src/formats/tcr/TcrPlugin.h43
-rw-r--r--reader/src/formats/tcr/TcrStream.cpp125
-rw-r--r--reader/src/formats/tcr/TcrStream.h47
-rw-r--r--reader/src/formats/txt/PlainTextFormat.cpp253
-rw-r--r--reader/src/formats/txt/PlainTextFormat.h112
-rw-r--r--reader/src/formats/txt/TxtBookReader.cpp124
-rw-r--r--reader/src/formats/txt/TxtBookReader.h59
-rw-r--r--reader/src/formats/txt/TxtPlugin.cpp79
-rw-r--r--reader/src/formats/txt/TxtPlugin.h37
-rw-r--r--reader/src/formats/txt/TxtReader.cpp200
-rw-r--r--reader/src/formats/txt/TxtReader.h56
-rw-r--r--reader/src/formats/util/EntityFilesCollector.cpp62
-rw-r--r--reader/src/formats/util/EntityFilesCollector.h42
-rw-r--r--reader/src/formats/util/MergedStream.cpp72
-rw-r--r--reader/src/formats/util/MergedStream.h45
-rw-r--r--reader/src/formats/util/MiscUtil.cpp91
-rw-r--r--reader/src/formats/util/MiscUtil.h39
-rw-r--r--reader/src/formats/util/TextFormatDetector.cpp77
-rw-r--r--reader/src/formats/util/TextFormatDetector.h35
-rw-r--r--reader/src/formats/util/XMLTextStream.cpp124
-rw-r--r--reader/src/formats/util/XMLTextStream.h52
-rw-r--r--reader/src/formats/xhtml/XHTMLReader.cpp715
-rw-r--r--reader/src/formats/xhtml/XHTMLReader.h113
201 files changed, 23831 insertions, 0 deletions
diff --git a/reader/src/formats/EncodedTextReader.cpp b/reader/src/formats/EncodedTextReader.cpp
new file mode 100644
index 0000000..12102c1
--- /dev/null
+++ b/reader/src/formats/EncodedTextReader.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "EncodedTextReader.h"
+
+EncodedTextReader::EncodedTextReader(const std::string &encoding) {
+ ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
+ ZLEncodingConverterInfoPtr info = collection.info(encoding);
+ myConverter = !info.isNull() ? info->createConverter() : collection.defaultConverter();
+}
+
+EncodedTextReader::~EncodedTextReader() {
+}
diff --git a/reader/src/formats/EncodedTextReader.h b/reader/src/formats/EncodedTextReader.h
new file mode 100644
index 0000000..8035508
--- /dev/null
+++ b/reader/src/formats/EncodedTextReader.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ENCODEDTEXTREADER_H__
+#define __ENCODEDTEXTREADER_H__
+
+#include <string>
+
+#include <ZLEncodingConverter.h>
+
+class EncodedTextReader {
+
+protected:
+ EncodedTextReader(const std::string &encoding);
+ virtual ~EncodedTextReader();
+
+protected:
+ shared_ptr<ZLEncodingConverter> myConverter;
+};
+
+#endif /* __ENCODEDTEXTREADER_H__ */
diff --git a/reader/src/formats/FormatPlugin.cpp b/reader/src/formats/FormatPlugin.cpp
new file mode 100644
index 0000000..059a53b
--- /dev/null
+++ b/reader/src/formats/FormatPlugin.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+#include <ZLLanguageDetector.h>
+#include <ZLImage.h>
+
+#include "FormatPlugin.h"
+
+#include "../library/Book.h"
+
+bool FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force) {
+ std::string language = book.language();
+ std::string encoding = book.encoding();
+ if (!force && !encoding.empty() && !language.empty()) {
+ return true;
+ }
+
+ bool detected = false;
+
+ PluginCollection &collection = PluginCollection::Instance();
+ if (language.empty()) {
+ language = collection.DefaultLanguageOption.value();
+ }
+ if (encoding.empty()) {
+ encoding = collection.DefaultEncodingOption.value();
+ }
+ if (collection.LanguageAutoDetectOption.value() && stream.open()) {
+ static const int BUFSIZE = 65536;
+ char *buffer = new char[BUFSIZE];
+ const std::size_t size = stream.read(buffer, BUFSIZE);
+ stream.close();
+ shared_ptr<ZLLanguageDetector::LanguageInfo> info =
+ ZLLanguageDetector().findInfo(buffer, size);
+ delete[] buffer;
+ if (!info.isNull()) {
+ detected = true;
+ if (!info->Language.empty()) {
+ language = info->Language;
+ }
+ encoding = info->Encoding;
+ if (encoding == "US-ASCII" || encoding == "ISO-8859-1") {
+ encoding = "windows-1252";
+ }
+ }
+ }
+ book.setEncoding(encoding);
+ book.setLanguage(language);
+ return detected;
+}
+
+bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force) {
+ std::string language = book.language();
+ if (!force && !language.empty()) {
+ return true;
+ }
+
+ bool detected = false;
+
+ PluginCollection &collection = PluginCollection::Instance();
+ if (language.empty()) {
+ language = collection.DefaultLanguageOption.value();
+ }
+ if (collection.LanguageAutoDetectOption.value() && stream.open()) {
+ static const int BUFSIZE = 65536;
+ char *buffer = new char[BUFSIZE];
+ const std::size_t size = stream.read(buffer, BUFSIZE);
+ stream.close();
+ shared_ptr<ZLLanguageDetector::LanguageInfo> info =
+ ZLLanguageDetector().findInfoForEncoding(encoding, buffer, size, -20000);
+ delete[] buffer;
+ if (!info.isNull()) {
+ if (!info->Language.empty()) {
+ detected = true;
+ language = info->Language;
+ }
+ }
+ }
+ book.setLanguage(language);
+ return detected;
+}
+
+const std::string &FormatPlugin::tryOpen(const ZLFile&) const {
+ static const std::string EMPTY = "";
+ return EMPTY;
+}
+
+shared_ptr<const ZLImage> FormatPlugin::coverImage(const ZLFile &file) const {
+ return 0;
+}
diff --git a/reader/src/formats/FormatPlugin.h b/reader/src/formats/FormatPlugin.h
new file mode 100644
index 0000000..5e1075e
--- /dev/null
+++ b/reader/src/formats/FormatPlugin.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __FORMATPLUGIN_H__
+#define __FORMATPLUGIN_H__
+
+#include <string>
+#include <vector>
+
+#include <shared_ptr.h>
+#include <ZLOptions.h>
+
+class Book;
+class BookModel;
+class ZLOptionsDialog;
+class ZLOptionsDialogTab;
+class ZLFile;
+class ZLInputStream;
+class ZLImage;
+
+class FormatInfoPage {
+
+protected:
+ FormatInfoPage();
+
+public:
+ virtual ~FormatInfoPage();
+};
+
+class FormatPlugin {
+
+protected:
+ FormatPlugin();
+
+public:
+ virtual ~FormatPlugin();
+
+ virtual bool providesMetaInfo() const = 0;
+ virtual bool acceptsFile(const ZLFile &file) const = 0;
+ virtual FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
+
+ virtual const std::string &tryOpen(const ZLFile &file) const;
+ virtual bool readMetaInfo(Book &book) const = 0;
+ virtual bool readLanguageAndEncoding(Book &book) const = 0;
+ virtual bool readModel(BookModel &model) const = 0;
+ virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
+
+protected:
+ static bool detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force = false);
+ static bool detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force = false);
+};
+
+class PluginCollection {
+
+public:
+ ZLBooleanOption LanguageAutoDetectOption;
+ ZLStringOption DefaultLanguageOption;
+ ZLStringOption DefaultEncodingOption;
+
+public:
+ static PluginCollection &Instance();
+ static void deleteInstance();
+
+private:
+ PluginCollection();
+
+public:
+ shared_ptr<FormatPlugin> plugin(const ZLFile &file, bool strong);
+ shared_ptr<FormatPlugin> plugin(const Book &book);
+
+private:
+ static PluginCollection *ourInstance;
+
+ std::vector<shared_ptr<FormatPlugin> > myPlugins;
+};
+
+inline FormatInfoPage::FormatInfoPage() {}
+inline FormatInfoPage::~FormatInfoPage() {}
+inline FormatPlugin::FormatPlugin() {}
+inline FormatPlugin::~FormatPlugin() {}
+inline FormatInfoPage *FormatPlugin::createInfoPage(ZLOptionsDialog&, const ZLFile&) { return 0; }
+
+#endif /* __FORMATPLUGIN_H__ */
diff --git a/reader/src/formats/PluginCollection.cpp b/reader/src/formats/PluginCollection.cpp
new file mode 100644
index 0000000..d120de1
--- /dev/null
+++ b/reader/src/formats/PluginCollection.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLibrary.h>
+#include <ZLFile.h>
+
+#include "FormatPlugin.h"
+
+#include "../library/Book.h"
+
+#include "fb2/FB2Plugin.h"
+//#include "docbook/DocBookPlugin.h"
+#include "html/HtmlPlugin.h"
+#include "txt/TxtPlugin.h"
+#include "pdb/PdbPlugin.h"
+#include "tcr/TcrPlugin.h"
+#include "oeb/OEBPlugin.h"
+#include "chm/CHMPlugin.h"
+#include "rtf/RtfPlugin.h"
+#include "openreader/OpenReaderPlugin.h"
+#include "doc/DocPlugin.h"
+//#include "pdf/PdfPlugin.h"
+
+PluginCollection *PluginCollection::ourInstance = 0;
+
+PluginCollection &PluginCollection::Instance() {
+ if (ourInstance == 0) {
+ ourInstance = new PluginCollection();
+ ourInstance->myPlugins.push_back(new FB2Plugin());
+ //ourInstance->myPlugins.push_back(new DocBookPlugin());
+ ourInstance->myPlugins.push_back(new HtmlPlugin());
+ ourInstance->myPlugins.push_back(new TxtPlugin());
+ ourInstance->myPlugins.push_back(new PluckerPlugin());
+ ourInstance->myPlugins.push_back(new PalmDocPlugin());
+ ourInstance->myPlugins.push_back(new MobipocketPlugin());
+ ourInstance->myPlugins.push_back(new EReaderPlugin());
+ ourInstance->myPlugins.push_back(new ZTXTPlugin());
+ ourInstance->myPlugins.push_back(new TcrPlugin());
+ ourInstance->myPlugins.push_back(new CHMPlugin());
+ ourInstance->myPlugins.push_back(new OEBPlugin());
+ ourInstance->myPlugins.push_back(new RtfPlugin());
+ ourInstance->myPlugins.push_back(new OpenReaderPlugin());
+ ourInstance->myPlugins.push_back(new DocPlugin());
+ //ourInstance->myPlugins.push_back(new PdfPlugin());
+ }
+ return *ourInstance;
+}
+
+void PluginCollection::deleteInstance() {
+ if (ourInstance != 0) {
+ delete ourInstance;
+ ourInstance = 0;
+ }
+}
+
+PluginCollection::PluginCollection() :
+ LanguageAutoDetectOption(ZLCategoryKey::CONFIG, "Format", "AutoDetect", true),
+ DefaultLanguageOption(ZLCategoryKey::CONFIG, "Format", "DefaultLanguageS", ZLibrary::Language()),
+ DefaultEncodingOption(ZLCategoryKey::CONFIG, "Format", "DefaultEncoding", "UTF-8") {
+}
+
+shared_ptr<FormatPlugin> PluginCollection::plugin(const Book &book) {
+ return plugin(book.file(), false);
+}
+
+shared_ptr<FormatPlugin> PluginCollection::plugin(const ZLFile &file, bool strong) {
+ for (std::vector<shared_ptr<FormatPlugin> >::const_iterator it = myPlugins.begin(); it != myPlugins.end(); ++it) {
+ if ((!strong || (*it)->providesMetaInfo()) && (*it)->acceptsFile(file)) {
+ return *it;
+ }
+ }
+ return 0;
+}
diff --git a/reader/src/formats/chm/BitStream.cpp b/reader/src/formats/chm/BitStream.cpp
new file mode 100644
index 0000000..bf6c642
--- /dev/null
+++ b/reader/src/formats/chm/BitStream.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include "BitStream.h"
+
+const int BitStream::BufferSize = sizeof(unsigned int) * 8;
+
+unsigned int BitStream::get4BytesDirect() {
+ if (myByteStream + 4 > myByteStreamEnd) {
+ return 0;
+ }
+ unsigned int bytes = *myByteStream++ << 24;
+ bytes += *myByteStream++ << 16;
+ bytes += *myByteStream++ << 8;
+ bytes += *myByteStream++;
+ return bytes;
+}
+
+bool BitStream::getBytesDirect(unsigned char *buffer, unsigned int length) {
+ if (myByteStream + length > myByteStreamEnd) {
+ return false;
+ }
+ std::memcpy(buffer, myByteStream, length);
+ myByteStream += length;
+ return true;
+}
diff --git a/reader/src/formats/chm/BitStream.h b/reader/src/formats/chm/BitStream.h
new file mode 100644
index 0000000..80c1e25
--- /dev/null
+++ b/reader/src/formats/chm/BitStream.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __BITSTREAM_H__
+#define __BITSTREAM_H__
+
+#include <string>
+
+class BitStream {
+
+public:
+ static const int BufferSize;
+
+public:
+ BitStream();
+ void setData(const std::string &data);
+ void reset();
+ unsigned int peek(unsigned char length);
+ void remove(unsigned char length);
+ unsigned int get(unsigned char length);
+ unsigned int bytesLeft() const;
+
+ unsigned int get4BytesDirect();
+ bool getBytesDirect(unsigned char *buffer, unsigned int length);
+
+private:
+ bool ensure(unsigned char length);
+
+private:
+ unsigned int myBuffer;
+ unsigned char myBitCounter;
+ const unsigned char *myByteStream;
+ const unsigned char *myByteStreamEnd;
+
+private:
+ BitStream(const BitStream&);
+ const BitStream &operator = (const BitStream&);
+};
+
+inline BitStream::BitStream() : myBuffer(0), myBitCounter(0) {
+}
+
+inline void BitStream::setData(const std::string &data) {
+ myByteStream = (const unsigned char*)data.data();
+ myByteStreamEnd = myByteStream + data.length();
+ myBuffer = 0;
+ myBitCounter = 0;
+}
+
+inline void BitStream::reset() {
+ myByteStream -= myBitCounter / 8;
+ myBuffer = 0;
+ myBitCounter = 0;
+}
+
+inline bool BitStream::ensure(unsigned char length) {
+ while ((myBitCounter < length) && (bytesLeft() >= 2)) {
+ myBuffer |= ((myByteStream[1] << 8) | myByteStream[0]) << (BitStream::BufferSize - 16 - myBitCounter);
+ myBitCounter += 16;
+ myByteStream += 2;
+ }
+ return myBitCounter >= length;
+}
+
+inline unsigned int BitStream::peek(unsigned char length) {
+ ensure(length);
+ return (length > 0) ? (myBuffer >> (BufferSize - length)) : 0;
+}
+
+inline void BitStream::remove(unsigned char length) {
+ if (ensure(length)) {
+ myBuffer <<= length;
+ myBitCounter -= length;
+ }
+}
+
+inline unsigned int BitStream::get(unsigned char length) {
+ unsigned int bits;
+ if (length > 16) {
+ bits = peek(length - 16) << 16;
+ remove(length - 16);
+ bits += peek(16);
+ remove(16);
+ } else {
+ bits = peek(length);
+ remove(length);
+ }
+ return bits;
+}
+
+inline unsigned int BitStream::bytesLeft() const {
+ return myByteStreamEnd - myByteStream;
+}
+
+#endif /* __BITSTREAM_H__ */
diff --git a/reader/src/formats/chm/CHMFile.cpp b/reader/src/formats/chm/CHMFile.cpp
new file mode 100644
index 0000000..8c62bca
--- /dev/null
+++ b/reader/src/formats/chm/CHMFile.cpp
@@ -0,0 +1,490 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLFile.h>
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLInputStream.h>
+
+#include "CHMFile.h"
+#include "CHMReferenceCollection.h"
+
+#include "LZXDecompressor.h"
+
+static std::string readString(ZLInputStream &stream, std::size_t length) {
+ std::string string(length, ' ');
+ stream.read(const_cast<char*>(string.data()), length);
+ return string;
+}
+
+static unsigned short readUnsignedWord(ZLInputStream &stream) {
+ unsigned char buffer[2];
+ stream.read((char*)buffer, 2);
+ unsigned short result = buffer[1];
+ result = result << 8;
+ result += buffer[0];
+ return result;
+}
+
+static unsigned long readUnsignedDWord(ZLInputStream &stream) {
+ unsigned long lowPart = readUnsignedWord(stream);
+ unsigned long highPart = readUnsignedWord(stream);
+ return (highPart << 16) + lowPart;
+}
+
+static unsigned long long readUnsignedQWord(ZLInputStream &stream) {
+ unsigned long long lowPart = readUnsignedDWord(stream);
+ unsigned long long highPart = readUnsignedDWord(stream);
+ return (highPart << 32) + lowPart;
+}
+
+static unsigned long long readEncodedInteger(ZLInputStream &stream) {
+ unsigned long long result = 0;
+ char part;
+ do {
+ result = result << 7;
+ stream.read(&part, 1);
+ result += part & 0x7F;
+ } while (part & -0x80);
+ return result;
+}
+
+CHMInputStream::CHMInputStream(shared_ptr<ZLInputStream> base, const CHMFileInfo::SectionInfo &sectionInfo, std::size_t offset, std::size_t size) : myBase(base), mySectionInfo(sectionInfo), mySize(size) {
+ myBaseStartIndex = offset / 0x8000;
+ myBaseStartIndex -= myBaseStartIndex % sectionInfo.ResetInterval;
+ myBytesToSkip = offset - myBaseStartIndex * 0x8000;
+ myOutData = new unsigned char[0x8000];
+}
+
+CHMInputStream::~CHMInputStream() {
+ close();
+ delete[] myOutData;
+}
+
+bool CHMInputStream::open() {
+ myOffset = 0;
+ myDoSkip = true;
+ myBaseIndex = myBaseStartIndex;
+ if (myDecompressor.isNull()) {
+ myDecompressor = new LZXDecompressor(mySectionInfo.WindowSizeIndex);
+ } else {
+ myDecompressor->reset();
+ }
+ myOutDataOffset = 0;
+ myOutDataLength = 0;
+ return true;
+}
+
+std::size_t CHMInputStream::read(char *buffer, std::size_t maxSize) {
+ if (myDoSkip) {
+ do_read(0, myBytesToSkip);
+ myDoSkip = false;
+ }
+ std::size_t realSize = do_read(buffer, std::min(maxSize, mySize - myOffset));
+ myOffset += realSize;
+ return realSize;
+}
+
+std::size_t CHMInputStream::do_read(char *buffer, std::size_t maxSize) {
+ std::size_t realSize = 0;
+ do {
+ if (myOutDataLength == 0) {
+ if (myBaseIndex >= mySectionInfo.ResetTable.size()) {
+ break;
+ }
+ const bool isTail = myBaseIndex + 1 == mySectionInfo.ResetTable.size();
+ const std::size_t start = mySectionInfo.ResetTable[myBaseIndex];
+ const std::size_t end = isTail ? mySectionInfo.CompressedSize : mySectionInfo.ResetTable[myBaseIndex + 1];
+ myOutDataLength = isTail ? mySectionInfo.UncompressedSize % 0x8000 : 0x8000;
+ myOutDataOffset = 0;
+
+ myInData.erase();
+ myInData.append(end - start, '\0');
+ myBase->seek(mySectionInfo.Offset + start, true);
+ myBase->read((char*)myInData.data(), myInData.length());
+ if (myBaseIndex % mySectionInfo.ResetInterval == 0) {
+ myDecompressor->reset();
+ }
+ ++myBaseIndex;
+
+ if (!myDecompressor->decompress(myInData, myOutData, myOutDataLength)) {
+ break;
+ }
+ }
+ const std::size_t partSize = std::min(myOutDataLength, maxSize);
+ if (buffer != 0) {
+ std::memcpy(buffer + realSize, myOutData + myOutDataOffset, partSize);
+ }
+ maxSize -= partSize;
+ realSize += partSize;
+ myOutDataLength -= partSize;
+ myOutDataOffset += partSize;
+ } while (maxSize != 0);
+ return realSize;
+}
+
+void CHMInputStream::close() {
+ myDecompressor = 0;
+}
+
+void CHMInputStream::seek(int offset, bool absoluteOffset) {
+ if (absoluteOffset) {
+ offset -= myOffset;
+ }
+ if (offset > 0) {
+ read(0, offset);
+ } else if (offset < 0) {
+ open();
+ read(0, std::max(offset + (int)myOffset, 0));
+ }
+}
+
+std::size_t CHMInputStream::offset() const {
+ return myOffset;
+}
+
+std::size_t CHMInputStream::sizeOfOpened() {
+ return mySize;
+}
+
+shared_ptr<ZLInputStream> CHMFileInfo::entryStream(shared_ptr<ZLInputStream> base, const std::string &name) const {
+ RecordMap::const_iterator it = myRecords.find(ZLUnicodeUtil::toLower(name));
+ if (it == myRecords.end()) {
+ return 0;
+ }
+ const RecordInfo &recordInfo = it->second;
+ if (recordInfo.Length == 0) {
+ return 0;
+ }
+ if (recordInfo.Section == 0) {
+ // TODO: implement
+ return 0;
+ }
+ if (recordInfo.Section > mySectionInfos.size()) {
+ return 0;
+ }
+ const SectionInfo &sectionInfo = mySectionInfos[recordInfo.Section - 1];
+ if (recordInfo.Offset + recordInfo.Length > sectionInfo.UncompressedSize) {
+ return 0;
+ }
+
+ return new CHMInputStream(base, sectionInfo, recordInfo.Offset, recordInfo.Length);
+}
+
+CHMFileInfo::CHMFileInfo(const ZLFile &file) : myFilePath(file.path()) {
+}
+
+bool CHMFileInfo::moveToEntry(ZLInputStream &stream, const std::string &entryName) {
+ RecordMap::const_iterator it = myRecords.find(entryName);
+ if (it == myRecords.end()) {
+ return false;
+ }
+ RecordInfo recordInfo = it->second;
+ if (recordInfo.Section > mySectionInfos.size()) {
+ return false;
+ }
+ if (recordInfo.Section != 0) {
+ // TODO: ???
+ return false;
+ }
+
+ stream.seek(mySection0Offset + recordInfo.Offset, true);
+ return true;
+}
+
+bool CHMFileInfo::init(ZLInputStream &stream) {
+ {
+ // header start
+ if (readString(stream, 4) != "ITSF") {
+ return false;
+ }
+
+ unsigned long version = readUnsignedDWord(stream);
+
+ // DWORD total length
+ // DWORD unknown
+ // DWORD timestamp
+ // DWORD language id
+ // 0x10 bytes 1st GUID
+ // 0x10 bytes 2nd GUID
+ // QWORD section 0 offset
+ // QWORD section 0 length
+ stream.seek(4 * 4 + 2 * 0x10 + 2 * 8, false);
+
+ unsigned long long sectionOffset1 = readUnsignedQWord(stream);
+ unsigned long long sectionLength1 = readUnsignedQWord(stream);
+ mySection0Offset = sectionOffset1 + sectionLength1;
+ // header end
+
+ // additional header data start
+ if (version > 2) {
+ mySection0Offset = readUnsignedQWord(stream);
+ }
+ // additional header data end
+
+ stream.seek(sectionOffset1, true);
+ // header section 1 start
+ // directory header start
+ if (readString(stream, 4) != "ITSP") {
+ return false;
+ }
+
+ // DWORD version
+ // DWORD length
+ // DWORD 0x000A
+ // DWORD chunk size
+ // DWORD density
+ // DWORD depth
+ // DWORD root chunk number
+ // DWORD first chunk number
+ // DWORD last chunk number
+ // DWORD -1
+ stream.seek(10 * 4, false);
+ unsigned long dirChunkNumber = readUnsignedDWord(stream);
+ // ...
+ stream.seek(36, false);
+ // header section 1 end
+
+ std::size_t nextOffset = stream.offset();
+ for (unsigned long i = 0; i < dirChunkNumber; ++i) {
+ nextOffset += 4096;
+ std::string header = readString(stream, 4);
+ if (header == "PMGL") {
+ unsigned long quickRefAreaSize = readUnsignedDWord(stream) % 4096;
+ stream.seek(12, false);
+ std::size_t startOffset = stream.offset();
+ std::size_t oldOffset = startOffset;
+ while (startOffset < nextOffset - quickRefAreaSize) {
+ int nameLength = readEncodedInteger(stream);
+ std::string name = readString(stream, nameLength);
+ int contentSection = readEncodedInteger(stream);
+ int offset = readEncodedInteger(stream);
+ int length = readEncodedInteger(stream);
+ if (name.substr(0, 2) != "::") {
+ name = ZLUnicodeUtil::toLower(name);
+ }
+ myRecords.insert(
+ std::make_pair(
+ name,
+ CHMFileInfo::RecordInfo(contentSection, offset, length)
+ )
+ );
+ startOffset = stream.offset();
+ if (oldOffset == startOffset) {
+ break;
+ }
+ oldOffset = startOffset;
+ }
+ } else if (header == "PMGI") {
+ unsigned long quickRefAreaSize = readUnsignedDWord(stream);
+ std::size_t startOffset = stream.offset();
+ std::size_t oldOffset = startOffset;
+ while (startOffset < nextOffset - quickRefAreaSize) {
+ int nameLength = readEncodedInteger(stream);
+ std::string name = readString(stream, nameLength);
+ // chunk number
+ readEncodedInteger(stream);
+ startOffset = stream.offset();
+ if (oldOffset == startOffset) {
+ break;
+ }
+ oldOffset = startOffset;
+ }
+ }
+ stream.seek(nextOffset, true);
+ if (stream.offset() != nextOffset) {
+ break;
+ }
+ }
+ }
+
+ {
+ if (!moveToEntry(stream, "::DataSpace/NameList")) {
+ return false;
+ }
+ stream.seek(2, false);
+ const int sectionNumber = readUnsignedWord(stream);
+ for (int i = 0; i < sectionNumber; ++i) {
+ const int length = readUnsignedWord(stream);
+ std::string sectionName;
+ sectionName.reserve(length);
+ for (int j = 0; j < length; ++j) {
+ sectionName += (char)readUnsignedWord(stream);
+ }
+ stream.seek(2, false);
+ mySectionNames.push_back(sectionName);
+ }
+ }
+
+ {
+ for (unsigned int i = 1; i < mySectionNames.size(); ++i) {
+ RecordMap::const_iterator it =
+ myRecords.find("::DataSpace/Storage/" + mySectionNames[i] + "/Content");
+ if (it == myRecords.end()) {
+ return false;
+ }
+ RecordInfo recordInfo = it->second;
+ if (recordInfo.Section != 0) {
+ return false;
+ }
+ mySectionInfos.push_back(SectionInfo());
+ SectionInfo &info = mySectionInfos.back();
+ info.Offset = mySection0Offset + recordInfo.Offset;
+ info.Length = recordInfo.Length;
+
+ if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/ControlData")) {
+ return false;
+ }
+ stream.seek(4, false);
+ std::string lzxc = readString(stream, 4);
+ if (lzxc != "LZXC") {
+ return false;
+ }
+ const int version = readUnsignedDWord(stream);
+ if ((version <= 0) || (version > 2)) {
+ return false;
+ }
+ info.ResetInterval = readUnsignedDWord(stream);
+ if (version == 1) {
+ info.ResetInterval /= 0x8000;
+ }
+ info.WindowSizeIndex = (version == 1) ? 0 : 15;
+ {
+ int ws = readUnsignedDWord(stream);
+ if (ws > 0) {
+ while ((ws & 1) == 0) {
+ ws >>= 1;
+ info.WindowSizeIndex++;
+ }
+ }
+ }
+
+ if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable")) {
+ return false;
+ }
+ stream.seek(4, false);
+ const std::size_t entriesNumber = readUnsignedDWord(stream);
+ if (entriesNumber == 0) {
+ return false;
+ }
+ if (entriesNumber > 2048) {
+ // file size is greater than 60 Mb
+ return false;
+ }
+ info.ResetTable.reserve(entriesNumber);
+ stream.seek(8, false);
+ info.UncompressedSize = readUnsignedQWord(stream);
+ if ((info.UncompressedSize - 1) / 0x8000 != entriesNumber - 1) {
+ return false;
+ }
+ info.CompressedSize = readUnsignedQWord(stream);
+ stream.seek(8, false);
+ std::size_t previous = 0;
+ for (std::size_t j = 0; j < entriesNumber; ++j) {
+ std::size_t value = readUnsignedQWord(stream);
+ if ((j > 0) == (value <= previous)) {
+ return false;
+ }
+ info.ResetTable.push_back(value);
+ previous = value;
+ }
+ }
+ }
+
+ return true;
+}
+
+static std::string readNTString(ZLInputStream &stream) {
+ std::string s;
+ char c;
+ while (stream.read(&c, 1) == 1) {
+ if (c == '\0') {
+ break;
+ } else {
+ s += c;
+ }
+ }
+ return CHMReferenceCollection::fullReference("/", s);
+}
+
+bool CHMFileInfo::FileNames::empty() const {
+ return Start.empty() && TOC.empty() && Home.empty() && Index.empty();
+}
+
+CHMFileInfo::FileNames CHMFileInfo::sectionNames(shared_ptr<ZLInputStream> base) const {
+ FileNames names;
+ shared_ptr<ZLInputStream> stringsStream = entryStream(base, "/#STRINGS");
+ if (!stringsStream.isNull() && stringsStream->open()) {
+ std::vector<std::string> fileNames;
+ int tocIndex = -1;
+ int indexIndex = -1;
+ for (int i = 0; i < 12; ++i) {
+ std::string argument = readNTString(*stringsStream);
+ if (argument.empty() || (argument[argument.length() - 1] == '/')) {
+ continue;
+ }
+ if (myRecords.find(argument) == myRecords.end()) {
+ continue;
+ }
+ if ((tocIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhc")) {
+ tocIndex = fileNames.size();
+ names.TOC = argument;
+ } else if ((indexIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhk")) {
+ indexIndex = fileNames.size();
+ names.Index = argument;
+ }
+ fileNames.push_back(argument);
+ }
+ std::size_t startIndex = std::max(3, std::max(tocIndex, indexIndex) + 1);
+ if (startIndex < 11) {
+ if (startIndex < fileNames.size()) {
+ names.Start = fileNames[startIndex];
+ }
+ if (startIndex + 1 < fileNames.size()) {
+ names.Home = fileNames[startIndex + 1];
+ }
+ }
+ stringsStream->close();
+ }
+ if (names.TOC.empty()) {
+ for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
+ if (ZLStringUtil::stringEndsWith(it->first, ".hhc")) {
+ names.TOC = it->first;
+ break;
+ }
+ }
+ }
+ if (names.empty()) {
+ for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
+ if ((ZLStringUtil::stringEndsWith(it->first, ".htm")) ||
+ (ZLStringUtil::stringEndsWith(it->first, ".html"))) {
+ names.Start = it->first;
+ break;
+ }
+ }
+ }
+
+ return names;
+}
+
+const std::string CHMFileInfo::filePath() const {
+ return myFilePath;
+}
diff --git a/reader/src/formats/chm/CHMFile.h b/reader/src/formats/chm/CHMFile.h
new file mode 100644
index 0000000..d98bd84
--- /dev/null
+++ b/reader/src/formats/chm/CHMFile.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __CHMFILE_H__
+#define __CHMFILE_H__
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include <shared_ptr.h>
+#include <ZLInputStream.h>
+
+class ZLFile;
+
+class LZXDecompressor;
+
+class CHMFileInfo {
+
+public:
+ struct FileNames {
+ std::string TOC;
+ std::string Index;
+ std::string Start;
+ std::string Home;
+
+ bool empty() const;
+ };
+
+public:
+ CHMFileInfo(const ZLFile &file);
+ bool init(ZLInputStream &stream);
+ // We assume that base exists and is already open
+ shared_ptr<ZLInputStream> entryStream(shared_ptr<ZLInputStream> base, const std::string &name) const;
+ // We assume that base exists and is already open
+ FileNames sectionNames(shared_ptr<ZLInputStream> base) const;
+ const std::string filePath() const;
+
+private:
+ bool moveToEntry(ZLInputStream &stream, const std::string &entryName);
+
+private:
+ unsigned long long mySection0Offset;
+
+ struct RecordInfo {
+ RecordInfo(int section, int offset, int length) : Section(section), Offset(offset), Length(length) {}
+ std::size_t Section;
+ std::size_t Offset;
+ std::size_t Length;
+ };
+
+ typedef std::map<std::string,RecordInfo> RecordMap;
+ RecordMap myRecords;
+ std::vector<std::string> mySectionNames;
+
+ struct SectionInfo {
+ std::size_t WindowSizeIndex;
+ std::size_t ResetInterval;
+ std::size_t Offset;
+ std::size_t Length;
+ std::size_t CompressedSize;
+ std::size_t UncompressedSize;
+ std::vector<std::size_t> ResetTable;
+ };
+ std::vector<SectionInfo> mySectionInfos;
+
+ const std::string myFilePath;
+
+private:
+ CHMFileInfo(const CHMFileInfo&);
+ const CHMFileInfo &operator = (const CHMFileInfo&);
+
+friend class CHMInputStream;
+};
+
+class CHMInputStream : public ZLInputStream {
+
+public:
+ CHMInputStream(shared_ptr<ZLInputStream> base, const CHMFileInfo::SectionInfo &sectionInfo, std::size_t offset, std::size_t size);
+ ~CHMInputStream();
+
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+private:
+ std::size_t do_read(char *buffer, std::size_t maxSize);
+
+private:
+ shared_ptr<ZLInputStream> myBase;
+ const CHMFileInfo::SectionInfo mySectionInfo;
+ std::size_t myBaseStartIndex;
+ std::size_t myBaseIndex;
+ std::size_t myBytesToSkip;
+ const std::size_t mySize;
+
+ std::size_t myOffset;
+ bool myDoSkip;
+
+ shared_ptr<LZXDecompressor> myDecompressor;
+ std::string myInData;
+
+ unsigned char *myOutData;
+ std::size_t myOutDataOffset;
+ std::size_t myOutDataLength;
+};
+
+#endif /* __CHMFILE_H__ */
diff --git a/reader/src/formats/chm/CHMFileImage.cpp b/reader/src/formats/chm/CHMFileImage.cpp
new file mode 100644
index 0000000..a2b58f0
--- /dev/null
+++ b/reader/src/formats/chm/CHMFileImage.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+
+#include "CHMFileImage.h"
+
+CHMFileImage::CHMFileImage(shared_ptr<CHMFileInfo> info, const std::string &entry) : ZLStreamImage(ZLMimeType::IMAGE_AUTO, 0, 0), myInfo(info), myEntry(entry) {
+}
+
+shared_ptr<ZLInputStream> CHMFileImage::inputStream() const {
+ shared_ptr<ZLInputStream> baseStream = ZLFile(myInfo->filePath()).inputStream();
+ if (baseStream.isNull() || !baseStream->open()) {
+ return 0;
+ }
+ return myInfo->entryStream(baseStream, myEntry);
+}
diff --git a/reader/src/formats/chm/CHMFileImage.h b/reader/src/formats/chm/CHMFileImage.h
new file mode 100644
index 0000000..bacb6aa
--- /dev/null
+++ b/reader/src/formats/chm/CHMFileImage.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __CHMFILEIMAGE_H__
+#define __CHMFILEIMAGE_H__
+
+#include <ZLStreamImage.h>
+
+#include "CHMFile.h"
+
+class CHMFileImage : public ZLStreamImage {
+
+public:
+ CHMFileImage(shared_ptr<CHMFileInfo> info, const std::string &entry);
+
+private:
+ shared_ptr<ZLInputStream> inputStream() const;
+
+private:
+ shared_ptr<CHMFileInfo> myInfo;
+ std::string myEntry;
+};
+
+#endif /* __CHMFILEIMAGE_H__ */
diff --git a/reader/src/formats/chm/CHMPlugin.cpp b/reader/src/formats/chm/CHMPlugin.cpp
new file mode 100644
index 0000000..9ea88e4
--- /dev/null
+++ b/reader/src/formats/chm/CHMPlugin.cpp
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLUnicodeUtil.h>
+#include <ZLStringUtil.h>
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "CHMPlugin.h"
+#include "CHMFile.h"
+#include "CHMFileImage.h"
+#include "CHMReferenceCollection.h"
+#include "HHCReader.h"
+#include "HHCReferenceCollector.h"
+#include "../txt/PlainTextFormat.h"
+#include "HtmlSectionReader.h"
+#include "../util/MergedStream.h"
+#include "../html/HtmlReaderStream.h"
+
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+bool CHMPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "chm";
+}
+
+class CHMTextStream : public MergedStream {
+
+public:
+ CHMTextStream(CHMFileInfo &chmFile, shared_ptr<ZLInputStream> base);
+
+private:
+ void resetToStart();
+ shared_ptr<ZLInputStream> nextStream();
+
+private:
+ CHMFileInfo &myCHMFile;
+ shared_ptr<ZLInputStream> myBase;
+ std::vector<std::string> myEntryNames;
+ std::size_t myIndex;
+};
+
+CHMTextStream::CHMTextStream(CHMFileInfo &chmFile, shared_ptr<ZLInputStream> base) : myCHMFile(chmFile), myBase(base) {
+}
+
+void CHMTextStream::resetToStart() {
+ myIndex = 0;
+
+ if (!myEntryNames.empty()) {
+ return;
+ }
+
+ CHMFileInfo::FileNames names = myCHMFile.sectionNames(myBase);
+ if (names.empty()) {
+ return;
+ }
+
+ CHMReferenceCollection referenceCollection;
+
+ referenceCollection.addReference(names.Start, false);
+ referenceCollection.addReference(names.Home, false);
+
+ shared_ptr<ZLInputStream> tocStream = myCHMFile.entryStream(myBase, names.TOC);
+ if (!tocStream.isNull() && tocStream->open()) {
+ referenceCollection.setPrefix(names.TOC);
+ HHCReferenceCollector(referenceCollection).readDocument(*tocStream);
+ }
+
+ while (referenceCollection.containsNonProcessedReferences()) {
+ myEntryNames.push_back(referenceCollection.nextReference());
+ }
+}
+
+shared_ptr<ZLInputStream> CHMTextStream::nextStream() {
+ while (myIndex < myEntryNames.size()) {
+ shared_ptr<ZLInputStream> stream = myCHMFile.entryStream(myBase, myEntryNames[myIndex++]);
+ if (!stream.isNull()) {
+ return new HtmlReaderStream(stream, 50000);
+ }
+ }
+ return 0;
+}
+
+bool CHMPlugin::readMetaInfo(Book &book) const {
+ const ZLFile &file = book.file();
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+
+ CHMFileInfo chmFile(file);
+ if (!chmFile.init(*stream)) {
+ return false;
+ }
+
+ CHMFileInfo::FileNames names = chmFile.sectionNames(stream);
+ if (names.empty()) {
+ return false;
+ }
+
+ /*
+ shared_ptr<ZLInputStream> entryStream = chmFile.entryStream(stream, names.Start);
+ if (entryStream.isNull()) {
+ entryStream = chmFile.entryStream(stream, names.Home);
+ }
+ if (entryStream.isNull()) {
+ entryStream = chmFile.entryStream(stream, names.TOC);
+ }
+ / *
+ if (entryStream.isNull()) {
+ chmFile.entryStream(stream, names.Index);
+ }
+ * /
+ if (entryStream.isNull()) {
+ return false;
+ }
+ */
+
+ CHMTextStream textStream(chmFile, stream);
+ detectEncodingAndLanguage(book, textStream);
+ if (book.encoding().empty()) {
+ return false;
+ }
+
+ return true;
+}
+
+bool CHMPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
+
+class CHMHyperlinkMatcher : public BookModel::HyperlinkMatcher {
+
+public:
+ BookModel::Label match(const std::map<std::string,BookModel::Label> &lMap, const std::string &id) const;
+};
+
+BookModel::Label CHMHyperlinkMatcher::match(const std::map<std::string,BookModel::Label> &lMap, const std::string &id) const {
+ std::map<std::string,BookModel::Label>::const_iterator it = lMap.find(id);
+ if (it != lMap.end()) {
+ return it->second;
+ }
+ std::size_t index = id.find('#');
+ if (index != std::string::npos) {
+ it = lMap.find(id.substr(0, index));
+ }
+ return (it != lMap.end()) ? it->second : BookModel::Label(0, -1);
+}
+
+bool CHMPlugin::readModel(BookModel &model) const {
+ model.setHyperlinkMatcher(new CHMHyperlinkMatcher());
+
+ const Book &book = *model.book();
+ const ZLFile &file = book.file();
+
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+
+ shared_ptr<CHMFileInfo> info = new CHMFileInfo(file);
+ if (!info->init(*stream)) {
+ return false;
+ }
+
+ CHMFileInfo::FileNames names = info->sectionNames(stream);
+ if (names.empty()) {
+ return false;
+ }
+
+ CHMReferenceCollection referenceCollection;
+
+ referenceCollection.addReference(names.Start, false);
+ referenceCollection.addReference(names.Home, false);
+
+ const std::string &encoding = book.encoding();
+
+ shared_ptr<ZLInputStream> tocStream = info->entryStream(stream, names.TOC);
+ HHCReader hhcReader(referenceCollection, model, encoding);
+ if (!tocStream.isNull() && tocStream->open()) {
+ referenceCollection.setPrefix(names.TOC);
+ hhcReader.readDocument(*tocStream);
+ }
+
+ /*
+ if (!tocStream.isNull() && tocStream->open()) {
+ std::string buf;
+ buf.append(tocStream->sizeOfOpened(), '\0');
+ tocStream->read((char*)buf.data(), buf.length());
+ std::cerr << "[ " << names.TOC << " ]\n" << buf << "\n";
+ }
+ */
+
+ int contentCounter = 0;
+ PlainTextFormat format(file);
+ HtmlSectionReader reader(model, format, encoding, info, referenceCollection);
+ while (referenceCollection.containsNonProcessedReferences()) {
+ const std::string fileName = referenceCollection.nextReference();
+ if (ZLStringUtil::stringEndsWith(fileName, ".jpg") ||
+ ZLStringUtil::stringEndsWith(fileName, ".gif")) {
+ std::string lowerCasedFileName = ZLUnicodeUtil::toLower(fileName);
+ BookReader bookReader(model);
+ bookReader.setMainTextModel();
+ bookReader.addHyperlinkLabel(lowerCasedFileName);
+ bookReader.pushKind(REGULAR);
+ bookReader.beginParagraph();
+ bookReader.addImageReference(lowerCasedFileName);
+ bookReader.addImage(fileName, new CHMFileImage(info, fileName));
+ bookReader.endParagraph();
+ bookReader.insertEndOfTextParagraph();
+ } else {
+ shared_ptr<ZLInputStream> entryStream = info->entryStream(stream, fileName);
+ if (!entryStream.isNull() && entryStream->open()) {
+ /*
+ std::string buf;
+ buf.append(entryStream->sizeOfOpened(), '\0');
+ entryStream->read((char*)buf.data(), buf.length());
+ std::cerr << "[ " << fileName << " ]\n" << buf << "\n";
+ entryStream->open();
+ */
+ reader.setSectionName(fileName);
+ reader.readDocument(*entryStream);
+ ++contentCounter;
+ }
+ }
+ }
+ if (contentCounter == 0) {
+ return false;
+ }
+
+ hhcReader.setReferences();
+
+
+ return true;
+}
diff --git a/reader/src/formats/chm/CHMPlugin.h b/reader/src/formats/chm/CHMPlugin.h
new file mode 100644
index 0000000..0d38e62
--- /dev/null
+++ b/reader/src/formats/chm/CHMPlugin.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __CHMPLUGIN_H__
+#define __CHMPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class CHMPlugin : public FormatPlugin {
+
+public:
+ CHMPlugin();
+ ~CHMPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+inline CHMPlugin::CHMPlugin() {}
+inline CHMPlugin::~CHMPlugin() {}
+inline bool CHMPlugin::providesMetaInfo() const { return false; }
+
+#endif /* __CHMPLUGIN_H__ */
diff --git a/reader/src/formats/chm/CHMReferenceCollection.cpp b/reader/src/formats/chm/CHMReferenceCollection.cpp
new file mode 100644
index 0000000..f29dd28
--- /dev/null
+++ b/reader/src/formats/chm/CHMReferenceCollection.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLUnicodeUtil.h>
+
+#include "CHMReferenceCollection.h"
+#include "../util/MiscUtil.h"
+
+std::string CHMReferenceCollection::fullReference(const std::string &prefix, std::string reference) {
+ reference = MiscUtil::decodeHtmlURL(reference);
+ if ((reference.length() > 0) && (reference[0] == '/')) {
+ return reference;
+ }
+ const int index = reference.rfind("::");
+ if (index != -1) {
+ return reference.substr(index + 2);
+ }
+
+ int counter = 0;
+ while (reference.substr(counter * 3, 3) == "../") {
+ ++counter;
+ }
+
+ int slashIndex = prefix.length() - 1;
+ for (int i = 0; (i < counter) && (slashIndex > 0); ++i) {
+ slashIndex = prefix.rfind('/', slashIndex - 1);
+ }
+ return prefix.substr(0, slashIndex + 1) + reference.substr(counter * 3);
+}
+
+CHMReferenceCollection::CHMReferenceCollection() : myPrefix("/") {
+}
+
+const std::string &CHMReferenceCollection::addReference(const std::string &reference, bool doConvert) {
+ if (reference.empty()) {
+ return reference;
+ }
+ std::string fullRef = doConvert ? fullReference(myPrefix, reference) : MiscUtil::decodeHtmlURL(reference);
+
+ const int index = fullRef.find('#');
+ if (index == -1) {
+ fullRef = ZLUnicodeUtil::toLower(fullRef);
+ } else {
+ fullRef = ZLUnicodeUtil::toLower(fullRef.substr(0, index));
+ }
+ std::set<std::string>::const_iterator it = myReferences.find(fullRef);
+ if (it != myReferences.end()) {
+ return *it;
+ }
+
+ myReferences.insert(fullRef);
+ myReferenceQueue.push(fullRef);
+ return myReferenceQueue.back();
+}
+
+bool CHMReferenceCollection::containsNonProcessedReferences() const {
+ return !myReferenceQueue.empty();
+}
+
+const std::string CHMReferenceCollection::nextReference() {
+ if (myReferenceQueue.empty()) {
+ return "";
+ }
+ const std::string front = myReferenceQueue.front();
+ myReferenceQueue.pop();
+ return front;
+}
+
+void CHMReferenceCollection::setPrefix(const std::string &fileName) {
+ myPrefix = MiscUtil::decodeHtmlURL(fileName.substr(0, fileName.rfind('/') + 1));
+}
+
+const std::string &CHMReferenceCollection::prefix() const {
+ return myPrefix;
+}
diff --git a/reader/src/formats/chm/CHMReferenceCollection.h b/reader/src/formats/chm/CHMReferenceCollection.h
new file mode 100644
index 0000000..6a53c45
--- /dev/null
+++ b/reader/src/formats/chm/CHMReferenceCollection.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __CHMREFERENCECOLLECTION_H__
+#define __CHMREFERENCECOLLECTION_H__
+
+#include <string>
+#include <set>
+#include <queue>
+
+class CHMReferenceCollection {
+
+public:
+ static std::string fullReference(const std::string &prefix, std::string reference);
+
+public:
+ CHMReferenceCollection();
+ const std::string &addReference(const std::string &reference, bool doConvert);
+ bool containsNonProcessedReferences() const;
+ const std::string nextReference();
+ void setPrefix(const std::string &fileName);
+ const std::string &prefix() const;
+
+private:
+ std::string myPrefix;
+ std::set<std::string> myReferences;
+ std::queue<std::string> myReferenceQueue;
+
+private:
+ CHMReferenceCollection(const CHMReferenceCollection&);
+ const CHMReferenceCollection &operator = (const CHMReferenceCollection&);
+};
+
+#endif /* __CHMREFERENCECOLLECTION_H__ */
diff --git a/reader/src/formats/chm/E8Decoder.cpp b/reader/src/formats/chm/E8Decoder.cpp
new file mode 100644
index 0000000..53b9335
--- /dev/null
+++ b/reader/src/formats/chm/E8Decoder.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "LZXDecompressor.h"
+
+void LZXDecompressor::E8Decoder::reset(unsigned int fileSize) {
+ myFileSize = fileSize;
+ myFramesCounter = 0;
+ myPosition = 0;
+}
+
+void LZXDecompressor::E8Decoder::decode(unsigned char *buffer, const std::size_t size) {
+ if (myFramesCounter >= 32768) {
+ return;
+ }
+ ++myFramesCounter;
+ if (myFileSize == 0) {
+ return;
+ }
+
+ myPosition += size;
+
+ if (size <= 10) {
+ return;
+ }
+
+ const unsigned char *end = buffer + size - 10;
+
+ for (unsigned char *ptr = buffer; ptr < end; ) {
+ if (*ptr == 0xE8) {
+ int absoluteOffset =
+ ptr[1] + (ptr[2] << 8) + (ptr[3] << 16) + (ptr[4] << 24);
+ int relativeOffset =
+ (absoluteOffset >= 0) ?
+ absoluteOffset - (ptr - buffer) : absoluteOffset + myFileSize;
+ ptr[1] = (unsigned char)relativeOffset;
+ ptr[2] = (unsigned char)(relativeOffset >> 8);
+ ptr[3] = (unsigned char)(relativeOffset >> 16);
+ ptr[4] = (unsigned char)(relativeOffset >> 24);
+ ptr += 5;
+ } else {
+ ++ptr;
+ }
+ }
+}
diff --git a/reader/src/formats/chm/HHCReader.cpp b/reader/src/formats/chm/HHCReader.cpp
new file mode 100644
index 0000000..4fd3105
--- /dev/null
+++ b/reader/src/formats/chm/HHCReader.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLUnicodeUtil.h>
+
+#include "HHCReader.h"
+#include "CHMReferenceCollection.h"
+
+HHCReader::HHCReader(CHMReferenceCollection &collection, BookModel &model, const std::string &encoding) : HtmlReader(encoding), myReferenceCollection(collection), myBookReader(model) {
+}
+
+HHCReader::~HHCReader() {
+}
+
+void HHCReader::startDocumentHandler() {
+ myBookReader.setMainTextModel();
+}
+
+void HHCReader::endDocumentHandler() {
+ std::string tmp0;
+ myText.swap(tmp0);
+ std::string tmp1;
+ myReference.swap(tmp1);
+}
+
+static const std::string UL = "UL";
+static const std::string LI = "LI";
+static const std::string OBJECT = "OBJECT";
+static const std::string PARAM = "PARAM";
+static const std::string NAME = "NAME";
+static const std::string VALUE = "VALUE";
+static const std::string NAME_VALUE = "Name";
+static const std::string LOCAL_VALUE = "Local";
+
+static bool isFirstChild = false;
+
+bool HHCReader::tagHandler(const HtmlTag &tag) {
+ if (tag.Start) {
+ if (tag.Name == UL) {
+ isFirstChild = true;
+ } else if (tag.Name == LI) {
+ } else if (tag.Name == OBJECT) {
+ myText.erase();
+ myReference.erase();
+ } else if (tag.Name == PARAM) {
+ std::string name;
+ std::string value;
+ for (std::vector<HtmlAttribute>::const_iterator it = tag.Attributes.begin(); it != tag.Attributes.end(); ++it) {
+ if (it->Name == NAME) {
+ name = it->Value;
+ } else if (it->Name == VALUE) {
+ value = it->Value;
+ }
+ }
+ if (name == NAME_VALUE) {
+ myText = value;
+ } else if (name == LOCAL_VALUE) {
+ myReference = myReferenceCollection.addReference(value, true);
+ }
+ }
+ } else {
+ if (tag.Name == UL) {
+ myBookReader.endContentsParagraph();
+ } else if (tag.Name == OBJECT) {
+ if (!myText.empty() || !myReference.empty()) {
+ if (!isFirstChild) {
+ myBookReader.endContentsParagraph();
+ } else {
+ isFirstChild = false;
+ }
+ myBookReader.beginContentsParagraph();
+ if (myText.empty()) {
+ myText = "...";
+ }
+ myBookReader.addContentsData(myText.empty() ? "..." : myText);
+ myReferenceVector.push_back(ZLUnicodeUtil::toLower(myReference));
+ }
+ }
+ }
+ return true;
+}
+
+bool HHCReader::characterDataHandler(const char*, std::size_t, bool) {
+ return true;
+}
+
+void HHCReader::setReferences() {
+ for (std::size_t i = 0; i < myReferenceVector.size(); ++i) {
+ myBookReader.setReference(i, myBookReader.model().label(myReferenceVector[i]).ParagraphNumber);
+ }
+}
diff --git a/reader/src/formats/chm/HHCReader.h b/reader/src/formats/chm/HHCReader.h
new file mode 100644
index 0000000..c0e4cef
--- /dev/null
+++ b/reader/src/formats/chm/HHCReader.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HHCREADER_H__
+#define __HHCREADER_H__
+
+#include <vector>
+
+#include "../html/HtmlReader.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../bookmodel/BookReader.h"
+
+class CHMReferenceCollection;
+
+class HHCReader : public HtmlReader {
+
+public:
+ HHCReader(CHMReferenceCollection &collection, BookModel &model, const std::string &encoding);
+ ~HHCReader();
+
+ void setReferences();
+
+private:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char*, std::size_t, bool);
+
+private:
+ CHMReferenceCollection &myReferenceCollection;
+
+ std::string myText;
+ std::string myReference;
+
+ BookReader myBookReader;
+
+ std::vector<std::string> myReferenceVector;
+};
+
+#endif /* __HHCREADER_H__ */
diff --git a/reader/src/formats/chm/HHCReferenceCollector.cpp b/reader/src/formats/chm/HHCReferenceCollector.cpp
new file mode 100644
index 0000000..6abcef2
--- /dev/null
+++ b/reader/src/formats/chm/HHCReferenceCollector.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLUnicodeUtil.h>
+
+#include "HHCReferenceCollector.h"
+#include "CHMReferenceCollection.h"
+
+HHCReferenceCollector::HHCReferenceCollector(CHMReferenceCollection &collection) : HtmlReader("US-ASCII"), myReferenceCollection(collection) {
+}
+
+void HHCReferenceCollector::startDocumentHandler() {
+}
+
+void HHCReferenceCollector::endDocumentHandler() {
+}
+
+static const std::string PARAM = "PARAM";
+static const std::string NAME = "NAME";
+static const std::string VALUE = "VALUE";
+static const std::string NAME_VALUE = "Name";
+static const std::string LOCAL_VALUE = "Local";
+
+bool HHCReferenceCollector::tagHandler(const HtmlTag &tag) {
+ if (tag.Start) {
+ if (tag.Name == PARAM) {
+ std::string name;
+ std::string value;
+ for (std::vector<HtmlAttribute>::const_iterator it = tag.Attributes.begin(); it != tag.Attributes.end(); ++it) {
+ if (it->Name == NAME) {
+ name = it->Value;
+ } else if (it->Name == VALUE) {
+ value = it->Value;
+ }
+ }
+ if (name == LOCAL_VALUE) {
+ myReferenceCollection.addReference(value, true);
+ }
+ }
+ }
+ return true;
+}
+
+bool HHCReferenceCollector::characterDataHandler(const char*, std::size_t, bool) {
+ return true;
+}
diff --git a/reader/src/formats/chm/HHCReferenceCollector.h b/reader/src/formats/chm/HHCReferenceCollector.h
new file mode 100644
index 0000000..20e58d1
--- /dev/null
+++ b/reader/src/formats/chm/HHCReferenceCollector.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HHCREFERENCECOLLECTOR_H__
+#define __HHCREFERENCECOLLECTOR_H__
+
+#include <vector>
+
+#include "../html/HtmlReader.h"
+
+class CHMReferenceCollection;
+
+class HHCReferenceCollector : public HtmlReader {
+
+public:
+ HHCReferenceCollector(CHMReferenceCollection &collection);
+
+private:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char*, std::size_t, bool);
+
+private:
+ CHMReferenceCollection &myReferenceCollection;
+};
+
+#endif /* __HHCREFERENCECOLLECTOR_H__ */
diff --git a/reader/src/formats/chm/HtmlSectionReader.cpp b/reader/src/formats/chm/HtmlSectionReader.cpp
new file mode 100644
index 0000000..9973e14
--- /dev/null
+++ b/reader/src/formats/chm/HtmlSectionReader.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLUnicodeUtil.h>
+
+#include "HtmlSectionReader.h"
+#include "CHMReferenceCollection.h"
+#include "CHMFileImage.h"
+#include "../util/MiscUtil.h"
+#include "../html/HtmlTagActions.h"
+
+class HtmlSectionHrefTagAction : public HtmlHrefTagAction {
+
+public:
+ HtmlSectionHrefTagAction(HtmlSectionReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class HtmlSectionImageTagAction : public HtmlTagAction {
+
+public:
+ HtmlSectionImageTagAction(HtmlSectionReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+shared_ptr<HtmlTagAction> HtmlSectionReader::createAction(const std::string &tag) {
+ if (tag == "IMG") {
+ return new HtmlSectionImageTagAction(*this);
+ } else if (tag == "A") {
+ return new HtmlSectionHrefTagAction(*this);
+ }
+ return HtmlBookReader::createAction(tag);
+}
+
+HtmlSectionReader::HtmlSectionReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding, shared_ptr<CHMFileInfo> info, CHMReferenceCollection &collection) : HtmlBookReader("", model, format, encoding), myInfo(info), myReferenceCollection(collection) {
+ setBuildTableOfContent(false);
+}
+
+void HtmlSectionReader::setSectionName(const std::string &sectionName) {
+ myCurrentSectionName = ZLUnicodeUtil::toLower(sectionName);
+ myReferenceCollection.setPrefix(myCurrentSectionName);
+}
+
+void HtmlSectionReader::startDocumentHandler() {
+ HtmlBookReader::startDocumentHandler();
+ myBookReader.addHyperlinkLabel(ZLUnicodeUtil::toLower(myCurrentSectionName));
+}
+
+void HtmlSectionReader::endDocumentHandler() {
+ HtmlBookReader::endDocumentHandler();
+ myBookReader.insertEndOfTextParagraph();
+}
+
+HtmlSectionHrefTagAction::HtmlSectionHrefTagAction(HtmlSectionReader &reader) : HtmlHrefTagAction(reader) {
+}
+
+void HtmlSectionHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ HtmlSectionReader &reader = (HtmlSectionReader&)myReader;
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "NAME") {
+ bookReader().addHyperlinkLabel(ZLUnicodeUtil::toLower(reader.myCurrentSectionName + '#' + tag.Attributes[i].Value));
+ } else if ((hyperlinkType() == REGULAR) && (tag.Attributes[i].Name == "HREF")) {
+ const std::string &value = tag.Attributes[i].Value;
+ if (!value.empty()) {
+ FBTextKind referenceType = MiscUtil::referenceType(value);
+ if (referenceType != INTERNAL_HYPERLINK) {
+ bookReader().addHyperlinkControl(referenceType, value);
+ setHyperlinkType(referenceType);
+ } else {
+ const int index = value.find('#');
+ std::string sectionName = (index == -1) ? value : value.substr(0, index);
+ sectionName = ZLUnicodeUtil::toLower(MiscUtil::decodeHtmlURL(sectionName));
+ if (sectionName.empty()) {
+ sectionName = reader.myCurrentSectionName;
+ } else {
+ sectionName = reader.myReferenceCollection.addReference(sectionName, true);
+ }
+ bookReader().addHyperlinkControl(
+ INTERNAL_HYPERLINK, ZLUnicodeUtil::toLower((index == -1) ? sectionName : (sectionName + value.substr(index)))
+ );
+ setHyperlinkType(INTERNAL_HYPERLINK);
+ }
+ }
+ }
+ }
+ } else if (hyperlinkType() != REGULAR) {
+ bookReader().addControl(hyperlinkType(), false);
+ setHyperlinkType(REGULAR);
+ }
+}
+
+HtmlSectionImageTagAction::HtmlSectionImageTagAction(HtmlSectionReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlSectionImageTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ //bookReader().endParagraph();
+ HtmlSectionReader &reader = (HtmlSectionReader&)myReader;
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "SRC") {
+ std::string fileName = MiscUtil::decodeHtmlURL(tag.Attributes[i].Value);
+ fileName = CHMReferenceCollection::fullReference(reader.myReferenceCollection.prefix(), fileName);
+ fileName = ZLUnicodeUtil::toLower(fileName);
+ bookReader().addImageReference(fileName);
+ bookReader().addImage(fileName, new CHMFileImage(reader.myInfo, fileName));
+ break;
+ }
+ }
+ //bookReader().beginParagraph();
+ }
+}
diff --git a/reader/src/formats/chm/HtmlSectionReader.h b/reader/src/formats/chm/HtmlSectionReader.h
new file mode 100644
index 0000000..424c178
--- /dev/null
+++ b/reader/src/formats/chm/HtmlSectionReader.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLSECTIONREADER_H__
+#define __HTMLSECTIONREADER_H__
+
+#include "../html/HtmlBookReader.h"
+#include "CHMFile.h"
+
+class CHMReferenceCollection;
+
+class HtmlSectionReader : public HtmlBookReader {
+
+public:
+ HtmlSectionReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding, shared_ptr<CHMFileInfo> info, CHMReferenceCollection &collection);
+ void setSectionName(const std::string &sectionName);
+
+private:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+private:
+ shared_ptr<HtmlTagAction> createAction(const std::string &tag);
+
+private:
+ shared_ptr<CHMFileInfo> myInfo;
+ CHMReferenceCollection &myReferenceCollection;
+ std::string myCurrentSectionName;
+
+friend class HtmlSectionHrefTagAction;
+friend class HtmlSectionImageTagAction;
+};
+
+#endif /* __HTMLSECTIONREADER_H__ */
diff --git a/reader/src/formats/chm/HuffmanDecoder.cpp b/reader/src/formats/chm/HuffmanDecoder.cpp
new file mode 100644
index 0000000..db8718f
--- /dev/null
+++ b/reader/src/formats/chm/HuffmanDecoder.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <algorithm>
+
+#include "HuffmanDecoder.h"
+
+HuffmanDecoder::HuffmanDecoder() : myMaxBitsNumber(0) {
+}
+
+void HuffmanDecoder::reset() {
+ CodeLengths.clear();
+}
+
+bool HuffmanDecoder::buildTable() {
+ myMaxBitsNumber = 0;
+ for (unsigned short symbol = 0; symbol < CodeLengths.size(); symbol++) {
+ myMaxBitsNumber = std::max(CodeLengths[symbol], myMaxBitsNumber);
+ }
+ if (myMaxBitsNumber > 16) {
+ return false;
+ }
+
+ unsigned int tableSize = 1 << myMaxBitsNumber;
+ mySymbols.clear();
+ mySymbols.reserve(tableSize);
+
+ for (unsigned char i = 1; i <= myMaxBitsNumber; ++i) {
+ for (unsigned short symbol = 0; symbol < CodeLengths.size(); symbol++) {
+ if (CodeLengths[symbol] == i) {
+ mySymbols.insert(mySymbols.end(), 1 << (myMaxBitsNumber - i), symbol);
+ if (mySymbols.size() > tableSize) {
+ return false;
+ }
+ }
+ }
+ }
+
+ if (mySymbols.size() < tableSize) {
+ mySymbols.insert(mySymbols.end(), tableSize - mySymbols.size(), 0);
+ }
+
+ return true;
+}
diff --git a/reader/src/formats/chm/HuffmanDecoder.h b/reader/src/formats/chm/HuffmanDecoder.h
new file mode 100644
index 0000000..bd9f700
--- /dev/null
+++ b/reader/src/formats/chm/HuffmanDecoder.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HUFFMANDECODER_H__
+#define __HUFFMANDECODER_H__
+
+#include <vector>
+
+#include "BitStream.h"
+
+class HuffmanDecoder {
+
+public:
+ HuffmanDecoder();
+
+ bool buildTable();
+ void reset();
+
+ unsigned int getSymbol(BitStream &stream) const;
+
+private:
+ unsigned char myMaxBitsNumber;
+ std::vector<unsigned short> mySymbols;
+ std::vector<unsigned char> CodeLengths;
+ HuffmanDecoder(const HuffmanDecoder&);
+ const HuffmanDecoder &operator = (const HuffmanDecoder&);
+
+friend class LZXDecompressor;
+};
+
+inline unsigned int HuffmanDecoder::getSymbol(BitStream &stream) const {
+ unsigned int symbol = mySymbols[stream.peek(myMaxBitsNumber)];
+ stream.remove(CodeLengths[symbol]);
+ return symbol;
+}
+
+#endif /* __HUFFMANDECODER_H__ */
diff --git a/reader/src/formats/chm/LZXDecompressor.cpp b/reader/src/formats/chm/LZXDecompressor.cpp
new file mode 100644
index 0000000..38b4311
--- /dev/null
+++ b/reader/src/formats/chm/LZXDecompressor.cpp
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include "LZXDecompressor.h"
+
+static unsigned int slotNumber(int windowSizeIndex) {
+ if (windowSizeIndex == 20) {
+ return 42;
+ } else if (windowSizeIndex == 21) {
+ return 50;
+ } else {
+ return 2 * windowSizeIndex;
+ }
+}
+
+LZXDecompressor::LZXDecompressor(int windowSizeIndex) : myWindow(1 << windowSizeIndex, 0), mySlotNumber(slotNumber(windowSizeIndex)) {
+ reset();
+}
+
+void LZXDecompressor::reset() {
+ myCurrentBlockType = UNKNOWNN;
+ myReadHeader = true;
+
+ myState.WindowIterator = myWindow.begin();
+ myState.R0 = 1;
+ myState.R1 = 1;
+ myState.R2 = 1;
+
+ myMainTree.reset();
+ myLengthTree.reset();
+
+ myBlockBytesLeft = 0;
+
+ myE8Decoder.reset(0);
+}
+
+static bool fill(std::vector<unsigned char> &data, std::vector<unsigned char>::iterator &it, int num, unsigned char value) {
+ if (data.end() - it < num) {
+ return false;
+ }
+ std::vector<unsigned char>::iterator end = it + num;
+ while (it != end) {
+ *it++ = value;
+ }
+ return true;
+}
+
+bool LZXDecompressor::readLengths(HuffmanDecoder &decoder, std::size_t from, std::size_t size) {
+ HuffmanDecoder preTree;
+ preTree.CodeLengths.reserve(20);
+ for (int i = 0; i < 20; i++) {
+ preTree.CodeLengths.push_back(myBitStream.get(4));
+ }
+ if (!preTree.buildTable()) {
+ return false;
+ }
+
+ std::vector<unsigned char> &lengths = decoder.CodeLengths;
+ if (lengths.size() < from + size) {
+ lengths.insert(lengths.end(), from + size - lengths.size(), 0);
+ }
+ std::vector<unsigned char>::iterator start = lengths.begin() + from;
+ std::vector<unsigned char>::iterator end = start + size;
+ for (std::vector<unsigned char>::iterator it = start; it != end; ) {
+ int z = preTree.getSymbol(myBitStream);
+ if (z == 17) {
+ if (!fill(lengths, it, myBitStream.get(4) + 4, 0)) {
+ return false;
+ }
+ } else if (z == 18) {
+ if (!fill(lengths, it, myBitStream.get(5) + 20, 0)) {
+ return false;
+ }
+ } else if (z == 19) {
+ unsigned int num = myBitStream.get(1) + 4;
+ z = *it - preTree.getSymbol(myBitStream);
+ if (!fill(lengths, it, num, (z < 0) ? z + 17 : z)) {
+ return false;
+ }
+ } else {
+ z = *it - z;
+ *it++ = (z < 0) ? z + 17 : z;
+ }
+ }
+
+ return true;
+}
+
+static const unsigned int basePosition[51] = {
+ 0, 1, 2, 3, 4, 6, 8, 12,
+ 16, 24, 32, 48, 64, 96, 128, 192,
+ 256, 384, 512, 768, 1024, 1536, 2048, 3072,
+ 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
+ 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360,
+ 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
+ 1835008, 1966080, 2097152
+};
+
+bool LZXDecompressor::decodeBytes(DecodingState &state, std::size_t bytesToDecode) {
+ if (myCurrentBlockType == UNCOMPRESSED) {
+ if (!myBitStream.getBytesDirect(&*state.WindowIterator, bytesToDecode)) {
+ return false;
+ }
+ state.WindowIterator += bytesToDecode;
+ return true;
+ }
+
+ while (bytesToDecode > 0) {
+ int symbol = myMainTree.getSymbol(myBitStream);
+ if (symbol < 256) {
+ *state.WindowIterator++ = symbol;
+ --bytesToDecode;
+ continue;
+ }
+
+ std::size_t length = symbol % 8;
+ if (length == 7) {
+ length += myLengthTree.getSymbol(myBitStream);
+ }
+ length += 2;
+ if (length > bytesToDecode) {
+ return false;
+ }
+
+ std::size_t offset = (symbol - 256) / 8;
+ switch (offset) {
+ case 0:
+ offset = state.R0;
+ break;
+ case 1:
+ offset = state.R1;
+ state.R1 = state.R0;
+ state.R0 = offset;
+ break;
+ case 2:
+ offset = state.R2;
+ state.R2 = state.R0;
+ state.R0 = offset;
+ break;
+ default:
+ if ((myCurrentBlockType == VERBATIM) && (offset == 3)) {
+ offset = 1;
+ } else {
+ if (offset > 50) {
+ return false;
+ }
+ const int positionFooterBits = std::max(0, std::min((int)offset / 2 - 1, 17));
+ offset = basePosition[offset] - 2;
+ if ((myCurrentBlockType == VERBATIM) || (positionFooterBits == 1) || (positionFooterBits == 2)) {
+ offset += myBitStream.get(positionFooterBits);
+ } else if (positionFooterBits == 3) {
+ offset += myAlignedOffsetTree.getSymbol(myBitStream);
+ } else if (positionFooterBits > 3) {
+ offset += 8 * myBitStream.get(positionFooterBits - 3);
+ offset += myAlignedOffsetTree.getSymbol(myBitStream);
+ } else {
+ offset = 1;
+ }
+ }
+ state.R2 = state.R1;
+ state.R1 = state.R0;
+ state.R0 = offset;
+ break;
+ }
+
+ if ((state.WindowIterator - myWindow.begin()) + myWindow.size() < offset) {
+ return false;
+ }
+ if (myWindow.size() >= offset + (myWindow.end() - state.WindowIterator)) {
+ offset += myWindow.size();
+ if (myWindow.size() >= offset + (myWindow.end() - state.WindowIterator)) {
+ return false;
+ }
+ }
+ std::vector<unsigned char>::iterator srcIt = state.WindowIterator + (myWindow.size() - offset);
+ for (std::size_t i = 0; i < length; ++i) {
+ if (srcIt == myWindow.end()) {
+ srcIt -= myWindow.size();
+ }
+ *state.WindowIterator++ = *srcIt++;
+ }
+ bytesToDecode -= length;
+ }
+ return true;
+}
+
+bool LZXDecompressor::decompress(const std::string &data, unsigned char *outBuffer, const std::size_t outSize) {
+ myBitStream.setData(data);
+
+ if (myReadHeader) {
+ if (myBitStream.get(1) == 1) {
+ myE8Decoder.reset(myBitStream.get(32));
+ }
+ myReadHeader = false;
+ }
+
+ DecodingState state = myState;
+
+ for (std::size_t bytesToWrite = outSize; bytesToWrite > 0; ) {
+ if (myBlockBytesLeft == 0) {
+ if (myCurrentBlockType == UNCOMPRESSED) {
+ if (myBlockSize & 1) {
+ myBitStream.remove(8);
+ }
+ myBitStream.reset();
+ }
+
+ myCurrentBlockType = (BlockType)myBitStream.get(3);
+ myBlockSize = myBitStream.get(24);
+ myBlockBytesLeft = myBlockSize;
+
+ switch (myCurrentBlockType) {
+ case UNCOMPRESSED:
+ myBitStream.reset();
+ state.R0 = myBitStream.get4BytesDirect();
+ state.R1 = myBitStream.get4BytesDirect();
+ state.R2 = myBitStream.get4BytesDirect();
+ break;
+
+ case ALIGNED:
+ myAlignedOffsetTree.CodeLengths.clear();
+ for (int i = 0; i < 8; i++) {
+ myAlignedOffsetTree.CodeLengths.push_back(myBitStream.get(3));
+ }
+ if (!myAlignedOffsetTree.buildTable()) {
+ return false;
+ }
+ // no break; it's not a mistake
+
+ case VERBATIM:
+ if (!readLengths(myMainTree, 0, 256) ||
+ !readLengths(myMainTree, 256, 8 * mySlotNumber) ||
+ !readLengths(myLengthTree, 0, 249) ||
+ !myMainTree.buildTable() ||
+ !myLengthTree.buildTable()) {
+ return false;
+ }
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ while ((myBlockBytesLeft > 0) && (bytesToWrite > 0)) {
+ std::size_t bytesToDecode = std::min(myBlockBytesLeft, bytesToWrite);
+ if (state.WindowIterator + bytesToDecode > myWindow.end()) {
+ return false;
+ }
+
+ if (!decodeBytes(state, bytesToDecode)) {
+ return false;
+ }
+
+ bytesToWrite -= bytesToDecode;
+ myBlockBytesLeft -= bytesToDecode;
+ }
+ }
+
+ std::vector<unsigned char>::iterator jt =
+ (state.WindowIterator != myWindow.begin()) ? state.WindowIterator : myWindow.end();
+ std::memcpy(outBuffer, &*(jt - outSize), outSize);
+
+ myState = state;
+
+ myE8Decoder.decode(outBuffer, outSize);
+
+ return true;
+}
diff --git a/reader/src/formats/chm/LZXDecompressor.h b/reader/src/formats/chm/LZXDecompressor.h
new file mode 100644
index 0000000..dac9e1f
--- /dev/null
+++ b/reader/src/formats/chm/LZXDecompressor.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __LZXDECOMPRESSOR_H__
+#define __LZXDECOMPRESSOR_H__
+
+#include <string>
+#include <vector>
+
+#include "BitStream.h"
+#include "HuffmanDecoder.h"
+
+class LZXDecompressor {
+
+public:
+ LZXDecompressor(int windowSizeIndex);
+ void reset();
+
+ bool decompress(const std::string &data, unsigned char *outBuffer, const std::size_t outSize);
+
+private:
+ struct DecodingState {
+ std::vector<unsigned char>::iterator WindowIterator;
+ unsigned int R0;
+ unsigned int R1;
+ unsigned int R2;
+ };
+
+ bool readLengths(HuffmanDecoder &decoder, std::size_t from, std::size_t size);
+ bool decodeBytes(DecodingState &state, std::size_t bytesToDecode);
+
+private:
+ enum BlockType {
+ UNKNOWNN = 0,
+ VERBATIM = 1,
+ ALIGNED = 2,
+ UNCOMPRESSED = 3
+ };
+
+ BlockType myCurrentBlockType;
+ bool myReadHeader;
+
+ std::vector<unsigned char> myWindow;
+
+ DecodingState myState;
+
+ std::size_t myBlockSize;
+ std::size_t myBlockBytesLeft;
+
+ const unsigned int mySlotNumber;
+ HuffmanDecoder myMainTree;
+ HuffmanDecoder myLengthTree;
+ HuffmanDecoder myAlignedOffsetTree;
+
+ BitStream myBitStream;
+
+ class E8Decoder {
+
+ public:
+ void reset(unsigned int fileSize);
+ void decode(unsigned char *buffer, const std::size_t size);
+
+ private:
+ unsigned int myFramesCounter;
+ unsigned int myFileSize;
+ unsigned int myPosition;
+ };
+
+ E8Decoder myE8Decoder;
+};
+
+#endif /* __LZXDECOMPRESSOR_H__ */
diff --git a/reader/src/formats/css/StyleSheetParser.cpp b/reader/src/formats/css/StyleSheetParser.cpp
new file mode 100644
index 0000000..33dc900
--- /dev/null
+++ b/reader/src/formats/css/StyleSheetParser.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+#include <cstring>
+
+#include <ZLStringUtil.h>
+#include <ZLInputStream.h>
+#include <ZLLogger.h>
+
+#include "StyleSheetParser.h"
+
+StyleSheetTableParser::StyleSheetTableParser(StyleSheetTable &table) : myTable(table) {
+ //ZLLogger::Instance().registerClass("CSS");
+}
+
+void StyleSheetTableParser::storeData(const std::string &selector, const StyleSheetTable::AttributeMap &map) {
+ std::string s = selector;
+ ZLStringUtil::stripWhiteSpaces(s);
+
+ if (s.empty()) {
+ return;
+ }
+
+ if (s[0] == '@') {
+ processAtRule(s, map);
+ return;
+ }
+
+ const std::vector<std::string> ids = ZLStringUtil::split(s, ",");
+ for (std::vector<std::string>::const_iterator it = ids.begin(); it != ids.end(); ++it) {
+ std::string id = *it;
+ ZLStringUtil::stripWhiteSpaces(id);
+ if (!id.empty()) {
+ const std::size_t index = id.find('.');
+ if (index == std::string::npos) {
+ myTable.addMap(id, std::string(), map);
+ } else {
+ myTable.addMap(id.substr(0, index), id.substr(index + 1), map);
+ }
+ }
+ }
+}
+
+void StyleSheetTableParser::processAtRule(const std::string &name, const StyleSheetTable::AttributeMap &map) {
+ (void)map;
+ if (name == "@font-face") {
+ }
+}
+
+shared_ptr<ZLTextStyleEntry> StyleSheetSingleStyleParser::parseString(const char *text) {
+ myReadState = WAITING_FOR_ATTRIBUTE;
+ parse(text, std::strlen(text), true);
+ shared_ptr<ZLTextStyleEntry> control = StyleSheetTable::createControl(myMap);
+ reset();
+ return control;
+}
+
+StyleSheetParser::StyleSheetParser() {
+ reset();
+}
+
+StyleSheetParser::~StyleSheetParser() {
+}
+
+void StyleSheetParser::reset() {
+ myWord.erase();
+ myAttributeName.erase();
+ myReadState = WAITING_FOR_SELECTOR;
+ myInsideComment = false;
+ mySelectorString.erase();
+ myMap.clear();
+}
+
+void StyleSheetParser::parse(ZLInputStream &stream) {
+ if (stream.open()) {
+ char *buffer = new char[1024];
+ while (true) {
+ int len = stream.read(buffer, 1024);
+ if (len == 0) {
+ break;
+ }
+ parse(buffer, len);
+ }
+ delete[] buffer;
+ stream.close();
+ }
+}
+
+void StyleSheetParser::parse(const char *text, int len, bool final) {
+ const char *start = text;
+ const char *end = text + len;
+ for (const char *ptr = start; ptr != end; ++ptr) {
+ if (std::isspace(*ptr)) {
+ if (start != ptr) {
+ myWord.append(start, ptr - start);
+ }
+ processWord(myWord);
+ myWord.erase();
+ start = ptr + 1;
+ } else if (isControlSymbol(*ptr)) {
+ if (start != ptr) {
+ myWord.append(start, ptr - start);
+ }
+ processWord(myWord);
+ myWord.erase();
+ processControl(*ptr);
+ start = ptr + 1;
+ }
+ }
+ if (start < end) {
+ myWord.append(start, end - start);
+ if (final) {
+ processWord(myWord);
+ myWord.erase();
+ }
+ }
+}
+
+bool StyleSheetParser::isControlSymbol(const char symbol) {
+ switch (myReadState) {
+ default:
+ case WAITING_FOR_SELECTOR:
+ return false;
+ case SELECTOR:
+ return symbol == '{' || symbol == ';';
+ case WAITING_FOR_ATTRIBUTE:
+ return symbol == '}' || symbol == ':';
+ case ATTRIBUTE_NAME:
+ return symbol == ':';
+ case ATTRIBUTE_VALUE:
+ return symbol == '}' || symbol == ';';
+ }
+}
+
+void StyleSheetParser::storeData(const std::string&, const StyleSheetTable::AttributeMap&) {
+}
+
+void StyleSheetParser::processAtRule(const std::string&, const StyleSheetTable::AttributeMap&) {
+}
+
+void StyleSheetParser::processControl(const char control) {
+ switch (myReadState) {
+ case WAITING_FOR_SELECTOR:
+ break;
+ case SELECTOR:
+ switch (control) {
+ case '{':
+ myReadState = WAITING_FOR_ATTRIBUTE;
+ break;
+ case ';':
+ myReadState = WAITING_FOR_SELECTOR;
+ mySelectorString.erase();
+ break;
+ }
+ break;
+ case WAITING_FOR_ATTRIBUTE:
+ if (control == '}') {
+ myReadState = WAITING_FOR_SELECTOR;
+ storeData(mySelectorString, myMap);
+ mySelectorString.erase();
+ myMap.clear();
+ }
+ break;
+ case ATTRIBUTE_NAME:
+ if (control == ':') {
+ myReadState = ATTRIBUTE_VALUE;
+ }
+ break;
+ case ATTRIBUTE_VALUE:
+ if (control == ';') {
+ myReadState = WAITING_FOR_ATTRIBUTE;
+ } else if (control == '}') {
+ myReadState = WAITING_FOR_SELECTOR;
+ storeData(mySelectorString, myMap);
+ mySelectorString.erase();
+ myMap.clear();
+ }
+ break;
+ }
+}
+
+void StyleSheetParser::processWord(std::string &word) {
+ while (!word.empty()) {
+ int index = word.find(myInsideComment ? "*/" : "/*");
+ if (!myInsideComment) {
+ if (index == -1) {
+ processWordWithoutComments(word);
+ } else if (index > 0) {
+ processWordWithoutComments(word.substr(0, index));
+ }
+ }
+ if (index == -1) {
+ break;
+ }
+ myInsideComment = !myInsideComment;
+ word.erase(0, index + 2);
+ }
+}
+
+void StyleSheetParser::processWordWithoutComments(const std::string &word) {
+ switch (myReadState) {
+ case WAITING_FOR_SELECTOR:
+ myReadState = SELECTOR;
+ mySelectorString = word;
+ break;
+ case SELECTOR:
+ mySelectorString += ' ' + word;
+ break;
+ case WAITING_FOR_ATTRIBUTE:
+ myReadState = ATTRIBUTE_NAME;
+ // go through
+ case ATTRIBUTE_NAME:
+ myAttributeName = word;
+ myMap[myAttributeName].clear();
+ break;
+ case ATTRIBUTE_VALUE:
+ {
+ const std::size_t l = word.length();
+ if (l >= 2 && (word[0] == '"' || word[0] == '\'') && word[0] == word[l - 1]) {
+ myMap[myAttributeName].push_back(word.substr(1, l - 2));
+ } else {
+ myMap[myAttributeName].push_back(word);
+ }
+ break;
+ }
+ }
+}
diff --git a/reader/src/formats/css/StyleSheetParser.h b/reader/src/formats/css/StyleSheetParser.h
new file mode 100644
index 0000000..8949823
--- /dev/null
+++ b/reader/src/formats/css/StyleSheetParser.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __STYLESHEETPARSER_H__
+#define __STYLESHEETPARSER_H__
+
+#include "StyleSheetTable.h"
+
+class ZLInputStream;
+
+class StyleSheetParser {
+
+protected:
+ StyleSheetParser();
+
+public:
+ virtual ~StyleSheetParser();
+ void reset();
+ void parse(ZLInputStream &stream);
+ void parse(const char *text, int len, bool final = false);
+
+protected:
+ virtual void storeData(const std::string &selector, const StyleSheetTable::AttributeMap &map);
+ virtual void processAtRule(const std::string &name, const StyleSheetTable::AttributeMap &map);
+
+private:
+ bool isControlSymbol(const char symbol);
+ void processWord(std::string &word);
+ void processWordWithoutComments(const std::string &word);
+ void processControl(const char control);
+
+private:
+ std::string myWord;
+ std::string myAttributeName;
+ enum {
+ WAITING_FOR_SELECTOR,
+ SELECTOR,
+ WAITING_FOR_ATTRIBUTE,
+ ATTRIBUTE_NAME,
+ ATTRIBUTE_VALUE,
+ } myReadState;
+ bool myInsideComment;
+ std::string mySelectorString;
+ StyleSheetTable::AttributeMap myMap;
+
+friend class StyleSheetSingleStyleParser;
+};
+
+class StyleSheetTableParser : public StyleSheetParser {
+
+public:
+ StyleSheetTableParser(StyleSheetTable &table);
+
+private:
+ void storeData(const std::string &selector, const StyleSheetTable::AttributeMap &map);
+ void processAtRule(const std::string &name, const StyleSheetTable::AttributeMap &map);
+
+private:
+ StyleSheetTable &myTable;
+};
+
+class StyleSheetSingleStyleParser : public StyleSheetParser {
+
+public:
+ shared_ptr<ZLTextStyleEntry> parseString(const char *text);
+};
+
+#endif /* __STYLESHEETPARSER_H__ */
diff --git a/reader/src/formats/css/StyleSheetTable.cpp b/reader/src/formats/css/StyleSheetTable.cpp
new file mode 100644
index 0000000..fe45a85
--- /dev/null
+++ b/reader/src/formats/css/StyleSheetTable.cpp
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+
+#include <ZLStringUtil.h>
+#include <ZLLogger.h>
+
+#include "StyleSheetTable.h"
+
+bool StyleSheetTable::isEmpty() const {
+ return myControlMap.empty() && myPageBreakBeforeMap.empty() && myPageBreakAfterMap.empty();
+}
+
+void StyleSheetTable::addMap(const std::string &tag, const std::string &aClass, const AttributeMap &map) {
+ if ((!tag.empty() || !aClass.empty()) && !map.empty()) {
+ Key key(tag, aClass);
+ myControlMap[key] = createControl(map);
+ const std::vector<std::string> &pbb = values(map, "page-break-before");
+ if (!pbb.empty()) {
+ if ((pbb[0] == "always") ||
+ (pbb[0] == "left") ||
+ (pbb[0] == "right")) {
+ myPageBreakBeforeMap[key] = true;
+ } else if (pbb[0] == "avoid") {
+ myPageBreakBeforeMap[key] = false;
+ }
+ }
+ const std::vector<std::string> &pba = values(map, "page-break-after");
+ if (!pba.empty()) {
+ if ((pba[0] == "always") ||
+ (pba[0] == "left") ||
+ (pba[0] == "right")) {
+ myPageBreakAfterMap[key] = true;
+ } else if (pba[0] == "avoid") {
+ myPageBreakAfterMap[key] = false;
+ }
+ }
+ }
+}
+
+static bool parseLength(const std::string &toParse, short &size, ZLTextStyleEntry::SizeUnit &unit) {
+ if (ZLStringUtil::stringEndsWith(toParse, "%")) {
+ unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
+ size = std::atoi(toParse.c_str());
+ return true;
+ } else if (ZLStringUtil::stringEndsWith(toParse, "em")) {
+ unit = ZLTextStyleEntry::SIZE_UNIT_EM_100;
+ size = (short)(100 * ZLStringUtil::stringToDouble(toParse, 0));
+ return true;
+ } else if (ZLStringUtil::stringEndsWith(toParse, "ex")) {
+ unit = ZLTextStyleEntry::SIZE_UNIT_EX_100;
+ size = (short)(100 * ZLStringUtil::stringToDouble(toParse, 0));
+ return true;
+ } else if (ZLStringUtil::stringEndsWith(toParse, "px")) {
+ unit = ZLTextStyleEntry::SIZE_UNIT_PIXEL;
+ size = std::atoi(toParse.c_str());
+ return true;
+ } else if (ZLStringUtil::stringEndsWith(toParse, "pt")) {
+ unit = ZLTextStyleEntry::SIZE_UNIT_POINT;
+ size = std::atoi(toParse.c_str());
+ return true;
+ }
+ return false;
+}
+
+void StyleSheetTable::setLength(ZLTextStyleEntry &entry, ZLTextStyleEntry::Feature featureId, const AttributeMap &map, const std::string &attributeName) {
+ StyleSheetTable::AttributeMap::const_iterator it = map.find(attributeName);
+ if (it == map.end()) {
+ return;
+ }
+ const std::vector<std::string> &values = it->second;
+ if (!values.empty() && !values[0].empty()) {
+ short size;
+ ZLTextStyleEntry::SizeUnit unit;
+ if (parseLength(values[0], size, unit)) {
+ entry.setLength(featureId, size, unit);
+ }
+ }
+}
+
+bool StyleSheetTable::doBreakBefore(const std::string &tag, const std::string &aClass) const {
+ std::map<Key,bool>::const_iterator it = myPageBreakBeforeMap.find(Key(tag, aClass));
+ if (it != myPageBreakBeforeMap.end()) {
+ return it->second;
+ }
+
+ it = myPageBreakBeforeMap.find(Key("", aClass));
+ if (it != myPageBreakBeforeMap.end()) {
+ return it->second;
+ }
+
+ it = myPageBreakBeforeMap.find(Key(tag, ""));
+ if (it != myPageBreakBeforeMap.end()) {
+ return it->second;
+ }
+
+ return false;
+}
+
+bool StyleSheetTable::doBreakAfter(const std::string &tag, const std::string &aClass) const {
+ std::map<Key,bool>::const_iterator it = myPageBreakAfterMap.find(Key(tag, aClass));
+ if (it != myPageBreakAfterMap.end()) {
+ return it->second;
+ }
+
+ it = myPageBreakAfterMap.find(Key("", aClass));
+ if (it != myPageBreakAfterMap.end()) {
+ return it->second;
+ }
+
+ it = myPageBreakAfterMap.find(Key(tag, ""));
+ if (it != myPageBreakAfterMap.end()) {
+ return it->second;
+ }
+
+ return false;
+}
+
+shared_ptr<ZLTextStyleEntry> StyleSheetTable::control(const std::string &tag, const std::string &aClass) const {
+ std::map<Key,shared_ptr<ZLTextStyleEntry> >::const_iterator it =
+ myControlMap.find(Key(tag, aClass));
+ return (it != myControlMap.end()) ? it->second : 0;
+}
+
+const std::vector<std::string> &StyleSheetTable::values(const AttributeMap &map, const std::string &name) {
+ const AttributeMap::const_iterator it = map.find(name);
+ if (it != map.end()) {
+ return it->second;
+ }
+ static const std::vector<std::string> emptyVector;
+ return emptyVector;
+}
+
+shared_ptr<ZLTextStyleEntry> StyleSheetTable::createControl(const AttributeMap &styles) {
+ shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_CSS_ENTRY);
+
+ const std::vector<std::string> &alignment = values(styles, "text-align");
+ if (!alignment.empty()) {
+ if (alignment[0] == "justify") {
+ entry->setAlignmentType(ALIGN_JUSTIFY);
+ } else if (alignment[0] == "left") {
+ entry->setAlignmentType(ALIGN_LEFT);
+ } else if (alignment[0] == "right") {
+ entry->setAlignmentType(ALIGN_RIGHT);
+ } else if (alignment[0] == "center") {
+ entry->setAlignmentType(ALIGN_CENTER);
+ }
+ }
+
+ const std::vector<std::string> &deco = values(styles, "text-decoration");
+ for (std::vector<std::string>::const_iterator it = deco.begin(); it != deco.end(); ++it) {
+ if (*it == "underline") {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_UNDERLINED, true);
+ } else if (*it == "line-through") {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_STRIKEDTHROUGH, true);
+ } else if (*it == "none") {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_UNDERLINED, false);
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_STRIKEDTHROUGH, false);
+ }
+ }
+
+ const std::vector<std::string> &bold = values(styles, "font-weight");
+ if (!bold.empty()) {
+ //ZLLogger::Instance().println(ZLLogger::DEFAULT_CLASS, "bold: " + bold[0]);
+ int num = -1;
+ if (bold[0] == "bold") {
+ num = 700;
+ } else if (bold[0] == "normal") {
+ num = 400;
+ } else if (bold[0] == "bolder") {
+ // TODO: implement
+ } else if (bold[0] == "lighter") {
+ // TODO: implement
+ } else {
+ num = ZLStringUtil::stringToInteger(bold[0], -1);
+ }
+ if (num != -1) {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_BOLD, num >= 600);
+ }
+ }
+
+ const std::vector<std::string> &italic = values(styles, "font-style");
+ if (!italic.empty()) {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_ITALIC, italic[0] == "italic");
+ }
+
+ const std::vector<std::string> &variant = values(styles, "font-variant");
+ if (!variant.empty()) {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_SMALLCAPS, variant[0] == "small-caps");
+ }
+
+ const std::vector<std::string> &fontFamily = values(styles, "font-family");
+ if (!fontFamily.empty() && !fontFamily[0].empty()) {
+ entry->setFontFamily(fontFamily[0]);
+ //ZLLogger::Instance().println(ZLLogger::DEFAULT_CLASS, "font family: " + fontFamily[0]);
+ }
+
+ const std::vector<std::string> &fontSize = values(styles, "font-size");
+ if (!fontSize.empty()) {
+ //TODO implement FONT_MODIFIER_INHERIT, SMALLER and LARGER support
+ bool doSetFontSize = true;
+ short size = 100;
+ ZLTextStyleEntry::SizeUnit unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
+ if (fontSize[0] == "xx-small") {
+ size = 58;
+ } else if (fontSize[0] == "x-small") {
+ size = 69;
+ } else if (fontSize[0] == "small") {
+ size = 83;
+ } else if (fontSize[0] == "medium") {
+ size = 100;
+ } else if (fontSize[0] == "large") {
+ size = 120;
+ } else if (fontSize[0] == "x-large") {
+ size = 144;
+ } else if (fontSize[0] == "xx-large") {
+ size = 173;
+ } else if (fontSize[0] == "inherit") {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_INHERIT, true);
+ doSetFontSize = false;
+ } else if (fontSize[0] == "smaller") {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_SMALLER, true);
+ doSetFontSize = false;
+ } else if (fontSize[0] == "larger") {
+ entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_LARGER, true);
+ doSetFontSize = false;
+ } else if (!parseLength(fontSize[0], size, unit)) {
+ doSetFontSize = false;
+ }
+ if (doSetFontSize) {
+ entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, size, unit);
+ }
+ }
+
+ setLength(*entry, ZLTextStyleEntry::LENGTH_LEFT_INDENT, styles, "margin-left");
+ setLength(*entry, ZLTextStyleEntry::LENGTH_RIGHT_INDENT, styles, "margin-right");
+ setLength(*entry, ZLTextStyleEntry::LENGTH_FIRST_LINE_INDENT_DELTA, styles, "text-indent");
+ setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_BEFORE, styles, "margin-top");
+ setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_BEFORE, styles, "padding-top");
+ setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_AFTER, styles, "margin-bottom");
+ setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_AFTER, styles, "padding-bottom");
+
+ return entry;
+}
+
+void StyleSheetTable::clear() {
+ myControlMap.clear();
+ myPageBreakBeforeMap.clear();
+ myPageBreakAfterMap.clear();
+}
diff --git a/reader/src/formats/css/StyleSheetTable.h b/reader/src/formats/css/StyleSheetTable.h
new file mode 100644
index 0000000..54236fb
--- /dev/null
+++ b/reader/src/formats/css/StyleSheetTable.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __STYLESHEETTABLE_H__
+#define __STYLESHEETTABLE_H__
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include <shared_ptr.h>
+
+#include <ZLTextParagraph.h>
+#include <ZLTextStyleEntry.h>
+
+class StyleSheetTable {
+
+public:
+ typedef std::map<std::string,std::vector<std::string> > AttributeMap;
+ static shared_ptr<ZLTextStyleEntry> createControl(const AttributeMap &map);
+
+private:
+ void addMap(const std::string &tag, const std::string &aClass, const AttributeMap &map);
+
+ static void setLength(ZLTextStyleEntry &entry, ZLTextStyleEntry::Feature featureId, const AttributeMap &map, const std::string &attributeName);
+ static const std::vector<std::string> &values(const AttributeMap &map, const std::string &name);
+
+public:
+ bool isEmpty() const;
+ bool doBreakBefore(const std::string &tag, const std::string &aClass) const;
+ bool doBreakAfter(const std::string &tag, const std::string &aClass) const;
+ shared_ptr<ZLTextStyleEntry> control(const std::string &tag, const std::string &aClass) const;
+
+ void clear();
+
+private:
+ struct Key {
+ Key(const std::string &tag, const std::string &aClass);
+
+ const std::string TagName;
+ const std::string ClassName;
+
+ bool operator < (const Key &key) const;
+ };
+
+ std::map<Key,shared_ptr<ZLTextStyleEntry> > myControlMap;
+ std::map<Key,bool> myPageBreakBeforeMap;
+ std::map<Key,bool> myPageBreakAfterMap;
+
+friend class StyleSheetTableParser;
+};
+
+inline StyleSheetTable::Key::Key(const std::string &tag, const std::string &aClass) : TagName(tag), ClassName(aClass) {
+}
+
+inline bool StyleSheetTable::Key::operator < (const StyleSheetTable::Key &key) const {
+ return (TagName < key.TagName) || ((TagName == key.TagName) && (ClassName < key.ClassName));
+}
+
+#endif /* __STYLESHEETTABLE_H__ */
diff --git a/reader/src/formats/doc/DocBookReader.cpp b/reader/src/formats/doc/DocBookReader.cpp
new file mode 100644
index 0000000..99f471a
--- /dev/null
+++ b/reader/src/formats/doc/DocBookReader.cpp
@@ -0,0 +1,377 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <vector>
+#include <string>
+
+#include <ZLInputStream.h>
+#include <ZLLogger.h>
+#include <ZLFile.h>
+#include <ZLStringUtil.h>
+#include <ZLFileImage.h>
+
+#include "DocBookReader.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+#include "OleStorage.h"
+#include "OleMainStream.h"
+
+DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
+ myModelReader(model),
+ myPictureCounter(0),
+ myEncoding(encoding) {
+ myReadState = READ_TEXT;
+}
+
+bool DocBookReader::readBook() {
+ const ZLFile &file = myModelReader.model().book()->file();
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+ myModelReader.setMainTextModel();
+ myModelReader.pushKind(REGULAR);
+ myModelReader.beginParagraph();
+
+ if (!readDocument(stream, true)) {
+ return false;
+ }
+
+ myModelReader.insertEndOfTextParagraph();
+ return true;
+}
+
+void DocBookReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
+ if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_INFO) {
+ myFieldInfoBuffer.push_back(ucs2char);
+ return;
+ }
+ if (myReadState == READ_FIELD && myReadFieldState == DONT_READ_FIELD_TEXT) {
+ return;
+ }
+ if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && ucs2char == WORD_HORIZONTAL_TAB) {
+ //to remove pagination from TOC (from doc saved in OpenOffice)
+ myReadFieldState = DONT_READ_FIELD_TEXT;
+ return;
+ }
+ std::string utf8String;
+ ZLUnicodeUtil::Ucs2String ucs2String;
+ ucs2String.push_back(ucs2char);
+ ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
+ if (!myModelReader.paragraphIsOpen()) {
+ myModelReader.beginParagraph();
+ }
+ myModelReader.addData(utf8String);
+}
+
+void DocBookReader::handleHardLinebreak() {
+ if (myModelReader.paragraphIsOpen()) {
+ myModelReader.endParagraph();
+ }
+ myModelReader.beginParagraph();
+ if (!myCurrentStyleEntry.isNull()) {
+ myModelReader.addStyleEntry(*myCurrentStyleEntry);
+ }
+ for (std::size_t i = 0; i < myKindStack.size(); ++i) {
+ myModelReader.addControl(myKindStack.at(i), true);
+ }
+}
+
+void DocBookReader::handleParagraphEnd() {
+ if (myModelReader.paragraphIsOpen()) {
+ myModelReader.endParagraph();
+ }
+ myModelReader.beginParagraph();
+ myCurrentStyleEntry = 0;
+}
+
+void DocBookReader::handlePageBreak() {
+ if (myModelReader.paragraphIsOpen()) {
+ myModelReader.endParagraph();
+ }
+ myCurrentStyleEntry = 0;
+ myModelReader.insertEndOfSectionParagraph();
+ myModelReader.beginParagraph();
+}
+
+void DocBookReader::handleTableSeparator() {
+ handleChar(SPACE);
+ handleChar(VERTICAL_LINE);
+ handleChar(SPACE);
+}
+
+void DocBookReader::handleTableEndRow() {
+ handleParagraphEnd();
+}
+
+void DocBookReader::handleFootNoteMark() {
+ //TODO implement
+}
+
+void DocBookReader::handleStartField() {
+ if (myReadState == READ_FIELD) { //for nested fields
+ handleEndField();
+ }
+ myReadState = READ_FIELD;
+ myReadFieldState = READ_FIELD_INFO;
+ myHyperlinkTypeState = NO_HYPERLINK;
+}
+
+void DocBookReader::handleSeparatorField() {
+ static const std::string HYPERLINK = "HYPERLINK";
+ static const std::string SEQUENCE = "SEQ";
+// static const std::string PAGE = "PAGE";
+// static const std::string PAGEREF = "PAGEREF";
+// static const std::string SHAPE = "SHAPE";
+ static const std::string SPACE_DELIMETER = " ";
+ static const std::string LOCAL_LINK = "\\l";
+ static const std::string QUOTE = "\"";
+ myReadFieldState = READ_FIELD_TEXT;
+ myHyperlinkTypeState = NO_HYPERLINK;
+ ZLUnicodeUtil::Ucs2String buffer = myFieldInfoBuffer;
+ myFieldInfoBuffer.clear();
+ std::string utf8String;
+ ZLUnicodeUtil::ucs2ToUtf8(utf8String, buffer);
+ ZLUnicodeUtil::utf8Trim(utf8String);
+ if (utf8String.empty()) {
+ return;
+ }
+ std::vector<std::string> result = ZLStringUtil::split(utf8String, SPACE_DELIMETER);
+ //TODO split function can returns empty string, maybe fix it
+ std::vector<std::string> splitted;
+ for (std::size_t i = 0; i < result.size(); ++i) {
+ if (!result.at(i).empty()) {
+ splitted.push_back(result.at(i));
+ }
+ }
+
+ if (!splitted.empty() && splitted.at(0) == SEQUENCE) {
+ myReadFieldState = READ_FIELD_TEXT;
+ myHyperlinkTypeState = NO_HYPERLINK;
+ return;
+ }
+
+ if (splitted.size() < 2 || splitted.at(0) != HYPERLINK) {
+ myReadFieldState = DONT_READ_FIELD_TEXT;
+ //to remove pagination from TOC and not hyperlink fields
+ return;
+ }
+
+ if (splitted.at(1) == LOCAL_LINK) {
+ std::string link = parseLink(buffer);
+ if (!link.empty()) {
+ myModelReader.addHyperlinkControl(INTERNAL_HYPERLINK, link);
+ myHyperlinkTypeState = INT_HYPERLINK_INSERTED;
+ }
+ } else {
+ std::string link = parseLink(buffer, true);
+ if (!link.empty()) {
+ myModelReader.addHyperlinkControl(EXTERNAL_HYPERLINK, link);
+ myHyperlinkTypeState = EXT_HYPERLINK_INSERTED;
+ }
+ }
+}
+
+void DocBookReader::handleEndField() {
+ myFieldInfoBuffer.clear();
+ if (myReadState == READ_TEXT) {
+ return;
+ }
+ if (myHyperlinkTypeState == EXT_HYPERLINK_INSERTED) {
+ myModelReader.addControl(EXTERNAL_HYPERLINK, false);
+ } else if (myHyperlinkTypeState == INT_HYPERLINK_INSERTED) {
+ myModelReader.addControl(INTERNAL_HYPERLINK, false);
+ }
+ myReadState = READ_TEXT;
+ myHyperlinkTypeState = NO_HYPERLINK;
+
+}
+
+void DocBookReader::handleImage(const ZLFileImage::Blocks &blocks) {
+ std::string number;
+ ZLStringUtil::appendNumber(number, myPictureCounter++);
+ myModelReader.addImageReference(number);
+ ZLFile file(myModelReader.model().book()->file().path(), ZLMimeType::IMAGE_AUTO);
+ myModelReader.addImage(number, new ZLFileImage(file, blocks, ZLFileImage::ENCODING_NONE));
+}
+
+void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
+ if (ucs2char == WORD_MINUS) {
+ handleChar(MINUS);
+ } else if (ucs2char == WORD_SOFT_HYPHEN) {
+ //skip
+ } else if (ucs2char == WORD_HORIZONTAL_TAB) {
+ handleChar(ucs2char);
+ } else {
+// myTextBuffer.clear();
+ }
+}
+
+void DocBookReader::handleFontStyle(unsigned int fontStyle) {
+ if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && myHyperlinkTypeState != NO_HYPERLINK) {
+ //to fix bug with hyperlink, that's only bold and doesn't looks like hyperlink
+ return;
+ }
+ while (!myKindStack.empty()) {
+ myModelReader.addControl(myKindStack.back(), false);
+ myKindStack.pop_back();
+ }
+ if (fontStyle & OleMainStream::CharInfo::FONT_BOLD) {
+ myKindStack.push_back(BOLD);
+ }
+ if (fontStyle & OleMainStream::CharInfo::FONT_ITALIC) {
+ myKindStack.push_back(ITALIC);
+ }
+ for (std::size_t i = 0; i < myKindStack.size(); ++i) {
+ myModelReader.addControl(myKindStack.at(i), true);
+ }
+}
+
+void DocBookReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
+ if (styleInfo.HasPageBreakBefore) {
+ handlePageBreak();
+ }
+ shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+
+ switch (styleInfo.Alignment) {
+ default: // in that case, use default alignment type
+ break;
+ case OleMainStream::Style::ALIGNMENT_LEFT:
+ entry->setAlignmentType(ALIGN_LEFT);
+ break;
+ case OleMainStream::Style::ALIGNMENT_RIGHT:
+ entry->setAlignmentType(ALIGN_RIGHT);
+ break;
+ case OleMainStream::Style::ALIGNMENT_CENTER:
+ entry->setAlignmentType(ALIGN_CENTER);
+ break;
+ case OleMainStream::Style::ALIGNMENT_JUSTIFY:
+ entry->setAlignmentType(ALIGN_JUSTIFY);
+ break;
+ }
+
+ //TODO in case, where style is heading, but size is small it works wrong
+ const ZLTextStyleEntry::SizeUnit unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
+ switch (styleInfo.StyleIdCurrent) {
+ default:
+ break;
+ case OleMainStream::Style::STYLE_H1:
+ entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 140, unit);
+ break;
+ case OleMainStream::Style::STYLE_H2:
+ entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 120, unit);
+ break;
+ case OleMainStream::Style::STYLE_H3:
+ entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 110, unit);
+ break;
+ }
+ myCurrentStyleEntry = entry;
+ myModelReader.addStyleEntry(*myCurrentStyleEntry);
+
+ // we should have the same font style, as for the previous paragraph,
+ // if it has the same StyleIdCurrent
+ if (myCurrentStyleInfo.StyleIdCurrent != OleMainStream::Style::STYLE_INVALID &&
+ myCurrentStyleInfo.StyleIdCurrent == styleInfo.StyleIdCurrent) {
+ for (std::size_t i = 0; i < myKindStack.size(); ++i) {
+ myModelReader.addControl(myKindStack.at(i), true);
+ }
+ } else {
+ myKindStack.clear();
+ // fill by the fontstyle, that was got from Stylesheet
+ handleFontStyle(styleInfo.CurrentCharInfo.FontStyle);
+ }
+ myCurrentStyleInfo = styleInfo;
+}
+
+void DocBookReader::handleBookmark(const std::string &name) {
+ myModelReader.addHyperlinkLabel(name);
+}
+
+std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode) {
+ //TODO add support for HYPERLINK like that:
+ // [0x13] HYPERLINK "http://site.ru/some text" \t "_blank" [0x14] text [0x15]
+ //Current implementation search for last QUOTE, so, it reads \t and _blank as part of link
+ //Last quote searching is need to handle link like that:
+ // [0x13] HYPERLINK "http://yandex.ru/yandsearch?text='some text' и "some text2"" [0x14] link text [0x15]
+
+ static const ZLUnicodeUtil::Ucs2Char QUOTE = 0x22;
+ std::size_t i, first = 0;
+ //TODO maybe functions findFirstOf and findLastOf should be in ZLUnicodeUtil class
+ for (i = 0; i < s.size(); ++i) {
+ if (s.at(i) == QUOTE) {
+ first = i;
+ break;
+ }
+ }
+ if (i == s.size()) {
+ return std::string();
+ }
+ std::size_t j, last = 0;
+ for (j = s.size(); j > 0 ; --j) {
+ if (s.at(j - 1) == QUOTE) {
+ last = j - 1;
+ break;
+ }
+ }
+ if (j == 0 || last == first) {
+ return std::string();
+ }
+
+ ZLUnicodeUtil::Ucs2String link;
+ for (std::size_t k = first + 1; k < last; ++k) {
+ ZLUnicodeUtil::Ucs2Char ch = s.at(k);
+ if (urlencode && ZLUnicodeUtil::isSpace(ch)) {
+ //TODO maybe implement function for encoding all signs in url, not only spaces and quotes
+ //TODO maybe add backslash support
+ link.push_back('%');
+ link.push_back('2');
+ link.push_back('0');
+ } else if (urlencode && ch == QUOTE) {
+ link.push_back('%');
+ link.push_back('2');
+ link.push_back('2');
+ } else {
+ link.push_back(ch);
+ }
+ }
+ std::string utf8String;
+ ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
+ return utf8String;
+}
+
+void DocBookReader::footnotesStartHandler() {
+ handlePageBreak();
+}
+
+void DocBookReader::ansiDataHandler(const char *buffer, std::size_t len) {
+ if (myConverter.isNull()) {
+ // lazy converter initialization
+ ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
+ ZLEncodingConverterInfoPtr info = collection.info(myEncoding);
+ myConverter = info.isNull() ? collection.defaultConverter() : info->createConverter();
+ }
+ std::string utf8String;
+ myConverter->convert(utf8String, buffer, buffer + len);
+ ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
+}
+
+void DocBookReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
+ myBuffer.push_back(symbol);
+}
diff --git a/reader/src/formats/doc/DocBookReader.h b/reader/src/formats/doc/DocBookReader.h
new file mode 100644
index 0000000..d80fb8e
--- /dev/null
+++ b/reader/src/formats/doc/DocBookReader.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCBOOKREADER_H__
+#define __DOCBOOKREADER_H__
+
+#include <vector>
+
+#include <shared_ptr.h>
+#include <ZLFile.h>
+#include <ZLTextStyleEntry.h>
+#include <ZLEncodingConverter.h>
+
+#include "../../bookmodel/BookReader.h"
+
+#include "OleMainStream.h"
+#include "OleStreamParser.h"
+
+class DocBookReader : public OleStreamParser {
+
+public:
+ DocBookReader(BookModel &model, const std::string &encoding);
+ ~DocBookReader();
+ bool readBook();
+
+private:
+ void ansiDataHandler(const char *buffer, std::size_t len);
+ void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
+ void footnotesStartHandler();
+
+ void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
+ void handleHardLinebreak();
+ void handleParagraphEnd();
+ void handlePageBreak();
+ void handleTableSeparator();
+ void handleTableEndRow();
+ void handleFootNoteMark();
+ void handleStartField();
+ void handleSeparatorField();
+ void handleEndField();
+ void handleImage(const ZLFileImage::Blocks &blocks);
+ void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
+
+ //formatting:
+ void handleFontStyle(unsigned int fontStyle);
+ void handleParagraphStyle(const OleMainStream::Style &styleInfo);
+ void handleBookmark(const std::string &name);
+
+private:
+ static std::string parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode = false);
+
+private:
+ BookReader myModelReader;
+
+ ZLUnicodeUtil::Ucs2String myFieldInfoBuffer;
+
+ enum {
+ READ_FIELD,
+ READ_TEXT
+ } myReadState;
+
+ enum {
+ READ_FIELD_TEXT,
+ DONT_READ_FIELD_TEXT,
+ READ_FIELD_INFO
+ } myReadFieldState;
+
+ //maybe it should be flag?
+ enum {
+ NO_HYPERLINK,
+ EXT_HYPERLINK_INSERTED,
+ INT_HYPERLINK_INSERTED
+ } myHyperlinkTypeState;
+
+ //formatting
+ std::vector<FBTextKind> myKindStack;
+ shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry;
+ OleMainStream::Style myCurrentStyleInfo;
+ unsigned int myPictureCounter;
+
+ const std::string myEncoding;
+ shared_ptr<ZLEncodingConverter> myConverter;
+};
+
+inline DocBookReader::~DocBookReader() {}
+
+#endif /* __DOCBOOKREADER_H__ */
diff --git a/reader/src/formats/doc/DocFloatImageReader.cpp b/reader/src/formats/doc/DocFloatImageReader.cpp
new file mode 100644
index 0000000..8c308e4
--- /dev/null
+++ b/reader/src/formats/doc/DocFloatImageReader.cpp
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleUtil.h"
+#include "OleStream.h"
+#include "OleMainStream.h"
+
+#include "DocFloatImageReader.h"
+
+DocFloatImageReader::DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream) :
+ myTableStream(tableStream),
+ myMainStream(mainStream),
+ myOffset(off),
+ myLength(len) {
+}
+
+void DocFloatImageReader::readAll() {
+ //OfficeArtContent structure is described at p.405-406 [MS-DOC]
+ if (!myTableStream->seek(myOffset, true)) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading float images");
+ return;
+ }
+
+ unsigned int count = 0;
+
+ RecordHeader header;
+ while (count < myLength) {
+ count += readRecordHeader(header, myTableStream);
+ switch (header.type) {
+ case 0xF000:
+ count += readDggContainer(myItem, header.length, myTableStream, myMainStream);
+ break;
+ case 0xF002:
+ count += readDgContainer(myItem, header.length, myTableStream);
+ break;
+ default:
+ return;
+ break;
+ }
+ }
+}
+
+ZLFileImage::Blocks DocFloatImageReader::getBlocksForShapeId(unsigned int shapeId) const {
+ FSPContainer container;
+ bool found = false;
+ for (std::size_t i = 0; !found && i < myItem.FSPs.size(); ++i) {
+ if (myItem.FSPs.at(i).fsp.shapeId == shapeId) {
+ found = true;
+ container = myItem.FSPs.at(i);
+ }
+ }
+
+ if (!found || container.fopte.empty()) {
+ return ZLFileImage::Blocks();
+ }
+
+ for (std::size_t i = 0; i < container.fopte.size(); ++i) {
+ const FOPTE &fopte = container.fopte.at(i);
+ if (fopte.pId == 0x0104 && !fopte.isComplex) { //0x0104 specifies the BLIP, see p.420 [MS-ODRAW]
+ if (fopte.value <= myItem.blips.size() && fopte.value > 0) {
+ Blip blip = myItem.blips.at(fopte.value - 1);
+ return blip.blocks;
+ }
+ }
+ }
+ return ZLFileImage::Blocks();
+}
+
+unsigned int DocFloatImageReader::readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream) {
+ //OfficeArtRecordHeader structure is described at p.26 [MS-ODRAW]
+ char buffer[8];
+ stream->read(buffer, 8);
+ unsigned int temp = OleUtil::getU2Bytes(buffer, 0);
+ header.version = temp & 0x000F;
+ header.instance = temp >> 4;
+ header.type = OleUtil::getU2Bytes(buffer, 2);
+ header.length = OleUtil::getU4Bytes(buffer, 4);
+ return 8;
+}
+
+unsigned int DocFloatImageReader::readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
+ //OfficeArtDggContainer structure is described at p.50 [MS-ODRAW]
+ RecordHeader header;
+ unsigned int count = 0;
+
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF001:
+ count += readBStoreContainer(item, header.length, stream, mainStream);
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+
+ stream->seek(1, false); //skipping dgglbl (see p.406 [MS-DOC])
+ ++count;
+
+ return count;
+}
+
+unsigned int DocFloatImageReader::readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
+ //OfficeArtBStoreContainer structure is described at p.58 [MS-ODRAW]
+ RecordHeader header;
+ unsigned int count = 0;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF007:
+ {
+ Blip blip;
+ count += readBStoreContainerFileBlock(blip, stream, mainStream);
+ item.blips.push_back(blip);
+ }
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream) {
+ stream->seek(header.length, false);
+ return header.length;
+}
+
+unsigned int DocFloatImageReader::readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
+ //OfficeArtBStoreContainerFileBlock structure is described at p.59 [MS-ODRAW]
+ unsigned int count = readFBSE(blip.storeEntry, stream);
+ if (blip.storeEntry.offsetInDelay != (unsigned int)-1) {
+ if (mainStream->seek(blip.storeEntry.offsetInDelay, true)) { //see p.70 [MS-ODRAW]
+ //TODO maybe we should stop reading float images here
+ ZLLogger::Instance().println("DocPlugin", "DocFloatImageReader: problems with seeking for offset");
+ return count;
+ }
+ }
+ RecordHeader header;
+ unsigned int count2 = readRecordHeader(header, mainStream);
+ switch (header.type) {
+ case OleMainStream::IMAGE_WMF:
+ case OleMainStream::IMAGE_EMF:
+ case OleMainStream::IMAGE_PICT:
+ count2 += skipRecord(header, mainStream);
+ break;
+ case OleMainStream::IMAGE_JPEG:
+ case OleMainStream::IMAGE_JPEG2:
+ case OleMainStream::IMAGE_PNG:
+ case OleMainStream::IMAGE_DIB:
+ case OleMainStream::IMAGE_TIFF:
+ count2 += readBlip(blip, header, mainStream);
+ break;
+ }
+ blip.type = header.type;
+ return count;
+}
+
+unsigned int DocFloatImageReader::readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream) {
+ //OfficeArtBlip structure is described at p.60-66 [MS-ODRAW]
+ stream->seek(16, false); //skipping rgbUid1
+ unsigned int count = 16;
+
+ bool addField = false;
+ switch (header.type) {
+ case OleMainStream::IMAGE_PNG:
+ if (header.instance == 0x6E1) {
+ addField = true;
+ }
+ break;
+ case OleMainStream::IMAGE_JPEG:
+ case OleMainStream::IMAGE_JPEG2:
+ if (header.instance == 0x46B || header.instance == 0x6E3) {
+ addField = true;
+ }
+ break;
+ case OleMainStream::IMAGE_DIB:
+ if (header.instance == 0x7A9) {
+ addField = true;
+ }
+ case OleMainStream::IMAGE_TIFF:
+ if (header.instance == 0x6E5) {
+ addField = true;
+ }
+ break;
+ }
+
+ if (addField) {
+ stream->seek(16, false); //skipping rgbUid2
+ count += 16;
+ }
+ stream->seek(1, false); //skipping tag
+ count += 1;
+
+ blip.blocks = stream->getBlockPieceInfoList(stream->offset(), header.length - count);
+ count += header.length;
+ return count;
+}
+
+unsigned int DocFloatImageReader::readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream) {
+ //OfficeArtFBSE structure is described at p.68 [MS-ODRAW]
+ stream->seek(2, false); //skipping btWin32 and btMacOS
+ stream->seek(16, false); //skipping rgbUid
+ stream->seek(2, false); //skipping tag
+ fbse.size = read4Bytes(stream);
+ fbse.referenceCount = read4Bytes(stream);
+ fbse.offsetInDelay = read4Bytes(stream);
+ stream->seek(1, false); //skipping unused value
+ unsigned int lengthName = read1Byte(stream); //if it should be multiplied on 2?
+ stream->seek(2, false); // skipping unused values
+ if (lengthName > 0) {
+ stream->seek(lengthName, false); //skipping nameData
+ }
+ return 36 + lengthName;
+}
+
+unsigned int DocFloatImageReader::readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtDgContainer structure is described at p.52 [MS-ODRAW]
+ unsigned int count = 0;
+
+ RecordHeader header;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF008: //skip OfficeArtFDG record, p. 82 [MS-ODRAW]
+ stream->seek(8, false);
+ count += 8;
+ break;
+ case 0xF003:
+ count += readSpgrContainer(item, header.length, stream);
+ break;
+ case 0xF004:
+ {
+ FSPContainer fspContainer;
+ count += readSpContainter(fspContainer, header.length, stream);
+ item.FSPs.push_back(fspContainer);
+ }
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtSpgrContainer structure is described at p.56 [MS-ODRAW]
+ unsigned count = 0;
+ RecordHeader header;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF003:
+ count += readSpgrContainer(item, header.length, stream);
+ break;
+ case 0xF004:
+ {
+ FSPContainer fspContainer;
+ count += readSpContainter(fspContainer, header.length, stream);
+ item.FSPs.push_back(fspContainer);
+ }
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtSpContainter structure is described at p.53-55 [MS-ODRAW]
+ RecordHeader header;
+ unsigned int count = 0;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF009: //skip OfficeArtFSPGR record, p.74 [MS-ODRAW]
+ stream->seek(16, false);
+ count += 16;
+ break;
+ case 0xF00A:
+ count += readFSP(item.fsp, stream);
+ break;
+ case 0xF00B:
+ count += readArrayFOPTE(item.fopte, header.length, stream);
+ break;
+ case 0xF00E: //OfficeArtAnchor
+ case 0xF00F: //OfficeArtChildAnchor, p.75 [MS-ODRAW]
+ case 0xF010: //OfficeArtClientAnchor
+ stream->seek(4, false);
+ count += 4;
+ break;
+ case 0xF00C:
+ case 0xF11F:
+ case 0xF11D:
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readFSP(FSP &fsp, shared_ptr<OleStream> stream) {
+ //OfficeArtFSP structure is described at p.76 [MS-ODRAW]
+ fsp.shapeId = read4Bytes(stream);
+ stream->seek(4, false);
+ return 8;
+}
+
+unsigned int DocFloatImageReader::readArrayFOPTE(std::vector<FOPTE> &fopteArray,unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtRGFOPTE structure is described at p.98 [MS-ODRAW]
+ unsigned int count = 0;
+ while (count < length) {
+ FOPTE fopte;
+ count += readFOPTE(fopte, stream);
+ fopteArray.push_back(fopte);
+ }
+ for (std::size_t i = 0; i < fopteArray.size(); ++i) {
+ if (fopteArray.at(i).isComplex) {
+ stream->seek(fopteArray.at(i).value, false);
+ count += fopteArray.at(i).value;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream) {
+ //OfficeArtFOPTE structure is described at p.32 [MS-ODRAW]
+ unsigned int dtemp;
+ dtemp = read2Bytes(stream);
+ fopte.pId = (dtemp & 0x3fff);
+ fopte.isBlipId = ((dtemp & 0x4000) >> 14) == 0x1;
+ fopte.isComplex = ((dtemp & 0x8000) >> 15) == 0x1;
+ fopte.value = read4Bytes(stream);
+ return 6;
+}
+
+unsigned int DocFloatImageReader::read1Byte(shared_ptr<OleStream> stream) {
+ char b[1];
+ if (stream->read(b, 1) != 1) {
+ return 0;
+ }
+ return OleUtil::getU1Byte(b, 0);
+}
+
+unsigned int DocFloatImageReader::read2Bytes(shared_ptr<OleStream> stream) {
+ char b[2];
+ if (stream->read(b, 2) != 2) {
+ return 0;
+ }
+ return OleUtil::getU2Bytes(b, 0);
+}
+
+unsigned int DocFloatImageReader::read4Bytes(shared_ptr<OleStream> stream) {
+ char b[4];
+ if (stream->read(b, 4) != 4) {
+ return 0;
+ }
+ return OleUtil::getU4Bytes(b, 0);
+}
diff --git a/reader/src/formats/doc/DocFloatImageReader.h b/reader/src/formats/doc/DocFloatImageReader.h
new file mode 100644
index 0000000..d2d6c2e
--- /dev/null
+++ b/reader/src/formats/doc/DocFloatImageReader.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCFLOATIMAGEREADER_H__
+#define __DOCFLOATIMAGEREADER_H__
+
+#include <ZLFileImage.h>
+
+class DocFloatImageReader {
+
+public:
+ struct BlipStoreEntry { // see p.68 [MS-ODRAW]
+ unsigned int size; // size of blip in stream
+ unsigned int referenceCount; // (cRef) reference count for the the blip
+ unsigned int offsetInDelay; // foDelay, file offset in the delay stream
+ };
+
+ struct Blip { //see p.59, p63-66 [MS-ODRAW]
+ BlipStoreEntry storeEntry;
+ unsigned int type;
+ ZLFileImage::Blocks blocks;
+ };
+
+ struct FSP { //see p.76-77 [MS-ODRAW]
+ unsigned int shapeId; //spid
+ };
+
+ struct FOPTE { //see p.98 and p.32 [MS-ODRAW]
+ unsigned int pId; //pid
+ bool isBlipId; //fBid
+ bool isComplex; //fComplex
+ unsigned int value; //op
+ };
+
+ struct FSPContainer { //see p.53-55 [MS-ODRAW]
+ FSP fsp;
+ std::vector<FOPTE> fopte;
+ };
+
+ struct OfficeArtContent { //see p.405-406 [MS-DOC]
+ std::vector<Blip> blips; //retrieved from OfficeArtDggContainer
+ std::vector<FSPContainer> FSPs; //retrieved from OfficeArtDgContainer
+ };
+
+ struct RecordHeader { //see p.26 [MS-ODRAW]
+ unsigned int version;
+ unsigned int instance;
+ unsigned int type;
+ unsigned int length;
+ };
+
+public:
+ DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream);
+
+public:
+ void readAll();
+
+ ZLFileImage::Blocks getBlocksForShapeId(unsigned int shapeId) const;
+
+private:
+ static unsigned int readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream);
+ static unsigned int readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
+
+ static unsigned int readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
+ static unsigned int readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
+ static unsigned int readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream);
+ static unsigned int readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream);
+
+ static unsigned int readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream);
+ static unsigned int readArrayFOPTE(std::vector<FOPTE> &fopte, unsigned int length, shared_ptr<OleStream> stream);
+ static unsigned int readFSP(FSP &fsp, shared_ptr<OleStream> stream);
+ static unsigned int readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream);
+ static unsigned int readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream);
+ static unsigned int readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream);
+
+ static unsigned int skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream);
+
+ static unsigned int read1Byte(shared_ptr<OleStream> stream);
+ static unsigned int read2Bytes(shared_ptr<OleStream> stream);
+ static unsigned int read4Bytes(shared_ptr<OleStream> stream);
+
+private:
+ shared_ptr<OleStream> myTableStream;
+ shared_ptr<OleStream> myMainStream;
+ unsigned int myOffset;
+ unsigned int myLength;
+
+ OfficeArtContent myItem;
+};
+
+#endif /* __DOCFLOATIMAGEREADER_H__ */
diff --git a/reader/src/formats/doc/DocInlineImageReader.cpp b/reader/src/formats/doc/DocInlineImageReader.cpp
new file mode 100644
index 0000000..69ce74f
--- /dev/null
+++ b/reader/src/formats/doc/DocInlineImageReader.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "OleUtil.h"
+#include "OleMainStream.h"
+
+#include "DocInlineImageReader.h"
+
+DocInlineImageReader::DocInlineImageReader(shared_ptr<OleStream> dataStream) :
+ myDataStream(dataStream) {
+}
+
+ZLFileImage::Blocks DocInlineImageReader::getImagePieceInfo(unsigned int dataPos) {
+ if (myDataStream.isNull()) {
+ return ZLFileImage::Blocks();
+ }
+ if (!myDataStream->seek(dataPos, true)) {
+ return ZLFileImage::Blocks();
+ }
+
+ //reading PICF structure (see p. 421 [MS-DOC])
+ unsigned int picfHeaderSize = 4 + 2 + 8; //record length, headerLength and storage format
+ char headerBuffer[picfHeaderSize];
+ if (myDataStream->read(headerBuffer, picfHeaderSize) != picfHeaderSize) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int length = OleUtil::getU4Bytes(headerBuffer, 0);
+ unsigned int headerLength = OleUtil::getU2Bytes(headerBuffer, 4);
+ unsigned int formatType = OleUtil::getU2Bytes(headerBuffer, 6);
+
+ if (formatType != 0x0064) { //external link to some file; see p.394 [MS-DOC]
+ //TODO implement
+ return ZLFileImage::Blocks();
+ }
+ if (headerLength >= length) {
+ return ZLFileImage::Blocks();
+ }
+
+ //reading OfficeArtInlineSpContainer structure; see p.421 [MS-DOC] and p.56 [MS-ODRAW]
+ if (!myDataStream->seek(headerLength - picfHeaderSize, false)) { //skip header
+ return ZLFileImage::Blocks();
+ }
+
+ char buffer[8]; //for OfficeArtRecordHeader structure; see p.69 [MS-ODRAW]
+ bool found = false;
+ unsigned int curOffset = 0;
+ for (curOffset = headerLength; !found && curOffset + 8 <= length; curOffset += 8) {
+ if (myDataStream->read(buffer, 8) != 8) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int recordInstance = OleUtil::getU2Bytes(buffer, 0) >> 4;
+ unsigned int recordType = OleUtil::getU2Bytes(buffer, 2);
+ unsigned int recordLen = OleUtil::getU4Bytes(buffer, 4);
+
+ switch (recordType) {
+ case 0xF000: case 0xF001: case 0xF002: case 0xF003: case 0xF004: case 0xF005:
+ break;
+ case 0xF007:
+ {
+ myDataStream->seek(33, false);
+ char tmpBuf[1];
+ myDataStream->read(tmpBuf, 1);
+ unsigned int nameLength = OleUtil::getU1Byte(tmpBuf, 0);
+ myDataStream->seek(nameLength * 2 + 2, false);
+ curOffset += 33 + 1 + nameLength * 2 + 2;
+ }
+ break;
+ case 0xF008:
+ myDataStream->seek(8, false);
+ curOffset += 8;
+ break;
+ case 0xF009:
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ break;
+ case 0xF006: case 0xF00A: case 0xF00B: case 0xF00D: case 0xF00E: case 0xF00F: case 0xF010: case 0xF011: case 0xF122:
+ myDataStream->seek(recordLen, false);
+ curOffset += recordLen;
+ break;
+ case OleMainStream::IMAGE_EMF:
+ case OleMainStream::IMAGE_WMF:
+ case OleMainStream::IMAGE_PICT:
+ //TODO implement
+ return ZLFileImage::Blocks();
+ case OleMainStream::IMAGE_JPEG:
+ case OleMainStream::IMAGE_JPEG2:
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x46B || recordInstance == 0x6E3) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case OleMainStream::IMAGE_PNG:
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x6E1) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case OleMainStream::IMAGE_DIB: // DIB = BMP without 14-bytes header
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x7A9) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case OleMainStream::IMAGE_TIFF:
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x6E5) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case 0xF00C:
+ default:
+ return ZLFileImage::Blocks();
+ }
+ }
+
+ if (!found) {
+ return ZLFileImage::Blocks();
+ }
+ return myDataStream->getBlockPieceInfoList(dataPos + curOffset, length - curOffset);
+}
diff --git a/reader/src/formats/doc/DocInlineImageReader.h b/reader/src/formats/doc/DocInlineImageReader.h
new file mode 100644
index 0000000..9dab9ae
--- /dev/null
+++ b/reader/src/formats/doc/DocInlineImageReader.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCINLINEIMAGEREADER_H__
+#define __DOCINLINEIMAGEREADER_H__
+
+#include <vector>
+
+#include "OleStream.h"
+
+class DocInlineImageReader {
+
+public:
+ DocInlineImageReader(shared_ptr<OleStream> dataStream);
+ ZLFileImage::Blocks getImagePieceInfo(unsigned int dataPos);
+
+private:
+ shared_ptr<OleStream> myDataStream;
+};
+
+#endif /* __DOCINLINEIMAGEREADER_H__ */
diff --git a/reader/src/formats/doc/DocMetaInfoReader.cpp b/reader/src/formats/doc/DocMetaInfoReader.cpp
new file mode 100644
index 0000000..37b39c2
--- /dev/null
+++ b/reader/src/formats/doc/DocMetaInfoReader.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "../../library/Book.h"
+
+#include "DocMetaInfoReader.h"
+
+DocMetaInfoReader::DocMetaInfoReader(Book &book) : myBook(book) {
+ myBook.removeAllAuthors();
+ myBook.setTitle(std::string());
+ myBook.setLanguage(std::string());
+ myBook.removeAllTags();
+}
+
+bool DocMetaInfoReader::readMetaInfo() {
+ myBook.removeAllAuthors();
+ myBook.setTitle(myBook.file().name(true));
+ myBook.removeAllTags();
+ return true;
+}
diff --git a/reader/src/formats/doc/DocMetaInfoReader.h b/reader/src/formats/doc/DocMetaInfoReader.h
new file mode 100644
index 0000000..db26d29
--- /dev/null
+++ b/reader/src/formats/doc/DocMetaInfoReader.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCMETAINFOREADER_H__
+#define __DOCMETAINFOREADER_H__
+
+#include <string>
+
+class Book;
+
+class DocMetaInfoReader {
+
+public:
+ DocMetaInfoReader(Book &book);
+ ~DocMetaInfoReader();
+ bool readMetaInfo();
+
+ /*
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+ */
+
+private:
+ Book &myBook;
+};
+
+inline DocMetaInfoReader::~DocMetaInfoReader() {}
+
+#endif /* __DOCMETAINFOREADER_H__ */
diff --git a/reader/src/formats/doc/DocPlugin.cpp b/reader/src/formats/doc/DocPlugin.cpp
new file mode 100644
index 0000000..ef6f511
--- /dev/null
+++ b/reader/src/formats/doc/DocPlugin.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+#include <ZLLogger.h>
+#include <ZLImage.h>
+#include <ZLEncodingConverter.h>
+
+#include "DocPlugin.h"
+#include "DocMetaInfoReader.h"
+#include "DocBookReader.h"
+#include "DocStreams.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+DocPlugin::DocPlugin() {
+}
+
+DocPlugin::~DocPlugin() {
+}
+
+bool DocPlugin::providesMetaInfo() const {
+ return true;
+}
+
+const std::string DocPlugin::supportedFileType() const {
+ return "doc";
+}
+
+bool DocPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "doc";
+}
+
+bool DocPlugin::readMetaInfo(Book &book) const {
+ if (!DocMetaInfoReader(book).readMetaInfo()) {
+ return false;
+ }
+
+ shared_ptr<ZLInputStream> stream = new DocAnsiStream(book.file(), 50000);
+ if (!detectEncodingAndLanguage(book, *stream)) {
+ stream = new DocUcs2Stream(book.file(), 50000);
+ detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true);
+ }
+
+ return true;
+}
+
+bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const {
+ return true;
+}
+
+bool DocPlugin::readModel(BookModel &model) const {
+ return DocBookReader(model, model.book()->encoding()).readBook();
+}
diff --git a/reader/src/formats/doc/DocPlugin.h b/reader/src/formats/doc/DocPlugin.h
new file mode 100644
index 0000000..93b1803
--- /dev/null
+++ b/reader/src/formats/doc/DocPlugin.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCPLUGIN_H__
+#define __DOCPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class DocPlugin : public FormatPlugin {
+
+public:
+ DocPlugin();
+ ~DocPlugin();
+ bool providesMetaInfo() const;
+
+ const std::string supportedFileType() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+#endif /* __DOCPLUGIN_H__ */
diff --git a/reader/src/formats/doc/DocStreams.cpp b/reader/src/formats/doc/DocStreams.cpp
new file mode 100644
index 0000000..b21e15a
--- /dev/null
+++ b/reader/src/formats/doc/DocStreams.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <cstdlib>
+#include <string>
+
+#include "DocStreams.h"
+#include "OleStreamReader.h"
+
+class DocReader : public OleStreamReader {
+
+public:
+ DocReader(char *buffer, std::size_t maxSize);
+ ~DocReader();
+ std::size_t readSize() const;
+
+private:
+ bool readStream(OleMainStream &stream);
+ void ansiDataHandler(const char *buffer, std::size_t len);
+ void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
+ void footnotesStartHandler();
+
+protected:
+ char *myBuffer;
+ const std::size_t myMaxSize;
+ std::size_t myActualSize;
+};
+
+class DocAnsiReader : public DocReader {
+
+public:
+ DocAnsiReader(char *buffer, std::size_t maxSize);
+ ~DocAnsiReader();
+
+private:
+ void ansiDataHandler(const char *buffer, std::size_t len);
+};
+
+class DocUcs2Reader : public DocReader {
+
+public:
+ DocUcs2Reader(char *buffer, std::size_t maxSize);
+ ~DocUcs2Reader();
+
+private:
+ void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
+};
+
+DocReader::DocReader(char *buffer, std::size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
+}
+
+DocReader::~DocReader() {
+}
+
+bool DocReader::readStream(OleMainStream &stream) {
+ // TODO make 2 optmizations:
+ // 1) If another piece is too big, reading of next piece can be stopped if some size parameter will be specified
+ // (it can be transfered as a parameter (with default 0 value, that means no need to use it) to readNextPiece method)
+ // 2) We can specify as a parameter for readNextPiece, what kind of piece should be read next (ANSI or not ANSI).
+ // As type of piece is known already, there's no necessary to read other pieces.
+ while (myActualSize < myMaxSize) {
+ if (!readNextPiece(stream)) {
+ break;
+ }
+ }
+ return true;
+}
+
+void DocReader::ansiDataHandler(const char*, std::size_t) {
+}
+
+void DocReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char) {
+}
+
+void DocReader::footnotesStartHandler() {
+}
+
+std::size_t DocReader::readSize() const {
+ return myActualSize;
+}
+
+DocAnsiReader::DocAnsiReader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) {
+}
+
+DocAnsiReader::~DocAnsiReader() {
+}
+
+void DocAnsiReader::ansiDataHandler(const char *buffer, std::size_t dataLength) {
+ if (myActualSize < myMaxSize) {
+ const std::size_t len = std::min(dataLength, myMaxSize - myActualSize);
+ std::strncpy(myBuffer + myActualSize, buffer, len);
+ myActualSize += len;
+ }
+}
+
+DocUcs2Reader::DocUcs2Reader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) {
+}
+
+DocUcs2Reader::~DocUcs2Reader() {
+}
+
+void DocUcs2Reader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
+ if (myActualSize < myMaxSize) {
+ char buffer[4];
+ const std::size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol);
+ const std::size_t len = std::min(dataLength, myMaxSize - myActualSize);
+ std::strncpy(myBuffer + myActualSize, buffer, len);
+ myActualSize += len;
+ }
+}
+
+DocStream::DocStream(const ZLFile& file, std::size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
+}
+
+DocStream::~DocStream() {
+ close();
+}
+
+bool DocStream::open() {
+ if (mySize != 0) {
+ myBuffer = new char[mySize];
+ }
+ shared_ptr<DocReader> reader = createReader(myBuffer, mySize);
+ shared_ptr<ZLInputStream> stream = myFile.inputStream();
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+ if (!reader->readDocument(stream, false)) {
+ return false;
+ }
+ mySize = reader->readSize();
+ myOffset = 0;
+ return true;
+}
+
+std::size_t DocStream::read(char *buffer, std::size_t maxSize) {
+ maxSize = std::min(maxSize, mySize - myOffset);
+ if (buffer != 0 && myBuffer != 0) {
+ std::memcpy(buffer, myBuffer + myOffset, maxSize);
+ }
+ myOffset += maxSize;
+ return maxSize;
+}
+
+void DocStream::close() {
+ if (myBuffer != 0) {
+ delete[] myBuffer;
+ myBuffer = 0;
+ }
+}
+
+void DocStream::seek(int offset, bool absoluteOffset) {
+ if (!absoluteOffset) {
+ offset += myOffset;
+ }
+ myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
+}
+
+std::size_t DocStream::offset() const {
+ return myOffset;
+}
+
+std::size_t DocStream::sizeOfOpened() {
+ return mySize;
+}
+
+DocAnsiStream::DocAnsiStream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) {
+}
+
+DocAnsiStream::~DocAnsiStream() {
+}
+
+shared_ptr<DocReader> DocAnsiStream::createReader(char *buffer, std::size_t maxSize) {
+ return new DocAnsiReader(buffer, maxSize);
+}
+
+DocUcs2Stream::DocUcs2Stream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) {
+}
+
+DocUcs2Stream::~DocUcs2Stream() {
+}
+
+shared_ptr<DocReader> DocUcs2Stream::createReader(char *buffer, std::size_t maxSize) {
+ return new DocUcs2Reader(buffer, maxSize);
+}
diff --git a/reader/src/formats/doc/DocStreams.h b/reader/src/formats/doc/DocStreams.h
new file mode 100644
index 0000000..4b1538a
--- /dev/null
+++ b/reader/src/formats/doc/DocStreams.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCSTREAMS_H__
+#define __DOCSTREAMS_H__
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+class DocReader;
+
+class DocStream : public ZLInputStream {
+
+public:
+ DocStream(const ZLFile& file, std::size_t maxSize);
+ ~DocStream();
+
+private:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+protected:
+ virtual shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize) = 0;
+
+private:
+ const ZLFile myFile;
+ char *myBuffer;
+ std::size_t mySize;
+ std::size_t myOffset;
+};
+
+class DocAnsiStream : public DocStream {
+
+public:
+ DocAnsiStream(const ZLFile& file, std::size_t maxSize);
+ ~DocAnsiStream();
+
+private:
+ shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize);
+};
+
+class DocUcs2Stream : public DocStream {
+
+public:
+ DocUcs2Stream(const ZLFile& file, std::size_t maxSize);
+ ~DocUcs2Stream();
+
+private:
+ shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize);
+};
+
+#endif /* __DOCSTREAMS_H__ */
diff --git a/reader/src/formats/doc/OleMainStream.cpp b/reader/src/formats/doc/OleMainStream.cpp
new file mode 100644
index 0000000..fe829e6
--- /dev/null
+++ b/reader/src/formats/doc/OleMainStream.cpp
@@ -0,0 +1,1085 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <string>
+
+#include <ZLLogger.h>
+#include <ZLUnicodeUtil.h>
+
+#include "OleUtil.h"
+#include "OleStorage.h"
+
+#include "DocInlineImageReader.h"
+
+#include "OleMainStream.h"
+
+OleMainStream::Style::Style() :
+ StyleIdCurrent(STYLE_INVALID),
+ StyleIdNext(STYLE_INVALID),
+ HasPageBreakBefore(false),
+ BeforeParagraphIndent(0),
+ AfterParagraphIndent(0),
+ LeftIndent(0),
+ FirstLineIndent(0),
+ RightIndent(0),
+ Alignment(ALIGNMENT_DEFAULT) {
+}
+
+OleMainStream::CharInfo::CharInfo() : FontStyle(FONT_REGULAR), FontSize(20) {
+}
+
+OleMainStream::SectionInfo::SectionInfo() : CharPosition(0), IsNewPage(true) {
+}
+
+OleMainStream::InlineImageInfo::InlineImageInfo() : DataPosition(0) {
+}
+
+OleMainStream::FloatImageInfo::FloatImageInfo() : ShapeId(0) {
+}
+
+OleMainStream::OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) : OleStream(storage, oleEntry, stream) {
+}
+
+bool OleMainStream::open(bool doReadFormattingData) {
+ if (OleStream::open() == false) {
+ return false;
+ }
+
+ static const std::size_t HEADER_SIZE = 768; //size of data in header of main stream
+ char headerBuffer[HEADER_SIZE];
+ seek(0, true);
+
+ if (read(headerBuffer, HEADER_SIZE) != HEADER_SIZE) {
+ return false;
+ }
+
+ bool result = readFIB(headerBuffer);
+ if (!result) {
+ return false;
+ }
+
+ // determining table stream number
+ unsigned int tableNumber = (OleUtil::getU2Bytes(headerBuffer, 0xA) & 0x0200) ? 1 : 0;
+ std::string tableName = tableNumber == 0 ? "0" : "1";
+ tableName += "Table";
+ OleEntry tableEntry;
+ result = myStorage->getEntryByName(tableName, tableEntry);
+
+ if (!result) {
+ // cant't find table stream (that can be only in case if file format is below Word 7/8), so building simple table stream
+ // TODO: CHECK may be not all old documents have ANSI
+ ZLLogger::Instance().println("DocPlugin", "cant't find table stream, building own simple piece table, that includes all charachters");
+ Piece piece = {myStartOfText, myEndOfText - myStartOfText, true, Piece::PIECE_TEXT, 0};
+ myPieces.push_back(piece);
+ return true;
+ }
+
+ result = readPieceTable(headerBuffer, tableEntry);
+
+ if (!result) {
+ ZLLogger::Instance().println("DocPlugin", "error during reading piece table");
+ return false;
+ }
+
+ if (!doReadFormattingData) {
+ return true;
+ }
+
+ OleEntry dataEntry;
+ if (myStorage->getEntryByName("Data", dataEntry)) {
+ myDataStream = new OleStream(myStorage, dataEntry, myBaseStream);
+ }
+
+ //result of reading following structures doesn't check, because all these
+ //problems can be ignored, and document can be showed anyway, maybe with wrong formatting
+ readBookmarks(headerBuffer, tableEntry);
+ readStylesheet(headerBuffer, tableEntry);
+ //readSectionsInfoTable(headerBuffer, tableEntry); //it isn't used now
+ readParagraphStyleTable(headerBuffer, tableEntry);
+ readCharInfoTable(headerBuffer, tableEntry);
+ readFloatingImages(headerBuffer, tableEntry);
+ return true;
+}
+
+const OleMainStream::Pieces &OleMainStream::getPieces() const {
+ return myPieces;
+}
+
+const OleMainStream::CharInfoList &OleMainStream::getCharInfoList() const {
+ return myCharInfoList;
+}
+
+const OleMainStream::StyleInfoList &OleMainStream::getStyleInfoList() const {
+ return myStyleInfoList;
+}
+
+const OleMainStream::BookmarksList &OleMainStream::getBookmarks() const {
+ return myBookmarks;
+}
+
+const OleMainStream::InlineImageInfoList &OleMainStream::getInlineImageInfoList() const {
+ return myInlineImageInfoList;
+}
+
+const OleMainStream::FloatImageInfoList &OleMainStream::getFloatImageInfoList() const {
+ return myFloatImageInfoList;
+}
+
+ZLFileImage::Blocks OleMainStream::getFloatImage(unsigned int shapeId) const {
+ if (myFLoatImageReader.isNull()) {
+ return ZLFileImage::Blocks();
+ }
+ return myFLoatImageReader->getBlocksForShapeId(shapeId);
+}
+
+ZLFileImage::Blocks OleMainStream::getInlineImage(unsigned int dataPosition) const {
+ if (myDataStream.isNull()) {
+ return ZLFileImage::Blocks();
+ }
+ DocInlineImageReader imageReader(myDataStream);
+ return imageReader.getImagePieceInfo(dataPosition);
+}
+
+bool OleMainStream::readFIB(const char *headerBuffer) {
+ int flags = OleUtil::getU2Bytes(headerBuffer, 0xA); //offset for flags
+
+ if (flags & 0x0004) { //flag for complex format
+ ZLLogger::Instance().println("DocPlugin", "This was fast-saved. Some information is lost");
+ //lostInfo = (flags & 0xF0) >> 4);
+ }
+
+ if (flags & 0x1000) { //flag for using extending charset
+ ZLLogger::Instance().println("DocPlugin", "File uses extended character set (get_word8_char)");
+ } else {
+ ZLLogger::Instance().println("DocPlugin", "File uses get_8bit_char character set");
+ }
+
+ if (flags & 0x100) { //flag for encrypted files
+ ZLLogger::Instance().println("DocPlugin", "File is encrypted");
+ // Encryption key = %08lx ; NumUtil::get4Bytes(header, 14)
+ return false;
+ }
+
+ unsigned int charset = OleUtil::getU2Bytes(headerBuffer, 0x14); //offset for charset number
+ if (charset && charset != 0x100) { //0x100 = default charset
+ ZLLogger::Instance().println("DocPlugin", "Using not default character set %d");
+ } else {
+ ZLLogger::Instance().println("DocPlugin", "Using default character set");
+ }
+
+ myStartOfText = OleUtil::get4Bytes(headerBuffer, 0x18); //offset for start of text value
+ myEndOfText = OleUtil::get4Bytes(headerBuffer, 0x1c); //offset for end of text value
+ return true;
+}
+
+void OleMainStream::splitPieces(const Pieces &s, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary) {
+ Pieces source = s;
+ dest1.clear();
+ dest2.clear();
+
+ int sumLength = 0;
+ std::size_t i = 0;
+ for (i = 0; i < source.size(); ++i) {
+ Piece piece = source.at(i);
+ if (piece.Length + sumLength >= boundary) {
+ Piece piece2 = piece;
+
+ piece.Length = boundary - sumLength;
+ piece.Type = type1;
+
+ piece2.Type = type2;
+ piece2.Offset += piece.Length * 2;
+ piece2.Length -= piece.Length;
+
+ if (piece.Length > 0) {
+ dest1.push_back(piece);
+ }
+ if (piece2.Length > 0) {
+ dest2.push_back(piece2);
+ }
+ ++i;
+ break;
+ }
+ sumLength += piece.Length;
+ piece.Type = type1;
+ dest1.push_back(piece);
+ }
+ for (; i < source.size(); ++i) {
+ Piece piece = source.at(i);
+ piece.Type = type2;
+ dest2.push_back(piece);
+ }
+
+}
+
+std::string OleMainStream::getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream) {
+ unsigned int clxOffset = OleUtil::getU4Bytes(headerBuffer, 0x01A2); //offset for CLX structure
+ unsigned int clxLength = OleUtil::getU4Bytes(headerBuffer, 0x01A6); //offset for value of CLX structure length
+
+ //1 step : loading CLX table from table stream
+ char *clxBuffer = new char[clxLength];
+ if (!tableStream.seek(clxOffset, true)) {
+ ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- error for seeking to CLX structure");
+ return std::string();
+ }
+ if (tableStream.read(clxBuffer, clxLength) != clxLength) {
+ ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure length is invalid");
+ return std::string();
+ }
+ std::string clx(clxBuffer, clxLength);
+ delete[] clxBuffer;
+
+ //2 step: searching for pieces table buffer at CLX
+ //(determines it by 0x02 as start symbol)
+ std::size_t from = 0;
+ std::size_t i;
+ std::string pieceTableBuffer;
+ while ((i = clx.find_first_of(0x02, from)) != std::string::npos) {
+ if (clx.size() < i + 1 + 4) {
+ ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure has invalid format");
+ return std::string();
+ }
+ unsigned int pieceTableLength = OleUtil::getU4Bytes(clx.c_str(), i + 1);
+ pieceTableBuffer = std::string(clx, i + 1 + 4);
+ if (pieceTableBuffer.length() != pieceTableLength) {
+ from = i + 1;
+ continue;
+ }
+ break;
+ }
+ return pieceTableBuffer;
+}
+
+
+bool OleMainStream::readPieceTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string piecesTableBuffer = getPiecesTableBuffer(headerBuffer, tableStream);
+
+ if (piecesTableBuffer.empty()) {
+ return false;
+ }
+
+ //getting count of Character Positions for different types of subdocuments in Main Stream
+ int ccpText = OleUtil::get4Bytes(headerBuffer, 0x004C); //text
+ int ccpFtn = OleUtil::get4Bytes(headerBuffer, 0x0050); //footnote subdocument
+ int ccpHdd = OleUtil::get4Bytes(headerBuffer, 0x0054); //header subdocument
+ int ccpMcr = OleUtil::get4Bytes(headerBuffer, 0x0058); //macro subdocument
+ int ccpAtn = OleUtil::get4Bytes(headerBuffer, 0x005C); //comment subdocument
+ int ccpEdn = OleUtil::get4Bytes(headerBuffer, 0x0060); //endnote subdocument
+ int ccpTxbx = OleUtil::get4Bytes(headerBuffer, 0x0064); //textbox subdocument
+ int ccpHdrTxbx = OleUtil::get4Bytes(headerBuffer, 0x0068); //textbox subdocument of the header
+ int lastCP = ccpFtn + ccpHdd + ccpMcr + ccpAtn + ccpEdn + ccpTxbx + ccpHdrTxbx;
+ if (lastCP != 0) {
+ ++lastCP;
+ }
+ lastCP += ccpText;
+
+ //getting the CP (character positions) and CP descriptors
+ std::vector<int> cp; //array of character positions for pieces
+ unsigned int j = 0;
+ for (j = 0; ; j += 4) {
+ if (piecesTableBuffer.size() < j + 4) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, cp ends not with a lastcp");
+ break;
+ }
+ int curCP = OleUtil::get4Bytes(piecesTableBuffer.c_str(), j);
+ cp.push_back(curCP);
+ if (curCP == lastCP) {
+ break;
+ }
+ }
+
+ if (cp.size() < 2) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, < 2 pieces");
+ return false;
+ }
+
+ std::vector<std::string> descriptors;
+ for (std::size_t k = 0; k < cp.size() - 1; ++k) {
+ //j + 4, because it should be taken after CP in PiecesTable Buffer
+ //k * 8, because it should be taken 8 byte for each descriptor
+ std::size_t substrFrom = j + 4 + k * 8;
+ if (piecesTableBuffer.size() < substrFrom + 8) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, problems with descriptors reading");
+ break;
+ }
+ descriptors.push_back(piecesTableBuffer.substr(substrFrom, 8));
+ }
+
+ //filling the Pieces vector
+ std::size_t minValidSize = std::min(cp.size() - 1, descriptors.size());
+ if (minValidSize == 0) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, there are no pieces");
+ return false;
+ }
+
+ for (std::size_t i = 0; i < minValidSize; ++i) {
+ //4byte integer with offset and ANSI flag
+ int fcValue = OleUtil::get4Bytes(descriptors.at(i).c_str(), 0x2); //offset for piece structure
+ Piece piece;
+ piece.IsANSI = (fcValue & 0x40000000) == 0x40000000; //ansi flag
+ piece.Offset = fcValue & 0x3FFFFFFF; //gettting offset for current piece
+ piece.Length = cp.at(i + 1) - cp.at(i);
+ myPieces.push_back(piece);
+ }
+
+ //split pieces into different types
+ Pieces piecesText, piecesFootnote, piecesOther;
+ splitPieces(myPieces, piecesText, piecesFootnote, Piece::PIECE_TEXT, Piece::PIECE_FOOTNOTE, ccpText);
+ splitPieces(piecesFootnote, piecesFootnote, piecesOther, Piece::PIECE_FOOTNOTE, Piece::PIECE_OTHER, ccpFtn);
+
+ myPieces.clear();
+ for (std::size_t i = 0; i < piecesText.size(); ++i) {
+ myPieces.push_back(piecesText.at(i));
+ }
+ for (std::size_t i = 0; i < piecesFootnote.size(); ++i) {
+ myPieces.push_back(piecesFootnote.at(i));
+ }
+ for (std::size_t i = 0; i < piecesOther.size(); ++i) {
+ myPieces.push_back(piecesOther.at(i));
+ }
+
+ //converting length and offset depending on isANSI
+ for (std::size_t i = 0; i < myPieces.size(); ++i) {
+ Piece &piece = myPieces.at(i);
+ if (!piece.IsANSI) {
+ piece.Length *= 2;
+ } else {
+ piece.Offset /= 2;
+ }
+ }
+
+ //filling startCP field
+ unsigned int curStartCP = 0;
+ for (std::size_t i = 0; i < myPieces.size(); ++i) {
+ Piece &piece = myPieces.at(i);
+ piece.startCP = curStartCP;
+ if (piece.IsANSI) {
+ curStartCP += piece.Length;
+ } else {
+ curStartCP += piece.Length / 2;
+ }
+ }
+ return true;
+}
+
+bool OleMainStream::readBookmarks(const char *headerBuffer, const OleEntry &tableEntry) {
+ //SttbfBkmk structure is a table of bookmark name strings
+ unsigned int beginNamesInfo = OleUtil::getU4Bytes(headerBuffer, 0x142); // address of SttbfBkmk structure
+ std::size_t namesInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x146); // length of SttbfBkmk structure
+
+ if (namesInfoLength == 0) {
+ return true; //there's no bookmarks
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginNamesInfo, namesInfoLength, tableStream)) {
+ return false;
+ }
+
+ unsigned int recordsNumber = OleUtil::getU2Bytes(buffer.c_str(), 0x2); //count of records
+
+ std::vector<std::string> names;
+ unsigned int offset = 0x6; //initial offset
+ for (unsigned int i = 0; i < recordsNumber; ++i) {
+ if (buffer.size() < offset + 2) {
+ ZLLogger::Instance().println("DocPlugin", "problmes with reading bookmarks names");
+ break;
+ }
+ unsigned int length = OleUtil::getU2Bytes(buffer.c_str(), offset) * 2; //length of string in bytes
+ ZLUnicodeUtil::Ucs2String name;
+ for (unsigned int j = 0; j < length; j+=2) {
+ char ch1 = buffer.at(offset + 2 + j);
+ char ch2 = buffer.at(offset + 2 + j + 1);
+ ZLUnicodeUtil::Ucs2Char ucs2Char = (unsigned int)ch1 | ((unsigned int)ch2 << 8);
+ name.push_back(ucs2Char);
+ }
+ std::string utf8Name;
+ ZLUnicodeUtil::ucs2ToUtf8(utf8Name, name);
+ names.push_back(utf8Name);
+ offset += length + 2;
+ }
+
+ //plcfBkmkf structure is table recording beginning CPs of bookmarks
+ unsigned int beginCharPosInfo = OleUtil::getU4Bytes(headerBuffer, 0x14A); // address of plcfBkmkf structure
+ std::size_t charPosInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x14E); // length of plcfBkmkf structure
+
+ if (charPosInfoLen == 0) {
+ return true; //there's no bookmarks
+ }
+
+ if (!readToBuffer(buffer, beginCharPosInfo, charPosInfoLen, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int BKF_SIZE = 4;
+ std::size_t size = calcCountOfPLC(charPosInfoLen, BKF_SIZE);
+ std::vector<unsigned int> charPage;
+ for (std::size_t index = 0, offset = 0; index < size; ++index, offset += 4) {
+ charPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
+ }
+
+ for (std::size_t i = 0; i < names.size(); ++i) {
+ if (i >= charPage.size()) {
+ break; //for the case if something in these structures goes wrong, to not to lose all bookmarks
+ }
+ Bookmark bookmark;
+ bookmark.CharPosition = charPage.at(i);
+ bookmark.Name = names.at(i);
+ myBookmarks.push_back(bookmark);
+ }
+
+ return true;
+}
+
+bool OleMainStream::readStylesheet(const char *headerBuffer, const OleEntry &tableEntry) {
+ //STSH structure is a stylesheet
+ unsigned int beginStshInfo = OleUtil::getU4Bytes(headerBuffer, 0xa2); // address of STSH structure
+ std::size_t stshInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xa6); // length of STSH structure
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ char *buffer = new char[stshInfoLength];
+ if (!tableStream.seek(beginStshInfo, true)) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure");
+ return false;
+ }
+ if (tableStream.read(buffer, stshInfoLength) != stshInfoLength) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure, invalid length");
+ return false;
+ }
+
+ std::size_t stdCount = (std::size_t)OleUtil::getU2Bytes(buffer, 2);
+ std::size_t stdBaseInFile = (std::size_t)OleUtil::getU2Bytes(buffer, 4);
+ myStyleSheet.resize(stdCount);
+
+ std::vector<bool> isFilled;
+ isFilled.resize(stdCount, false);
+
+ std::size_t stdLen = 0;
+ bool styleSheetWasChanged = false;
+ do { //make it in while loop, because some base style can be after their successors
+ styleSheetWasChanged = false;
+ for (std::size_t index = 0, offset = 2 + (std::size_t)OleUtil::getU2Bytes(buffer, 0); index < stdCount; index++, offset += 2 + stdLen) {
+ stdLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset);
+ if (isFilled.at(index)) {
+ continue;
+ }
+
+ if (stdLen == 0) {
+ //if record is empty, left it default
+ isFilled[index] = true;
+ continue;
+ }
+
+ Style styleInfo = myStyleSheet.at(index);
+
+ const unsigned int styleAndBaseType = OleUtil::getU2Bytes(buffer, offset + 4);
+ const unsigned int styleType = styleAndBaseType % 16;
+ const unsigned int baseStyleId = styleAndBaseType / 16;
+ if (baseStyleId == Style::STYLE_NIL || baseStyleId == Style::STYLE_USER) {
+ //if based on nil or user style, left default
+ } else {
+ int baseStyleIndex = getStyleIndex(baseStyleId, isFilled, myStyleSheet);
+ if (baseStyleIndex < 0) {
+ //this base style is not filled yet, so pass it at some time
+ continue;
+ }
+ styleInfo = myStyleSheet.at(baseStyleIndex);
+ styleInfo.StyleIdCurrent = Style::STYLE_INVALID;
+ }
+
+ // parse STD structure
+ unsigned int tmp = OleUtil::getU2Bytes(buffer, offset + 6);
+ unsigned int upxCount = tmp % 16;
+ styleInfo.StyleIdNext = tmp / 16;
+
+ //adding current style
+ myStyleSheet[index] = styleInfo;
+ isFilled[index] = true;
+ styleSheetWasChanged = true;
+
+ std::size_t pos = 2 + stdBaseInFile;
+ std::size_t nameLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
+ nameLen = nameLen * 2 + 2; //from Unicode characters to bytes + Unicode null charachter length
+ pos += 2 + nameLen;
+ if (pos % 2 != 0) {
+ ++pos;
+ }
+ if (pos >= stdLen) {
+ continue;
+ }
+ std::size_t upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
+ if (pos + upxLen > stdLen) {
+ //UPX length too large
+ continue;
+ }
+ //for style info styleType must be equal 1
+ if (styleType == 1 && upxCount >= 1) {
+ if (upxLen >= 2) {
+ styleInfo.StyleIdCurrent = OleUtil::getU2Bytes(buffer, offset + pos + 2);
+ getStyleInfo(0, buffer + offset + pos + 4, upxLen - 2, styleInfo);
+ myStyleSheet[index] = styleInfo;
+ }
+ pos += 2 + upxLen;
+ if (pos % 2 != 0) {
+ ++pos;
+ }
+ upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
+ }
+ if (upxLen == 0 || pos + upxLen > stdLen) {
+ //too small/too large
+ continue;
+ }
+ //for char info styleType can be equal 1 or 2
+ if ((styleType == 1 && upxCount >= 2) || (styleType == 2 && upxCount >= 1)) {
+ CharInfo charInfo;
+ getCharInfo(0, Style::STYLE_INVALID, buffer + offset + pos + 2, upxLen, charInfo);
+ styleInfo.CurrentCharInfo = charInfo;
+ myStyleSheet[index] = styleInfo;
+ }
+ }
+ } while (styleSheetWasChanged);
+ delete[] buffer;
+ return true;
+}
+
+bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ //PlcfbteChpx structure is table with formatting for particular run of text
+ unsigned int beginCharInfo = OleUtil::getU4Bytes(headerBuffer, 0xfa); // address of PlcfbteChpx structure
+ std::size_t charInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xfe); // length of PlcfbteChpx structure
+ if (charInfoLength < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginCharInfo, charInfoLength, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int CHPX_SIZE = 4;
+ std::size_t size = calcCountOfPLC(charInfoLength, CHPX_SIZE);
+ std::vector<unsigned int> charBlocks;
+ for (std::size_t index = 0, offset = (size + 1) * 4; index < size; ++index, offset += CHPX_SIZE) {
+ charBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
+ }
+
+ char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
+ for (std::size_t index = 0; index < charBlocks.size(); ++index) {
+ seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
+ if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
+ return false;
+ }
+ unsigned int crun = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with crun (count of 'run of text')
+ for (unsigned int index2 = 0; index2 < crun; ++index2) {
+ unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
+ unsigned int chpxOffset = 2 * OleUtil::getU1Byte(formatPageBuffer, (crun + 1) * 4 + index2);
+ unsigned int len = OleUtil::getU1Byte(formatPageBuffer, chpxOffset);
+ unsigned int charPos = 0;
+ if (!offsetToCharPos(offset, charPos, myPieces)) {
+ continue;
+ }
+ unsigned int styleId = getStyleIdByCharPos(charPos, myStyleInfoList);
+
+ CharInfo charInfo = getStyleFromStylesheet(styleId, myStyleSheet).CurrentCharInfo;
+ if (chpxOffset != 0) {
+ getCharInfo(chpxOffset, styleId, formatPageBuffer + 1, len - 1, charInfo);
+ }
+ myCharInfoList.push_back(CharPosToCharInfo(charPos, charInfo));
+
+ if (chpxOffset != 0) {
+ InlineImageInfo pictureInfo;
+ if (getInlineImageInfo(chpxOffset, formatPageBuffer + 1, len - 1, pictureInfo)) {
+ myInlineImageInfoList.push_back(CharPosToInlineImageInfo(charPos, pictureInfo));
+ }
+ }
+
+ }
+ }
+ delete[] formatPageBuffer;
+ return true;
+}
+
+bool OleMainStream::readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry) {
+ //Plcspa structure is a table with information for FSPA (File Shape Address)
+ unsigned int beginPicturesInfo = OleUtil::getU4Bytes(headerBuffer, 0x01DA); // address of Plcspa structure
+ if (beginPicturesInfo == 0) {
+ return true; //there's no office art objects
+ }
+ unsigned int picturesInfoLength = OleUtil::getU4Bytes(headerBuffer, 0x01DE); // length of Plcspa structure
+ if (picturesInfoLength < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginPicturesInfo, picturesInfoLength, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int SPA_SIZE = 26;
+ std::size_t size = calcCountOfPLC(picturesInfoLength, SPA_SIZE);
+
+ std::vector<unsigned int> picturesBlocks;
+ for (std::size_t index = 0, tOffset = 0; index < size; ++index, tOffset += 4) {
+ picturesBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset));
+ }
+
+ for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += SPA_SIZE) {
+ unsigned int spid = OleUtil::getU4Bytes(buffer.c_str(), tOffset);
+ FloatImageInfo info;
+ unsigned int charPos = picturesBlocks.at(index);
+ info.ShapeId = spid;
+ myFloatImageInfoList.push_back(CharPosToFloatImageInfo(charPos, info));
+ }
+
+ //DggInfo structure is office art object table data
+ unsigned int beginOfficeArtContent = OleUtil::getU4Bytes(headerBuffer, 0x22A); // address of DggInfo structure
+ if (beginOfficeArtContent == 0) {
+ return true; //there's no office art objects
+ }
+ unsigned int officeArtContentLength = OleUtil::getU4Bytes(headerBuffer, 0x022E); // length of DggInfo structure
+ if (officeArtContentLength < 4) {
+ return false;
+ }
+
+ shared_ptr<OleStream> newTableStream = new OleStream(myStorage, tableEntry, myBaseStream);
+ shared_ptr<OleStream> newMainStream = new OleStream(myStorage, myOleEntry, myBaseStream);
+ if (newTableStream->open() && newMainStream->open()) {
+ myFLoatImageReader = new DocFloatImageReader(beginOfficeArtContent, officeArtContentLength, newTableStream, newMainStream);
+ myFLoatImageReader->readAll();
+ }
+ return true;
+}
+
+bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ //PlcBtePapx structure is table with formatting for all paragraphs
+ unsigned int beginParagraphInfo = OleUtil::getU4Bytes(headerBuffer, 0x102); // address of PlcBtePapx structure
+ std::size_t paragraphInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x106); // length of PlcBtePapx structure
+ if (paragraphInfoLength < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginParagraphInfo, paragraphInfoLength, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int PAPX_SIZE = 4;
+ std::size_t size = calcCountOfPLC(paragraphInfoLength, PAPX_SIZE);
+
+ std::vector<unsigned int> paragraphBlocks;
+ for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += PAPX_SIZE) {
+ paragraphBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset));
+ }
+
+ char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
+ for (std::size_t index = 0; index < paragraphBlocks.size(); ++index) {
+ seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
+ if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
+ return false;
+ }
+ const unsigned int paragraphsCount = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with 'cpara' value (count of paragraphs)
+ for (unsigned int index2 = 0; index2 < paragraphsCount; ++index2) {
+ const unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
+ unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (paragraphsCount + 1) * 4 + index2 * 13) * 2;
+ if (papxOffset <= 0) {
+ continue;
+ }
+ unsigned int len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
+ if (len == 0) {
+ ++papxOffset;
+ len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
+ }
+
+ const unsigned int styleId = OleUtil::getU2Bytes(formatPageBuffer, papxOffset + 1);
+ Style styleInfo = getStyleFromStylesheet(styleId, myStyleSheet);
+
+ if (len >= 3) {
+ getStyleInfo(papxOffset, formatPageBuffer + 3, len - 3, styleInfo);
+ }
+
+ unsigned int charPos = 0;
+ if (!offsetToCharPos(offset, charPos, myPieces)) {
+ continue;
+ }
+ myStyleInfoList.push_back(CharPosToStyle(charPos, styleInfo));
+ }
+ }
+ delete[] formatPageBuffer;
+ return true;
+}
+
+bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ //PlcfSed structure is a section table
+ unsigned int beginOfText = OleUtil::getU4Bytes(headerBuffer, 0x18); //address of text's begin in main stream
+ unsigned int beginSectInfo = OleUtil::getU4Bytes(headerBuffer, 0xca); //address if PlcfSed structure
+
+ std::size_t sectInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xce); //length of PlcfSed structure
+ if (sectInfoLen < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginSectInfo, sectInfoLen, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int SED_SIZE = 12;
+ std::size_t decriptorsCount = calcCountOfPLC(sectInfoLen, SED_SIZE);
+
+ //saving the section offsets (in character positions)
+ std::vector<unsigned int> charPos;
+ for (std::size_t index = 0, tOffset = 0; index < decriptorsCount; ++index, tOffset += 4) {
+ unsigned int ulTextOffset = OleUtil::getU4Bytes(buffer.c_str(), tOffset);
+ charPos.push_back(beginOfText + ulTextOffset);
+ }
+
+ //saving sepx offsets
+ std::vector<unsigned int> sectPage;
+ for (std::size_t index = 0, tOffset = (decriptorsCount + 1) * 4; index < decriptorsCount; ++index, tOffset += SED_SIZE) {
+ sectPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset + 2));
+ }
+
+ //reading the section properties
+ char tmpBuffer[2];
+ for (std::size_t index = 0; index < sectPage.size(); ++index) {
+ if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info
+ SectionInfo sectionInfo;
+ sectionInfo.CharPosition = charPos.at(index);
+ mySectionInfoList.push_back(sectionInfo);
+ continue;
+ }
+ //getting number of bytes to read
+ if (!seek(sectPage.at(index), true)) {
+ continue;
+ }
+ if (read(tmpBuffer, 2) != 2) {
+ continue;
+ }
+ std::size_t bytes = 2 + (std::size_t)OleUtil::getU2Bytes(tmpBuffer, 0);
+
+ if (!seek(sectPage.at(index), true)) {
+ continue;
+ }
+ char *formatPageBuffer = new char[bytes];
+ if (read(formatPageBuffer, bytes) != bytes) {
+ delete[] formatPageBuffer;
+ continue;
+ }
+ SectionInfo sectionInfo;
+ sectionInfo.CharPosition = charPos.at(index);
+ getSectionInfo(formatPageBuffer + 2, bytes - 2, sectionInfo);
+ mySectionInfoList.push_back(sectionInfo);
+ delete[] formatPageBuffer;
+ }
+ return true;
+}
+
+void OleMainStream::getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo) {
+ int tmp, toDelete, toAdd;
+ unsigned int offset = 0;
+ while (bytes >= offset + 2) {
+ unsigned int curPrlLength = 0;
+ switch (OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset)) {
+ case 0x2403:
+ styleInfo.Alignment = (Style::AlignmentType)OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x4610:
+ styleInfo.LeftIndent += OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ if (styleInfo.LeftIndent < 0) {
+ styleInfo.LeftIndent = 0;
+ }
+ break;
+ case 0xc60d: // ChgTabsPapx
+ case 0xc615: // ChgTabs
+ tmp = OleUtil::get1Byte(grpprlBuffer, papxOffset + offset + 2);
+ if (tmp < 2) {
+ curPrlLength = 1;
+ break;
+ }
+ toDelete = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 3);
+ if (tmp < 2 + 2 * toDelete) {
+ curPrlLength = 1;
+ break;
+ }
+ toAdd = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 4 + 2 * toDelete);
+ if (tmp < 2 + 2 * toDelete + 2 * toAdd) {
+ curPrlLength = 1;
+ break;
+ }
+ break;
+ case 0x840e:
+ styleInfo.RightIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x840f:
+ styleInfo.LeftIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x8411:
+ styleInfo.FirstLineIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0xa413:
+ styleInfo.BeforeParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0xa414:
+ styleInfo.AfterParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x2407:
+ styleInfo.HasPageBreakBefore = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2) == 0x01;
+ break;
+ default:
+ break;
+ }
+ if (curPrlLength == 0) {
+ curPrlLength = getPrlLength(grpprlBuffer, papxOffset + offset);
+ }
+ offset += curPrlLength;
+ }
+
+}
+
+void OleMainStream::getCharInfo(unsigned int chpxOffset, unsigned int /*styleId*/, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo) {
+ unsigned int sprm = 0; //single propery modifier
+ unsigned int offset = 0;
+ while (bytes >= offset + 2) {
+ switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) {
+ case 0x0835: //bold
+ sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
+ switch (sprm) {
+ case UNSET:
+ charInfo.FontStyle &= ~CharInfo::FONT_BOLD;
+ break;
+ case SET:
+ charInfo.FontStyle |= CharInfo::FONT_BOLD;
+ break;
+ case UNCHANGED:
+ break;
+ case NEGATION:
+ charInfo.FontStyle ^= CharInfo::FONT_BOLD;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x0836: //italic
+ sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
+ switch (sprm) {
+ case UNSET:
+ charInfo.FontStyle &= ~CharInfo::FONT_ITALIC;
+ break;
+ case SET:
+ charInfo.FontStyle |= CharInfo::FONT_ITALIC;
+ break;
+ case UNCHANGED:
+ break;
+ case NEGATION:
+ charInfo.FontStyle ^= CharInfo::FONT_ITALIC;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x4a43: //size of font
+ charInfo.FontSize = OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset + 2);
+ break;
+ default:
+ break;
+ }
+ offset += getPrlLength(grpprlBuffer, chpxOffset + offset);
+ }
+
+}
+
+void OleMainStream::getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo &sectionInfo) {
+ unsigned int tmp;
+ std::size_t offset = 0;
+ while (bytes >= offset + 2) {
+ switch (OleUtil::getU2Bytes(grpprlBuffer, offset)) {
+ case 0x3009: //new page
+ tmp = OleUtil::getU1Byte(grpprlBuffer, offset + 2);
+ sectionInfo.IsNewPage = (tmp != 0 && tmp != 1);
+ break;
+ default:
+ break;
+ }
+ offset += getPrlLength(grpprlBuffer, offset);
+ }
+}
+
+bool OleMainStream::getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo) {
+ //p. 105 of [MS-DOC] documentation
+ unsigned int offset = 0;
+ bool isFound = false;
+ while (bytes >= offset + 2) {
+ switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) {
+ case 0x080a: // ole object, p.107 [MS-DOC]
+ if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) == 0x01) {
+ return false;
+ }
+ break;
+ case 0x0806: // is not a picture, but a binary data? (sprmCFData, p.106 [MS-DOC])
+ if (OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2) == 0x01) {
+ return false;
+ }
+ break;
+// case 0x0855: // sprmCFSpec, p.117 [MS-DOC], MUST BE applied with a value of 1 (see p.105 [MS-DOC])
+// if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) != 0x01) {
+// return false;
+// }
+// break;
+ case 0x6a03: // location p.105 [MS-DOC]
+ pictureInfo.DataPosition = OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2);
+ isFound = true;
+ break;
+ default:
+ break;
+ }
+ offset += getPrlLength(grpprlBuffer, chpxOffset + offset);
+ }
+ return isFound;
+}
+
+OleMainStream::Style OleMainStream::getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet) {
+ //TODO optimize it: StyleSheet can be map structure with styleId key
+ Style style;
+ if (styleId != Style::STYLE_INVALID && styleId != Style::STYLE_NIL && styleId != Style::STYLE_USER) {
+ for (std::size_t index = 0; index < stylesheet.size(); ++index) {
+ if (stylesheet.at(index).StyleIdCurrent == styleId) {
+ return stylesheet.at(index);
+ }
+ }
+ }
+ style.StyleIdCurrent = styleId;
+ return style;
+}
+
+int OleMainStream::getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet) {
+ //TODO optimize it: StyleSheet can be map structure with styleId key
+ //in that case, this method will be excess
+ if (styleId == Style::STYLE_INVALID) {
+ return -1;
+ }
+ for (int index = 0; index < (int)stylesheet.size(); ++index) {
+ if (isFilled.at(index) && stylesheet.at(index).StyleIdCurrent == styleId) {
+ return index;
+ }
+ }
+ return -1;
+}
+
+unsigned int OleMainStream::getStyleIdByCharPos(unsigned int charPos, const StyleInfoList &styleInfoList) {
+ unsigned int styleId = Style::STYLE_INVALID;
+ for (std::size_t i = 0; i < styleInfoList.size(); ++i) {
+ const Style &info = styleInfoList.at(i).second;
+ if (i == styleInfoList.size() - 1) { //if last
+ styleId = info.StyleIdCurrent;
+ break;
+ }
+ unsigned int curOffset = styleInfoList.at(i).first;
+ unsigned int nextOffset = styleInfoList.at(i + 1).first;
+ if (charPos >= curOffset && charPos < nextOffset) {
+ styleId = info.StyleIdCurrent;
+ break;
+ }
+ }
+ return styleId;
+}
+
+bool OleMainStream::offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces) {
+ if (pieces.empty()) {
+ return false;
+ }
+ if ((unsigned int)pieces.front().Offset > offset) {
+ charPos = 0;
+ return true;
+ }
+ if ((unsigned int)(pieces.back().Offset + pieces.back().Length) <= offset) {
+ return false;
+ }
+
+ std::size_t pieceNumber = 0;
+ for (std::size_t i = 0; i < pieces.size(); ++i) {
+ if (i == pieces.size() - 1) { //if last
+ pieceNumber = i;
+ break;
+ }
+ unsigned int curOffset = pieces.at(i).Offset;
+ unsigned int nextOffset = pieces.at(i + 1).Offset;
+ if (offset >= curOffset && offset < nextOffset) {
+ pieceNumber = i;
+ break;
+ }
+ }
+
+ const Piece &piece = pieces.at(pieceNumber);
+ unsigned int diffOffset = offset - piece.Offset;
+ if (!piece.IsANSI) {
+ diffOffset /= 2;
+ }
+ charPos = piece.startCP + diffOffset;
+ return true;
+}
+
+bool OleMainStream::readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream) {
+ char *buffer = new char[length];
+ stream.seek(offset, true);
+ if (stream.read(buffer, length) != length) {
+ return false;
+ }
+ result = std::string(buffer, length);
+ delete[] buffer;
+ return true;
+}
+
+unsigned int OleMainStream::calcCountOfPLC(unsigned int totalSize, unsigned int elementSize) {
+ //calculates count of elements in PLC structure, formula from p.30 [MS-DOC]
+ return (totalSize - 4) / (4 + elementSize);
+}
+
+unsigned int OleMainStream::getPrlLength(const char *grpprlBuffer, unsigned int byteNumber) {
+ unsigned int tmp;
+ unsigned int opCode = OleUtil::getU2Bytes(grpprlBuffer, byteNumber);
+ switch (opCode & 0xe000) {
+ case 0x0000:
+ case 0x2000:
+ return 3;
+ case 0x4000:
+ case 0x8000:
+ case 0xA000:
+ return 4;
+ case 0xE000:
+ return 5;
+ case 0x6000:
+ return 6;
+ case 0xC000:
+ //counting of info length
+ tmp = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 2);
+ if (opCode == 0xc615 && tmp == 255) {
+ unsigned int del = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 3);
+ unsigned int add = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 4 + del * 4);
+ tmp = 2 + del * 4 + add * 3;
+ }
+ return 3 + tmp;
+ default:
+ return 1;
+ }
+}
diff --git a/reader/src/formats/doc/OleMainStream.h b/reader/src/formats/doc/OleMainStream.h
new file mode 100644
index 0000000..378f037
--- /dev/null
+++ b/reader/src/formats/doc/OleMainStream.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLEMAINSTREAM_H__
+#define __OLEMAINSTREAM_H__
+
+#include <vector>
+#include <string>
+
+#include "OleStream.h"
+#include "DocFloatImageReader.h"
+
+class OleMainStream : public OleStream {
+
+public:
+ struct Piece {
+ enum PieceType {
+ PIECE_TEXT,
+ PIECE_FOOTNOTE,
+ PIECE_OTHER
+ };
+
+ int Offset; // TODO: maybe make it unsigned int
+ int Length; // TODO: maybe make it unsigned int
+ bool IsANSI;
+ PieceType Type;
+ unsigned int startCP;
+ };
+ typedef std::vector<Piece> Pieces;
+
+ struct CharInfo {
+ enum Font {
+ FONT_REGULAR = 0,
+ FONT_BOLD = 1 << 0,
+ FONT_ITALIC = 1 << 1,
+ FONT_UNDERLINE = 1 << 2,
+ FONT_CAPITALS = 1 << 3,
+ FONT_SMALL_CAPS = 1 << 4,
+ FONT_STRIKE = 1 << 5,
+ FONT_HIDDEN = 1 << 6,
+ FONT_MARKDEL = 1 << 7,
+ FONT_SUPERSCRIPT = 1 << 8,
+ FONT_SUBSCRIPT = 1 << 9
+ };
+
+ unsigned int FontStyle;
+ unsigned int FontSize;
+
+ CharInfo();
+ };
+ typedef std::pair<unsigned int, CharInfo> CharPosToCharInfo;
+ typedef std::vector<CharPosToCharInfo > CharInfoList;
+
+ struct Style {
+ enum AlignmentType {
+ ALIGNMENT_LEFT = 0x00,
+ ALIGNMENT_CENTER = 0x01,
+ ALIGNMENT_RIGHT = 0x02,
+ ALIGNMENT_JUSTIFY = 0x03,
+ ALIGNMENT_DEFAULT // for case if alignment is not setted by word
+ };
+
+ // style Ids:
+ // (this is not full list of possible style ids, enum is used for using in switch-case)
+ enum StyleID {
+ STYLE_H1 = 0x1,
+ STYLE_H2 = 0x2,
+ STYLE_H3 = 0x3,
+ STYLE_USER = 0xFFE,
+ STYLE_NIL = 0xFFF,
+ STYLE_INVALID = 0xFFFF
+ };
+
+ unsigned int StyleIdCurrent;
+ unsigned int StyleIdNext; // Next style unless overruled
+
+ bool HasPageBreakBefore;
+ unsigned int BeforeParagraphIndent; // Vertical indent before paragraph, pixels
+ unsigned int AfterParagraphIndent; // Vertical indent after paragraph, pixels
+ int LeftIndent;
+ int FirstLineIndent;
+ int RightIndent;
+ AlignmentType Alignment;
+ CharInfo CurrentCharInfo;
+
+ Style();
+ };
+
+ typedef std::pair<unsigned int, Style> CharPosToStyle;
+ typedef std::vector<CharPosToStyle> StyleInfoList;
+ typedef std::vector<Style> StyleSheet;
+
+ struct SectionInfo {
+ unsigned int CharPosition;
+ bool IsNewPage;
+
+ SectionInfo();
+ };
+ typedef std::vector<SectionInfo> SectionInfoList;
+
+ struct Bookmark {
+ unsigned int CharPosition;
+ std::string Name;
+ };
+ typedef std::vector<Bookmark> BookmarksList;
+
+ struct InlineImageInfo {
+ unsigned int DataPosition;
+
+ InlineImageInfo();
+ };
+ typedef std::pair<unsigned int, InlineImageInfo> CharPosToInlineImageInfo;
+ typedef std::vector<CharPosToInlineImageInfo> InlineImageInfoList;
+
+ struct FloatImageInfo {
+ unsigned int ShapeId;
+ FloatImageInfo();
+ };
+ typedef std::pair<unsigned int, FloatImageInfo> CharPosToFloatImageInfo;
+ typedef std::vector<CharPosToFloatImageInfo> FloatImageInfoList;
+
+ enum ImageType { //see p. 60 [MS-ODRAW]
+ IMAGE_EMF = 0xF01A,
+ IMAGE_WMF = 0xF01B,
+ IMAGE_PICT = 0xF01C,
+ IMAGE_JPEG = 0xF01D,
+ IMAGE_PNG = 0xF01E,
+ IMAGE_DIB = 0xF01F,
+ IMAGE_TIFF = 0xF029,
+ IMAGE_JPEG2 = 0xF02A
+ };
+
+public:
+ OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
+
+public:
+ bool open(bool doReadFormattingData);
+ const Pieces &getPieces() const;
+ const CharInfoList &getCharInfoList() const;
+ const StyleInfoList &getStyleInfoList() const;
+ const BookmarksList &getBookmarks() const;
+ const InlineImageInfoList &getInlineImageInfoList() const;
+ const FloatImageInfoList &getFloatImageInfoList() const;
+
+ ZLFileImage::Blocks getFloatImage(unsigned int shapeId) const;
+ ZLFileImage::Blocks getInlineImage(unsigned int dataPos) const;
+
+private:
+ bool readFIB(const char *headerBuffer);
+ bool readPieceTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readBookmarks(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readStylesheet(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry);
+
+private: //readPieceTable helpers methods
+ static std::string getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream);
+ static void splitPieces(const Pieces &source, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary);
+
+private: //formatting reader helpers methods
+ static unsigned int getPrlLength(const char *grpprlBuffer, unsigned int byteNumber);
+ static void getCharInfo(unsigned int chpxOffset, unsigned int styleId, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo);
+ static void getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo);
+ static void getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo &sectionInfo);
+ static bool getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo);
+
+ static Style getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet);
+ static int getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet);
+ static unsigned int getStyleIdByCharPos(unsigned int offset, const StyleInfoList &styleInfoList);
+
+ static bool offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces);
+ static bool readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream);
+
+ static unsigned int calcCountOfPLC(unsigned int totalSize, unsigned int elementSize);
+
+private:
+ enum PrlFlag {
+ UNSET = 0,
+ SET = 1,
+ UNCHANGED = 128,
+ NEGATION = 129
+ };
+
+private:
+ int myStartOfText;
+ int myEndOfText;
+
+ Pieces myPieces;
+
+ StyleSheet myStyleSheet;
+
+ CharInfoList myCharInfoList;
+ StyleInfoList myStyleInfoList;
+ SectionInfoList mySectionInfoList;
+ InlineImageInfoList myInlineImageInfoList;
+ FloatImageInfoList myFloatImageInfoList;
+
+ BookmarksList myBookmarks;
+
+ shared_ptr<OleStream> myDataStream;
+
+ shared_ptr<DocFloatImageReader> myFLoatImageReader;
+};
+
+#endif /* __OLEMAINSTREAM_H__ */
diff --git a/reader/src/formats/doc/OleStorage.cpp b/reader/src/formats/doc/OleStorage.cpp
new file mode 100644
index 0000000..a7ab81a
--- /dev/null
+++ b/reader/src/formats/doc/OleStorage.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleStorage.h"
+#include "OleUtil.h"
+
+#include <cstring>
+
+const std::size_t OleStorage::BBD_BLOCK_SIZE = 512;
+
+OleStorage::OleStorage() {
+ clear();
+}
+
+void OleStorage::clear() {
+ myInputStream = 0;
+ mySectorSize = 0;
+ myShortSectorSize = 0;
+ myStreamSize = 0;
+ myRootEntryIndex = -1;
+
+ myDIFAT.clear();
+ myBBD.clear();
+ mySBD.clear();
+ myProperties.clear();
+ myEntries.clear();
+}
+
+
+
+bool OleStorage::init(shared_ptr<ZLInputStream> stream, std::size_t streamSize) {
+ clear();
+
+ myInputStream = stream;
+ myStreamSize = streamSize;
+ myInputStream->seek(0, true);
+
+ char oleBuf[BBD_BLOCK_SIZE];
+ std::size_t ret = myInputStream->read(oleBuf, BBD_BLOCK_SIZE);
+ if (ret != BBD_BLOCK_SIZE) {
+ clear();
+ return false;
+ }
+ static const char OLE_SIGN[] = {(char)0xD0, (char)0xCF, (char)0x11, (char)0xE0, (char)0xA1, (char)0xB1, (char)0x1A, (char)0xE1, 0};
+ if (std::strncmp(oleBuf, OLE_SIGN, 8) != 0) {
+ clear();
+ return false;
+ }
+ mySectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x1e); //offset for value of big sector size
+ myShortSectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x20); //offset for value of small sector size
+
+ if (readDIFAT(oleBuf) && readBBD(oleBuf) && readSBD(oleBuf) && readProperties(oleBuf) && readAllEntries()) {
+ return true;
+ }
+ clear();
+ return false;
+}
+
+bool OleStorage::readDIFAT(char *oleBuf) {
+ int difatBlock = OleUtil::get4Bytes(oleBuf, 0x44); //address for first difat sector
+ int difatSectorNumbers = OleUtil::get4Bytes(oleBuf, 0x48); //numbers of additional difat records
+
+ //436 of difat records are stored in header, by offset 0x4c
+ for (unsigned int i = 0; i < 436; i += 4) {
+ myDIFAT.push_back(OleUtil::get4Bytes(oleBuf + 0x4c, i));
+ }
+
+ //for files > 6.78 mb we need read additional DIFAT fields
+ for (int i = 0; difatBlock > 0 && i < difatSectorNumbers; ++i) {
+ ZLLogger::Instance().println("DocPlugin", "Read additional data for DIFAT");
+ char buffer[mySectorSize];
+ myInputStream->seek(BBD_BLOCK_SIZE + difatBlock * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "Error read DIFAT!");
+ return false;
+ }
+ for (unsigned int j = 0; j < (mySectorSize - 4); j += 4) {
+ myDIFAT.push_back(OleUtil::get4Bytes(buffer, j));
+ }
+ difatBlock = OleUtil::get4Bytes(buffer, mySectorSize - 4); //next DIFAT block is pointed at the end of the sector
+ }
+
+ //removing unusable DIFAT links
+ //0xFFFFFFFF means "free section"
+ while (!myDIFAT.empty() && myDIFAT.back() == (int)0xFFFFFFFF) {
+ myDIFAT.pop_back();
+ }
+ return true;
+}
+
+bool OleStorage::readBBD(char *oleBuf) {
+ char buffer[mySectorSize];
+ unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks
+
+ if (myDIFAT.size() < bbdNumberBlocks) {
+ //TODO maybe add check on myDIFAT == bbdNumberBlocks
+ ZLLogger::Instance().println("DocPlugin", "Wrong number of FAT blocks value");
+ return false;
+ }
+
+ for (unsigned int i = 0; i < bbdNumberBlocks; ++i) {
+ int bbdSector = myDIFAT.at(i);
+ if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) {
+ ZLLogger::Instance().println("DocPlugin", "Bad BBD entry!");
+ return false;
+ }
+ myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "Error during reading BBD!");
+ return false;
+ }
+ for (unsigned int j = 0; j < mySectorSize; j += 4) {
+ myBBD.push_back(OleUtil::get4Bytes(buffer, j));
+ }
+ }
+ return true;
+}
+
+bool OleStorage::readSBD(char *oleBuf) {
+ int sbdCur = OleUtil::get4Bytes(oleBuf, 0x3c); //address of first small sector
+ int sbdCount = OleUtil::get4Bytes(oleBuf, 0x40); //count of small sectors
+
+ if (sbdCur <= 0) {
+ ZLLogger::Instance().println("DocPlugin", "There's no SBD, don't read it");
+ return true;
+ }
+
+ char buffer[mySectorSize];
+ for (int i = 0; i < sbdCount; ++i) {
+ if (i != 0) {
+ if (sbdCur < 0 || (unsigned int)sbdCur >= myBBD.size()) {
+ ZLLogger::Instance().println("DocPlugin", "error during parsing SBD");
+ return false;
+ }
+ sbdCur = myBBD.at(sbdCur);
+ }
+ if (sbdCur <= 0) {
+ break;
+ }
+ myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "reading error during parsing SBD");
+ return false;
+ }
+ for (unsigned int j = 0; j < mySectorSize; j += 4) {
+ mySBD.push_back(OleUtil::get4Bytes(buffer, j));
+ }
+
+ }
+ return true;
+}
+
+bool OleStorage::readProperties(char *oleBuf) {
+ int propCur = OleUtil::get4Bytes(oleBuf, 0x30); //offset for address of sector with first property
+ if (propCur < 0) {
+ ZLLogger::Instance().println("DocPlugin", "Wrong first directory sector location");
+ return false;
+ }
+
+ char buffer[mySectorSize];
+ do {
+ myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "Error during reading properties");
+ return false;
+ }
+ for (unsigned int j = 0; j < mySectorSize; j += 128) {
+ myProperties.push_back(std::string(buffer + j, 128));
+ }
+ if (propCur < 0 || (std::size_t)propCur >= myBBD.size()) {
+ break;
+ }
+ propCur = myBBD.at(propCur);
+ } while (propCur >= 0 && propCur < (int)(myStreamSize / mySectorSize));
+ return true;
+}
+
+bool OleStorage::readAllEntries() {
+ int propCount = myProperties.size();
+ for (int i = 0; i < propCount; ++i) {
+ OleEntry entry;
+ bool result = readOleEntry(i, entry);
+ if (!result) {
+ break;
+ }
+ if (entry.type == OleEntry::ROOT_DIR) {
+ myRootEntryIndex = i;
+ }
+ myEntries.push_back(entry);
+ }
+ if (myRootEntryIndex < 0) {
+ return false;
+ }
+ return true;
+}
+
+bool OleStorage::readOleEntry(int propNumber, OleEntry &e) {
+ static const std::string ROOT_ENTRY = "Root Entry";
+
+ std::string property = myProperties.at(propNumber);
+
+ char oleType = property.at(0x42); //offset for Ole Type
+ if (oleType != 1 && oleType != 2 && oleType != 3 && oleType != 5) {
+ ZLLogger::Instance().println("DocPlugin", "entry -- not right ole type");
+ return false;
+ }
+
+ e.type = (OleEntry::Type)oleType;
+
+ int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length
+ e.name.clear();
+ e.name.reserve(33); //max size of entry name
+
+ if ((unsigned int)nameLength >= property.size()) {
+ return false;
+ }
+ for (int i = 0; i < nameLength; i+=2) {
+ char c = property.at(i);
+ if (c != 0) {
+ e.name += c;
+ }
+ }
+
+ e.length = OleUtil::getU4Bytes(property.c_str(), 0x78); //offset for entry's length value
+ e.isBigBlock = e.length >= 0x1000 || e.name == ROOT_ENTRY;
+
+ // Read sector chain
+ if (property.size() < 0x74 + 4) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading ole entry");
+ return false;
+ }
+ int chainCur = OleUtil::get4Bytes(property.c_str(), 0x74); //offset for start block of entry
+ if (chainCur >= 0 && (chainCur <= (int)(myStreamSize / (e.isBigBlock ? mySectorSize : myShortSectorSize)))) {
+ //filling blocks with chains
+ do {
+ e.blocks.push_back((unsigned int)chainCur);
+ if (e.isBigBlock && (std::size_t)chainCur < myBBD.size()) {
+ chainCur = myBBD.at(chainCur);
+ } else if (!mySBD.empty() && (std::size_t)chainCur < mySBD.size()) {
+ chainCur = mySBD.at(chainCur);
+ } else {
+ chainCur = -1;
+ }
+ } while (chainCur > 0 &&
+ chainCur < (int)(e.isBigBlock ? myBBD.size() : mySBD.size()) &&
+ e.blocks.size() <= e.length / (e.isBigBlock ? mySectorSize : myShortSectorSize));
+ }
+ e.length = std::min(e.length, (unsigned int)((e.isBigBlock ? mySectorSize : myShortSectorSize) * e.blocks.size()));
+ return true;
+}
+
+bool OleStorage::countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const {
+ //TODO maybe better syntax can be used?
+ if (e.blocks.size() <= (std::size_t)blockNumber) {
+ ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, blockNumber is invalid");
+ return false;
+ }
+ if (e.isBigBlock) {
+ result = BBD_BLOCK_SIZE + e.blocks.at(blockNumber) * mySectorSize;
+ } else {
+ unsigned int sbdPerSector = mySectorSize / myShortSectorSize;
+ unsigned int sbdSectorNumber = e.blocks.at(blockNumber) / sbdPerSector;
+ unsigned int sbdSectorMod = e.blocks.at(blockNumber) % sbdPerSector;
+ if (myEntries.at(myRootEntryIndex).blocks.size() <= (std::size_t)sbdSectorNumber) {
+ ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, invalid sbd data");
+ return false;
+ }
+ result = BBD_BLOCK_SIZE + myEntries.at(myRootEntryIndex).blocks.at(sbdSectorNumber) * mySectorSize + sbdSectorMod * myShortSectorSize;
+ }
+ return true;
+}
+
+bool OleStorage::getEntryByName(std::string name, OleEntry &returnEntry) const {
+ //TODO fix the workaround for duplicates streams: now it takes a stream with max length
+ unsigned int maxLength = 0;
+ for (std::size_t i = 0; i < myEntries.size(); ++i) {
+ const OleEntry &entry = myEntries.at(i);
+ if (entry.name == name && entry.length >= maxLength) {
+ returnEntry = entry;
+ maxLength = entry.length;
+ }
+ }
+ return maxLength > 0;
+}
+
+
diff --git a/reader/src/formats/doc/OleStorage.h b/reader/src/formats/doc/OleStorage.h
new file mode 100644
index 0000000..584ee94
--- /dev/null
+++ b/reader/src/formats/doc/OleStorage.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTORAGE_H__
+#define __OLESTORAGE_H__
+
+#include <algorithm>
+#include <vector>
+#include <string>
+
+#include <ZLInputStream.h>
+
+struct OleEntry {
+ enum Type {
+ DIR = 1,
+ STREAM = 2,
+ ROOT_DIR = 5,
+ LOCK_BYTES =3
+ };
+
+ typedef std::vector<unsigned int> Blocks;
+
+ std::string name;
+ unsigned int length;
+ Type type;
+ Blocks blocks;
+ bool isBigBlock;
+};
+
+class OleStorage {
+
+public:
+ static const std::size_t BBD_BLOCK_SIZE;
+
+public:
+ OleStorage();
+ bool init(shared_ptr<ZLInputStream>, std::size_t streamSize);
+ void clear();
+ const std::vector<OleEntry> &getEntries() const;
+ bool getEntryByName(std::string name, OleEntry &entry) const;
+
+ unsigned int getSectorSize() const;
+ unsigned int getShortSectorSize() const;
+
+public: //TODO make private
+ bool countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const;
+
+private:
+ bool readDIFAT(char *oleBuf);
+ bool readBBD(char *oleBuf);
+ bool readSBD(char *oleBuf);
+ bool readProperties(char *oleBuf);
+
+ bool readAllEntries();
+ bool readOleEntry(int propNumber, OleEntry &entry);
+
+private:
+
+ shared_ptr<ZLInputStream> myInputStream;
+ unsigned int mySectorSize, myShortSectorSize;
+
+ std::size_t myStreamSize;
+ std::vector<int> myDIFAT; //double-indirect file allocation table
+ std::vector<int> myBBD; //Big Block Depot
+ std::vector<int> mySBD; //Small Block Depot
+ std::vector<std::string> myProperties;
+ std::vector<OleEntry> myEntries;
+ int myRootEntryIndex;
+
+};
+
+inline const std::vector<OleEntry> &OleStorage::getEntries() const { return myEntries; }
+inline unsigned int OleStorage::getSectorSize() const { return mySectorSize; }
+inline unsigned int OleStorage::getShortSectorSize() const { return myShortSectorSize; }
+
+#endif /* __OLESTORAGE_H__ */
diff --git a/reader/src/formats/doc/OleStream.cpp b/reader/src/formats/doc/OleStream.cpp
new file mode 100644
index 0000000..8de1cc4
--- /dev/null
+++ b/reader/src/formats/doc/OleStream.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleStream.h"
+#include "OleUtil.h"
+
+OleStream::OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) :
+ myStorage(storage),
+ myOleEntry(oleEntry),
+ myBaseStream(stream) {
+ myOleOffset = 0;
+}
+
+
+bool OleStream::open() {
+ if (myOleEntry.type != OleEntry::STREAM) {
+ return false;
+ }
+ return true;
+}
+
+std::size_t OleStream::read(char *buffer, std::size_t maxSize) {
+ std::size_t length = maxSize;
+ std::size_t readedBytes = 0;
+ std::size_t bytesLeftInCurBlock;
+ unsigned int newFileOffset;
+
+ unsigned int curBlockNumber, modBlock;
+ std::size_t toReadBlocks, toReadBytes;
+
+ if (myOleOffset + length > myOleEntry.length) {
+ length = myOleEntry.length - myOleOffset;
+ }
+
+ std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+
+ curBlockNumber = myOleOffset / sectorSize;
+ if (curBlockNumber >= myOleEntry.blocks.size()) {
+ return 0;
+ }
+ modBlock = myOleOffset % sectorSize;
+ bytesLeftInCurBlock = sectorSize - modBlock;
+ if (bytesLeftInCurBlock < length) {
+ toReadBlocks = (length - bytesLeftInCurBlock) / sectorSize;
+ toReadBytes = (length - bytesLeftInCurBlock) % sectorSize;
+ } else {
+ toReadBlocks = toReadBytes = 0;
+ }
+
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return 0;
+ }
+ newFileOffset += modBlock;
+
+ myBaseStream->seek(newFileOffset, true);
+
+ readedBytes = myBaseStream->read(buffer, std::min(length, bytesLeftInCurBlock));
+ for (std::size_t i = 0; i < toReadBlocks; ++i) {
+ if (++curBlockNumber >= myOleEntry.blocks.size()) {
+ break;
+ }
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return readedBytes;
+ }
+ myBaseStream->seek(newFileOffset, true);
+ readedBytes += myBaseStream->read(buffer + readedBytes, std::min(length - readedBytes, sectorSize));
+ }
+ if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) {
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return readedBytes;
+ }
+ myBaseStream->seek(newFileOffset, true);
+ readedBytes += myBaseStream->read(buffer + readedBytes, toReadBytes);
+ }
+ myOleOffset += readedBytes;
+ return readedBytes;
+}
+
+bool OleStream::eof() const {
+ return (myOleOffset >= myOleEntry.length);
+}
+
+
+void OleStream::close() {
+}
+
+bool OleStream::seek(unsigned int offset, bool absoluteOffset) {
+ unsigned int newOleOffset = 0;
+ unsigned int newFileOffset;
+
+ if (absoluteOffset) {
+ newOleOffset = offset;
+ } else {
+ newOleOffset = myOleOffset + offset;
+ }
+
+ newOleOffset = std::min(newOleOffset, myOleEntry.length);
+
+ unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+ unsigned int blockNumber = newOleOffset / sectorSize;
+ if (blockNumber >= myOleEntry.blocks.size()) {
+ return false;
+ }
+
+ unsigned int modBlock = newOleOffset % sectorSize;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, blockNumber, newFileOffset)) {
+ return false;
+ }
+ newFileOffset += modBlock;
+ myBaseStream->seek(newFileOffset, true);
+ myOleOffset = newOleOffset;
+ return true;
+}
+
+std::size_t OleStream::offset() {
+ return myOleOffset;
+}
+
+ZLFileImage::Blocks OleStream::getBlockPieceInfoList(unsigned int offset, unsigned int size) const {
+ ZLFileImage::Blocks list;
+ unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+ unsigned int curBlockNumber = offset / sectorSize;
+ if (curBlockNumber >= myOleEntry.blocks.size()) {
+ return list;
+ }
+ unsigned int modBlock = offset % sectorSize;
+ unsigned int startFileOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, startFileOffset)) {
+ return ZLFileImage::Blocks();
+ }
+ startFileOffset += modBlock;
+
+ unsigned int bytesLeftInCurBlock = sectorSize - modBlock;
+ unsigned int toReadBlocks = 0, toReadBytes = 0;
+ if (bytesLeftInCurBlock < size) {
+ toReadBlocks = (size - bytesLeftInCurBlock) / sectorSize;
+ toReadBytes = (size - bytesLeftInCurBlock) % sectorSize;
+ }
+
+ unsigned int readedBytes = std::min(size, bytesLeftInCurBlock);
+ list.push_back(ZLFileImage::Block(startFileOffset, readedBytes));
+
+ for (unsigned int i = 0; i < toReadBlocks; ++i) {
+ if (++curBlockNumber >= myOleEntry.blocks.size()) {
+ break;
+ }
+ unsigned int newFileOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int readbytes = std::min(size - readedBytes, sectorSize);
+ list.push_back(ZLFileImage::Block(newFileOffset, readbytes));
+ readedBytes += readbytes;
+ }
+ if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) {
+ unsigned int newFileOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int readbytes = toReadBytes;
+ list.push_back(ZLFileImage::Block(newFileOffset, readbytes));
+ readedBytes += readbytes;
+ }
+
+ return concatBlocks(list);
+}
+
+ZLFileImage::Blocks OleStream::concatBlocks(const ZLFileImage::Blocks &blocks) {
+ if (blocks.size() < 2) {
+ return blocks;
+ }
+ ZLFileImage::Blocks optList;
+ ZLFileImage::Block curBlock = blocks.at(0);
+ unsigned int nextOffset = curBlock.offset + curBlock.size;
+ for (std::size_t i = 1; i < blocks.size(); ++i) {
+ ZLFileImage::Block b = blocks.at(i);
+ if (b.offset == nextOffset) {
+ curBlock.size += b.size;
+ nextOffset += b.size;
+ } else {
+ optList.push_back(curBlock);
+ curBlock = b;
+ nextOffset = curBlock.offset + curBlock.size;
+ }
+ }
+ optList.push_back(curBlock);
+ return optList;
+}
+
+std::size_t OleStream::fileOffset() {
+ //TODO maybe remove this method, it doesn't use at this time
+ std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+ unsigned int curBlockNumber = myOleOffset / sectorSize;
+ if (curBlockNumber >= myOleEntry.blocks.size()) {
+ return 0;
+ }
+ unsigned int modBlock = myOleOffset % sectorSize;
+ unsigned int curOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, curOffset)) {
+ return 0; //TODO maybe remove -1?
+ }
+ return curOffset + modBlock;
+}
diff --git a/reader/src/formats/doc/OleStream.h b/reader/src/formats/doc/OleStream.h
new file mode 100644
index 0000000..861c7cb
--- /dev/null
+++ b/reader/src/formats/doc/OleStream.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTREAM_H__
+#define __OLESTREAM_H__
+
+#include <ZLFileImage.h>
+
+#include "OleStorage.h"
+
+class OleStream {
+
+public:
+ OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
+
+public:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+public:
+ bool seek(unsigned int offset, bool absoluteOffset);
+ std::size_t offset();
+
+public:
+ ZLFileImage::Blocks getBlockPieceInfoList(unsigned int offset, unsigned int size) const;
+ static ZLFileImage::Blocks concatBlocks(const ZLFileImage::Blocks &blocks);
+ std::size_t fileOffset();
+
+public:
+ bool eof() const;
+
+protected:
+ shared_ptr<OleStorage> myStorage;
+
+ OleEntry myOleEntry;
+ shared_ptr<ZLInputStream> myBaseStream;
+
+ unsigned int myOleOffset;
+};
+
+#endif /* __OLESTREAM_H__ */
diff --git a/reader/src/formats/doc/OleStreamParser.cpp b/reader/src/formats/doc/OleStreamParser.cpp
new file mode 100644
index 0000000..0a9c62d
--- /dev/null
+++ b/reader/src/formats/doc/OleStreamParser.cpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+//#include <cctype>
+//#include <cstring>
+
+#include <ZLLogger.h>
+
+#include "OleMainStream.h"
+#include "OleUtil.h"
+#include "OleStreamParser.h"
+
+//word's control chars:
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008;
+
+//unicode values:
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C;
+
+OleStreamParser::OleStreamParser() {
+ myCurBufferPosition = 0;
+
+ myCurCharPos = 0;
+ myNextStyleInfoIndex = 0;
+ myNextCharInfoIndex = 0;
+ myNextBookmarkIndex = 0;
+ myNextInlineImageInfoIndex = 0;
+ myNextFloatImageInfoIndex = 0;
+}
+
+bool OleStreamParser::readStream(OleMainStream &oleMainStream) {
+ ZLUnicodeUtil::Ucs2Char ucs2char;
+ bool tabMode = false;
+ while (getUcs2Char(oleMainStream, ucs2char)) {
+ if (tabMode) {
+ tabMode = false;
+ if (ucs2char == WORD_TABLE_SEPARATOR) {
+ handleTableEndRow();
+ continue;
+ } else {
+ handleTableSeparator();
+ }
+ }
+
+ if (ucs2char < 32) {
+ switch (ucs2char) {
+ case NULL_SYMBOL:
+ break;
+ case WORD_HARD_LINEBREAK:
+ handleHardLinebreak();
+ break;
+ case WORD_END_OF_PARAGRAPH:
+ case WORD_PAGE_BREAK:
+ handleParagraphEnd();
+ break;
+ case WORD_TABLE_SEPARATOR:
+ tabMode = true;
+ break;
+ case WORD_FOOTNOTE_MARK:
+ handleFootNoteMark();
+ break;
+ case WORD_START_FIELD:
+ handleStartField();
+ break;
+ case WORD_SEPARATOR_FIELD:
+ handleSeparatorField();
+ break;
+ case WORD_END_FIELD:
+ handleEndField();
+ break;
+ case INLINE_IMAGE:
+ case FLOAT_IMAGE:
+ break;
+ default:
+ handleOtherControlChar(ucs2char);
+ break;
+ }
+ } else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
+ continue; //skip
+ } else {
+ handleChar(ucs2char);
+ }
+ }
+
+ return true;
+}
+
+bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
+ while (myCurBufferPosition >= myBuffer.size()) {
+ myBuffer.clear();
+ myCurBufferPosition = 0;
+ if (!readNextPiece(stream)) {
+ return false;
+ }
+ }
+ ucs2char = myBuffer.at(myCurBufferPosition++);
+ processStyles(stream);
+
+ switch (ucs2char) {
+ case INLINE_IMAGE:
+ processInlineImage(stream);
+ break;
+ case FLOAT_IMAGE:
+ processFloatImage(stream);
+ break;
+ }
+ ++myCurCharPos;
+ return true;
+}
+
+void OleStreamParser::processInlineImage(OleMainStream &stream) {
+ const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
+ if (imageInfoList.empty()) {
+ return;
+ }
+ //seek to curCharPos, because not all entries are real pictures
+ while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
+ ++myNextInlineImageInfoIndex;
+ }
+ while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
+ OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
+ ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
+ if (!list.empty()) {
+ handleImage(list);
+ }
+ ++myNextInlineImageInfoIndex;
+ }
+}
+
+void OleStreamParser::processFloatImage(OleMainStream &stream) {
+ const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
+ if (imageInfoList.empty()) {
+ return;
+ }
+ //seek to curCharPos, because not all entries are real pictures
+ while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
+ ++myNextFloatImageInfoIndex;
+ }
+ while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
+ OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
+ ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
+ if (!list.empty()) {
+ handleImage(list);
+ }
+ ++myNextFloatImageInfoIndex;
+ }
+}
+
+void OleStreamParser::processStyles(OleMainStream &stream) {
+ const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
+ if (!styleInfoList.empty()) {
+ while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
+ OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
+ handleParagraphStyle(info);
+ ++myNextStyleInfoIndex;
+ }
+ }
+
+ const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
+ if (!charInfoList.empty()) {
+ while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
+ OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
+ handleFontStyle(info.FontStyle);
+ ++myNextCharInfoIndex;
+ }
+ }
+
+ const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
+ if (!bookmarksList.empty()) {
+ while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
+ OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
+ handleBookmark(bookmark.Name);
+ ++myNextBookmarkIndex;
+ }
+ }
+}
diff --git a/reader/src/formats/doc/OleStreamParser.h b/reader/src/formats/doc/OleStreamParser.h
new file mode 100644
index 0000000..1adec2f
--- /dev/null
+++ b/reader/src/formats/doc/OleStreamParser.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTREAMPARSER_H__
+#define __OLESTREAMPARSER_H__
+
+#include <ZLUnicodeUtil.h>
+
+#include "OleMainStream.h"
+#include "OleStreamReader.h"
+
+class OleStreamParser : public OleStreamReader {
+
+public:
+ //word's control chars:
+ static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
+ static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
+ static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
+ static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
+ static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
+ static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
+ static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
+ static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
+ static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
+ static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
+ static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
+ static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
+ static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
+ static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
+
+ //unicode values:
+ static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
+ static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
+ static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
+ static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
+ static const ZLUnicodeUtil::Ucs2Char SPACE;
+ static const ZLUnicodeUtil::Ucs2Char MINUS;
+ static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
+
+public:
+ OleStreamParser();
+
+private:
+ bool readStream(OleMainStream &stream);
+
+protected:
+ virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
+ virtual void handleHardLinebreak() = 0;
+ virtual void handleParagraphEnd() = 0;
+ virtual void handlePageBreak() = 0;
+ virtual void handleTableSeparator() = 0;
+ virtual void handleTableEndRow() = 0;
+ virtual void handleFootNoteMark() = 0;
+ virtual void handleStartField() = 0;
+ virtual void handleSeparatorField() = 0;
+ virtual void handleEndField() = 0;
+ virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
+ virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
+
+ virtual void handleFontStyle(unsigned int fontStyle) = 0;
+ virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
+ virtual void handleBookmark(const std::string &name) = 0;
+
+private:
+ bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
+ void processInlineImage(OleMainStream &stream);
+ void processFloatImage(OleMainStream &stream);
+ void processStyles(OleMainStream &stream);
+
+private:
+protected:
+ ZLUnicodeUtil::Ucs2String myBuffer;
+private:
+ std::size_t myCurBufferPosition;
+
+ unsigned int myCurCharPos;
+
+ std::size_t myNextStyleInfoIndex;
+ std::size_t myNextCharInfoIndex;
+ std::size_t myNextBookmarkIndex;
+ std::size_t myNextInlineImageInfoIndex;
+ std::size_t myNextFloatImageInfoIndex;
+};
+
+#endif /* __OLESTREAMPARSER_H__ */
diff --git a/reader/src/formats/doc/OleStreamReader.cpp b/reader/src/formats/doc/OleStreamReader.cpp
new file mode 100644
index 0000000..224489a
--- /dev/null
+++ b/reader/src/formats/doc/OleStreamReader.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleMainStream.h"
+#include "OleUtil.h"
+#include "OleStreamReader.h"
+
+OleStreamReader::OleStreamReader() : myNextPieceNumber(0) {
+}
+
+bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream, bool doReadFormattingData) {
+ static const std::string WORD_DOCUMENT = "WordDocument";
+
+ shared_ptr<OleStorage> storage = new OleStorage;
+
+ if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
+ ZLLogger::Instance().println("DocPlugin", "Broken OLE file");
+ return false;
+ }
+
+ OleEntry wordDocumentEntry;
+ if (!storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry)) {
+ return false;
+ }
+
+ OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
+ if (!oleStream.open(doReadFormattingData)) {
+ ZLLogger::Instance().println("DocPlugin", "Cannot open OleMainStream");
+ return false;
+ }
+ return readStream(oleStream);
+}
+
+bool OleStreamReader::readNextPiece(OleMainStream &stream) {
+ const OleMainStream::Pieces &pieces = stream.getPieces();
+ if (myNextPieceNumber >= pieces.size()) {
+ return false;
+ }
+ const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
+
+ if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) {
+ footnotesStartHandler();
+ } else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) {
+ return false;
+ }
+
+ if (!stream.seek(piece.Offset, true)) {
+ //TODO maybe in that case we should take next piece?
+ return false;
+ }
+ char *textBuffer = new char[piece.Length];
+ std::size_t readBytes = stream.read(textBuffer, piece.Length);
+ if (readBytes != (std::size_t)piece.Length) {
+ ZLLogger::Instance().println("DocPlugin", "not all bytes have been read from piece");
+ }
+
+ if (!piece.IsANSI) {
+ for (std::size_t i = 0; i < readBytes; i += 2) {
+ ucs2SymbolHandler(OleUtil::getU2Bytes(textBuffer, i));
+ }
+ } else {
+ ansiDataHandler(textBuffer, readBytes);
+ }
+ ++myNextPieceNumber;
+ delete[] textBuffer;
+
+ return true;
+}
diff --git a/reader/src/formats/doc/OleStreamReader.h b/reader/src/formats/doc/OleStreamReader.h
new file mode 100644
index 0000000..2d2a0ae
--- /dev/null
+++ b/reader/src/formats/doc/OleStreamReader.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTREAMREADER_H__
+#define __OLESTREAMREADER_H__
+
+#include <ZLUnicodeUtil.h>
+
+#include "OleMainStream.h"
+
+class OleStreamReader {
+
+public:
+ OleStreamReader();
+ bool readDocument(shared_ptr<ZLInputStream> stream, bool doReadFormattingData);
+
+protected:
+ virtual bool readStream(OleMainStream &stream) = 0;
+
+ bool readNextPiece(OleMainStream &stream);
+
+ virtual void ansiDataHandler(const char *buffer, std::size_t len) = 0;
+ virtual void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0;
+ virtual void footnotesStartHandler() = 0;
+
+private:
+ std::size_t myNextPieceNumber;
+};
+
+#endif /* __OLESTREAMREADER_H__ */
diff --git a/reader/src/formats/doc/OleUtil.cpp b/reader/src/formats/doc/OleUtil.cpp
new file mode 100644
index 0000000..2e8f685
--- /dev/null
+++ b/reader/src/formats/doc/OleUtil.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "OleUtil.h"
+
+int OleUtil::get4Bytes(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return
+ (int)buf[offset]
+ | ((int)buf[offset+1] << 8)
+ | ((int)buf[offset+2] << 16)
+ | ((int)buf[offset+3] << 24);
+}
+
+unsigned int OleUtil::getU4Bytes(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return
+ (unsigned int)buf[offset]
+ | ((unsigned int)buf[offset+1] << 8)
+ | ((unsigned int)buf[offset+2] << 16)
+ | ((unsigned int)buf[offset+3] << 24);
+}
+
+unsigned int OleUtil::getU2Bytes(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return
+ (unsigned int)buf[offset]
+ | ((unsigned int)buf[offset+1] << 8);
+}
+
+unsigned int OleUtil::getU1Byte(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return (unsigned int)buf[offset];
+}
+
+int OleUtil::get1Byte(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return (int)buf[offset];
+}
+
+
+
diff --git a/reader/src/formats/doc/OleUtil.h b/reader/src/formats/doc/OleUtil.h
new file mode 100644
index 0000000..531c769
--- /dev/null
+++ b/reader/src/formats/doc/OleUtil.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLEUTIL_H__
+#define __OLEUTIL_H__
+
+class OleUtil {
+public:
+ static int get4Bytes(const char *buffer, unsigned int offset);
+ static unsigned int getU4Bytes(const char *buffer, unsigned int offset);
+ static unsigned int getU2Bytes(const char *buffer, unsigned int offset);
+ static unsigned int getU1Byte(const char *buffer, unsigned int offset);
+ static int get1Byte(const char *buffer, unsigned int offset);
+};
+
+#endif /* __OLEUTIL_H__ */
diff --git a/reader/src/formats/docbook/DocBookBookReader.cpp b/reader/src/formats/docbook/DocBookBookReader.cpp
new file mode 100644
index 0000000..eada90c
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookBookReader.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "DocBookBookReader.h"
+
+#include "../../bookmodel/BookModel.h"
+#include "../../model/Paragraph.h"
+
+DocBookBookReader::DocBookBookReader(BookModel &model) : BookReader(model) {
+ setMainTextModel();
+
+ myReadText = false;
+}
+
+void DocBookBookReader::characterDataHandler(const char *text, std::size_t len) {
+ addDataToBuffer(text, len);
+}
+
+void DocBookBookReader::startElementHandler(int tag, const char **) {
+ switch (tag) {
+ case _SECT1:
+ myReadText = true;
+ pushKind(REGULAR);
+ beginContentsParagraph();
+ break;
+ case _PARA:
+ if (myReadText) {
+ beginParagraph();
+ }
+ break;
+ case _TITLE:
+ enterTitle();
+ pushKind(SECTION_TITLE);
+ if (myReadText) {
+ beginParagraph();
+ }
+ break;
+ case _EMPHASIS:
+ addControl(EMPHASIS, true);
+ break;
+ case _CITETITLE:
+ addControl(CITE, true);
+ break;
+ case _ULINK:
+ case _EMAIL:
+ addControl(CODE, true);
+ break;
+ case _BLOCKQUOTE:
+ pushKind(STRONG);
+ break;
+ default:
+ break;
+ }
+}
+
+void DocBookBookReader::endElementHandler(int tag) {
+ switch (tag) {
+ case _SECT1:
+ myReadText = false;
+ popKind();
+ endContentsParagraph();
+ insertEndOfSectionParagraph();
+ break;
+ case _PARA:
+ endParagraph();
+ break;
+ case _TITLE:
+ endParagraph();
+ popKind();
+ endContentsParagraph();
+ exitTitle();
+ break;
+ case _EMPHASIS:
+ addControl(EMPHASIS, false);
+ break;
+ case _CITETITLE:
+ addControl(CITE, false);
+ break;
+ case _ULINK:
+ case _EMAIL:
+ addControl(CODE, false);
+ break;
+ case _BLOCKQUOTE:
+ popKind();
+ break;
+ default:
+ break;
+ }
+}
+
+void DocBookBookReader::readBook(shared_ptr<ZLInputStream> stream) {
+ readDocument(stream);
+}
diff --git a/reader/src/formats/docbook/DocBookBookReader.h b/reader/src/formats/docbook/DocBookBookReader.h
new file mode 100644
index 0000000..c226184
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookBookReader.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCBOOKBOOKREADER_H__
+#define __DOCBOOKBOOKREADER_H__
+
+#include "DocBookReader.h"
+#include "../../bookmodel/BookReader.h"
+
+class BookModel;
+
+class DocBookBookReader : public BookReader, public DocBookReader {
+
+public:
+ DocBookBookReader(BookModel &model);
+ ~DocBookBookReader();
+ void readBook(shared_ptr<ZLInputStream> stream);
+
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+private:
+ bool myReadText;
+};
+
+inline DocBookBookReader::~DocBookBookReader() {}
+
+#endif /* __DOCBOOKBOOKREADER_H__ */
diff --git a/reader/src/formats/docbook/DocBookDescriptionReader.cpp b/reader/src/formats/docbook/DocBookDescriptionReader.cpp
new file mode 100644
index 0000000..bcd4ae4
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookDescriptionReader.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+#include <ZLUnicodeUtil.h>
+
+#include "DocBookDescriptionReader.h"
+
+#include "../../library/Book.h"
+#include "../../library/Author.h"
+
+DocBookDescriptionReader::DocBookDescriptionReader(Book &book) : myBook(book) {
+ myReadTitle = false;
+ myReadAuthor = false;
+ for (int i = 0; i < 3; ++i) {
+ myReadAuthorName[i] = false;
+ }
+ myBook.setLanguage("en");
+ myDepth = 0;
+}
+
+void DocBookDescriptionReader::characterDataHandler(const char *text, std::size_t len) {
+ if (myReadTitle) {
+ myBook.setTitle(myBook.title() + std::string(text, len));
+ } else {
+ for (int i = 0; i < 3; ++i) {
+ if (myReadAuthorName[i]) {
+ myAuthorNames[i].append(text, len);
+ break;
+ }
+ }
+ }
+}
+
+void DocBookDescriptionReader::startElementHandler(int tag, const char **) {
+ ++myDepth;
+ switch (tag) {
+ case _SECT1:
+ myReturnCode = true;
+ myDoBreak = true;
+ break;
+ case _TITLE:
+ if (myDepth == 2) {
+ myReadTitle = true;
+ }
+ break;
+ case _AUTHOR:
+ if (myDepth == 3) {
+ myReadAuthor = true;
+ }
+ break;
+ case _FIRSTNAME:
+ if (myReadAuthor) {
+ myReadAuthorName[0] = true;
+ }
+ break;
+ case _OTHERNAME:
+ if (myReadAuthor) {
+ myReadAuthorName[1] = true;
+ }
+ break;
+ case _SURNAME:
+ if (myReadAuthor) {
+ myReadAuthorName[2] = true;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void DocBookDescriptionReader::endElementHandler(int tag) {
+ --myDepth;
+ switch (tag) {
+ case _TITLE:
+ myReadTitle = false;
+ break;
+ case _AUTHOR: {
+ ZLUnicodeUtil::utf8Trim(myAuthorNames[0]);
+ ZLUnicodeUtil::utf8Trim(myAuthorNames[1]);
+ ZLUnicodeUtil::utf8Trim(myAuthorNames[2]);
+ std::string fullName = myAuthorNames[0];
+ if (!fullName.empty() && !myAuthorNames[1].empty()) {
+ fullName += ' ';
+ }
+ fullName += myAuthorNames[1];
+ if (!fullName.empty() && !myAuthorNames[2].empty()) {
+ fullName += ' ';
+ }
+ fullName += myAuthorNames[2];
+ shared_ptr<Author> author = Author::create(fullName, myAuthorNames[2]);
+ if (!author.isNull()) {
+ myBook.authors().add( author );
+ }
+ }
+ myAuthorNames[0].erase();
+ myAuthorNames[1].erase();
+ myAuthorNames[2].erase();
+ myReadAuthor = false;
+ break;
+ case _FIRSTNAME:
+ myReadAuthorName[0] = false;
+ break;
+ case _OTHERNAME:
+ myReadAuthorName[1] = false;
+ break;
+ case _SURNAME:
+ myReadAuthorName[2] = false;
+ break;
+ default:
+ break;
+ }
+}
+
+bool DocBookDescriptionReader::readMetaInfo(shared_ptr<ZLInputStream> stream) {
+ bool code = readDocument(stream);
+ if (myBook.authors().empty()) {
+ myBook.authors().push_back( new Author() );
+ }
+ return code;
+}
diff --git a/reader/src/formats/docbook/DocBookDescriptionReader.h b/reader/src/formats/docbook/DocBookDescriptionReader.h
new file mode 100644
index 0000000..d9f4aa3
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookDescriptionReader.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCBOOKDESCRIPTIONREADER_H__
+#define __DOCBOOKDESCRIPTIONREADER_H__
+
+#include <string>
+
+#include "DocBookReader.h"
+
+class Book;
+
+class DocBookDescriptionReader : public DocBookReader {
+
+public:
+ DocBookDescriptionReader(Book &book);
+ ~DocBookDescriptionReader();
+ bool readMetaInfo(shared_ptr<ZLInputStream> stream);
+
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+private:
+ Book &myBook;
+
+ bool myReturnCode;
+
+ bool myReadTitle;
+ bool myReadAuthor;
+ bool myReadAuthorName[3];
+
+ std::string myAuthorNames[3];
+
+ int myDepth;
+};
+
+inline DocBookDescriptionReader::~DocBookDescriptionReader() {}
+
+#endif /* __DOCBOOKDESCRIPTIONREADER_H__ */
diff --git a/reader/src/formats/docbook/DocBookPlugin.cpp b/reader/src/formats/docbook/DocBookPlugin.cpp
new file mode 100644
index 0000000..1b890a6
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookPlugin.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "DocBookPlugin.h"
+#include "DocBookDescriptionReader.h"
+#include "DocBookBookReader.h"
+#include "../../library/Book.h"
+
+bool DocBookPlugin::acceptsFile(const std::string &extension) const {
+ return extension == "xml";
+}
+
+bool DocBookPlugin::readMetaInfo(Book &book) const {
+ return DocBookDescriptionReader(book).readMetaInfo(ZLFile(path).inputStream());
+}
+
+bool DocBookPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
+
+bool DocBookPlugin::readModel(BookModel &model) const {
+ return DocBookBookReader(model).readDocument(ZLFile(book.fileName()).inputStream());
+}
diff --git a/reader/src/formats/docbook/DocBookPlugin.h b/reader/src/formats/docbook/DocBookPlugin.h
new file mode 100644
index 0000000..324b2be
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookPlugin.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCBOOKPLUGIN_H__
+#define __DOCBOOKPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class DocBookPlugin : public FormatPlugin {
+
+public:
+ DocBookPlugin();
+ ~DocBookPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const std::string &extension) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+inline DocBookPlugin::DocBookPlugin() {}
+inline DocBookPlugin::~DocBookPlugin() {}
+inline bool DocBookPlugin::providesMetaInfo() const { return true; }
+
+#endif /* __DOCBOOKPLUGIN_H__ */
diff --git a/reader/src/formats/docbook/DocBookReader.cpp b/reader/src/formats/docbook/DocBookReader.cpp
new file mode 100644
index 0000000..73c17d1
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookReader.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLStringUtil.h>
+#include <ZLFile.h>
+#include <ZLDir.h>
+
+#include "DocBookReader.h"
+
+static const DocBookReader::Tag TAGS[] = {
+ {"article", DocBookReader::_ARTICLE},
+ {"title", DocBookReader::_TITLE},
+ {"articleinfo", DocBookReader::_ARTICLEINFO},
+ {"author", DocBookReader::_AUTHOR},
+ {"firstname", DocBookReader::_FIRSTNAME},
+ {"othername", DocBookReader::_OTHERNAME},
+ {"surname", DocBookReader::_SURNAME},
+ {"affiliation", DocBookReader::_AFFILIATION},
+ {"orgname", DocBookReader::_ORGNAME},
+ {"ulink", DocBookReader::_ULINK},
+ {"address", DocBookReader::_ADDRESS},
+ {"email", DocBookReader::_EMAIL},
+ {"pubdate", DocBookReader::_PUBDATE},
+ {"releaseinfo", DocBookReader::_RELEASEINFO},
+ {"copyright", DocBookReader::_COPYRIGHT},
+ {"year", DocBookReader::_YEAR},
+ {"holder", DocBookReader::_HOLDER},
+ {"legalnotice", DocBookReader::_LEGALNOTICE},
+ {"para", DocBookReader::_PARA},
+ {"revhistory", DocBookReader::_REVHISTORY},
+ {"revision", DocBookReader::_REVISION},
+ {"revnumber", DocBookReader::_REVNUMBER},
+ {"date", DocBookReader::_DATE},
+ {"authorinitials", DocBookReader::_AUTHORINITIALS},
+ {"revremark", DocBookReader::_REVREMARK},
+ {"abstract", DocBookReader::_ABSTRACT},
+ {"sect1", DocBookReader::_SECT1},
+ {"emphasis", DocBookReader::_EMPHASIS},
+ {"blockquote", DocBookReader::_BLOCKQUOTE},
+ {"citetitle", DocBookReader::_CITETITLE},
+ {"link", DocBookReader::_LINK},
+ {"foreignphrase", DocBookReader::_FOREIGNPHRASE},
+ {"part", DocBookReader::_PART},
+ {"preface", DocBookReader::_PREFACE},
+ {"chapter", DocBookReader::_CHAPTER},
+ {0, DocBookReader::_UNKNOWN}
+};
+
+const DocBookReader::Tag *DocBookReader::tags() const {
+ return TAGS;
+}
+
+const std::vector<std::string> &DocBookReader::externalDTDs() const {
+ return EntityFilesCollector::Instance().externalDTDs("docbook");
+}
diff --git a/reader/src/formats/docbook/DocBookReader.h b/reader/src/formats/docbook/DocBookReader.h
new file mode 100644
index 0000000..a18f358
--- /dev/null
+++ b/reader/src/formats/docbook/DocBookReader.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCBOOKREADER_H__
+#define __DOCBOOKREADER_H__
+
+#include <ZLXMLReader.h>
+
+class DocBookReader : public ZLXMLReader {
+
+public:
+ static std::string DTDDirectory;
+
+public:
+ struct Tag {
+ const char *tagName;
+ int tagCode;
+ };
+
+public:
+//protected:
+ enum TagCode {
+ _ARTICLE,
+ _TITLE,
+ _ARTICLEINFO,
+ _AUTHOR,
+ _FIRSTNAME,
+ _OTHERNAME,
+ _SURNAME,
+ _AFFILIATION,
+ _ORGNAME,
+ _ULINK,
+ _ADDRESS,
+ _EMAIL,
+ _PUBDATE,
+ _RELEASEINFO,
+ _COPYRIGHT,
+ _YEAR,
+ _HOLDER,
+ _LEGALNOTICE,
+ _PARA,
+ _REVHISTORY,
+ _REVISION,
+ _REVNUMBER,
+ _DATE,
+ _AUTHORINITIALS,
+ _REVREMARK,
+ _ABSTRACT,
+ _SECT1,
+ _EMPHASIS,
+ _BLOCKQUOTE,
+ _CITETITLE,
+ _LINK,
+ _FOREIGNPHRASE,
+ _FIRSTTERM,
+ _FILENAME,
+ _ITEMIZEDLIST,
+ _LISTITEM,
+ _PART,
+ _PREFACE,
+ _CHAPTER,
+ _UNKNOWN
+ };
+
+protected:
+ DocBookReader();
+
+public:
+ ~DocBookReader();
+ const Tag *tags() const;
+
+protected:
+ const std::vector<std::string> &externalDTDs() const;
+};
+
+inline DocBookReader::DocBookReader() {}
+inline DocBookReader::~DocBookReader() {}
+
+#endif /* __DOCBOOKREADER_H__ */
diff --git a/reader/src/formats/dummy/DummyBookReader.cpp b/reader/src/formats/dummy/DummyBookReader.cpp
new file mode 100644
index 0000000..2684ebf
--- /dev/null
+++ b/reader/src/formats/dummy/DummyBookReader.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "DummyBookReader.h"
+#include "../../bookmodel/BookModel.h"
+
+DummyBookReader::DummyBookReader(BookModel &model) : myModelReader(model) {
+}
+
+/*
+void DummyBookReader::characterDataHandler(const char *text, std::size_t len) {
+}
+
+void DummyBookReader::startElementHandler(int tag, const char **xmlattributes) {
+}
+
+void DummyBookReader::endElementHandler(int tag) {
+}
+*/
+
+bool DummyBookReader::readBook(shared_ptr<ZLInputStream> stream) {
+ //return readDocument(stream);
+ return true;
+}
diff --git a/reader/src/formats/dummy/DummyBookReader.h b/reader/src/formats/dummy/DummyBookReader.h
new file mode 100644
index 0000000..ba6bcf8
--- /dev/null
+++ b/reader/src/formats/dummy/DummyBookReader.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DUMMYBOOKREADER_H__
+#define __DUMMYBOOKREADER_H__
+
+#include "../../bookmodel/BookReader.h"
+
+class DummyBookReader {
+
+public:
+ DummyBookReader(BookModel &model);
+ ~DummyBookReader();
+ bool readBook(shared_ptr<ZLInputStream> stream);
+
+ /*
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+ */
+
+private:
+ BookReader myModelReader;
+};
+
+inline DummyBookReader::~DummyBookReader() {}
+
+#endif /* __DUMMYBOOKREADER_H__ */
diff --git a/reader/src/formats/dummy/DummyMetaInfoReader.cpp b/reader/src/formats/dummy/DummyMetaInfoReader.cpp
new file mode 100644
index 0000000..5dd13c5
--- /dev/null
+++ b/reader/src/formats/dummy/DummyMetaInfoReader.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "DummyMetaInfoReader.h"
+
+DummyMetaInfoReader::DummyMetaInfoReader(Book &book) : myBook(book) {
+}
+
+/*
+void DummyMetaInfoReader::characterDataHandler(const char *text, std::size_t len) {
+}
+
+void DummyMetaInfoReader::startElementHandler(int tag, const char **) {
+}
+
+void DummyMetaInfoReader::endElementHandler(int tag) {
+}
+*/
+
+bool DummyMetaInfoReader::readMetaInfo(shared_ptr<ZLInputStream> stream) {
+ return false;
+}
diff --git a/reader/src/formats/dummy/DummyMetaInfoReader.h b/reader/src/formats/dummy/DummyMetaInfoReader.h
new file mode 100644
index 0000000..818d996
--- /dev/null
+++ b/reader/src/formats/dummy/DummyMetaInfoReader.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DUMMYMETAINFOREADER_H__
+#define __DUMMYMETAINFOREADER_H__
+
+#include <string>
+
+class Book;
+
+class DummyMetaInfoReader {
+
+public:
+ DummyMetaInfoReader(Book &book);
+ ~DummyMetaInfoReader();
+ bool readMetaInfo(shared_ptr<ZLInputStream> stream);
+
+ /*
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+ */
+
+private:
+ Book &myBook;
+};
+
+inline DummyMetaInfoReader::~DummyMetaInfoReader() {}
+
+#endif /* __DUMMYMETAINFOREADER_H__ */
diff --git a/reader/src/formats/dummy/DummyPlugin.cpp b/reader/src/formats/dummy/DummyPlugin.cpp
new file mode 100644
index 0000000..bfe0662
--- /dev/null
+++ b/reader/src/formats/dummy/DummyPlugin.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "DummyPlugin.h"
+#include "DummyMetaInfoReader.h"
+#include "DummyBookReader.h"
+#include "../../library/Book.h"
+
+DummyPlugin::DummyPlugin() {
+}
+
+DummyPlugin::~DummyPlugin() {
+}
+
+bool DummyPlugin::providesMetaInfo() const {
+ return true;
+}
+
+bool DummyPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "dummy";
+}
+
+bool DummyPlugin::readMetaInfo(Book &book) const {
+ return DummyMetaInfoReader(book).readMetaInfo(ZLFile(path).inputStream());
+}
+
+bool DummyPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
+
+bool DummyPlugin::readModel(BookModel &model) const {
+ return DummyBookReader(model).readBook(ZLFile(book.fileName()).inputStream());
+}
+
+shared_ptr<const ZLImage> DummyPlugin::coverImage(const ZLFile &file) const {
+ return DummyCoverReader(file).readCover();
+}
diff --git a/reader/src/formats/dummy/DummyPlugin.h b/reader/src/formats/dummy/DummyPlugin.h
new file mode 100644
index 0000000..073449c
--- /dev/null
+++ b/reader/src/formats/dummy/DummyPlugin.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DUMMYPLUGIN_H__
+#define __DUMMYPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class DummyPlugin : public FormatPlugin {
+
+public:
+ DummyPlugin();
+ ~DummyPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+ shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
+};
+
+#endif /* __DUMMYPLUGIN_H__ */
diff --git a/reader/src/formats/dummy/createPlugin.sh b/reader/src/formats/dummy/createPlugin.sh
new file mode 100755
index 0000000..aacc3d4
--- /dev/null
+++ b/reader/src/formats/dummy/createPlugin.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+if [ $# != 3 ]; then
+ echo "usage: $0 <short_format_name> <camel_cased_format_name> <upper_cased_format_name>";
+ exit 0;
+fi;
+
+if mkdir ../$1; then
+ for file in Dummy*.h Dummy*.cpp; do
+ sed "s/Dummy/$2/g" $file | sed "s/DUMMY/$3/g" > ../$1/`echo $file | sed "s/Dummy/$2/"`;
+ done
+fi;
diff --git a/reader/src/formats/fb2/FB2BookReader.cpp b/reader/src/formats/fb2/FB2BookReader.cpp
new file mode 100644
index 0000000..f689343
--- /dev/null
+++ b/reader/src/formats/fb2/FB2BookReader.cpp
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <cstring>
+
+#include <ZLInputStream.h>
+#include <ZLStringUtil.h>
+#include <ZLFileImage.h>
+
+#include <ZLTextParagraph.h>
+
+#include "FB2BookReader.h"
+#include "../../library/Book.h"
+#include "../../bookmodel/BookModel.h"
+
+FB2BookReader::FB2BookReader(BookModel &model) : myModelReader(model) {
+ myInsideCoverpage = false;
+ myParagraphsBeforeBodyNumber = (std::size_t)-1;
+ myInsidePoem = false;
+ mySectionDepth = 0;
+ myBodyCounter = 0;
+ myReadMainText = false;
+ myCurrentImageStart = -1;
+ mySectionStarted = false;
+ myInsideTitle = false;
+ myCurrentContentType = ZLMimeType::EMPTY;
+}
+
+void FB2BookReader::characterDataHandler(const char *text, std::size_t len) {
+ if ((len > 0) && (!myCurrentImageId.empty() || myModelReader.paragraphIsOpen())) {
+ std::string str(text, len);
+ if (!myCurrentImageId.empty()) {
+ if (myCurrentImageStart == -1) {
+ myCurrentImageStart = getCurrentPosition();
+ }
+ } else {
+ myModelReader.addData(str);
+ if (myInsideTitle) {
+ myModelReader.addContentsData(str);
+ }
+ }
+ }
+}
+
+bool FB2BookReader::processNamespaces() const {
+ return true;
+}
+
+void FB2BookReader::startElementHandler(int tag, const char **xmlattributes) {
+ const char *id = attributeValue(xmlattributes, "id");
+ if (id != 0 && tag != _BINARY) {
+ if (!myReadMainText) {
+ myModelReader.setFootnoteTextModel(id);
+ }
+ myModelReader.addHyperlinkLabel(id);
+ }
+ switch (tag) {
+ case _P:
+ if (mySectionStarted) {
+ mySectionStarted = false;
+ } else if (myInsideTitle) {
+ static const std::string SPACE = " ";
+ myModelReader.addContentsData(SPACE);
+ }
+ myModelReader.beginParagraph();
+ break;
+ case _V:
+ myModelReader.pushKind(VERSE);
+ myModelReader.beginParagraph();
+ break;
+ case _SUBTITLE:
+ myModelReader.pushKind(SUBTITLE);
+ myModelReader.beginParagraph();
+ break;
+ case _TEXT_AUTHOR:
+ myModelReader.pushKind(AUTHOR);
+ myModelReader.beginParagraph();
+ break;
+ case _DATE:
+ myModelReader.pushKind(DATEKIND);
+ myModelReader.beginParagraph();
+ break;
+ case _CITE:
+ myModelReader.pushKind(CITE);
+ break;
+ case _SECTION:
+ if (myReadMainText) {
+ myModelReader.insertEndOfSectionParagraph();
+ ++mySectionDepth;
+ myModelReader.beginContentsParagraph();
+ mySectionStarted = true;
+ }
+ break;
+ case _TITLE:
+ if (myInsidePoem) {
+ myModelReader.pushKind(POEM_TITLE);
+ } else if (mySectionDepth == 0) {
+ myModelReader.insertEndOfSectionParagraph();
+ myModelReader.pushKind(TITLE);
+ } else {
+ myModelReader.pushKind(SECTION_TITLE);
+ myModelReader.enterTitle();
+ myInsideTitle = true;
+ }
+ break;
+ case _POEM:
+ myInsidePoem = true;
+ break;
+ case _STANZA:
+ myModelReader.pushKind(STANZA);
+ myModelReader.beginParagraph(ZLTextParagraph::BEFORE_SKIP_PARAGRAPH);
+ myModelReader.endParagraph();
+ break;
+ case _EPIGRAPH:
+ myModelReader.pushKind(EPIGRAPH);
+ break;
+ case _ANNOTATION:
+ if (myBodyCounter == 0) {
+ myModelReader.setMainTextModel();
+ }
+ myModelReader.pushKind(ANNOTATION);
+ break;
+ case _COVERPAGE:
+ if (myBodyCounter == 0) {
+ myInsideCoverpage = true;
+ myModelReader.setMainTextModel();
+ }
+ break;
+ case _SUB:
+ myModelReader.addControl(SUB, true);
+ break;
+ case _SUP:
+ myModelReader.addControl(SUP, true);
+ break;
+ case _CODE:
+ myModelReader.addControl(CODE, true);
+ break;
+ case _STRIKETHROUGH:
+ myModelReader.addControl(STRIKETHROUGH, true);
+ break;
+ case _STRONG:
+ myModelReader.addControl(STRONG, true);
+ break;
+ case _EMPHASIS:
+ myModelReader.addControl(EMPHASIS, true);
+ break;
+ case _A:
+ {
+ const char *ref = attributeValue(xmlattributes, myHrefPredicate);
+ if (ref != 0) {
+ if (ref[0] == '#') {
+ const char *type = attributeValue(xmlattributes, "type");
+ static const std::string NOTE = "note";
+ if ((type != 0) && (NOTE == type)) {
+ myHyperlinkType = FOOTNOTE;
+ } else {
+ myHyperlinkType = INTERNAL_HYPERLINK;
+ }
+ ++ref;
+ } else {
+ myHyperlinkType = EXTERNAL_HYPERLINK;
+ }
+ myModelReader.addHyperlinkControl(myHyperlinkType, ref);
+ } else {
+ myHyperlinkType = FOOTNOTE;
+ myModelReader.addControl(myHyperlinkType, true);
+ }
+ break;
+ }
+ case _IMAGE:
+ {
+ const char *ref = attributeValue(xmlattributes, myHrefPredicate);
+ const char *vOffset = attributeValue(xmlattributes, "voffset");
+ char offset = vOffset != 0 ? std::atoi(vOffset) : 0;
+ if (ref != 0 && *ref == '#') {
+ ++ref;
+ const bool isCoverImage =
+ myParagraphsBeforeBodyNumber ==
+ myModelReader.model().bookTextModel()->paragraphsNumber();
+ if (myCoverImageReference != ref || !isCoverImage) {
+ myModelReader.addImageReference(ref, offset);
+ }
+ if (myInsideCoverpage) {
+ myCoverImageReference = ref;
+ }
+ }
+ break;
+ }
+ case _BINARY:
+ {
+ const char *contentType = attributeValue(xmlattributes, "content-type");
+ if (contentType != 0) {
+ shared_ptr<ZLMimeType> contentMimeType = ZLMimeType::get(contentType);
+ if ((!contentMimeType.isNull()) && (id != 0) && (ZLMimeType::TEXT_XML != contentMimeType)) {
+ myCurrentContentType = contentMimeType;
+ myCurrentImageId.assign(id);
+ }
+ }
+ break;
+ }
+ case _EMPTY_LINE:
+ myModelReader.beginParagraph(ZLTextParagraph::EMPTY_LINE_PARAGRAPH);
+ myModelReader.endParagraph();
+ break;
+ case _BODY:
+ ++myBodyCounter;
+ myParagraphsBeforeBodyNumber = myModelReader.model().bookTextModel()->paragraphsNumber();
+ if ((myBodyCounter == 1) || (attributeValue(xmlattributes, "name") == 0)) {
+ myModelReader.setMainTextModel();
+ myReadMainText = true;
+ }
+ myModelReader.pushKind(REGULAR);
+ break;
+ default:
+ break;
+ }
+}
+
+void FB2BookReader::endElementHandler(int tag) {
+ switch (tag) {
+ case _P:
+ myModelReader.endParagraph();
+ break;
+ case _V:
+ case _SUBTITLE:
+ case _TEXT_AUTHOR:
+ case _DATE:
+ myModelReader.popKind();
+ myModelReader.endParagraph();
+ break;
+ case _CITE:
+ myModelReader.popKind();
+ break;
+ case _SECTION:
+ if (myReadMainText) {
+ myModelReader.endContentsParagraph();
+ --mySectionDepth;
+ mySectionStarted = false;
+ } else {
+ myModelReader.unsetTextModel();
+ }
+ break;
+ case _TITLE:
+ myModelReader.exitTitle();
+ myModelReader.popKind();
+ myInsideTitle = false;
+ break;
+ case _POEM:
+ myInsidePoem = false;
+ break;
+ case _STANZA:
+ myModelReader.beginParagraph(ZLTextParagraph::AFTER_SKIP_PARAGRAPH);
+ myModelReader.endParagraph();
+ myModelReader.popKind();
+ break;
+ case _EPIGRAPH:
+ myModelReader.popKind();
+ break;
+ case _ANNOTATION:
+ myModelReader.popKind();
+ if (myBodyCounter == 0) {
+ myModelReader.insertEndOfSectionParagraph();
+ myModelReader.unsetTextModel();
+ }
+ break;
+ case _COVERPAGE:
+ if (myBodyCounter == 0) {
+ myInsideCoverpage = false;
+ myModelReader.insertEndOfSectionParagraph();
+ myModelReader.unsetTextModel();
+ }
+ break;
+ case _SUB:
+ myModelReader.addControl(SUB, false);
+ break;
+ case _SUP:
+ myModelReader.addControl(SUP, false);
+ break;
+ case _CODE:
+ myModelReader.addControl(CODE, false);
+ break;
+ case _STRIKETHROUGH:
+ myModelReader.addControl(STRIKETHROUGH, false);
+ break;
+ case _STRONG:
+ myModelReader.addControl(STRONG, false);
+ break;
+ case _EMPHASIS:
+ myModelReader.addControl(EMPHASIS, false);
+ break;
+ case _A:
+ myModelReader.addControl(myHyperlinkType, false);
+ break;
+ case _BINARY:
+ if (!myCurrentImageId.empty() && myCurrentImageStart != -1) {
+ myModelReader.addImage(myCurrentImageId, new ZLFileImage(
+ ZLFile(myModelReader.model().book()->file().path(), myCurrentContentType),
+ myCurrentImageStart,
+ getCurrentPosition() - myCurrentImageStart,
+ ZLFileImage::ENCODING_BASE64
+ ));
+ }
+ myCurrentImageId.clear();
+ myCurrentContentType = ZLMimeType::EMPTY;
+ myCurrentImageStart = -1;
+ break;
+ case _BODY:
+ myModelReader.popKind();
+ myModelReader.unsetTextModel();
+ myReadMainText = false;
+ break;
+ default:
+ break;
+ }
+}
+
+bool FB2BookReader::readBook() {
+ return readDocument(myModelReader.model().book()->file());
+}
diff --git a/reader/src/formats/fb2/FB2BookReader.h b/reader/src/formats/fb2/FB2BookReader.h
new file mode 100644
index 0000000..b9d22d1
--- /dev/null
+++ b/reader/src/formats/fb2/FB2BookReader.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __FB2BOOKREADER_H__
+#define __FB2BOOKREADER_H__
+
+#include <ZLMimeType.h>
+
+#include "FB2Reader.h"
+#include "../../bookmodel/BookReader.h"
+
+class BookModel;
+
+class FB2BookReader : public FB2Reader {
+
+public:
+ FB2BookReader(BookModel &model);
+ bool readBook();
+
+ bool processNamespaces() const;
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+private:
+ int mySectionDepth;
+ int myBodyCounter;
+ bool myReadMainText;
+ bool myInsideCoverpage;
+ std::size_t myParagraphsBeforeBodyNumber;
+ std::string myCoverImageReference;
+ bool myInsidePoem;
+ BookReader myModelReader;
+
+ int myCurrentImageStart;
+ std::string myCurrentImageId;
+ shared_ptr<ZLMimeType> myCurrentContentType;
+
+ bool mySectionStarted;
+ bool myInsideTitle;
+
+ FBTextKind myHyperlinkType;
+};
+
+#endif /* __FB2BOOKREADER_H__ */
diff --git a/reader/src/formats/fb2/FB2CoverReader.cpp b/reader/src/formats/fb2/FB2CoverReader.cpp
new file mode 100644
index 0000000..cc84ac2
--- /dev/null
+++ b/reader/src/formats/fb2/FB2CoverReader.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFileImage.h>
+
+#include "FB2CoverReader.h"
+
+#include "../../library/Book.h"
+
+FB2CoverReader::FB2CoverReader(const ZLFile &file) : myFile(file) {
+}
+
+shared_ptr<const ZLImage> FB2CoverReader::readCover() {
+ myReadCoverPage = false;
+ myLookForImage = false;
+ myImageId.erase();
+ myImageStart = -1;
+
+ readDocument(myFile);
+
+ return myImage;
+}
+
+bool FB2CoverReader::processNamespaces() const {
+ return true;
+}
+
+void FB2CoverReader::startElementHandler(int tag, const char **attributes) {
+ switch (tag) {
+ case _COVERPAGE:
+ myReadCoverPage = true;
+ break;
+ case _IMAGE:
+ if (myReadCoverPage) {
+ const char *ref = attributeValue(attributes, myHrefPredicate);
+ if (ref != 0 && *ref == '#' && *(ref + 1) != '\0') {
+ myImageId = ref + 1;
+ }
+ }
+ break;
+ case _BINARY:
+ {
+ const char *id = attributeValue(attributes, "id");
+ const char *contentType = attributeValue(attributes, "content-type");
+ if (id != 0 && contentType != 0 && myImageId == id) {
+ myLookForImage = true;
+ }
+ }
+ }
+}
+
+void FB2CoverReader::endElementHandler(int tag) {
+ switch (tag) {
+ case _COVERPAGE:
+ myReadCoverPage = false;
+ break;
+ case _DESCRIPTION:
+ if (myImageId.empty()) {
+ interrupt();
+ }
+ break;
+ case _BINARY:
+ if (!myImageId.empty() && myImageStart >= 0) {
+ myImage = new ZLFileImage(myFile, myImageStart, getCurrentPosition() - myImageStart, ZLFileImage::ENCODING_BASE64);
+ interrupt();
+ }
+ break;
+ }
+}
+
+void FB2CoverReader::characterDataHandler(const char *text, std::size_t len) {
+ if (len > 0 && myLookForImage) {
+ myImageStart = getCurrentPosition();
+ myLookForImage = false;
+ }
+}
diff --git a/reader/src/formats/fb2/FB2CoverReader.h b/reader/src/formats/fb2/FB2CoverReader.h
new file mode 100644
index 0000000..6807aa9
--- /dev/null
+++ b/reader/src/formats/fb2/FB2CoverReader.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __FB2COVERREADER_H__
+#define __FB2COVERREADER_H__
+
+#include <ZLFile.h>
+#include <ZLImage.h>
+
+#include "FB2Reader.h"
+
+class FB2CoverReader : public FB2Reader {
+
+public:
+ FB2CoverReader(const ZLFile &file);
+ shared_ptr<const ZLImage> readCover();
+
+private:
+ bool processNamespaces() const;
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+private:
+ const ZLFile myFile;
+ bool myReadCoverPage;
+ bool myLookForImage;
+ std::string myImageId;
+ int myImageStart;
+ shared_ptr<const ZLImage> myImage;
+};
+
+#endif /* __FB2COVERREADER_H__ */
diff --git a/reader/src/formats/fb2/FB2MetaInfoReader.cpp b/reader/src/formats/fb2/FB2MetaInfoReader.cpp
new file mode 100644
index 0000000..3d596ac
--- /dev/null
+++ b/reader/src/formats/fb2/FB2MetaInfoReader.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+
+#include <ZLInputStream.h>
+#include <ZLUnicodeUtil.h>
+
+#include "FB2MetaInfoReader.h"
+#include "FB2TagManager.h"
+
+#include "../../library/Book.h"
+
+FB2MetaInfoReader::FB2MetaInfoReader(Book &book) : myBook(book) {
+ myBook.removeAllAuthors();
+ myBook.setTitle(std::string());
+ myBook.setLanguage(std::string());
+ myBook.removeAllTags();
+}
+
+void FB2MetaInfoReader::characterDataHandler(const char *text, std::size_t len) {
+ switch (myReadState) {
+ case READ_TITLE:
+ myBuffer.append(text, len);
+ break;
+ case READ_LANGUAGE:
+ myBuffer.append(text, len);
+ break;
+ case READ_AUTHOR_NAME_0:
+ myAuthorNames[0].append(text, len);
+ break;
+ case READ_AUTHOR_NAME_1:
+ myAuthorNames[1].append(text, len);
+ break;
+ case READ_AUTHOR_NAME_2:
+ myAuthorNames[2].append(text, len);
+ break;
+ case READ_GENRE:
+ myBuffer.append(text, len);
+ break;
+ default:
+ break;
+ }
+}
+
+void FB2MetaInfoReader::startElementHandler(int tag, const char **attributes) {
+ switch (tag) {
+ case _BODY:
+ myReturnCode = true;
+ interrupt();
+ break;
+ case _TITLE_INFO:
+ myReadState = READ_SOMETHING;
+ break;
+ case _BOOK_TITLE:
+ if (myReadState == READ_SOMETHING) {
+ myReadState = READ_TITLE;
+ }
+ break;
+ case _GENRE:
+ if (myReadState == READ_SOMETHING) {
+ myReadState = READ_GENRE;
+ }
+ break;
+ case _AUTHOR:
+ if (myReadState == READ_SOMETHING) {
+ myReadState = READ_AUTHOR;
+ }
+ break;
+ case _LANG:
+ if (myReadState == READ_SOMETHING) {
+ myReadState = READ_LANGUAGE;
+ }
+ break;
+ case _FIRST_NAME:
+ if (myReadState == READ_AUTHOR) {
+ myReadState = READ_AUTHOR_NAME_0;
+ }
+ break;
+ case _MIDDLE_NAME:
+ if (myReadState == READ_AUTHOR) {
+ myReadState = READ_AUTHOR_NAME_1;
+ }
+ break;
+ case _LAST_NAME:
+ if (myReadState == READ_AUTHOR) {
+ myReadState = READ_AUTHOR_NAME_2;
+ }
+ break;
+ case _SEQUENCE:
+ if (myReadState == READ_SOMETHING) {
+ const char *name = attributeValue(attributes, "name");
+ if (name != 0) {
+ std::string seriesTitle = name;
+ ZLUnicodeUtil::utf8Trim(seriesTitle);
+ const char *number = attributeValue(attributes, "number");
+ myBook.setSeries(seriesTitle, number != 0 ? std::string(number) : std::string());
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void FB2MetaInfoReader::endElementHandler(int tag) {
+ switch (tag) {
+ case _TITLE_INFO:
+ myReadState = READ_NOTHING;
+ break;
+ case _BOOK_TITLE:
+ if (myReadState == READ_TITLE) {
+ myBook.setTitle(myBuffer);
+ myBuffer.erase();
+ myReadState = READ_SOMETHING;
+ }
+ break;
+ case _GENRE:
+ if (myReadState == READ_GENRE) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ if (!myBuffer.empty()) {
+ const std::vector<std::string> &tags =
+ FB2TagManager::Instance().humanReadableTags(myBuffer);
+ if (!tags.empty()) {
+ for (std::vector<std::string>::const_iterator it = tags.begin(); it != tags.end(); ++it) {
+ myBook.addTag(*it);
+ }
+ } else {
+ myBook.addTag(myBuffer);
+ }
+ myBuffer.erase();
+ }
+ myReadState = READ_SOMETHING;
+ }
+ break;
+ case _AUTHOR:
+ if (myReadState == READ_AUTHOR) {
+ ZLUnicodeUtil::utf8Trim(myAuthorNames[0]);
+ ZLUnicodeUtil::utf8Trim(myAuthorNames[1]);
+ ZLUnicodeUtil::utf8Trim(myAuthorNames[2]);
+ std::string fullName = myAuthorNames[0];
+ if (!fullName.empty() && !myAuthorNames[1].empty()) {
+ fullName += ' ';
+ }
+ fullName += myAuthorNames[1];
+ if (!fullName.empty() && !myAuthorNames[2].empty()) {
+ fullName += ' ';
+ }
+ fullName += myAuthorNames[2];
+ myBook.addAuthor(fullName, myAuthorNames[2]);
+ myAuthorNames[0].erase();
+ myAuthorNames[1].erase();
+ myAuthorNames[2].erase();
+ myReadState = READ_SOMETHING;
+ }
+ break;
+ case _LANG:
+ if (myReadState == READ_LANGUAGE) {
+ myBook.setLanguage(myBuffer);
+ myBuffer.erase();
+ myReadState = READ_SOMETHING;
+ }
+ break;
+ case _FIRST_NAME:
+ if (myReadState == READ_AUTHOR_NAME_0) {
+ myReadState = READ_AUTHOR;
+ }
+ break;
+ case _MIDDLE_NAME:
+ if (myReadState == READ_AUTHOR_NAME_1) {
+ myReadState = READ_AUTHOR;
+ }
+ break;
+ case _LAST_NAME:
+ if (myReadState == READ_AUTHOR_NAME_2) {
+ myReadState = READ_AUTHOR;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+bool FB2MetaInfoReader::readMetaInfo() {
+ myReadState = READ_NOTHING;
+ for (int i = 0; i < 3; ++i) {
+ myAuthorNames[i].erase();
+ }
+ return readDocument(myBook.file());
+}
diff --git a/reader/src/formats/fb2/FB2MetaInfoReader.h b/reader/src/formats/fb2/FB2MetaInfoReader.h
new file mode 100644
index 0000000..cc09909
--- /dev/null
+++ b/reader/src/formats/fb2/FB2MetaInfoReader.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __FB2METAINFOREADER_H__
+#define __FB2METAINFOREADER_H__
+
+#include <string>
+
+#include "FB2Reader.h"
+
+class Book;
+
+class FB2MetaInfoReader : public FB2Reader {
+
+public:
+ FB2MetaInfoReader(Book &book);
+ bool readMetaInfo();
+
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+private:
+ Book &myBook;
+
+ bool myReturnCode;
+
+ enum {
+ READ_NOTHING,
+ READ_SOMETHING,
+ READ_TITLE,
+ READ_AUTHOR,
+ READ_AUTHOR_NAME_0,
+ READ_AUTHOR_NAME_1,
+ READ_AUTHOR_NAME_2,
+ READ_LANGUAGE,
+ READ_GENRE
+ } myReadState;
+
+ std::string myAuthorNames[3];
+ std::string myBuffer;
+};
+
+#endif /* __FB2METAINFOREADER_H__ */
diff --git a/reader/src/formats/fb2/FB2Plugin.cpp b/reader/src/formats/fb2/FB2Plugin.cpp
new file mode 100644
index 0000000..f65ddcb
--- /dev/null
+++ b/reader/src/formats/fb2/FB2Plugin.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLImage.h>
+
+#include "FB2Plugin.h"
+#include "FB2MetaInfoReader.h"
+#include "FB2BookReader.h"
+#include "FB2CoverReader.h"
+
+#include "../../database/booksdb/BooksDBUtil.h"
+
+bool FB2Plugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "fb2";
+}
+
+bool FB2Plugin::readMetaInfo(Book &book) const {
+ return FB2MetaInfoReader(book).readMetaInfo();
+}
+
+bool FB2Plugin::readModel(BookModel &model) const {
+ return FB2BookReader(model).readBook();
+}
+
+shared_ptr<const ZLImage> FB2Plugin::coverImage(const ZLFile &file) const {
+ return FB2CoverReader(file).readCover();
+}
+bool FB2Plugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
diff --git a/reader/src/formats/fb2/FB2Plugin.h b/reader/src/formats/fb2/FB2Plugin.h
new file mode 100644
index 0000000..d96558d
--- /dev/null
+++ b/reader/src/formats/fb2/FB2Plugin.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __FB2PLUGIN_H__
+#define __FB2PLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class FB2Plugin : public FormatPlugin {
+
+public:
+ FB2Plugin();
+ ~FB2Plugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+ shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
+};
+
+inline FB2Plugin::FB2Plugin() {}
+inline FB2Plugin::~FB2Plugin() {}
+inline bool FB2Plugin::providesMetaInfo() const { return true; }
+
+#endif /* __FB2PLUGIN_H__ */
diff --git a/reader/src/formats/fb2/FB2Reader.cpp b/reader/src/formats/fb2/FB2Reader.cpp
new file mode 100644
index 0000000..c8e279c
--- /dev/null
+++ b/reader/src/formats/fb2/FB2Reader.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLibrary.h>
+#include <ZLStringUtil.h>
+#include <ZLXMLNamespace.h>
+
+#include "FB2Reader.h"
+
+#include "../util/EntityFilesCollector.h"
+
+FB2Reader::FB2Reader() : myHrefPredicate(ZLXMLNamespace::XLink, "href") {
+}
+
+void FB2Reader::startElementHandler(const char *t, const char **attributes) {
+ startElementHandler(tag(t), attributes);
+}
+
+void FB2Reader::endElementHandler(const char *t) {
+ endElementHandler(tag(t));
+}
+
+static const FB2Reader::Tag TAGS[] = {
+ {"p", FB2Reader::_P},
+ {"subtitle", FB2Reader::_SUBTITLE},
+ {"cite", FB2Reader::_CITE},
+ {"text-author", FB2Reader::_TEXT_AUTHOR},
+ {"date", FB2Reader::_DATE},
+ {"section", FB2Reader::_SECTION},
+ {"v", FB2Reader::_V},
+ {"title", FB2Reader::_TITLE},
+ {"poem", FB2Reader::_POEM},
+ {"stanza", FB2Reader::_STANZA},
+ {"epigraph", FB2Reader::_EPIGRAPH},
+ {"annotation", FB2Reader::_ANNOTATION},
+ {"sub", FB2Reader::_SUB},
+ {"sup", FB2Reader::_SUP},
+ {"code", FB2Reader::_CODE},
+ {"strikethrough", FB2Reader::_STRIKETHROUGH},
+ {"strong", FB2Reader::_STRONG},
+ {"emphasis", FB2Reader::_EMPHASIS},
+ {"a", FB2Reader::_A},
+ {"image", FB2Reader::_IMAGE},
+ {"binary", FB2Reader::_BINARY},
+ {"description", FB2Reader::_DESCRIPTION},
+ {"body", FB2Reader::_BODY},
+ {"empty-line", FB2Reader::_EMPTY_LINE},
+ {"title-info", FB2Reader::_TITLE_INFO},
+ {"book-title", FB2Reader::_BOOK_TITLE},
+ {"author", FB2Reader::_AUTHOR},
+ {"lang", FB2Reader::_LANG},
+ {"first-name", FB2Reader::_FIRST_NAME},
+ {"middle-name", FB2Reader::_MIDDLE_NAME},
+ {"last-name", FB2Reader::_LAST_NAME},
+ {"coverpage", FB2Reader::_COVERPAGE},
+ {"sequence", FB2Reader::_SEQUENCE},
+ {"genre", FB2Reader::_GENRE},
+ {0, FB2Reader::_UNKNOWN}
+};
+
+int FB2Reader::tag(const char *name) {
+ for (int i = 0; ; ++i) {
+ if (TAGS[i].tagName == 0 || std::strcmp(name, TAGS[i].tagName) == 0) {
+ return TAGS[i].tagCode;
+ }
+ }
+}
+
+const std::vector<std::string> &FB2Reader::externalDTDs() const {
+ return EntityFilesCollector::Instance().externalDTDs("fb2");
+}
diff --git a/reader/src/formats/fb2/FB2Reader.h b/reader/src/formats/fb2/FB2Reader.h
new file mode 100644
index 0000000..8fa8654
--- /dev/null
+++ b/reader/src/formats/fb2/FB2Reader.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __FB2READER_H__
+#define __FB2READER_H__
+
+#include <ZLXMLReader.h>
+
+class FB2Reader : public ZLXMLReader {
+
+public:
+ struct Tag {
+ const char *tagName;
+ int tagCode;
+ };
+
+protected:
+ virtual int tag(const char *name);
+
+ virtual void startElementHandler(int tag, const char **attributes) = 0;
+ virtual void endElementHandler(int tag) = 0;
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+
+ const std::vector<std::string> &externalDTDs() const;
+
+public:
+ enum TagCode {
+ _P,
+ _SUBTITLE,
+ _CITE,
+ _TEXT_AUTHOR,
+ _DATE,
+ _SECTION,
+ _V,
+ _TITLE,
+ _POEM,
+ _STANZA,
+ _EPIGRAPH,
+ _ANNOTATION,
+ _SUB,
+ _SUP,
+ _CODE,
+ _STRIKETHROUGH,
+ _STRONG,
+ _EMPHASIS,
+ _A,
+ _IMAGE,
+ _BINARY,
+ _DESCRIPTION,
+ _BODY,
+ _EMPTY_LINE,
+ _TITLE_INFO,
+ _BOOK_TITLE,
+ _AUTHOR,
+ _LANG,
+ _FIRST_NAME,
+ _MIDDLE_NAME,
+ _LAST_NAME,
+ _COVERPAGE,
+ _SEQUENCE,
+ _GENRE,
+ _UNKNOWN
+ };
+
+protected:
+ FB2Reader();
+ ~FB2Reader();
+
+protected:
+ const NamespaceAttributeNamePredicate myHrefPredicate;
+};
+
+inline FB2Reader::~FB2Reader() {}
+
+#endif /* __FB2READER_H__ */
diff --git a/reader/src/formats/fb2/FB2TagManager.cpp b/reader/src/formats/fb2/FB2TagManager.cpp
new file mode 100644
index 0000000..f698ace
--- /dev/null
+++ b/reader/src/formats/fb2/FB2TagManager.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <vector>
+
+#include <ZLFile.h>
+#include <ZLXMLReader.h>
+#include <ZLibrary.h>
+#include <ZLUnicodeUtil.h>
+
+#include "FB2TagManager.h"
+
+class FB2TagInfoReader : public ZLXMLReader {
+
+public:
+ FB2TagInfoReader(std::map<std::string,std::vector<std::string> > &tagMap);
+
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+
+private:
+ std::map<std::string,std::vector<std::string> > &myTagMap;
+
+ std::string myCategoryName;
+ std::string mySubCategoryName;
+ std::vector<std::string> myGenreIds;
+ std::string myLanguage;
+};
+
+FB2TagInfoReader::FB2TagInfoReader(std::map<std::string,std::vector<std::string> > &tagMap) : myTagMap(tagMap) {
+ myLanguage = ZLibrary::Language();
+ if (myLanguage != "ru") {
+ myLanguage = "en";
+ }
+}
+
+static const std::string CATEGORY_NAME_TAG = "root-descr";
+static const std::string SUBCATEGORY_NAME_TAG = "genre-descr";
+static const std::string GENRE_TAG = "genre";
+static const std::string SUBGENRE_TAG = "subgenre";
+static const std::string SUBGENRE_ALT_TAG = "genre-alt";
+
+void FB2TagInfoReader::startElementHandler(const char *tag, const char **attributes) {
+ if ((SUBGENRE_TAG == tag) || (SUBGENRE_ALT_TAG == tag)) {
+ const char *id = attributeValue(attributes, "value");
+ if (id != 0) {
+ myGenreIds.push_back(id);
+ }
+ } else if (CATEGORY_NAME_TAG == tag) {
+ const char *lang = attributeValue(attributes, "lang");
+ if ((lang != 0) && (myLanguage == lang)) {
+ const char *name = attributeValue(attributes, "genre-title");
+ if (name != 0) {
+ myCategoryName = name;
+ ZLUnicodeUtil::utf8Trim(myCategoryName);
+ }
+ }
+ } else if (SUBCATEGORY_NAME_TAG == tag) {
+ const char *lang = attributeValue(attributes, "lang");
+ if ((lang != 0) && (myLanguage == lang)) {
+ const char *name = attributeValue(attributes, "title");
+ if (name != 0) {
+ mySubCategoryName = name;
+ ZLUnicodeUtil::utf8Trim(mySubCategoryName);
+ }
+ }
+ }
+}
+
+void FB2TagInfoReader::endElementHandler(const char *tag) {
+ if (GENRE_TAG == tag) {
+ myCategoryName.erase();
+ mySubCategoryName.erase();
+ myGenreIds.clear();
+ } else if (SUBGENRE_TAG == tag) {
+ if (!myCategoryName.empty() && !mySubCategoryName.empty()) {
+ const std::string fullTagName = myCategoryName + '/' + mySubCategoryName;
+ for (std::vector<std::string>::const_iterator it = myGenreIds.begin(); it != myGenreIds.end(); ++it) {
+ myTagMap[*it].push_back(fullTagName);
+ }
+ }
+ mySubCategoryName.erase();
+ myGenreIds.clear();
+ }
+}
+
+FB2TagManager *FB2TagManager::ourInstance = 0;
+
+const FB2TagManager &FB2TagManager::Instance() {
+ if (ourInstance == 0) {
+ ourInstance = new FB2TagManager();
+ }
+ return *ourInstance;
+}
+
+FB2TagManager::FB2TagManager() {
+ FB2TagInfoReader(myTagMap).readDocument(ZLFile(
+ ZLibrary::ApplicationDirectory() + ZLibrary::FileNameDelimiter +
+ "formats" + ZLibrary::FileNameDelimiter + "fb2" +
+ ZLibrary::FileNameDelimiter + "fb2genres.xml"
+ ));
+}
+
+const std::vector<std::string> &FB2TagManager::humanReadableTags(const std::string &id) const {
+ static const std::vector<std::string> EMPTY;
+ std::map<std::string,std::vector<std::string> >::const_iterator it = myTagMap.find(id);
+ return (it != myTagMap.end()) ? it->second : EMPTY;
+}
diff --git a/reader/src/formats/fb2/FB2TagManager.h b/reader/src/formats/fb2/FB2TagManager.h
new file mode 100644
index 0000000..cfbf076
--- /dev/null
+++ b/reader/src/formats/fb2/FB2TagManager.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __FB2TAGMANAGER_H__
+#define __FB2TAGMANAGER_H__
+
+#include <string>
+#include <map>
+#include <vector>
+
+class FB2TagManager {
+
+private:
+ static FB2TagManager *ourInstance;
+
+public:
+ static const FB2TagManager &Instance();
+
+private:
+ FB2TagManager();
+
+public:
+ const std::vector<std::string> &humanReadableTags(const std::string &id) const;
+
+private:
+ std::map<std::string,std::vector<std::string> > myTagMap;
+};
+
+#endif /* __FB2TAGMANAGER_H__ */
diff --git a/reader/src/formats/html/HtmlBookReader.cpp b/reader/src/formats/html/HtmlBookReader.cpp
new file mode 100644
index 0000000..321913d
--- /dev/null
+++ b/reader/src/formats/html/HtmlBookReader.cpp
@@ -0,0 +1,583 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+
+#include <ZLFile.h>
+#include <ZLFileImage.h>
+#include <ZLStringUtil.h>
+
+#include "HtmlBookReader.h"
+#include "HtmlTagActions.h"
+#include "../txt/PlainTextFormat.h"
+#include "../util/MiscUtil.h"
+#include "../../bookmodel/BookModel.h"
+#include "../css/StyleSheetParser.h"
+
+HtmlTagAction::HtmlTagAction(HtmlBookReader &reader) : myReader(reader) {
+}
+
+HtmlTagAction::~HtmlTagAction() {
+}
+
+void HtmlTagAction::reset() {
+}
+
+DummyHtmlTagAction::DummyHtmlTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void DummyHtmlTagAction::run(const HtmlReader::HtmlTag&) {
+}
+
+HtmlControlTagAction::HtmlControlTagAction(HtmlBookReader &reader, FBTextKind kind) : HtmlTagAction(reader), myKind(kind) {
+}
+
+void HtmlControlTagAction::run(const HtmlReader::HtmlTag &tag) {
+ std::vector<FBTextKind> &list = myReader.myKindList;
+ int index;
+ for (index = list.size() - 1; index >= 0; --index) {
+ if (list[index] == myKind) {
+ break;
+ }
+ }
+ if (tag.Start) {
+ if (index == -1) {
+ bookReader().pushKind(myKind);
+ myReader.myKindList.push_back(myKind);
+ bookReader().addControl(myKind, true);
+ }
+ } else {
+ if (index >= 0) {
+ for (int i = list.size() - 1; i >= index; --i) {
+ bookReader().addControl(list[i], false);
+ bookReader().popKind();
+ }
+ for (unsigned int j = index + 1; j < list.size(); ++j) {
+ bookReader().addControl(list[j], true);
+ bookReader().pushKind(list[j]);
+ }
+ list.erase(list.begin() + index);
+ }
+ }
+}
+
+HtmlHeaderTagAction::HtmlHeaderTagAction(HtmlBookReader &reader, FBTextKind kind) : HtmlTagAction(reader), myKind(kind) {
+}
+
+void HtmlHeaderTagAction::run(const HtmlReader::HtmlTag &tag) {
+ myReader.myIsStarted = false;
+ if (tag.Start) {
+ if (myReader.myBuildTableOfContent && !myReader.myIgnoreTitles) {
+ if (!bookReader().contentsParagraphIsOpen()) {
+ bookReader().insertEndOfSectionParagraph();
+ bookReader().enterTitle();
+ bookReader().beginContentsParagraph();
+ }
+ }
+ bookReader().pushKind(myKind);
+ } else {
+ bookReader().popKind();
+ if (myReader.myBuildTableOfContent && !myReader.myIgnoreTitles) {
+ bookReader().endContentsParagraph();
+ bookReader().exitTitle();
+ }
+ }
+ bookReader().beginParagraph();
+}
+
+HtmlIgnoreTagAction::HtmlIgnoreTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlIgnoreTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ if (myTagNames.find(tag.Name) == myTagNames.end()) {
+ ++myReader.myIgnoreDataCounter;
+ myTagNames.insert(tag.Name);
+ }
+ } else {
+ if (myTagNames.find(tag.Name) != myTagNames.end()) {
+ --myReader.myIgnoreDataCounter;
+ myTagNames.erase(tag.Name);
+ }
+ }
+}
+
+HtmlHrefTagAction::HtmlHrefTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "NAME") {
+ bookReader().addHyperlinkLabel(tag.Attributes[i].Value);
+ } else if ((hyperlinkType() == REGULAR) && (tag.Attributes[i].Name == "HREF")) {
+ std::string value = tag.Attributes[i].Value;
+ if (!myReader.myFileName.empty() &&
+ (value.length() > myReader.myFileName.length()) &&
+ (value.substr(0, myReader.myFileName.length()) == myReader.myFileName)) {
+ value = value.substr(myReader.myFileName.length());
+ }
+ if (!value.empty()) {
+ if (value[0] == '#') {
+ setHyperlinkType(INTERNAL_HYPERLINK);
+ bookReader().addHyperlinkControl(INTERNAL_HYPERLINK, value.substr(1));
+ } else {
+ FBTextKind hyperlinkType = MiscUtil::referenceType(value);
+ if (hyperlinkType != INTERNAL_HYPERLINK) {
+ setHyperlinkType(hyperlinkType);
+ bookReader().addHyperlinkControl(hyperlinkType, value);
+ }
+ }
+ }
+ }
+ }
+ } else if (hyperlinkType() != REGULAR) {
+ bookReader().addControl(hyperlinkType(), false);
+ setHyperlinkType(REGULAR);
+ }
+}
+
+void HtmlHrefTagAction::reset() {
+ setHyperlinkType(REGULAR);
+}
+
+FBTextKind HtmlHrefTagAction::hyperlinkType() const {
+ return myHyperlinkType;
+}
+
+void HtmlHrefTagAction::setHyperlinkType(FBTextKind hyperlinkType) {
+ myHyperlinkType = hyperlinkType;
+}
+
+HtmlImageTagAction::HtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ bookReader().endParagraph();
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "SRC") {
+ const std::string fileName = MiscUtil::decodeHtmlURL(tag.Attributes[i].Value);
+ const ZLFile file(myReader.myBaseDirPath + fileName);
+ if (file.exists()) {
+ bookReader().addImageReference(fileName);
+ bookReader().addImage(fileName, new ZLFileImage(file, 0));
+ }
+ break;
+ }
+ }
+ bookReader().beginParagraph();
+ }
+}
+
+HtmlBreakTagAction::HtmlBreakTagAction(HtmlBookReader &reader, BreakType breakType) : HtmlTagAction(reader), myBreakType(breakType) {
+}
+
+void HtmlBreakTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (myReader.myDontBreakParagraph) {
+ myReader.myDontBreakParagraph = false;
+ return;
+ }
+
+ if ((tag.Start && (myBreakType & BREAK_AT_START)) ||
+ (!tag.Start && (myBreakType & BREAK_AT_END))) {
+ bookReader().endParagraph();
+ if (bookReader().isKindStackEmpty()) {
+ bookReader().pushKind(REGULAR);
+ }
+ bookReader().beginParagraph();
+ }
+}
+
+HtmlPreTagAction::HtmlPreTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlPreTagAction::run(const HtmlReader::HtmlTag &tag) {
+ bookReader().endParagraph();
+ myReader.myIsPreformatted = tag.Start;
+ myReader.mySpaceCounter = -1;
+ myReader.myBreakCounter = 0;
+ if (myReader.myFormat.breakType() == PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) {
+ if (tag.Start) {
+ bookReader().pushKind(PREFORMATTED);
+ } else {
+ bookReader().popKind();
+ }
+ }
+ bookReader().beginParagraph();
+}
+
+HtmlListTagAction::HtmlListTagAction(HtmlBookReader &reader, int startIndex) : HtmlTagAction(reader), myStartIndex(startIndex) {
+}
+
+void HtmlListTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ myReader.myListNumStack.push(myStartIndex);
+ } else if (!myReader.myListNumStack.empty()) {
+ myReader.myListNumStack.pop();
+ }
+}
+
+HtmlListItemTagAction::HtmlListItemTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlListItemTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ bookReader().endParagraph();
+ bookReader().beginParagraph();
+ if (!myReader.myListNumStack.empty()) {
+ bookReader().addFixedHSpace(3 * myReader.myListNumStack.size());
+ int &index = myReader.myListNumStack.top();
+ if (index == 0) {
+ myReader.addConvertedDataToBuffer("\342\200\242 ", 4, false);
+ } else {
+ std::string number;
+ ZLStringUtil::appendNumber(number, index++);
+ number += ". ";
+ myReader.addConvertedDataToBuffer(number.data(), number.length(), false);
+ }
+ myReader.myDontBreakParagraph = true;
+ }
+ } else {
+ myReader.myDontBreakParagraph = false;
+ }
+}
+
+HtmlTableTagAction::HtmlTableTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlTableTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ myReader.myIgnoreTitles = true;
+ } else {
+ myReader.myIgnoreTitles = false;
+ }
+}
+
+HtmlStyleTagAction::HtmlStyleTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void HtmlStyleTagAction::run(const HtmlReader::HtmlTag &tag) {
+ myReader.myStyleSheetParser = tag.Start ? new StyleSheetTableParser(myReader.myStyleSheetTable) : 0;
+ /*
+ if (!tag.Start) {
+ myReader.myStyleSheetTable.dump();
+ }
+ */
+}
+
+shared_ptr<HtmlTagAction> HtmlBookReader::createAction(const std::string &tag) {
+ if (tag == "EM") {
+ return new HtmlControlTagAction(*this, EMPHASIS);
+ } else if (tag == "STRONG") {
+ return new HtmlControlTagAction(*this, STRONG);
+ } else if (tag == "B") {
+ return new HtmlControlTagAction(*this, BOLD);
+ } else if (tag == "I") {
+ return new HtmlControlTagAction(*this, ITALIC);
+ } else if (tag == "TT") {
+ return new HtmlControlTagAction(*this, CODE);
+ } else if (tag == "CODE") {
+ return new HtmlControlTagAction(*this, CODE);
+ } else if (tag == "CITE") {
+ return new HtmlControlTagAction(*this, CITE);
+ } else if (tag == "SUB") {
+ return new HtmlControlTagAction(*this, SUB);
+ } else if (tag == "SUP") {
+ return new HtmlControlTagAction(*this, SUP);
+ } else if (tag == "H1") {
+ return new HtmlHeaderTagAction(*this, H1);
+ } else if (tag == "H2") {
+ return new HtmlHeaderTagAction(*this, H2);
+ } else if (tag == "H3") {
+ return new HtmlHeaderTagAction(*this, H3);
+ } else if (tag == "H4") {
+ return new HtmlHeaderTagAction(*this, H4);
+ } else if (tag == "H5") {
+ return new HtmlHeaderTagAction(*this, H5);
+ } else if (tag == "H6") {
+ return new HtmlHeaderTagAction(*this, H6);
+ } else if (tag == "HEAD") {
+ return new HtmlIgnoreTagAction(*this);
+ } else if (tag == "TITLE") {
+ return new HtmlIgnoreTagAction(*this);
+ } else if (tag == "STYLE") {
+ return new HtmlStyleTagAction(*this);
+ } else if (tag == "SELECT") {
+ return new HtmlIgnoreTagAction(*this);
+ } else if (tag == "SCRIPT") {
+ return new HtmlIgnoreTagAction(*this);
+ } else if (tag == "A") {
+ return new HtmlHrefTagAction(*this);
+ } else if (tag == "TD") {
+ //return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END);
+ } else if (tag == "TR") {
+ return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END);
+ } else if (tag == "DIV") {
+ return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END);
+ } else if (tag == "DT") {
+ return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START);
+ } else if (tag == "P") {
+ return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END);
+ } else if (tag == "BR") {
+ return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END);
+ } else if (tag == "IMG") {
+ return new HtmlImageTagAction(*this);
+ } else if (tag == "UL") {
+ return new HtmlListTagAction(*this, 0);
+ } else if (tag == "MENU") {
+ return new HtmlListTagAction(*this, 0);
+ } else if (tag == "DIR") {
+ return new HtmlListTagAction(*this, 0);
+ } else if (tag == "OL") {
+ return new HtmlListTagAction(*this, 1);
+ } else if (tag == "LI") {
+ return new HtmlListItemTagAction(*this);
+ } else if (tag == "PRE") {
+ if (myProcessPreTag) {
+ return new HtmlPreTagAction(*this);
+ }
+ } else if (tag == "TABLE") {
+ return new HtmlTableTagAction(*this);
+ }
+ /*
+ } else if (tag == "DD") {
+ return 0;
+ } else if (tag == "DL") {
+ return 0;
+ } else if (tag == "DFN") {
+ return 0;
+ } else if (tag == "SAMP") {
+ return 0;
+ } else if (tag == "KBD") {
+ return 0;
+ } else if (tag == "VAR") {
+ return 0;
+ } else if (tag == "ABBR") {
+ return 0;
+ } else if (tag == "ACRONYM") {
+ return 0;
+ } else if (tag == "BLOCKQUOTE") {
+ return 0;
+ } else if (tag == "Q") {
+ return 0;
+ } else if (tag == "INS") {
+ return 0;
+ } else if (tag == "DEL") {
+ return 0;
+ } else if (tag == "BODY") {
+ return 0;
+ */
+ return new DummyHtmlTagAction(*this);
+}
+
+void HtmlBookReader::setBuildTableOfContent(bool build) {
+ myBuildTableOfContent = build;
+}
+
+void HtmlBookReader::setProcessPreTag(bool process) {
+ myProcessPreTag = process;
+}
+
+HtmlBookReader::HtmlBookReader(const std::string &baseDirectoryPath, BookModel &model, const PlainTextFormat &format, const std::string &encoding) : HtmlReader(encoding), myBookReader(model), myBaseDirPath(baseDirectoryPath), myFormat(format), myBuildTableOfContent(true), myProcessPreTag(true) {
+}
+
+HtmlBookReader::~HtmlBookReader() {
+}
+
+void HtmlBookReader::addConvertedDataToBuffer(const char *text, std::size_t len, bool convert) {
+ if (len > 0) {
+ if (myDontBreakParagraph) {
+ while (len > 0 && std::isspace(*text)) {
+ --len;
+ ++text;
+ }
+ if (len == 0) {
+ return;
+ }
+ }
+ if (convert) {
+ myConverter->convert(myConverterBuffer, text, text + len);
+ myBookReader.addData(myConverterBuffer);
+ myBookReader.addContentsData(myConverterBuffer);
+ myConverterBuffer.erase();
+ } else {
+ std::string strText(text, len);
+ myBookReader.addData(strText);
+ myBookReader.addContentsData(strText);
+ }
+ myDontBreakParagraph = false;
+ }
+}
+
+bool HtmlBookReader::tagHandler(const HtmlTag &tag) {
+ myConverter->reset();
+
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "ID") {
+ myBookReader.addHyperlinkLabel(tag.Attributes[i].Value);
+ break;
+ }
+ }
+ shared_ptr<HtmlTagAction> action = myActionMap[tag.Name];
+ if (action.isNull()) {
+ action = createAction(tag.Name);
+ myActionMap[tag.Name] = action;
+ }
+ action->run(tag);
+
+ return true;
+}
+
+void HtmlBookReader::preformattedCharacterDataHandler(const char *text, std::size_t len, bool convert) {
+ const char *start = text;
+ const char *end = text + len;
+
+ int breakType = myFormat.breakType();
+ if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) {
+ for (const char *ptr = text; ptr != end; ++ptr) {
+ if (*ptr == '\n') {
+ mySpaceCounter = 0;
+ if (start < ptr) {
+ addConvertedDataToBuffer(start, ptr - start, convert);
+ } else {
+ static const std::string SPACE = " ";
+ myBookReader.addData(SPACE);
+ }
+ myBookReader.endParagraph();
+ myBookReader.beginParagraph();
+ start = ptr + 1;
+ } else if (mySpaceCounter >= 0) {
+ if (std::isspace((unsigned char)*ptr)) {
+ ++mySpaceCounter;
+ } else {
+ myBookReader.addFixedHSpace(mySpaceCounter);
+ mySpaceCounter = -1;
+ }
+ }
+ }
+ addConvertedDataToBuffer(start, end - start, convert);
+ } else if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT) {
+ for (const char *ptr = text; ptr != end; ++ptr) {
+ if (std::isspace((unsigned char)*ptr)) {
+ if (*ptr == '\n') {
+ mySpaceCounter = 0;
+ } else if (mySpaceCounter >= 0) {
+ ++mySpaceCounter;
+ }
+ } else {
+ if (mySpaceCounter > myFormat.ignoredIndent()) {
+ if (ptr - start > mySpaceCounter) {
+ addConvertedDataToBuffer(start, ptr - start - mySpaceCounter, convert);
+ myBookReader.endParagraph();
+ myBookReader.beginParagraph();
+ }
+ start = ptr;
+ }
+ mySpaceCounter = -1;
+ }
+ }
+ mySpaceCounter = std::max(mySpaceCounter, 0);
+ if (end - start > mySpaceCounter) {
+ addConvertedDataToBuffer(start, end - start - mySpaceCounter, convert);
+ }
+ } else if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE) {
+ for (const char *ptr = start; ptr != end; ++ptr) {
+ if (std::isspace((unsigned char)*ptr)) {
+ if (*ptr == '\n') {
+ ++myBreakCounter;
+ }
+ } else {
+ if (myBreakCounter > 1) {
+ addConvertedDataToBuffer(start, ptr - start, convert);
+ myBookReader.endParagraph();
+ myBookReader.beginParagraph();
+ start = ptr;
+ }
+ myBreakCounter = 0;
+ }
+ }
+ addConvertedDataToBuffer(start, end - start, convert);
+ }
+}
+
+bool HtmlBookReader::characterDataHandler(const char *text, std::size_t len, bool convert) {
+ if (!myStyleSheetParser.isNull()) {
+ myStyleSheetParser->parse(text, len);
+ return true;
+ }
+
+ if (myIgnoreDataCounter != 0) {
+ return true;
+ }
+
+ if (myIsPreformatted) {
+ preformattedCharacterDataHandler(text, len, convert);
+ return true;
+ }
+
+ const char *ptr = text;
+ const char *end = text + len;
+ if (!myIsStarted) {
+ for (; ptr != end; ++ptr) {
+ if (!std::isspace((unsigned char)*ptr)) {
+ myIsStarted = true;
+ break;
+ }
+ }
+ }
+ if (myIsStarted) {
+ addConvertedDataToBuffer(ptr, end - ptr, convert);
+ }
+ return true;
+}
+
+void HtmlBookReader::startDocumentHandler() {
+ while (!myListNumStack.empty()) {
+ myListNumStack.pop();
+ }
+ myConverterBuffer.erase();
+ myKindList.clear();
+
+ myBookReader.reset();
+ myBookReader.setMainTextModel();
+ myBookReader.pushKind(REGULAR);
+ myBookReader.beginParagraph();
+ myIgnoreDataCounter = 0;
+ myIsPreformatted = false;
+ myDontBreakParagraph = false;
+ for (std::map<std::string,shared_ptr<HtmlTagAction> >::const_iterator it = myActionMap.begin(); it != myActionMap.end(); ++it) {
+ it->second->reset();
+ }
+ myIsStarted = false;
+ myIgnoreTitles = false;
+
+ myStyleSheetParser = 0;
+
+ mySpaceCounter = -1;
+ myBreakCounter = 0;
+}
+
+void HtmlBookReader::endDocumentHandler() {
+ myBookReader.endParagraph();
+}
+
+void HtmlBookReader::setFileName(const std::string fileName) {
+ myFileName = fileName;
+}
diff --git a/reader/src/formats/html/HtmlBookReader.h b/reader/src/formats/html/HtmlBookReader.h
new file mode 100644
index 0000000..c8d4e32
--- /dev/null
+++ b/reader/src/formats/html/HtmlBookReader.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLBOOKREADER_H__
+#define __HTMLBOOKREADER_H__
+
+#include <stack>
+
+#include <shared_ptr.h>
+
+#include "HtmlReader.h"
+#include "../../bookmodel/BookReader.h"
+#include "../css/StyleSheetTable.h"
+
+class BookModel;
+class PlainTextFormat;
+class StyleSheetParser;
+
+class HtmlTagAction;
+
+class HtmlBookReader : public HtmlReader {
+
+public:
+ HtmlBookReader(const std::string &baseDirectoryPath, BookModel &model, const PlainTextFormat &format, const std::string &encoding);
+ ~HtmlBookReader();
+ void setFileName(const std::string fileName);
+
+protected:
+ virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag);
+ void setBuildTableOfContent(bool build);
+ void setProcessPreTag(bool process);
+
+protected:
+ void startDocumentHandler();
+ void endDocumentHandler();
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char *text, std::size_t len, bool convert);
+
+private:
+ void preformattedCharacterDataHandler(const char *text, std::size_t len, bool convert);
+ void addConvertedDataToBuffer(const char *text, std::size_t len, bool convert);
+
+protected:
+ BookReader myBookReader;
+ std::string myBaseDirPath;
+
+private:
+ const PlainTextFormat &myFormat;
+ int myIgnoreDataCounter;
+ bool myIsPreformatted;
+ bool myDontBreakParagraph;
+
+ bool myIsStarted;
+ bool myBuildTableOfContent;
+ bool myProcessPreTag;
+ bool myIgnoreTitles;
+ std::stack<int> myListNumStack;
+
+ StyleSheetTable myStyleSheetTable;
+ shared_ptr<StyleSheetParser> myStyleSheetParser;
+
+ int mySpaceCounter;
+ int myBreakCounter;
+ std::string myConverterBuffer;
+
+ std::map<std::string,shared_ptr<HtmlTagAction> > myActionMap;
+ std::vector<FBTextKind> myKindList;
+
+ std::string myFileName;
+
+ friend class HtmlTagAction;
+ friend class HtmlControlTagAction;
+ friend class HtmlHeaderTagAction;
+ friend class HtmlIgnoreTagAction;
+ friend class HtmlHrefTagAction;
+ friend class HtmlImageTagAction;
+ friend class HtmlBreakTagAction;
+ friend class HtmlPreTagAction;
+ friend class HtmlListTagAction;
+ friend class HtmlListItemTagAction;
+ friend class HtmlTableTagAction;
+ friend class HtmlStyleTagAction;
+};
+
+#endif /* __HTMLBOOKREADER_H__ */
diff --git a/reader/src/formats/html/HtmlDescriptionReader.cpp b/reader/src/formats/html/HtmlDescriptionReader.cpp
new file mode 100644
index 0000000..6ebcb8b
--- /dev/null
+++ b/reader/src/formats/html/HtmlDescriptionReader.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "HtmlDescriptionReader.h"
+
+#include "../../library/Book.h"
+
+HtmlDescriptionReader::HtmlDescriptionReader(Book &book) : HtmlReader(book.encoding()), myBook(book) {
+ myBook.setTitle("");
+}
+
+void HtmlDescriptionReader::startDocumentHandler() {
+ myReadTitle = false;
+}
+
+void HtmlDescriptionReader::endDocumentHandler() {
+ if (!myBook.title().empty()) {
+ const char *titleStart = myBook.title().data();
+ const char *titleEnd = titleStart + myBook.title().length();
+ std::string newTitle;
+ myConverter->convert(newTitle, titleStart, titleEnd);
+ myBook.setTitle(newTitle);
+ }
+}
+
+bool HtmlDescriptionReader::tagHandler(const HtmlTag &tag) {
+ if (tag.Name == "TITLE") {
+ if (myReadTitle && !tag.Start) {
+ myBook.setTitle(myBuffer);
+ myBuffer.erase();
+ }
+ myReadTitle = tag.Start && myBook.title().empty();
+ return true;
+ } else if (tag.Start && tag.Name == "META") {
+ std::vector<HtmlAttribute>::const_iterator it = tag.Attributes.begin();
+ for (; it != tag.Attributes.end(); ++it) {
+ if (it->Name == "CONTENT") {
+ break;
+ }
+ }
+ if (it != tag.Attributes.end()) {
+ const std::string prefix = "charset=";
+ std::size_t index = it->Value.find(prefix);
+ if (index != std::string::npos) {
+ std::string charset = it->Value.substr(index + prefix.length());
+ index = charset.find(';');
+ if (index != std::string::npos) {
+ charset = charset.substr(0, index);
+ }
+ index = charset.find(' ');
+ if (index != std::string::npos) {
+ charset = charset.substr(0, index);
+ }
+ myBook.setEncoding(charset);
+ }
+ }
+ }
+ return tag.Name != "BODY";
+}
+
+bool HtmlDescriptionReader::characterDataHandler(const char *text, std::size_t len, bool) {
+ if (myReadTitle) {
+ myBuffer.append(text, len);
+ }
+ return true;
+}
diff --git a/reader/src/formats/html/HtmlDescriptionReader.h b/reader/src/formats/html/HtmlDescriptionReader.h
new file mode 100644
index 0000000..159d4b0
--- /dev/null
+++ b/reader/src/formats/html/HtmlDescriptionReader.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLDESCRIPTIONREADER_H__
+#define __HTMLDESCRIPTIONREADER_H__
+
+#include "HtmlReader.h"
+
+class Book;
+
+class HtmlDescriptionReader : public HtmlReader {
+
+public:
+ HtmlDescriptionReader(Book &book);
+ ~HtmlDescriptionReader();
+
+protected:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char *text, std::size_t len, bool convert);
+
+private:
+ bool myReadTitle;
+ std::string myBuffer;
+ Book &myBook;
+};
+
+inline HtmlDescriptionReader::~HtmlDescriptionReader() {}
+
+#endif /* __HTMLDESCRIPTIONREADER_H__ */
diff --git a/reader/src/formats/html/HtmlEntityCollection.cpp b/reader/src/formats/html/HtmlEntityCollection.cpp
new file mode 100644
index 0000000..bd1bb4e
--- /dev/null
+++ b/reader/src/formats/html/HtmlEntityCollection.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <cctype>
+
+#include <ZLibrary.h>
+#include <ZLFile.h>
+#include <ZLXMLReader.h>
+
+#include "HtmlEntityCollection.h"
+
+class CollectionReader : public ZLXMLReader {
+
+public:
+ CollectionReader(std::map<std::string,int> &collection);
+ void startElementHandler(const char *tag, const char **attributes);
+
+private:
+ std::map<std::string,int> &myCollection;
+};
+
+std::map<std::string,int> HtmlEntityCollection::ourCollection;
+
+int HtmlEntityCollection::symbolNumber(const std::string &name) {
+ if (ourCollection.empty()) {
+ CollectionReader(ourCollection).readDocument(ZLFile(
+ ZLibrary::ApplicationDirectory() + ZLibrary::FileNameDelimiter +
+ "formats" + ZLibrary::FileNameDelimiter +
+ "html" + ZLibrary::FileNameDelimiter + "html.ent"
+ ));
+ }
+ std::map<std::string,int>::const_iterator it = ourCollection.find(name);
+ return it == ourCollection.end() ? 0 : it->second;
+}
+
+CollectionReader::CollectionReader(std::map<std::string,int> &collection) : myCollection(collection) {
+}
+
+void CollectionReader::startElementHandler(const char *tag, const char **attributes) {
+ static const std::string ENTITY = "entity";
+
+ if (ENTITY == tag) {
+ for (int i = 0; i < 4; ++i) {
+ if (attributes[i] == 0) {
+ return;
+ }
+ }
+ static const std::string _name = "name";
+ static const std::string _number = "number";
+ if (_name == attributes[0] && _number == attributes[2]) {
+ myCollection[attributes[1]] = std::atoi(attributes[3]);
+ }
+ }
+}
diff --git a/reader/src/formats/html/HtmlEntityCollection.h b/reader/src/formats/html/HtmlEntityCollection.h
new file mode 100644
index 0000000..6f70491
--- /dev/null
+++ b/reader/src/formats/html/HtmlEntityCollection.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLENTITYCOLLECTION_H__
+#define __HTMLENTITYCOLLECTION_H__
+
+#include <string>
+#include <map>
+
+class HtmlEntityCollection {
+
+public:
+ static int symbolNumber(const std::string &name);
+
+private:
+ static std::map<std::string,int> ourCollection;
+
+private:
+ HtmlEntityCollection();
+};
+
+#endif /* __HTMLENTITYCOLLECTION_H__ */
diff --git a/reader/src/formats/html/HtmlPlugin.cpp b/reader/src/formats/html/HtmlPlugin.cpp
new file mode 100644
index 0000000..279e096
--- /dev/null
+++ b/reader/src/formats/html/HtmlPlugin.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLStringUtil.h>
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "HtmlPlugin.h"
+#include "HtmlDescriptionReader.h"
+#include "HtmlBookReader.h"
+#include "HtmlReaderStream.h"
+#include "../txt/PlainTextFormat.h"
+#include "../util/MiscUtil.h"
+#include "../../library/Book.h"
+#include "../../bookmodel/BookModel.h"
+
+bool HtmlPlugin::acceptsFile(const ZLFile &file) const {
+ const std::string &extension = file.extension();
+ return ZLStringUtil::stringEndsWith(extension, "html") || (extension == "htm");
+}
+
+bool HtmlPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = book.file().inputStream();
+ if (stream.isNull()) {
+ return false;
+ }
+
+ shared_ptr<ZLInputStream> htmlStream = new HtmlReaderStream(stream, 50000);
+ detectEncodingAndLanguage(book, *htmlStream);
+ if (book.encoding().empty()) {
+ return false;
+ }
+ HtmlDescriptionReader(book).readDocument(*stream);
+
+ return true;
+}
+
+bool HtmlPlugin::readModel(BookModel &model) const {
+ const Book& book = *model.book();
+ const ZLFile &file = book.file();
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull()) {
+ return false;
+ }
+
+ PlainTextFormat format(file);
+ if (!format.initialized()) {
+ PlainTextFormatDetector detector;
+ detector.detect(*stream, format);
+ }
+
+ std::string directoryPrefix = MiscUtil::htmlDirectoryPrefix(file.path());
+ HtmlBookReader reader(directoryPrefix, model, format, book.encoding());
+ reader.setFileName(MiscUtil::htmlFileName(file.path()));
+ reader.readDocument(*stream);
+
+ return true;
+}
+
+FormatInfoPage *HtmlPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) {
+ return new PlainTextInfoPage(dialog, file, ZLResourceKey("<PRE>"), false);
+}
+
+bool HtmlPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
diff --git a/reader/src/formats/html/HtmlPlugin.h b/reader/src/formats/html/HtmlPlugin.h
new file mode 100644
index 0000000..c66a108
--- /dev/null
+++ b/reader/src/formats/html/HtmlPlugin.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLPLUGIN_H__
+#define __HTMLPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class HtmlPlugin : public FormatPlugin {
+
+public:
+ HtmlPlugin();
+ ~HtmlPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+ FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
+};
+
+inline HtmlPlugin::HtmlPlugin() {}
+inline HtmlPlugin::~HtmlPlugin() {}
+inline bool HtmlPlugin::providesMetaInfo() const { return false; }
+
+#endif /* __HTMLPLUGIN_H__ */
diff --git a/reader/src/formats/html/HtmlReader.cpp b/reader/src/formats/html/HtmlReader.cpp
new file mode 100644
index 0000000..a5ce7fa
--- /dev/null
+++ b/reader/src/formats/html/HtmlReader.cpp
@@ -0,0 +1,373 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <algorithm>
+#include <cctype>
+
+#include <ZLInputStream.h>
+#include <ZLXMLReader.h>
+#include <ZLFile.h>
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+
+#include "HtmlReader.h"
+#include "HtmlEntityCollection.h"
+
+HtmlReader::HtmlReader(const std::string &encoding) : EncodedTextReader(encoding) {
+}
+
+HtmlReader::~HtmlReader() {
+}
+
+void HtmlReader::setTag(HtmlTag &tag, const std::string &name) {
+ tag.Attributes.clear();
+
+ if (name.length() == 0) {
+ tag.Name = name;
+ return;
+ }
+
+ tag.Start = name[0] != '/';
+ if (tag.Start) {
+ tag.Name = name;
+ } else {
+ tag.Name = name.substr(1);
+ }
+
+ const std::size_t len = tag.Name.length();
+ for (std::size_t i = 0; i < len; ++i) {
+ tag.Name[i] = std::toupper(tag.Name[i]);
+ }
+}
+
+enum ParseState {
+ PS_TEXT,
+ PS_TAGSTART,
+ PS_TAGNAME,
+ PS_WAIT_END_OF_TAG,
+ PS_ATTRIBUTENAME,
+ PS_ATTRIBUTEVALUE,
+ PS_SKIPTAG,
+ PS_COMMENT,
+ PS_SPECIAL,
+ PS_SPECIAL_IN_ATTRIBUTEVALUE,
+};
+
+enum SpecialType {
+ ST_UNKNOWN,
+ ST_NUM,
+ ST_NAME,
+ ST_DEC,
+ ST_HEX
+};
+
+static bool allowSymbol(SpecialType type, char ch) {
+ return
+ (type == ST_NAME && std::isalpha(ch)) ||
+ (type == ST_DEC && std::isdigit(ch)) ||
+ (type == ST_HEX && std::isxdigit(ch));
+}
+
+static int specialSymbolNumber(SpecialType type, const std::string &txt) {
+ char *end = 0;
+ switch (type) {
+ case ST_NAME:
+ return HtmlEntityCollection::symbolNumber(txt);
+ case ST_DEC:
+ return std::strtol(txt.c_str() + 1, &end, 10);
+ case ST_HEX:
+ return std::strtol(txt.c_str() + 2, &end, 16);
+ default:
+ return 0;
+ }
+}
+
+void HtmlReader::appendString(std::string &to, std::string &from) {
+ if (myConverter.isNull()) {
+ to += from;
+ } else {
+ myConverter->convert(to, from);
+ myConverter->reset();
+ }
+ from.erase();
+}
+
+void HtmlReader::readDocument(ZLInputStream &stream) {
+ if (!stream.open()) {
+ return;
+ }
+
+ startDocumentHandler();
+
+ ParseState state = PS_TEXT;
+ SpecialType state_special = ST_UNKNOWN;
+ std::string currentString;
+ std::string attributeValueString;
+ std::string specialString;
+ int quotationCounter = 0;
+ HtmlTag currentTag;
+ char endOfComment[2] = "\0";
+
+ const std::size_t BUFSIZE = 2048;
+ char *buffer = new char[BUFSIZE];
+ std::size_t length;
+ std::size_t offset = 0;
+ do {
+ length = stream.read(buffer, BUFSIZE);
+ char *start = buffer;
+ char *endOfBuffer = buffer + length;
+ for (char *ptr = buffer; ptr < endOfBuffer; ++ptr) {
+ switch (state) {
+ case PS_TEXT:
+ if (*ptr == '<') {
+ if (!characterDataHandler(start, ptr - start, true)) {
+ goto endOfProcessing;
+ }
+ start = ptr + 1;
+ state = PS_TAGSTART;
+ currentTag.Offset = offset + (ptr - buffer);
+ }
+ if (*ptr == '&') {
+ if (!characterDataHandler(start, ptr - start, true)) {
+ goto endOfProcessing;
+ }
+ start = ptr + 1;
+ state = PS_SPECIAL;
+ state_special = ST_UNKNOWN;
+ }
+ break;
+ case PS_SPECIAL:
+ case PS_SPECIAL_IN_ATTRIBUTEVALUE:
+ if (state_special == ST_UNKNOWN) {
+ if (*ptr == '#') {
+ state_special = ST_NUM;
+ } else if (std::isalpha(*ptr)) {
+ state_special = ST_NAME;
+ } else {
+ start = ptr;
+ state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
+ }
+ } else if (state_special == ST_NUM) {
+ if (*ptr == 'x') {
+ state_special = ST_HEX;
+ } else if (std::isdigit(*ptr)) {
+ state_special = ST_DEC;
+ } else {
+ start = ptr;
+ state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
+ }
+ } else {
+ if (*ptr == ';') {
+ specialString.append(start, ptr - start);
+ int number = specialSymbolNumber(state_special, specialString);
+ if ((128 <= number) && (number <= 159)) {
+ char ch = number;
+ if (state == PS_SPECIAL) {
+ characterDataHandler(&ch, 1, true);
+ } else {
+ myConverter->convert(attributeValueString, &ch, &ch + 1);
+ }
+ } else if (number != 0) {
+ char buffer[4];
+ int len = ZLUnicodeUtil::ucs4ToUtf8(buffer, number);
+ if (state == PS_SPECIAL) {
+ characterDataHandler(buffer, len, false);
+ } else {
+ attributeValueString.append(buffer, len);
+ }
+ } else {
+ specialString = "&" + specialString + ";";
+ if (state == PS_SPECIAL) {
+ characterDataHandler(specialString.c_str(), specialString.length(), false);
+ } else {
+ attributeValueString += specialString;
+ }
+ }
+ specialString.erase();
+ start = ptr + 1;
+ state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
+ } else if (!allowSymbol(state_special, *ptr)) {
+ start = ptr;
+ state = (state == PS_SPECIAL) ? PS_TEXT : PS_ATTRIBUTEVALUE;
+ }
+ }
+ break;
+ case PS_TAGSTART:
+ state = (*ptr == '!') ? PS_COMMENT : PS_TAGNAME;
+ break;
+ case PS_COMMENT:
+ if ((endOfComment[0] == '\0') && (*ptr != '-')) {
+ state = PS_TAGNAME;
+ } else if ((endOfComment[0] == '-') && (endOfComment[1] == '-') && (*ptr == '>')) {
+ start = ptr + 1;
+ state = PS_TEXT;
+ endOfComment[0] = '\0';
+ endOfComment[1] = '\0';
+ } else {
+ endOfComment[0] = endOfComment[1];
+ endOfComment[1] = *ptr;
+ }
+ break;
+ case PS_WAIT_END_OF_TAG:
+ if (*ptr == '>') {
+ start = ptr + 1;
+ state = PS_TEXT;
+ }
+ break;
+ case PS_TAGNAME:
+ if (*ptr == '>' || *ptr == '/' || std::isspace((unsigned char)*ptr)) {
+ currentString.append(start, ptr - start);
+ start = ptr + 1;
+ setTag(currentTag, currentString);
+ currentString.erase();
+ if (currentTag.Name == "") {
+ state = *ptr == '>' ? PS_TEXT : PS_SKIPTAG;
+ } else {
+ if (*ptr == '>') {
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ state = PS_TEXT;
+ } else if (*ptr == '/') {
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ currentTag.Start = false;
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ state = PS_WAIT_END_OF_TAG;
+ } else {
+ state = PS_ATTRIBUTENAME;
+ }
+ }
+ }
+ break;
+ case PS_ATTRIBUTENAME:
+ if (*ptr == '>' || *ptr == '/' || *ptr == '=' || std::isspace((unsigned char)*ptr)) {
+ if (ptr != start || !currentString.empty()) {
+ currentString.append(start, ptr - start);
+ for (unsigned int i = 0; i < currentString.length(); ++i) {
+ currentString[i] = std::toupper(currentString[i]);
+ }
+ currentTag.addAttribute(currentString);
+ currentString.erase();
+ }
+ start = ptr + 1;
+ if (*ptr == '>') {
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ state = PS_TEXT;
+ } else if (*ptr == '/') {
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ currentTag.Start = false;
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ state = PS_WAIT_END_OF_TAG;
+ } else {
+ state = (*ptr == '=') ? PS_ATTRIBUTEVALUE : PS_ATTRIBUTENAME;
+ }
+ }
+ break;
+ case PS_ATTRIBUTEVALUE:
+ if (*ptr == '"') {
+ if (((ptr == start) && currentString.empty()) || (quotationCounter > 0)) {
+ ++quotationCounter;
+ }
+ } else if (*ptr == '&') {
+ currentString.append(start, ptr - start);
+ start = ptr + 1;
+ appendString(attributeValueString, currentString);
+ state = PS_SPECIAL_IN_ATTRIBUTEVALUE;
+ state_special = ST_UNKNOWN;
+ } else if (quotationCounter != 1 && (*ptr == '>' || *ptr == '/' || std::isspace((unsigned char)*ptr))) {
+ if (ptr != start || !currentString.empty()) {
+ currentString.append(start, ptr - start);
+ appendString(attributeValueString, currentString);
+ if (attributeValueString[0] == '"') {
+ attributeValueString = attributeValueString.substr(1, attributeValueString.length() - 2);
+ }
+ currentTag.setLastAttributeValue(attributeValueString);
+ attributeValueString.erase();
+ quotationCounter = 0;
+ }
+ start = ptr + 1;
+ if (*ptr == '>') {
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ state = PS_TEXT;
+ } else if (*ptr == '/') {
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ currentTag.Start = false;
+ if (!tagHandler(currentTag)) {
+ goto endOfProcessing;
+ }
+ state = PS_WAIT_END_OF_TAG;
+ } else {
+ state = PS_ATTRIBUTENAME;
+ }
+ }
+ break;
+ case PS_SKIPTAG:
+ if (*ptr == '>') {
+ start = ptr + 1;
+ state = PS_TEXT;
+ }
+ break;
+ }
+ }
+ if (start != endOfBuffer) {
+ switch (state) {
+ case PS_TEXT:
+ if (!characterDataHandler(start, endOfBuffer - start, true)) {
+ goto endOfProcessing;
+ }
+ break;
+ case PS_TAGNAME:
+ case PS_ATTRIBUTENAME:
+ case PS_ATTRIBUTEVALUE:
+ currentString.append(start, endOfBuffer - start);
+ break;
+ case PS_SPECIAL:
+ case PS_SPECIAL_IN_ATTRIBUTEVALUE:
+ specialString.append(start, endOfBuffer - start);
+ break;
+ case PS_TAGSTART:
+ case PS_SKIPTAG:
+ case PS_COMMENT:
+ case PS_WAIT_END_OF_TAG:
+ break;
+ }
+ }
+ offset += length;
+ } while (length == BUFSIZE);
+endOfProcessing:
+ delete[] buffer;
+
+ endDocumentHandler();
+
+ stream.close();
+}
diff --git a/reader/src/formats/html/HtmlReader.h b/reader/src/formats/html/HtmlReader.h
new file mode 100644
index 0000000..876fad8
--- /dev/null
+++ b/reader/src/formats/html/HtmlReader.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLREADER_H__
+#define __HTMLREADER_H__
+
+#include <string>
+#include <vector>
+
+#include <ZLEncodingConverter.h>
+#include "../EncodedTextReader.h"
+
+class ZLInputStream;
+
+class HtmlReader : public EncodedTextReader {
+
+public:
+ struct HtmlAttribute {
+ std::string Name;
+ std::string Value;
+ bool HasValue;
+
+ HtmlAttribute(const std::string &name);
+ ~HtmlAttribute();
+ void setValue(const std::string &value);
+ };
+
+ struct HtmlTag {
+ std::string Name;
+ std::size_t Offset;
+ bool Start;
+ std::vector<HtmlAttribute> Attributes;
+
+ HtmlTag();
+ ~HtmlTag();
+ void addAttribute(const std::string &name);
+ void setLastAttributeValue(const std::string &value);
+
+ private:
+ HtmlTag(const HtmlTag&);
+ const HtmlTag &operator = (const HtmlTag&);
+ };
+
+private:
+ static void setTag(HtmlTag &tag, const std::string &fullName);
+
+public:
+ virtual void readDocument(ZLInputStream &stream);
+
+protected:
+ HtmlReader(const std::string &encoding);
+ virtual ~HtmlReader();
+
+protected:
+ virtual void startDocumentHandler() = 0;
+ virtual void endDocumentHandler() = 0;
+
+ // returns false iff processing must be stopped
+ virtual bool tagHandler(const HtmlTag &tag) = 0;
+ // returns false iff processing must be stopped
+ virtual bool characterDataHandler(const char *text, std::size_t len, bool convert) = 0;
+
+private:
+ void appendString(std::string &to, std::string &from);
+};
+
+inline HtmlReader::HtmlAttribute::HtmlAttribute(const std::string &name) : Name(name), HasValue(false) {}
+inline HtmlReader::HtmlAttribute::~HtmlAttribute() {}
+inline void HtmlReader::HtmlAttribute::setValue(const std::string &value) { Value = value; HasValue = true; }
+
+inline HtmlReader::HtmlTag::HtmlTag() : Start(true) {}
+inline HtmlReader::HtmlTag::~HtmlTag() {}
+inline void HtmlReader::HtmlTag::addAttribute(const std::string &name) { Attributes.push_back(HtmlAttribute(name)); }
+inline void HtmlReader::HtmlTag::setLastAttributeValue(const std::string &value) { if (!Attributes.empty()) Attributes.back().setValue(value); }
+
+#endif /* __HTMLREADER_H__ */
diff --git a/reader/src/formats/html/HtmlReaderStream.cpp b/reader/src/formats/html/HtmlReaderStream.cpp
new file mode 100644
index 0000000..08c43ae
--- /dev/null
+++ b/reader/src/formats/html/HtmlReaderStream.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+
+#include "HtmlReaderStream.h"
+#include "HtmlReader.h"
+
+class HtmlTextOnlyReader : public HtmlReader {
+
+public:
+ HtmlTextOnlyReader(char *buffer, std::size_t maxSize);
+ std::size_t size() const;
+
+private:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char *text, std::size_t len, bool convert);
+
+private:
+ char *myBuffer;
+ std::size_t myMaxSize;
+ std::size_t myFilledSize;
+ bool myIgnoreText;
+};
+
+HtmlTextOnlyReader::HtmlTextOnlyReader(char *buffer, std::size_t maxSize) : HtmlReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myFilledSize(0), myIgnoreText(false) {
+}
+
+std::size_t HtmlTextOnlyReader::size() const {
+ return myFilledSize;
+}
+
+void HtmlTextOnlyReader::startDocumentHandler() {
+}
+
+void HtmlTextOnlyReader::endDocumentHandler() {
+}
+
+bool HtmlTextOnlyReader::tagHandler(const HtmlTag &tag) {
+ if (tag.Name == "SCRIPT") {
+ myIgnoreText = tag.Start;
+ }
+ if ((myFilledSize < myMaxSize) && (myFilledSize > 0) && (myBuffer[myFilledSize - 1] != '\n')) {
+ myBuffer[myFilledSize++] = '\n';
+ }
+ return myFilledSize < myMaxSize;
+}
+
+bool HtmlTextOnlyReader::characterDataHandler(const char *text, std::size_t len, bool) {
+ if (!myIgnoreText) {
+ len = std::min((std::size_t)len, myMaxSize - myFilledSize);
+ std::memcpy(myBuffer + myFilledSize, text, len);
+ myFilledSize += len;
+ }
+ return myFilledSize < myMaxSize;
+}
+
+HtmlReaderStream::HtmlReaderStream(shared_ptr<ZLInputStream> base, std::size_t maxSize) : myBase(base), myBuffer(0), mySize(maxSize) {
+}
+
+HtmlReaderStream::~HtmlReaderStream() {
+ close();
+}
+
+bool HtmlReaderStream::open() {
+ if (myBase.isNull() || !myBase->open()) {
+ return false;
+ }
+ myBuffer = new char[mySize];
+ HtmlTextOnlyReader reader(myBuffer, mySize);
+ reader.readDocument(*myBase);
+ mySize = reader.size();
+ myOffset = 0;
+ myBase->close();
+ return true;
+}
+
+std::size_t HtmlReaderStream::read(char *buffer, std::size_t maxSize) {
+ maxSize = std::min(maxSize, mySize - myOffset);
+ if (buffer != 0) {
+ std::memcpy(buffer, myBuffer, maxSize);
+ }
+ myOffset += maxSize;
+ return maxSize;
+}
+
+void HtmlReaderStream::close() {
+ if (myBuffer != 0) {
+ delete[] myBuffer;
+ myBuffer = 0;
+ }
+}
+
+void HtmlReaderStream::seek(int offset, bool absoluteOffset) {
+ if (!absoluteOffset) {
+ offset += myOffset;
+ }
+ myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
+}
+
+std::size_t HtmlReaderStream::offset() const {
+ return myOffset;
+}
+
+std::size_t HtmlReaderStream::sizeOfOpened() {
+ return mySize;
+}
diff --git a/reader/src/formats/html/HtmlReaderStream.h b/reader/src/formats/html/HtmlReaderStream.h
new file mode 100644
index 0000000..c5c15b8
--- /dev/null
+++ b/reader/src/formats/html/HtmlReaderStream.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLREADERSTREAM_H__
+#define __HTMLREADERSTREAM_H__
+
+#include <shared_ptr.h>
+#include <ZLInputStream.h>
+
+class HtmlReaderStream : public ZLInputStream {
+
+public:
+ HtmlReaderStream(shared_ptr<ZLInputStream> base, std::size_t maxSize);
+ ~HtmlReaderStream();
+
+private:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+private:
+ shared_ptr<ZLInputStream> myBase;
+ char *myBuffer;
+ std::size_t mySize;
+ std::size_t myOffset;
+};
+
+#endif /* __HTMLREADERSTREAM_H__ */
diff --git a/reader/src/formats/html/HtmlTagActions.h b/reader/src/formats/html/HtmlTagActions.h
new file mode 100644
index 0000000..7da3f20
--- /dev/null
+++ b/reader/src/formats/html/HtmlTagActions.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLTAGACTIONS_H__
+#define __HTMLTAGACTIONS_H__
+
+#include <set>
+
+#include "HtmlBookReader.h"
+
+class HtmlTagAction {
+
+protected:
+ HtmlTagAction(HtmlBookReader &reader);
+
+public:
+ virtual ~HtmlTagAction();
+ virtual void run(const HtmlReader::HtmlTag &tag) = 0;
+ virtual void reset();
+
+protected:
+ BookReader &bookReader();
+
+protected:
+ HtmlBookReader &myReader;
+};
+
+class DummyHtmlTagAction : public HtmlTagAction {
+
+public:
+ DummyHtmlTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class HtmlControlTagAction : public HtmlTagAction {
+
+public:
+ HtmlControlTagAction(HtmlBookReader &reader, FBTextKind kind);
+ void run(const HtmlReader::HtmlTag &tag);
+
+private:
+ FBTextKind myKind;
+};
+
+class HtmlHeaderTagAction : public HtmlTagAction {
+
+public:
+ HtmlHeaderTagAction(HtmlBookReader &reader, FBTextKind kind);
+ void run(const HtmlReader::HtmlTag &tag);
+
+private:
+ FBTextKind myKind;
+};
+
+class HtmlIgnoreTagAction : public HtmlTagAction {
+
+public:
+ HtmlIgnoreTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+
+private:
+ std::set<std::string> myTagNames;
+};
+
+class HtmlHrefTagAction : public HtmlTagAction {
+
+public:
+ HtmlHrefTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+ void reset();
+
+protected:
+ FBTextKind hyperlinkType() const;
+ void setHyperlinkType(FBTextKind hyperlinkType);
+
+private:
+ FBTextKind myHyperlinkType;
+};
+
+class HtmlImageTagAction : public HtmlTagAction {
+
+public:
+ HtmlImageTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class HtmlBreakTagAction : public HtmlTagAction {
+
+public:
+ enum BreakType {
+ BREAK_AT_START = 1,
+ BREAK_AT_END = 2,
+ BREAK_AT_START_AND_AT_END = BREAK_AT_START | BREAK_AT_END
+ };
+ HtmlBreakTagAction(HtmlBookReader &reader, BreakType breakType);
+ void run(const HtmlReader::HtmlTag &tag);
+
+private:
+ BreakType myBreakType;
+};
+
+class HtmlPreTagAction : public HtmlTagAction {
+
+public:
+ HtmlPreTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class HtmlListTagAction : public HtmlTagAction {
+
+public:
+ HtmlListTagAction(HtmlBookReader &reader, int startIndex);
+ void run(const HtmlReader::HtmlTag &tag);
+
+private:
+ int myStartIndex;
+};
+
+class HtmlListItemTagAction : public HtmlTagAction {
+
+public:
+ HtmlListItemTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class HtmlTableTagAction : public HtmlTagAction {
+
+public:
+ HtmlTableTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class HtmlStyleTagAction : public HtmlTagAction {
+
+public:
+ HtmlStyleTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+inline BookReader &HtmlTagAction::bookReader() { return myReader.myBookReader; }
+
+#endif /* __HTMLTAGACTIONS_H__ */
diff --git a/reader/src/formats/oeb/NCXReader.cpp b/reader/src/formats/oeb/NCXReader.cpp
new file mode 100644
index 0000000..e824e16
--- /dev/null
+++ b/reader/src/formats/oeb/NCXReader.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+
+#include "NCXReader.h"
+#include "../util/MiscUtil.h"
+#include "../util/EntityFilesCollector.h"
+
+NCXReader::NCXReader(BookReader &modelReader) : myModelReader(modelReader), myReadState(READ_NONE), myPlayIndex(-65535) {
+}
+
+static const std::string TAG_NAVMAP = "navMap";
+static const std::string TAG_NAVPOINT = "navPoint";
+static const std::string TAG_NAVLABEL = "navLabel";
+static const std::string TAG_CONTENT = "content";
+static const std::string TAG_TEXT = "text";
+
+void NCXReader::startElementHandler(const char *fullTag, const char **attributes) {
+ std::string tag = fullTag;
+ const std::size_t index = tag.rfind(':');
+ if (index != std::string::npos) {
+ tag = tag.substr(index + 1);
+ }
+ switch (myReadState) {
+ case READ_NONE:
+ if (TAG_NAVMAP == tag) {
+ myReadState = READ_MAP;
+ }
+ break;
+ case READ_MAP:
+ if (TAG_NAVPOINT == tag) {
+ const char *order = attributeValue(attributes, "playOrder");
+ myPointStack.push_back(NavPoint(order != 0 ? std::atoi(order) : myPlayIndex++, myPointStack.size()));
+ myReadState = READ_POINT;
+ }
+ break;
+ case READ_POINT:
+ if (TAG_NAVPOINT == tag) {
+ const char *order = attributeValue(attributes, "playOrder");
+ myPointStack.push_back(NavPoint(order != 0 ? std::atoi(order) : myPlayIndex++, myPointStack.size()));
+ } else if (TAG_NAVLABEL == tag) {
+ myReadState = READ_LABEL;
+ } else if (TAG_CONTENT == tag) {
+ const char *src = attributeValue(attributes, "src");
+ if (src != 0) {
+ myPointStack.back().ContentHRef = MiscUtil::decodeHtmlURL(src);
+ }
+ }
+ break;
+ case READ_LABEL:
+ if (TAG_TEXT == tag) {
+ myReadState = READ_TEXT;
+ }
+ break;
+ case READ_TEXT:
+ break;
+ }
+}
+
+void NCXReader::endElementHandler(const char *fullTag) {
+ std::string tag = fullTag;
+ const std::size_t index = tag.rfind(':');
+ if (index != std::string::npos) {
+ tag = tag.substr(index + 1);
+ }
+ switch (myReadState) {
+ case READ_NONE:
+ break;
+ case READ_MAP:
+ if (TAG_NAVMAP == tag) {
+ myReadState = READ_NONE;
+ }
+ break;
+ case READ_POINT:
+ if (TAG_NAVPOINT == tag) {
+ if (myPointStack.back().Text.empty()) {
+ myPointStack.back().Text = "...";
+ }
+ myNavigationMap[myPointStack.back().Order] = myPointStack.back();
+ myPointStack.pop_back();
+ myReadState = myPointStack.empty() ? READ_MAP : READ_POINT;
+ }
+ case READ_LABEL:
+ if (TAG_NAVLABEL == tag) {
+ myReadState = READ_POINT;
+ }
+ break;
+ case READ_TEXT:
+ if (TAG_TEXT == tag) {
+ myReadState = READ_LABEL;
+ }
+ break;
+ }
+}
+
+void NCXReader::characterDataHandler(const char *text, std::size_t len) {
+ if (myReadState == READ_TEXT) {
+ myPointStack.back().Text.append(text, len);
+ }
+}
+
+const std::vector<std::string> &NCXReader::externalDTDs() const {
+ return EntityFilesCollector::Instance().externalDTDs("xhtml");
+}
+
+const std::map<int,NCXReader::NavPoint> &NCXReader::navigationMap() const {
+ return myNavigationMap;
+}
+
+NCXReader::NavPoint::NavPoint() {
+}
+
+NCXReader::NavPoint::NavPoint(int order, std::size_t level) : Order(order), Level(level) {
+}
diff --git a/reader/src/formats/oeb/NCXReader.h b/reader/src/formats/oeb/NCXReader.h
new file mode 100644
index 0000000..c10d2ab
--- /dev/null
+++ b/reader/src/formats/oeb/NCXReader.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __NCXREADER_H__
+#define __NCXREADER_H__
+
+#include <map>
+#include <vector>
+
+#include <ZLXMLReader.h>
+
+#include "../../bookmodel/BookReader.h"
+
+class NCXReader : public ZLXMLReader {
+
+public:
+ struct NavPoint {
+ NavPoint();
+ NavPoint(int order, std::size_t level);
+
+ int Order;
+ std::size_t Level;
+ std::string Text;
+ std::string ContentHRef;
+ };
+
+public:
+ NCXReader(BookReader &modelReader);
+ const std::map<int,NavPoint> &navigationMap() const;
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+ void characterDataHandler(const char *text, std::size_t len);
+ const std::vector<std::string> &externalDTDs() const;
+
+private:
+ BookReader &myModelReader;
+ std::map<int,NavPoint> myNavigationMap;
+ std::vector<NavPoint> myPointStack;
+
+ enum {
+ READ_NONE,
+ READ_MAP,
+ READ_POINT,
+ READ_LABEL,
+ READ_TEXT
+ } myReadState;
+
+ int myPlayIndex;
+};
+
+#endif /* __NCXREADER_H__ */
diff --git a/reader/src/formats/oeb/OEBBookReader.cpp b/reader/src/formats/oeb/OEBBookReader.cpp
new file mode 100644
index 0000000..c4234a7
--- /dev/null
+++ b/reader/src/formats/oeb/OEBBookReader.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <algorithm>
+
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLFile.h>
+#include <ZLFileImage.h>
+#include <ZLXMLNamespace.h>
+
+#include "OEBBookReader.h"
+#include "XHTMLImageFinder.h"
+#include "NCXReader.h"
+#include "../xhtml/XHTMLReader.h"
+#include "../util/MiscUtil.h"
+#include "../util/EntityFilesCollector.h"
+#include "../../bookmodel/BookModel.h"
+
+OEBBookReader::OEBBookReader(BookModel &model) : myModelReader(model) {
+}
+
+static const std::string MANIFEST = "manifest";
+static const std::string SPINE = "spine";
+static const std::string GUIDE = "guide";
+static const std::string TOUR = "tour";
+static const std::string SITE = "site";
+
+static const std::string ITEM = "item";
+static const std::string ITEMREF = "itemref";
+static const std::string REFERENCE = "reference";
+
+static const std::string COVER = "cover";
+static const std::string COVER_IMAGE = "other.ms-coverimage-standard";
+
+bool OEBBookReader::isOPFTag(const std::string &expected, const std::string &tag) const {
+ return expected == tag || testTag(ZLXMLNamespace::OpenPackagingFormat, expected, tag);
+}
+
+void OEBBookReader::startElementHandler(const char *tag, const char **xmlattributes) {
+ std::string tagString = ZLUnicodeUtil::toLower(tag);
+
+ switch (myState) {
+ case READ_NONE:
+ if (isOPFTag(MANIFEST, tagString)) {
+ myState = READ_MANIFEST;
+ } else if (isOPFTag(SPINE, tagString)) {
+ const char *toc = attributeValue(xmlattributes, "toc");
+ if (toc != 0) {
+ myNCXTOCFileName = myIdToHref[toc];
+ }
+ myState = READ_SPINE;
+ } else if (isOPFTag(GUIDE, tagString)) {
+ myState = READ_GUIDE;
+ } else if (isOPFTag(TOUR, tagString)) {
+ myState = READ_TOUR;
+ }
+ break;
+ case READ_MANIFEST:
+ if (isOPFTag(ITEM, tagString)) {
+ const char *href = attributeValue(xmlattributes, "href");
+ if (href != 0) {
+ const std::string sHref = MiscUtil::decodeHtmlURL(href);
+ const char *id = attributeValue(xmlattributes, "id");
+ const char *mediaType = attributeValue(xmlattributes, "media-type");
+ if (id != 0) {
+ myIdToHref[id] = sHref;
+ }
+ if (mediaType != 0) {
+ myHrefToMediatype[sHref] = mediaType;
+ }
+ }
+ }
+ break;
+ case READ_SPINE:
+ if (isOPFTag(ITEMREF, tagString)) {
+ const char *id = attributeValue(xmlattributes, "idref");
+ if (id != 0) {
+ const std::string &fileName = myIdToHref[id];
+ if (!fileName.empty()) {
+ myHtmlFileNames.push_back(fileName);
+ }
+ }
+ }
+ break;
+ case READ_GUIDE:
+ if (isOPFTag(REFERENCE, tagString)) {
+ const char *type = attributeValue(xmlattributes, "type");
+ const char *title = attributeValue(xmlattributes, "title");
+ const char *href = attributeValue(xmlattributes, "href");
+ if (href != 0) {
+ const std::string reference = MiscUtil::decodeHtmlURL(href);
+ if (title != 0) {
+ myGuideTOC.push_back(std::make_pair(std::string(title), reference));
+ }
+ if (type != 0) {
+ if (COVER == type) {
+ ZLFile imageFile(myFilePrefix + reference);
+ myCoverFileName = imageFile.path();
+ const std::map<std::string,std::string>::const_iterator it =
+ myHrefToMediatype.find(reference);
+ const std::string mimeType =
+ it != myHrefToMediatype.end() ? it->second : std::string();
+ shared_ptr<const ZLImage> image;
+ if (ZLStringUtil::stringStartsWith(mimeType, "image/")) {
+ image = new ZLFileImage(imageFile, 0);
+ } else {
+ image = XHTMLImageFinder().readImage(imageFile);
+ }
+ if (!image.isNull()) {
+ const std::string imageName = imageFile.name(false);
+ myModelReader.setMainTextModel();
+ myModelReader.addImageReference(imageName, 0);
+ myModelReader.addImage(imageName, image);
+ myModelReader.insertEndOfSectionParagraph();
+ } else {
+ myCoverFileName.erase();
+ }
+ } else if (COVER_IMAGE == type) {
+ ZLFile imageFile(myFilePrefix + reference);
+ myCoverFileName = imageFile.path();
+ const std::string imageName = imageFile.name(false);
+ myModelReader.setMainTextModel();
+ myModelReader.addImageReference(imageName, 0);
+ myModelReader.addImage(imageName, new ZLFileImage(imageFile, 0));
+ myModelReader.insertEndOfSectionParagraph();
+ }
+ }
+ }
+ }
+ break;
+ case READ_TOUR:
+ if (isOPFTag(SITE, tagString)) {
+ const char *title = attributeValue(xmlattributes, "title");
+ const char *href = attributeValue(xmlattributes, "href");
+ if ((title != 0) && (href != 0)) {
+ myTourTOC.push_back(std::make_pair(title, MiscUtil::decodeHtmlURL(href)));
+ }
+ }
+ break;
+ }
+}
+
+void OEBBookReader::endElementHandler(const char *tag) {
+ std::string tagString = ZLUnicodeUtil::toLower(tag);
+
+ switch (myState) {
+ case READ_MANIFEST:
+ if (isOPFTag(MANIFEST, tagString)) {
+ myState = READ_NONE;
+ }
+ break;
+ case READ_SPINE:
+ if (isOPFTag(SPINE, tagString)) {
+ myState = READ_NONE;
+ }
+ break;
+ case READ_GUIDE:
+ if (isOPFTag(GUIDE, tagString)) {
+ myState = READ_NONE;
+ }
+ break;
+ case READ_TOUR:
+ if (isOPFTag(TOUR, tagString)) {
+ myState = READ_NONE;
+ }
+ break;
+ case READ_NONE:
+ break;
+ }
+}
+
+bool OEBBookReader::readBook(const ZLFile &file) {
+ myFilePrefix = MiscUtil::htmlDirectoryPrefix(file.path());
+
+ myIdToHref.clear();
+ myHtmlFileNames.clear();
+ myNCXTOCFileName.erase();
+ myCoverFileName.erase();
+ myTourTOC.clear();
+ myGuideTOC.clear();
+ myState = READ_NONE;
+
+ if (!readDocument(file)) {
+ return false;
+ }
+
+ myModelReader.setMainTextModel();
+ myModelReader.pushKind(REGULAR);
+
+ XHTMLReader xhtmlReader(myModelReader);
+ bool firstFile = true;
+ for (std::vector<std::string>::const_iterator it = myHtmlFileNames.begin(); it != myHtmlFileNames.end(); ++it) {
+ const ZLFile xhtmlFile(myFilePrefix + *it);
+ if (firstFile && myCoverFileName == xhtmlFile.path()) {
+ continue;
+ }
+ if (!firstFile) {
+ myModelReader.insertEndOfSectionParagraph();
+ }
+ xhtmlReader.readFile(xhtmlFile, *it);
+ firstFile = false;
+ }
+
+ generateTOC(xhtmlReader);
+
+ return true;
+}
+
+void OEBBookReader::generateTOC(const XHTMLReader &xhtmlReader) {
+ if (!myNCXTOCFileName.empty()) {
+ NCXReader ncxReader(myModelReader);
+ if (ncxReader.readDocument(ZLFile(myFilePrefix + myNCXTOCFileName))) {
+ const std::map<int,NCXReader::NavPoint> navigationMap = ncxReader.navigationMap();
+ if (!navigationMap.empty()) {
+ std::size_t level = 0;
+ for (std::map<int,NCXReader::NavPoint>::const_iterator it = navigationMap.begin(); it != navigationMap.end(); ++it) {
+ const NCXReader::NavPoint &point = it->second;
+ int index = myModelReader.model().label(xhtmlReader.normalizedReference(point.ContentHRef)).ParagraphNumber;
+ while (level > point.Level) {
+ myModelReader.endContentsParagraph();
+ --level;
+ }
+ while (++level <= point.Level) {
+ myModelReader.beginContentsParagraph(-2);
+ myModelReader.addContentsData("...");
+ }
+ myModelReader.beginContentsParagraph(index);
+ myModelReader.addContentsData(point.Text);
+ }
+ while (level > 0) {
+ myModelReader.endContentsParagraph();
+ --level;
+ }
+ return;
+ }
+ }
+ }
+
+ std::vector<std::pair<std::string,std::string> > &toc = myTourTOC.empty() ? myGuideTOC : myTourTOC;
+ for (std::vector<std::pair<std::string,std::string> >::const_iterator it = toc.begin(); it != toc.end(); ++it) {
+ int index = myModelReader.model().label(it->second).ParagraphNumber;
+ if (index != -1) {
+ myModelReader.beginContentsParagraph(index);
+ myModelReader.addContentsData(it->first);
+ myModelReader.endContentsParagraph();
+ }
+ }
+}
+
+bool OEBBookReader::processNamespaces() const {
+ return true;
+}
+
+const std::vector<std::string> &OEBBookReader::externalDTDs() const {
+ return EntityFilesCollector::Instance().externalDTDs("xhtml");
+}
diff --git a/reader/src/formats/oeb/OEBBookReader.h b/reader/src/formats/oeb/OEBBookReader.h
new file mode 100644
index 0000000..092f269
--- /dev/null
+++ b/reader/src/formats/oeb/OEBBookReader.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OEBBOOKREADER_H__
+#define __OEBBOOKREADER_H__
+
+#include <map>
+#include <vector>
+#include <string>
+
+#include <ZLXMLReader.h>
+
+#include "../../bookmodel/BookReader.h"
+
+class XHTMLReader;
+
+class OEBBookReader : public ZLXMLReader {
+
+public:
+ OEBBookReader(BookModel &model);
+ bool readBook(const ZLFile &file);
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+ bool processNamespaces() const;
+ bool isOPFTag(const std::string &expected, const std::string &tag) const;
+ const std::vector<std::string> &externalDTDs() const;
+
+ void generateTOC(const XHTMLReader &xhtmlReader);
+
+private:
+ enum ReaderState {
+ READ_NONE,
+ READ_MANIFEST,
+ READ_SPINE,
+ READ_GUIDE,
+ READ_TOUR
+ };
+
+ BookReader myModelReader;
+ ReaderState myState;
+
+ std::string myFilePrefix;
+ std::map<std::string,std::string> myIdToHref;
+ std::map<std::string,std::string> myHrefToMediatype;
+ std::vector<std::string> myHtmlFileNames;
+ std::string myNCXTOCFileName;
+ std::string myCoverFileName;
+ std::vector<std::pair<std::string,std::string> > myTourTOC;
+ std::vector<std::pair<std::string,std::string> > myGuideTOC;
+};
+
+#endif /* __OEBBOOKREADER_H__ */
diff --git a/reader/src/formats/oeb/OEBCoverReader.cpp b/reader/src/formats/oeb/OEBCoverReader.cpp
new file mode 100644
index 0000000..842de30
--- /dev/null
+++ b/reader/src/formats/oeb/OEBCoverReader.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2009-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLFileImage.h>
+#include <ZLXMLNamespace.h>
+
+#include "OEBCoverReader.h"
+#include "XHTMLImageFinder.h"
+
+#include "../util/MiscUtil.h"
+
+OEBCoverReader::OEBCoverReader() {
+}
+
+shared_ptr<const ZLImage> OEBCoverReader::readCover(const ZLFile &file) {
+ myPathPrefix = MiscUtil::htmlDirectoryPrefix(file.path());
+ myReadState = READ_NOTHING;
+ myImage.reset();
+ myCoverXHTML.erase();
+ readDocument(file);
+ if (myImage.isNull() && !myCoverXHTML.empty()) {
+ const ZLFile coverFile(myCoverXHTML);
+ const std::string ext = coverFile.extension();
+ if (ext == "gif" || ext == "jpeg" || ext == "jpg") {
+ myImage = new ZLFileImage(coverFile, 0);
+ } else {
+ myImage = XHTMLImageFinder().readImage(coverFile);
+ }
+ }
+ return myImage;
+}
+
+static const std::string METADATA = "metadata";
+static const std::string META = "meta";
+static const std::string MANIFEST = "manifest";
+static const std::string ITEM = "item";
+static const std::string GUIDE = "guide";
+static const std::string REFERENCE = "reference";
+static const std::string COVER = "cover";
+static const std::string COVER_IMAGE = "other.ms-coverimage-standard";
+
+bool OEBCoverReader::processNamespaces() const {
+ return true;
+}
+
+void OEBCoverReader::startElementHandler(const char *tag, const char **attributes) {
+ switch (myReadState) {
+ case READ_NOTHING:
+ if (GUIDE == tag) {
+ myReadState = READ_GUIDE;
+ } else if (MANIFEST == tag && !myCoverId.empty()) {
+ myReadState = READ_MANIFEST;
+ } else if (testTag(ZLXMLNamespace::OpenPackagingFormat, METADATA, tag)) {
+ myReadState = READ_METADATA;
+ }
+ break;
+ case READ_GUIDE:
+ if (REFERENCE == tag) {
+ const char *type = attributeValue(attributes, "type");
+ if (type != 0) {
+ if (COVER == type) {
+ const char *href = attributeValue(attributes, "href");
+ if (href != 0) {
+ myCoverXHTML = myPathPrefix + MiscUtil::decodeHtmlURL(href);
+ interrupt();
+ }
+ } else if (COVER_IMAGE == type) {
+ createImage(attributeValue(attributes, "href"));
+ }
+ }
+ }
+ break;
+ case READ_METADATA:
+ if (testTag(ZLXMLNamespace::OpenPackagingFormat, META, tag)) {
+ const char *name = attributeValue(attributes, "name");
+ if (name != 0 && COVER == name) {
+ myCoverId = attributeValue(attributes, "content");
+ }
+ }
+ break;
+ case READ_MANIFEST:
+ if (ITEM == tag) {
+ const char *id = attributeValue(attributes, "id");
+ if (id != 0 && myCoverId == id) {
+ createImage(attributeValue(attributes, "href"));
+ }
+ }
+ break;
+ }
+}
+
+void OEBCoverReader::createImage(const char *href) {
+ if (href != 0) {
+ myImage = new ZLFileImage(ZLFile(myPathPrefix + MiscUtil::decodeHtmlURL(href)), 0);
+ interrupt();
+ }
+}
+
+void OEBCoverReader::endElementHandler(const char *tag) {
+ switch (myReadState) {
+ case READ_NOTHING:
+ break;
+ case READ_GUIDE:
+ if (GUIDE == tag) {
+ myReadState = READ_NOTHING;
+ }
+ break;
+ case READ_METADATA:
+ if (testTag(ZLXMLNamespace::OpenPackagingFormat, METADATA, tag)) {
+ myReadState = READ_NOTHING;
+ }
+ break;
+ case READ_MANIFEST:
+ if (MANIFEST == tag) {
+ myReadState = READ_NOTHING;
+ }
+ break;
+ }
+}
diff --git a/reader/src/formats/oeb/OEBCoverReader.h b/reader/src/formats/oeb/OEBCoverReader.h
new file mode 100644
index 0000000..e1f96b5
--- /dev/null
+++ b/reader/src/formats/oeb/OEBCoverReader.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OEBCOVERREADER_H__
+#define __OEBCOVERREADER_H__
+
+#include <vector>
+
+#include <shared_ptr.h>
+#include <ZLXMLReader.h>
+
+class ZLImage;
+
+class OEBCoverReader : public ZLXMLReader {
+
+public:
+ OEBCoverReader();
+ shared_ptr<const ZLImage> readCover(const ZLFile &file);
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+ bool processNamespaces() const;
+
+ void createImage(const char *href);
+
+private:
+ shared_ptr<const ZLImage> myImage;
+ std::string myPathPrefix;
+ std::string myCoverXHTML;
+ std::string myCoverId;
+ enum {
+ READ_NOTHING,
+ READ_METADATA,
+ READ_MANIFEST,
+ READ_GUIDE
+ } myReadState;
+};
+
+#endif /* __OEBCOVERREADER_H__ */
diff --git a/reader/src/formats/oeb/OEBMetaInfoReader.cpp b/reader/src/formats/oeb/OEBMetaInfoReader.cpp
new file mode 100644
index 0000000..f9eb82d
--- /dev/null
+++ b/reader/src/formats/oeb/OEBMetaInfoReader.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLLogger.h>
+#include <ZLXMLNamespace.h>
+
+#include "OEBMetaInfoReader.h"
+#include "../util/EntityFilesCollector.h"
+
+#include "../../library/Book.h"
+
+OEBMetaInfoReader::OEBMetaInfoReader(Book &book) : myBook(book) {
+ myBook.removeAllAuthors();
+ myBook.setTitle("");
+ myBook.removeAllTags();
+}
+
+static const std::string METADATA = "metadata";
+static const std::string DC_METADATA = "dc-metadata";
+static const std::string META = "meta";
+static const std::string AUTHOR_ROLE = "aut";
+
+void OEBMetaInfoReader::characterDataHandler(const char *text, std::size_t len) {
+ switch (myReadState) {
+ case READ_NONE:
+ case READ_METADATA:
+ break;
+ case READ_AUTHOR:
+ case READ_AUTHOR2:
+ case READ_SUBJECT:
+ case READ_LANGUAGE:
+ case READ_TITLE:
+ myBuffer.append(text, len);
+ break;
+ }
+}
+
+bool OEBMetaInfoReader::testDCTag(const std::string &name, const std::string &tag) const {
+ return
+ testTag(ZLXMLNamespace::DublinCore, name, tag) ||
+ testTag(ZLXMLNamespace::DublinCoreLegacy, name, tag);
+}
+
+bool OEBMetaInfoReader::isNSName(const std::string &fullName, const std::string &shortName, const std::string &fullNSId) const {
+ const int prefixLength = fullName.length() - shortName.length() - 1;
+ if (prefixLength <= 0 ||
+ fullName[prefixLength] != ':' ||
+ !ZLStringUtil::stringEndsWith(fullName, shortName)) {
+ return false;
+ }
+ const std::map<std::string,std::string> &namespaceMap = namespaces();
+ std::map<std::string,std::string>::const_iterator iter =
+ namespaceMap.find(fullName.substr(0, prefixLength));
+ return iter != namespaceMap.end() && iter->second == fullNSId;
+}
+
+void OEBMetaInfoReader::startElementHandler(const char *tag, const char **attributes) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ switch (myReadState) {
+ default:
+ break;
+ case READ_NONE:
+ if (testTag(ZLXMLNamespace::OpenPackagingFormat, METADATA, tagString) ||
+ DC_METADATA == tagString) {
+ myReadState = READ_METADATA;
+ }
+ break;
+ case READ_METADATA:
+ if (testDCTag("title", tagString)) {
+ myReadState = READ_TITLE;
+ } else if (testDCTag("creator", tagString)) {
+ const char *role = attributeValue(attributes, "role");
+ if (role == 0) {
+ myReadState = READ_AUTHOR2;
+ } else if (AUTHOR_ROLE == role) {
+ myReadState = READ_AUTHOR;
+ }
+ } else if (testDCTag("subject", tagString)) {
+ myReadState = READ_SUBJECT;
+ } else if (testDCTag("language", tagString)) {
+ myReadState = READ_LANGUAGE;
+ } else if (testTag(ZLXMLNamespace::OpenPackagingFormat, META, tagString)) {
+ const char *name = attributeValue(attributes, "name");
+ const char *content = attributeValue(attributes, "content");
+ if (name != 0 && content != 0) {
+ std::string sName = name;
+ if (sName == "calibre:series" || isNSName(sName, "series", ZLXMLNamespace::CalibreMetadata)) {
+ myBook.setSeries(content, myBook.indexInSeries());
+ } else if (sName == "calibre:series_index" || isNSName(sName, "series_index", ZLXMLNamespace::CalibreMetadata)) {
+ myBook.setSeries(myBook.seriesTitle(), std::string(content));
+ }
+ }
+ }
+ break;
+ }
+}
+
+void OEBMetaInfoReader::endElementHandler(const char *tag) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ switch (myReadState) {
+ case READ_NONE:
+ break;
+ case READ_METADATA:
+ if (testTag(ZLXMLNamespace::OpenPackagingFormat, METADATA, tagString) || DC_METADATA == tagString) {
+ interrupt();
+ myReadState = READ_NONE;
+ return;
+ }
+ break;
+ case READ_AUTHOR:
+ if (!myBuffer.empty()) {
+ myAuthorList.push_back(myBuffer);
+ }
+ break;
+ case READ_AUTHOR2:
+ if (!myBuffer.empty()) {
+ myAuthorList2.push_back(myBuffer);
+ }
+ break;
+ case READ_SUBJECT:
+ if (!myBuffer.empty()) {
+ myBook.addTag(myBuffer);
+ }
+ break;
+ case READ_TITLE:
+ if (!myBuffer.empty()) {
+ myBook.setTitle(myBuffer);
+ }
+ break;
+ case READ_LANGUAGE:
+ if (!myBuffer.empty()) {
+ int index = myBuffer.find('-');
+ if (index >= 0) {
+ myBuffer = myBuffer.substr(0, index);
+ }
+ index = myBuffer.find('_');
+ if (index >= 0) {
+ myBuffer = myBuffer.substr(0, index);
+ }
+ myBook.setLanguage(myBuffer);
+ }
+ break;
+ }
+ myBuffer.erase();
+ myReadState = READ_METADATA;
+}
+
+bool OEBMetaInfoReader::processNamespaces() const {
+ return true;
+}
+
+bool OEBMetaInfoReader::readMetaInfo(const ZLFile &file) {
+ myReadState = READ_NONE;
+ if (!readDocument(file)) {
+ ZLLogger::Instance().println("epub", "Failure while reading info from " + file.path());
+ return false;
+ }
+
+ if (!myAuthorList.empty()) {
+ for (std::vector<std::string>::const_iterator it = myAuthorList.begin(); it != myAuthorList.end(); ++it) {
+ myBook.addAuthor(*it);
+ }
+ } else {
+ for (std::vector<std::string>::const_iterator it = myAuthorList2.begin(); it != myAuthorList2.end(); ++it) {
+ myBook.addAuthor(*it);
+ }
+ }
+ return true;
+}
+
+const std::vector<std::string> &OEBMetaInfoReader::externalDTDs() const {
+ return EntityFilesCollector::Instance().externalDTDs("xhtml");
+}
diff --git a/reader/src/formats/oeb/OEBMetaInfoReader.h b/reader/src/formats/oeb/OEBMetaInfoReader.h
new file mode 100644
index 0000000..2337c50
--- /dev/null
+++ b/reader/src/formats/oeb/OEBMetaInfoReader.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OEBMETAINFOREADER_H__
+#define __OEBMETAINFOREADER_H__
+
+#include <vector>
+
+#include <ZLXMLReader.h>
+
+class Book;
+
+class OEBMetaInfoReader : public ZLXMLReader {
+
+public:
+ OEBMetaInfoReader(Book &book);
+ bool readMetaInfo(const ZLFile &file);
+
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+ void characterDataHandler(const char *text, std::size_t len);
+ bool processNamespaces() const;
+ const std::vector<std::string> &externalDTDs() const;
+
+private:
+ bool testDCTag(const std::string &name, const std::string &tag) const;
+ bool isNSName(const std::string &fullName, const std::string &shortName, const std::string &fullNSId) const;
+
+private:
+ Book &myBook;
+
+ enum {
+ READ_NONE,
+ READ_METADATA,
+ READ_AUTHOR,
+ READ_AUTHOR2,
+ READ_TITLE,
+ READ_SUBJECT,
+ READ_LANGUAGE,
+ } myReadState;
+
+ std::string myBuffer;
+ std::vector<std::string> myAuthorList;
+ std::vector<std::string> myAuthorList2;
+};
+
+#endif /* __OEBMETAINFOREADER_H__ */
diff --git a/reader/src/formats/oeb/OEBPlugin.cpp b/reader/src/formats/oeb/OEBPlugin.cpp
new file mode 100644
index 0000000..96970c1
--- /dev/null
+++ b/reader/src/formats/oeb/OEBPlugin.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLImage.h>
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLDir.h>
+#include <ZLInputStream.h>
+#include <ZLLogger.h>
+#include <ZLMimeType.h>
+
+#include "OEBPlugin.h"
+#include "OEBMetaInfoReader.h"
+#include "OEBBookReader.h"
+#include "OEBCoverReader.h"
+#include "OEBTextStream.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+static const std::string OPF = "opf";
+static const std::string OEBZIP = "oebzip";
+static const std::string EPUB = "epub";
+
+class ContainerFileReader : public ZLXMLReader {
+
+public:
+ const std::string &rootPath() const;
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+
+private:
+ std::string myRootPath;
+};
+
+const std::string &ContainerFileReader::rootPath() const {
+ return myRootPath;
+}
+
+void ContainerFileReader::startElementHandler(const char *tag, const char **attributes) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ if (tagString == "rootfile") {
+ const char *path = attributeValue(attributes, "full-path");
+ if (path != 0) {
+ myRootPath = path;
+ interrupt();
+ }
+ }
+}
+
+OEBPlugin::~OEBPlugin() {
+}
+
+bool OEBPlugin::providesMetaInfo() const {
+ return true;
+}
+
+bool OEBPlugin::acceptsFile(const ZLFile &file) const {
+ shared_ptr<ZLMimeType> mimeType = file.mimeType();
+ const std::string &extension = file.extension();
+ if (!mimeType.isNull() && mimeType != ZLMimeType::EMPTY) {
+ return
+ mimeType == ZLMimeType::APPLICATION_EPUB_ZIP ||
+ (mimeType == ZLMimeType::APPLICATION_XML && extension == OPF) ||
+ (mimeType == ZLMimeType::APPLICATION_ZIP && extension == OEBZIP);
+ }
+ return extension == OPF || extension == OEBZIP || extension == EPUB;
+}
+
+ZLFile OEBPlugin::opfFile(const ZLFile &oebFile) {
+ //ZLLogger::Instance().registerClass("epub");
+
+ if (oebFile.extension() == OPF) {
+ return oebFile;
+ }
+
+ ZLLogger::Instance().println("epub", "Looking for opf file in " + oebFile.path());
+
+ shared_ptr<ZLDir> oebDir = oebFile.directory();
+ if (!oebDir.isNull()) {
+ const ZLFile containerInfoFile(oebDir->itemPath("META-INF/container.xml"));
+ if (containerInfoFile.exists()) {
+ ZLLogger::Instance().println("epub", "Found container file " + containerInfoFile.path());
+ ContainerFileReader reader;
+ reader.readDocument(containerInfoFile);
+ const std::string &opfPath = reader.rootPath();
+ ZLLogger::Instance().println("epub", "opf path = " + opfPath);
+ if (!opfPath.empty()) {
+ return ZLFile(oebDir->itemPath(opfPath));
+ }
+ }
+ }
+
+ oebFile.forceArchiveType(ZLFile::ZIP);
+ shared_ptr<ZLDir> zipDir = oebFile.directory(false);
+ if (zipDir.isNull()) {
+ ZLLogger::Instance().println("epub", "Couldn't open zip archive");
+ return ZLFile::NO_FILE;
+ }
+ std::vector<std::string> fileNames;
+ zipDir->collectFiles(fileNames, false);
+ for (std::vector<std::string>::const_iterator it = fileNames.begin(); it != fileNames.end(); ++it) {
+ ZLLogger::Instance().println("epub", "Item: " + *it);
+ if (ZLStringUtil::stringEndsWith(*it, ".opf")) {
+ return ZLFile(zipDir->itemPath(*it));
+ }
+ }
+ ZLLogger::Instance().println("epub", "Opf file not found");
+ return ZLFile::NO_FILE;
+}
+
+bool OEBPlugin::readMetaInfo(Book &book) const {
+ const ZLFile &file = book.file();
+ return OEBMetaInfoReader(book).readMetaInfo(opfFile(file));
+}
+
+bool OEBPlugin::readModel(BookModel &model) const {
+ const ZLFile &file = model.book()->file();
+ return OEBBookReader(model).readBook(opfFile(file));
+}
+
+shared_ptr<const ZLImage> OEBPlugin::coverImage(const ZLFile &file) const {
+ return OEBCoverReader().readCover(opfFile(file));
+}
+
+bool OEBPlugin::readLanguageAndEncoding(Book &book) const {
+ if (book.language().empty()) {
+ shared_ptr<ZLInputStream> oebStream = new OEBTextStream(opfFile(book.file()));
+ detectLanguage(book, *oebStream, book.encoding());
+ }
+ return true;
+}
diff --git a/reader/src/formats/oeb/OEBPlugin.h b/reader/src/formats/oeb/OEBPlugin.h
new file mode 100644
index 0000000..a515208
--- /dev/null
+++ b/reader/src/formats/oeb/OEBPlugin.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OEBPLUGIN_H__
+#define __OEBPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class OEBPlugin : public FormatPlugin {
+
+public:
+ static ZLFile opfFile(const ZLFile &oebFile);
+
+public:
+ ~OEBPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+ shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
+};
+
+#endif /* __OEBPLUGIN_H__ */
diff --git a/reader/src/formats/oeb/OEBTextStream.cpp b/reader/src/formats/oeb/OEBTextStream.cpp
new file mode 100644
index 0000000..4dbfa47
--- /dev/null
+++ b/reader/src/formats/oeb/OEBTextStream.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <map>
+
+#include <ZLFile.h>
+#include <ZLXMLReader.h>
+#include <ZLUnicodeUtil.h>
+
+#include "OEBTextStream.h"
+#include "../util/MiscUtil.h"
+#include "../util/XMLTextStream.h"
+
+class XHTMLFilesCollector : public ZLXMLReader {
+
+public:
+ XHTMLFilesCollector(std::vector<std::string> &xhtmlFileNames);
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+
+private:
+ std::vector<std::string> &myXHTMLFileNames;
+ std::map<std::string,std::string> myIdToHref;
+ enum {
+ READ_NONE,
+ READ_MANIFEST,
+ READ_SPINE
+ } myState;
+};
+
+XHTMLFilesCollector::XHTMLFilesCollector(std::vector<std::string> &xhtmlFileNames) : myXHTMLFileNames(xhtmlFileNames), myState(READ_NONE) {
+}
+
+static const std::string MANIFEST = "manifest";
+static const std::string SPINE = "spine";
+static const std::string ITEM = "item";
+static const std::string ITEMREF = "itemref";
+
+void XHTMLFilesCollector::startElementHandler(const char *tag, const char **xmlattributes) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ if (MANIFEST == tagString) {
+ myState = READ_MANIFEST;
+ } else if (SPINE == tagString) {
+ myState = READ_SPINE;
+ } else if ((myState == READ_MANIFEST) && (ITEM == tagString)) {
+ const char *id = attributeValue(xmlattributes, "id");
+ const char *href = attributeValue(xmlattributes, "href");
+ if ((id != 0) && (href != 0)) {
+ myIdToHref[id] = href;
+ }
+ } else if ((myState == READ_SPINE) && (ITEMREF == tagString)) {
+ const char *id = attributeValue(xmlattributes, "idref");
+ if (id != 0) {
+ const std::string &fileName = myIdToHref[id];
+ if (!fileName.empty()) {
+ myXHTMLFileNames.push_back(fileName);
+ }
+ }
+ }
+}
+
+void XHTMLFilesCollector::endElementHandler(const char *tag) {
+ if (SPINE == ZLUnicodeUtil::toLower(tag)) {
+ interrupt();
+ }
+}
+
+OEBTextStream::OEBTextStream(const ZLFile &opfFile) {
+ myFilePrefix = MiscUtil::htmlDirectoryPrefix(opfFile.path());
+ XHTMLFilesCollector(myXHTMLFileNames).readDocument(opfFile);
+}
+
+void OEBTextStream::resetToStart() {
+ myIndex = 0;
+}
+
+shared_ptr<ZLInputStream> OEBTextStream::nextStream() {
+ if (myIndex >= myXHTMLFileNames.size()) {
+ return 0;
+ }
+ ZLFile xhtmlFile(myFilePrefix + myXHTMLFileNames[myIndex++]);
+ return new XMLTextStream(xhtmlFile.inputStream(), "body");
+}
diff --git a/reader/src/formats/oeb/OEBTextStream.h b/reader/src/formats/oeb/OEBTextStream.h
new file mode 100644
index 0000000..6ddd2c9
--- /dev/null
+++ b/reader/src/formats/oeb/OEBTextStream.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OEBTEXTSTREAM_H__
+#define __OEBTEXTSTREAM_H__
+
+#include <vector>
+#include <string>
+
+#include "../util/MergedStream.h"
+
+class OEBTextStream : public MergedStream {
+
+public:
+ OEBTextStream(const ZLFile &opfFile);
+
+private:
+ void resetToStart();
+ shared_ptr<ZLInputStream> nextStream();
+
+private:
+ std::string myFilePrefix;
+ std::vector<std::string> myXHTMLFileNames;
+ std::size_t myIndex;
+};
+
+#endif /* __OEBTEXTSTREAM_H__ */
diff --git a/reader/src/formats/oeb/XHTMLImageFinder.cpp b/reader/src/formats/oeb/XHTMLImageFinder.cpp
new file mode 100644
index 0000000..6a449c9
--- /dev/null
+++ b/reader/src/formats/oeb/XHTMLImageFinder.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLFileImage.h>
+#include <ZLXMLNamespace.h>
+
+#include "XHTMLImageFinder.h"
+#include "../util/MiscUtil.h"
+
+static const std::string TAG_IMG = "img";
+static const std::string TAG_IMAGE = "image";
+
+shared_ptr<const ZLImage> XHTMLImageFinder::readImage(const ZLFile &file) {
+ myImage.reset();
+ myPathPrefix = MiscUtil::htmlDirectoryPrefix(file.path());
+ readDocument(file);
+ return myImage;
+}
+
+bool XHTMLImageFinder::processNamespaces() const {
+ return true;
+}
+
+void XHTMLImageFinder::startElementHandler(const char *tag, const char **attributes) {
+ const char *reference = 0;
+ if (TAG_IMG == tag) {
+ reference = attributeValue(attributes, "src");
+ } else if (TAG_IMAGE == tag) {
+ reference = attributeValue(
+ attributes, NamespaceAttributeNamePredicate(ZLXMLNamespace::XLink, "href")
+ );
+ }
+ if (reference != 0) {
+ myImage = new ZLFileImage(ZLFile(myPathPrefix + reference), 0);
+ interrupt();
+ }
+}
diff --git a/reader/src/formats/oeb/XHTMLImageFinder.h b/reader/src/formats/oeb/XHTMLImageFinder.h
new file mode 100644
index 0000000..28e53f2
--- /dev/null
+++ b/reader/src/formats/oeb/XHTMLImageFinder.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2009-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __XHTMLIMAGEFINDER_H__
+#define __XHTMLIMAGEFINDER_H__
+
+#include <shared_ptr.h>
+#include <ZLXMLReader.h>
+
+class ZLFile;
+class ZLImage;
+
+class XHTMLImageFinder : public ZLXMLReader {
+
+public:
+ shared_ptr<const ZLImage> readImage(const ZLFile &file);
+
+private:
+ bool processNamespaces() const;
+ void startElementHandler(const char *tag, const char **attributes);
+
+private:
+ std::string myPathPrefix;
+ shared_ptr<const ZLImage> myImage;
+};
+
+#endif /* __XHTMLIMAGEFINDER_H__ */
diff --git a/reader/src/formats/openreader/ORBookReader.cpp b/reader/src/formats/openreader/ORBookReader.cpp
new file mode 100644
index 0000000..d494b7f
--- /dev/null
+++ b/reader/src/formats/openreader/ORBookReader.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <cstdlib>
+#include <algorithm>
+
+#include <ZLUnicodeUtil.h>
+#include <ZLFileImage.h>
+
+#include "ORBookReader.h"
+#include "../xhtml/XHTMLReader.h"
+#include "../util/MiscUtil.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+ORBookReader::ORBookReader(BookModel &model) : myModelReader(model) {
+}
+
+void ORBookReader::characterDataHandler(const char *data, std::size_t len) {
+ if (myState == READ_TOCTITLE) {
+ myTOCTitle.append(data, len);
+ }
+}
+
+static const std::string TAG_RESOURCES = "resources";
+static const std::string TAG_USERSET = "userset";
+static const std::string TAG_NAVIGATION = "primarynav";
+
+static const std::string TAG_SPINE = "spine";
+static const std::string TAG_COVER = "cover";
+
+static const std::string TAG_ITEM = "item";
+static const std::string TAG_ITEMREF = "itemref";
+static const std::string TAG_POINTER = "pointer";
+static const std::string TAG_TITLE = "title";
+
+static const std::string xhtmlMediaType = "application/x-orp-bcd1+xml";
+
+void ORBookReader::startElementHandler(const char *tag, const char **xmlattributes) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ if (TAG_RESOURCES == tagString) {
+ myState = READ_RESOURCES;
+ } else if (TAG_USERSET == tagString) {
+ myState = READ_USERSET;
+ } else if ((myState == READ_RESOURCES) && (TAG_ITEM == tagString)) {
+ const char *resid = attributeValue(xmlattributes, "resid");
+ const char *resource = attributeValue(xmlattributes, "resource");
+ shared_ptr<ZLMimeType> mediaType = ZLMimeType::get(attributeValue(xmlattributes, "media-type"));
+ if ((resid != 0) && (resource != 0)) {
+ myResources[resid] = resource;
+ if (!mediaType.isNull() && mediaType != ZLMimeType::EMPTY) {
+ if (ZLMimeType::APPLICATION_OR_XML == mediaType) {
+ myHtmlFileIDs.insert(resid);
+ } else if (ZLMimeType::isImage(mediaType)) {
+ myImageIDs[resid] = mediaType;
+ }
+ }
+ }
+ } else if (myState == READ_USERSET) {
+ if (TAG_NAVIGATION == tagString) {
+ myState = READ_NAVIGATION;
+ } else if (TAG_SPINE == tagString) {
+ const char *residrefs = attributeValue(xmlattributes, "residrefs");
+ if (residrefs != 0) {
+ while (1) {
+ const char *nextSpace = std::strchr(residrefs, ' ');
+ if (nextSpace == 0) {
+ if (*residrefs != '\0') {
+ myHtmlFilesOrder.push_back(residrefs);
+ }
+ break;
+ }
+ if (nextSpace != residrefs) {
+ myHtmlFilesOrder.push_back(std::string(residrefs, nextSpace - residrefs));
+ }
+ residrefs = nextSpace + 1;
+ }
+ }
+ } else if (TAG_COVER == tagString) {
+ const char *residrefs = attributeValue(xmlattributes, "residrefs");
+ if (residrefs != 0) {
+ myCoverReference = residrefs;
+ }
+ }
+ } else if (myState == READ_NAVIGATION && TAG_POINTER == tagString) {
+ const char *ref = attributeValue(xmlattributes, "elemrefs");
+ const char *level = attributeValue(xmlattributes, "level");
+ if (ref != 0 && level != 0) {
+ myTOCReference = ref;
+ myTOCLevel = std::atoi(level);
+ myState = READ_POINTER;
+ }
+ } else if (myState == READ_POINTER && TAG_TITLE == tagString) {
+ myState = READ_TOCTITLE;
+ }
+}
+
+void ORBookReader::endElementHandler(const char *tag) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ if (TAG_RESOURCES == tagString || TAG_USERSET == tagString) {
+ myState = READ_NONE;
+ } else if (myState == READ_NAVIGATION && TAG_NAVIGATION == tagString) {
+ myState = READ_USERSET;
+ } else if (myState == READ_POINTER && TAG_POINTER == tagString) {
+ myState = READ_NAVIGATION;
+ } else if (myState == READ_TOCTITLE && TAG_TITLE == tagString) {
+ myTOC.push_back(TOCItem(myTOCReference, myTOCTitle, myTOCLevel));
+ myTOCTitle.erase();
+ myState = READ_POINTER;
+ }
+}
+
+bool ORBookReader::readBook() {
+ const ZLFile &file = myModelReader.model().book()->file();
+ myFilePrefix = MiscUtil::htmlDirectoryPrefix(file.path());
+
+ myResources.clear();
+ myCoverReference.erase();
+ myHtmlFileIDs.clear();
+ myImageIDs.clear();
+ myHtmlFilesOrder.clear();
+ myTOC.clear();
+ myState = READ_NONE;
+
+ if (!readDocument(file)) {
+ return false;
+ }
+
+ myModelReader.setMainTextModel();
+ myModelReader.pushKind(REGULAR);
+
+ if (!myCoverReference.empty()) {
+ myModelReader.addImageReference(myCoverReference);
+ }
+
+ for (std::vector<std::string>::const_iterator it = myHtmlFilesOrder.begin(); it != myHtmlFilesOrder.end(); ++it) {
+ myHtmlFileIDs.erase(*it);
+ XHTMLReader(myModelReader).readFile(ZLFile(myFilePrefix + myResources[*it]), *it);
+ }
+
+ int level = 1;
+ for (std::vector<TOCItem>::const_iterator it = myTOC.begin(); it != myTOC.end(); ++it) {
+ int index = myModelReader.model().label(it->Reference).ParagraphNumber;
+ if (index != -1) {
+ for (; level > it->Level; --level) {
+ myModelReader.endContentsParagraph();
+ }
+ ++level;
+ myModelReader.beginContentsParagraph(index);
+ myModelReader.addContentsData(it->Text);
+ }
+ }
+ for (; level > 1; --level) {
+ myModelReader.endContentsParagraph();
+ }
+
+ for (std::set<std::string>::const_iterator it = myHtmlFileIDs.begin(); it != myHtmlFileIDs.end(); ++it) {
+ myModelReader.setFootnoteTextModel(*it);
+ myModelReader.pushKind(REGULAR);
+ XHTMLReader(myModelReader).readFile(ZLFile(myFilePrefix + myResources[*it]), *it);
+ }
+
+ for (std::map<std::string,shared_ptr<ZLMimeType> >::const_iterator it = myImageIDs.begin(); it != myImageIDs.end(); ++it) {
+ myModelReader.addImage(it->first, new ZLFileImage(ZLFile(myFilePrefix + myResources[it->first], it->second), 0));
+ }
+
+ return true;
+}
diff --git a/reader/src/formats/openreader/ORBookReader.h b/reader/src/formats/openreader/ORBookReader.h
new file mode 100644
index 0000000..160c9f1
--- /dev/null
+++ b/reader/src/formats/openreader/ORBookReader.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ORBOOKREADER_H__
+#define __ORBOOKREADER_H__
+
+#include <map>
+#include <set>
+#include <vector>
+#include <string>
+
+#include <ZLXMLReader.h>
+
+#include "../../bookmodel/BookReader.h"
+
+class ORBookReader : public ZLXMLReader {
+
+public:
+ ORBookReader(BookModel &model);
+ bool readBook();
+
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+private:
+ enum ReaderState {
+ READ_NONE,
+ READ_RESOURCES,
+ READ_USERSET,
+ READ_NAVIGATION,
+ READ_POINTER,
+ READ_TOCTITLE
+ };
+
+ BookReader myModelReader;
+ ReaderState myState;
+
+ std::string myFilePrefix;
+ std::map<std::string,std::string> myResources;
+ std::string myCoverReference;
+ std::set<std::string> myHtmlFileIDs;
+ std::map<std::string,shared_ptr<ZLMimeType> > myImageIDs;
+ std::vector<std::string> myHtmlFilesOrder;
+
+ struct TOCItem {
+ TOCItem(const std::string &reference, const std::string &text, int level) : Reference(reference), Text(text), Level(level) {
+ }
+
+ std::string Reference;
+ std::string Text;
+ int Level;
+ };
+ std::vector<TOCItem> myTOC;
+
+ std::string myTOCReference;
+ int myTOCLevel;
+ std::string myTOCTitle;
+};
+
+#endif /* __ORBOOKREADER_H__ */
diff --git a/reader/src/formats/openreader/ORDescriptionReader.cpp b/reader/src/formats/openreader/ORDescriptionReader.cpp
new file mode 100644
index 0000000..8c80dfa
--- /dev/null
+++ b/reader/src/formats/openreader/ORDescriptionReader.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLUnicodeUtil.h>
+
+#include "ORDescriptionReader.h"
+
+#include "../util/EntityFilesCollector.h"
+#include "../../library/Book.h"
+
+ORDescriptionReader::ORDescriptionReader(Book &book) : myBook(book) {
+ myBook.removeAllAuthors();
+ myBook.setTitle("");
+}
+
+// TODO: replace "dc" by real DC scheme name
+static const std::string METADATA = "metadata";
+static const std::string TITLE = "dc:title";
+static const std::string AUTHOR_TAG = "dc:creator";
+static const std::string AUTHOR_ROLE = "aut";
+
+void ORDescriptionReader::characterDataHandler(const char *text, std::size_t len) {
+ switch (myReadState) {
+ case READ_NONE:
+ break;
+ case READ_AUTHOR:
+ myCurrentAuthor.append(text, len);
+ break;
+ case READ_TITLE:
+ myBook.setTitle(myBook.title() + std::string(text, len));
+ break;
+ }
+}
+
+void ORDescriptionReader::startElementHandler(const char *tag, const char **attributes) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ if (METADATA == tagString) {
+ myReadMetaData = true;
+ } else if (myReadMetaData) {
+ if (TITLE == tagString) {
+ myReadState = READ_TITLE;
+ } else if (AUTHOR_TAG == tagString) {
+ const char *role = attributeValue(attributes, "role");
+ if ((role != 0) && (AUTHOR_ROLE == role)) {
+ myReadState = READ_AUTHOR;
+ }
+ }
+ }
+}
+
+void ORDescriptionReader::endElementHandler(const char *tag) {
+ const std::string tagString = ZLUnicodeUtil::toLower(tag);
+ if (METADATA == tagString) {
+ interrupt();
+ } else {
+ if (!myCurrentAuthor.empty()) {
+ myBook.addAuthor(myCurrentAuthor);
+ myCurrentAuthor.erase();
+ }
+ myReadState = READ_NONE;
+ }
+}
+
+bool ORDescriptionReader::readMetaInfo() {
+ myReadMetaData = false;
+ myReadState = READ_NONE;
+ return readDocument(myBook.file());
+}
+
+const std::vector<std::string> &ORDescriptionReader::externalDTDs() const {
+ return EntityFilesCollector::Instance().externalDTDs("xhtml");
+}
diff --git a/reader/src/formats/openreader/ORDescriptionReader.h b/reader/src/formats/openreader/ORDescriptionReader.h
new file mode 100644
index 0000000..a4f6b2a
--- /dev/null
+++ b/reader/src/formats/openreader/ORDescriptionReader.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ORDESCRIPTIONREADER_H__
+#define __ORDESCRIPTIONREADER_H__
+
+#include <ZLXMLReader.h>
+
+class Book;
+
+class ORDescriptionReader : public ZLXMLReader {
+
+public:
+ ORDescriptionReader(Book &book);
+ bool readMetaInfo();
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+ const std::vector<std::string> &externalDTDs() const;
+
+private:
+ Book &myBook;
+
+ bool myReadMetaData;
+ enum {
+ READ_NONE,
+ READ_AUTHOR,
+ READ_TITLE
+ } myReadState;
+
+ std::string myCurrentAuthor;
+};
+
+#endif /* __ORDESCRIPTIONREADER_H__ */
diff --git a/reader/src/formats/openreader/OpenReaderPlugin.cpp b/reader/src/formats/openreader/OpenReaderPlugin.cpp
new file mode 100644
index 0000000..545f83b
--- /dev/null
+++ b/reader/src/formats/openreader/OpenReaderPlugin.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLStringUtil.h>
+#include <ZLDir.h>
+
+#include "OpenReaderPlugin.h"
+#include "ORDescriptionReader.h"
+#include "ORBookReader.h"
+
+#include "../../library/Book.h"
+
+OpenReaderPlugin::~OpenReaderPlugin() {
+}
+
+bool OpenReaderPlugin::providesMetaInfo() const {
+ return true;
+}
+
+bool OpenReaderPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "orb";
+}
+
+bool OpenReaderPlugin::readMetaInfo(Book &book) const {
+ return ORDescriptionReader(book).readMetaInfo();
+}
+
+bool OpenReaderPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
+
+bool OpenReaderPlugin::readModel(BookModel &model) const {
+ return ORBookReader(model).readBook();
+}
diff --git a/reader/src/formats/openreader/OpenReaderPlugin.h b/reader/src/formats/openreader/OpenReaderPlugin.h
new file mode 100644
index 0000000..fcfaa11
--- /dev/null
+++ b/reader/src/formats/openreader/OpenReaderPlugin.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OPENREADERPLUGIN_H__
+#define __OPENREADERPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class OpenReaderPlugin : public FormatPlugin {
+
+public:
+ ~OpenReaderPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+#endif /* __OPENREADERPLUGIN_H__ */
diff --git a/reader/src/formats/pdb/BitReader.cpp b/reader/src/formats/pdb/BitReader.cpp
new file mode 100644
index 0000000..551aaf3
--- /dev/null
+++ b/reader/src/formats/pdb/BitReader.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <string>
+
+#include "BitReader.h"
+
+BitReader::BitReader(const unsigned char* data, std::size_t size) : myOffset(0), myLength(size * 8) {
+ myData = new unsigned char[size + 4];
+ std::memcpy(myData, data, size);
+ std::memset(myData + size, 0x00, 4);
+}
+
+BitReader::~BitReader() {
+ delete[] myData;
+}
+
+unsigned long long BitReader::peek(std::size_t n) {
+ if (n > 32) {
+ return 0;
+ }
+ unsigned long long r = 0;
+ std::size_t g = 0;
+ while (g < n) {
+ r = (r << 8) | myData[(myOffset + g) >> 3];
+ g = g + 8 - ((myOffset+g) & 7);
+ }
+ unsigned long long mask = 1;
+ mask = (mask << n) - 1;
+ return (r >> (g - n)) & mask;
+}
+
+bool BitReader::eat(std::size_t n) {
+ myOffset += n;
+ return myOffset <= myLength;
+}
+
+std::size_t BitReader::left() const {
+ return myLength - myOffset;
+}
diff --git a/reader/src/formats/pdb/BitReader.h b/reader/src/formats/pdb/BitReader.h
new file mode 100644
index 0000000..a8a3d2d
--- /dev/null
+++ b/reader/src/formats/pdb/BitReader.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __BITREADER_H__
+#define __BITREADER_H__
+
+class BitReader {
+
+public:
+ BitReader(const unsigned char* data, std::size_t size);
+ ~BitReader();
+
+ unsigned long long peek(std::size_t n);
+ bool eat(std::size_t n);
+ std::size_t left() const;
+
+private:
+ unsigned char* myData;
+ std::size_t myOffset;
+ std::size_t myLength;
+};
+
+#endif //__BITREADER_H__
diff --git a/reader/src/formats/pdb/DocDecompressor.cpp b/reader/src/formats/pdb/DocDecompressor.cpp
new file mode 100644
index 0000000..9175bc9
--- /dev/null
+++ b/reader/src/formats/pdb/DocDecompressor.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLInputStream.h>
+
+#include "DocDecompressor.h"
+
+static unsigned char TOKEN_CODE[256] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+std::size_t DocDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, std::size_t compressedSize, std::size_t maxUncompressedSize) {
+ const unsigned char *sourceBuffer = new unsigned char[compressedSize];
+ const unsigned char *sourceBufferEnd = sourceBuffer + compressedSize;
+ const unsigned char *sourcePtr = sourceBuffer;
+
+ unsigned char *targetBufferEnd = (unsigned char*)targetBuffer + maxUncompressedSize;
+ unsigned char *targetPtr = (unsigned char*)targetBuffer;
+
+ if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
+ unsigned char token;
+ unsigned short copyLength, N, shift;
+ unsigned char *shifted;
+
+ while ((sourcePtr < sourceBufferEnd) && (targetPtr < targetBufferEnd)) {
+ token = *(sourcePtr++);
+ switch (TOKEN_CODE[token]) {
+ case 0:
+ *(targetPtr++) = token;
+ break;
+ case 1:
+ if ((sourcePtr + token > sourceBufferEnd) || (targetPtr + token > targetBufferEnd)) {
+ goto endOfLoop;
+ }
+ std::memcpy(targetPtr, sourcePtr, token);
+ sourcePtr += token;
+ targetPtr += token;
+ break;
+ case 2:
+ if (targetPtr + 2 > targetBufferEnd) {
+ goto endOfLoop;
+ }
+ *(targetPtr++) = ' ';
+ *(targetPtr++) = token ^ 0x80;
+ break;
+ case 3:
+ if (sourcePtr + 1 > sourceBufferEnd) {
+ goto endOfLoop;
+ }
+ N = 256 * token + *(sourcePtr++);
+ copyLength = (N & 7) + 3;
+ if (targetPtr + copyLength > targetBufferEnd) {
+ goto endOfLoop;
+ }
+ shift = (N & 0x3fff) / 8;
+ shifted = targetPtr - shift;
+ if ((char*)shifted >= targetBuffer) {
+ for (short i = 0; i < copyLength; i++) {
+ *(targetPtr++) = *(shifted++);
+ }
+ }
+ break;
+ }
+ }
+ }
+endOfLoop:
+
+ delete[] sourceBuffer;
+ return targetPtr - (unsigned char*)targetBuffer;
+}
diff --git a/reader/src/formats/pdb/DocDecompressor.h b/reader/src/formats/pdb/DocDecompressor.h
new file mode 100644
index 0000000..820bb0a
--- /dev/null
+++ b/reader/src/formats/pdb/DocDecompressor.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCDECOMPRESSOR_H__
+#define __DOCDECOMPRESSOR_H__
+
+#include <string>
+
+class ZLInputStream;
+
+class DocDecompressor {
+
+public:
+ DocDecompressor() {}
+ ~DocDecompressor() {}
+
+ std::size_t decompress(ZLInputStream &stream, char *buffer, std::size_t compressedSize, std::size_t maxUncompressedSize);
+};
+
+#endif /* __DOCDECOMPRESSOR_H__ */
diff --git a/reader/src/formats/pdb/EReaderPlugin.cpp b/reader/src/formats/pdb/EReaderPlugin.cpp
new file mode 100644
index 0000000..8420c7f
--- /dev/null
+++ b/reader/src/formats/pdb/EReaderPlugin.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+#include <ZLEncodingConverter.h>
+#include <ZLStringUtil.h>
+#include <ZLLanguageUtil.h>
+#include <ZLFileImage.h>
+
+#include "PdbPlugin.h"
+#include "EReaderStream.h"
+#include "PmlBookReader.h"
+
+#include "../../library/Book.h"
+
+bool EReaderPlugin::providesMetaInfo() const {
+ return true;
+}
+
+bool EReaderPlugin::acceptsFile(const ZLFile &file) const {
+ return PdbPlugin::fileType(file) == "PNRdPPrs";
+}
+
+void EReaderPlugin::readDocumentInternal(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const {
+ if (!stream.open()) {
+ //TODO maybe anything else opens stream
+ return;
+ }
+ BookReader bookReader(model);
+ PmlBookReader pmlBookReader(bookReader, format, encoding);
+ bookReader.setMainTextModel();
+ pmlBookReader.readDocument(stream);
+ EReaderStream &estream = (EReaderStream&)stream;
+ const std::map<std::string, EReaderStream::ImageInfo>& imageIds = estream.images();
+ for(std::map<std::string, EReaderStream::ImageInfo>::const_iterator it = imageIds.begin(); it != imageIds.end(); ++it) {
+ const std::string id = it->first;
+ bookReader.addImage(id, new ZLFileImage(ZLFile(file.path(), it->second.Type), it->second.Offset, it->second.Size));
+ }
+ const std::map<std::string, unsigned short>& footnoteIds = estream.footnotes();
+ for(std::map<std::string, unsigned short>::const_iterator it = footnoteIds.begin(); it != footnoteIds.end(); ++it) {
+ const std::string id = it->first;
+ if (estream.switchStreamDestination(EReaderStream::FOOTNOTE, id)) {
+ bookReader.setFootnoteTextModel(id);
+ bookReader.addHyperlinkLabel(id);
+ pmlBookReader.readDocument(estream);
+ }
+ }
+ stream.close();
+}
+
+shared_ptr<ZLInputStream> EReaderPlugin::createStream(const ZLFile &file) const {
+ return new EReaderStream(file);
+}
+
+const std::string &EReaderPlugin::tryOpen(const ZLFile &file) const {
+ EReaderStream stream(file);
+ stream.open();
+ return stream.error();
+}
+
+bool EReaderPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = book.file().inputStream();
+ if (stream.isNull() || ! stream->open()) {
+ return false;
+ }
+ PdbHeader header;
+ if (!header.read(stream)) {
+ return false;
+ }
+ stream->seek(header.Offsets[0] + 46, true);
+ unsigned short metaInfoOffset;
+ PdbUtil::readUnsignedShort(*stream, metaInfoOffset);
+ if (metaInfoOffset == 0 || metaInfoOffset >= header.Offsets.size()) {
+ return false;
+ }
+ std::size_t currentOffset = header.Offsets[metaInfoOffset];
+ std::size_t nextOffset =
+ (metaInfoOffset + 1 < (unsigned short)header.Offsets.size()) ?
+ header.Offsets[metaInfoOffset + 1] : stream->sizeOfOpened();
+ if (nextOffset <= currentOffset) {
+ return false;
+ }
+ std::size_t length = nextOffset - currentOffset;
+
+ char* metaInfoBuffer = new char[length];
+ stream->seek(currentOffset, true);
+ stream->read(metaInfoBuffer, length);
+ std::string metaInfoStr(metaInfoBuffer, length);
+ delete[] metaInfoBuffer;
+
+ std::string metaInfoData[5]; // Title; Author; Rights; Publisher; isbn;
+ for (std::size_t i = 0; i < 5; ++i) {
+ const std::size_t index = metaInfoStr.find('\0');
+ metaInfoData[i] = metaInfoStr.substr(0,index);
+ metaInfoStr = metaInfoStr.substr(index + 1);
+ }
+
+ if (!metaInfoData[0].empty()) {
+ book.setTitle(metaInfoData[0]);
+ }
+
+ if (!metaInfoData[1].empty()) {
+ book.addAuthor(metaInfoData[1]);
+ }
+
+ stream->close();
+ return SimplePdbPlugin::readMetaInfo(book);
+}
diff --git a/reader/src/formats/pdb/EReaderStream.cpp b/reader/src/formats/pdb/EReaderStream.cpp
new file mode 100644
index 0000000..9775773
--- /dev/null
+++ b/reader/src/formats/pdb/EReaderStream.cpp
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <algorithm>
+#include <cctype>
+
+#include <ZLFile.h>
+#include <ZLResource.h>
+#include <ZLZDecompressor.h>
+
+#include "EReaderStream.h"
+#include "DocDecompressor.h"
+
+
+EReaderStream::EReaderStream(const ZLFile &file) : PalmDocLikeStream(file) {
+ myDestination = TEXT;
+}
+
+EReaderStream::~EReaderStream() {
+ close();
+}
+
+bool EReaderStream::switchStreamDestination(StreamDestination destination, const std::string& id) {
+ bool result = true;
+ switch(destination) {
+ case TEXT:
+ myDestination = TEXT;
+ myRecordIndex = 1;
+ break;
+ case FOOTNOTE:
+ std::map<std::string, unsigned short>::const_iterator footnoteIt = myFootnotes.find(id);
+ if (footnoteIt != myFootnotes.end()) {
+ myDestination = FOOTNOTE;
+ myRecordIndex = footnoteIt->second;
+ } else {
+ result = false;
+ }
+ break;
+ }
+ return result;
+}
+
+bool EReaderStream::fillBuffer() {
+ if (myDestination == TEXT) {
+ return PalmDocLikeStream::fillBuffer();
+ } else {
+ while (myBufferOffset == myBufferLength) {
+ if (!processRecord()) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
+
+bool EReaderStream::processRecord() {
+ const std::size_t currentOffset = recordOffset(myRecordIndex);
+ if (currentOffset < myBase->offset()) {
+ return false;
+ }
+ myBase->seek(currentOffset, true);
+ const std::size_t nextOffset = recordOffset(myRecordIndex + 1);
+ if (nextOffset < currentOffset) {
+ return false;
+ }
+
+ unsigned short myCompressedSize = nextOffset - currentOffset;
+
+ switch (myCompressionVersion) {
+ case 10: // Inflate compression
+ myBase->seek(2, false);
+ myBufferLength = ZLZDecompressor(myCompressedSize - 2).decompress(*myBase, myBuffer, myMaxRecordSize);
+ break;
+ case 2: // PalmDoc compression
+ myBufferLength = DocDecompressor().decompress(*myBase, myBuffer, myCompressedSize, myMaxRecordSize);
+ break;
+ }
+ clearBuffer('\0');
+ myBufferOffset = 0;
+ return true;
+}
+
+bool EReaderStream::processZeroRecord() {
+ // Use it with offset presetting to zero record offset value
+ PdbUtil::readUnsignedShort(*myBase, myCompressionVersion); // myBase offset: ^ + 2
+ if (myCompressionVersion > 255) {
+ myErrorCode = ERROR_ENCRYPTION;
+ return false;
+ } else {
+ switch (myCompressionVersion) {
+ case 2:
+ case 10:
+ break;
+ default:
+ myErrorCode = ERROR_COMPRESSION;
+ return false;
+ }
+ }
+ myBase->seek(10, false); // myBase offset: ^ + 12
+ PdbUtil::readUnsignedShort(*myBase, myNonTextOffset); // myBase offset: ^ + 14
+ PdbUtil::readUnsignedShort(*myBase, myNonTextOffsetReserved); // myBase offset: ^ + 16
+ myBase->seek(12, false); // myBase offset: ^ + 28
+ PdbUtil::readUnsignedShort(*myBase, myFootnoteRecords); // myBase offset: ^ + 30
+ PdbUtil::readUnsignedShort(*myBase, mySidebarRecords); // myBase offset: ^ + 32
+ PdbUtil::readUnsignedShort(*myBase, myBookmarksOffset); // myBase offset: ^ + 34
+ myBase->seek(2, false); // myBase offset: ^ + 36
+ PdbUtil::readUnsignedShort(*myBase, myNonTextOffsetExtraReserved); // myBase offset: ^ + 38
+ myBase->seek(2, false); // myBase offset: ^ + 40
+ PdbUtil::readUnsignedShort(*myBase, myImagedataOffset); // myBase offset: ^ + 42
+ PdbUtil::readUnsignedShort(*myBase, myImagedataOffsetReserved); // myBase offset: ^ + 44
+ PdbUtil::readUnsignedShort(*myBase, myMetadataOffset); // myBase offset: ^ + 46
+ PdbUtil::readUnsignedShort(*myBase, myMetadataOffsetReserved); // myBase offset: ^ + 48
+ PdbUtil::readUnsignedShort(*myBase, myFootnoteOffset); // myBase offset: ^ + 50
+ PdbUtil::readUnsignedShort(*myBase, mySidebarOffset); // myBase offset: ^ + 52
+ PdbUtil::readUnsignedShort(*myBase, myLastdataOffset); // myBase offset: ^ + 54
+
+ unsigned short endSectionIndex = header().Offsets.size();
+ myMaxRecordIndex = std::min((unsigned short) (myNonTextOffset - 1), (unsigned short) (endSectionIndex - 1));
+
+ myMaxRecordSize = 65535; // Maximum size of addressable space in PalmOS
+ // not more than 8192 bytes happens in the tested examples
+
+ if (myFootnoteRecords) {
+ bool isSuccess = processFootnoteIdsRecord();
+ if (!isSuccess) {
+ //TODO take in account returned bool value
+ //false if wrong footnotes amount anounced in zero record
+ //or corrupted or wrong footnote ids record
+ }
+ }
+
+ if (myImagedataOffset != myMetadataOffset) {
+ bool isSuccess = processImageHeaders();
+ if (!isSuccess) {
+ //TODO take in account returned bool value
+ //false if one of image record is corrupted
+ }
+ }
+
+ myBase->seek(header().Offsets[1], true);
+
+ /*
+ std::cerr << "EReaderStream::processZeroRecord():\n";
+ std::cerr << "PDB header indentificator : " << header().Id << "\n";
+ std::cerr << "PDB file system: sizeof opened : " << myBaseSize << "\n";
+ std::cerr << "PDB header/record[0] max index : " << myMaxRecordIndex << "\n";
+ std::cerr << "PDB record[0][0..2] compression : " << myCompressionVersion << "\n";
+ std::cerr << "EReader record[0] myNonTextOffset : " << myNonTextOffset << std::endl;
+ std::cerr << "EReader record[0] myNonTextOffset2 : " << myNonTextOffsetReserved << std::endl;
+ std::cerr << "EReader record[0] myFootnoteRecords : " << myFootnoteRecords << std::endl;
+ std::cerr << "EReader record[0] mySidebarRecords : " << mySidebarRecords << std::endl;
+ std::cerr << "EReader record[0] myBookmarksOffset : " << myBookmarksOffset << std::endl;
+ std::cerr << "EReader record[0] myNonTextOffset3 : " << myNonTextOffsetExtraReserved << std::endl;
+ std::cerr << "EReader record[0] myImagedataOffset : " << myImagedataOffset << std::endl;
+ std::cerr << "EReader record[0] myImagedataOffset2 : " << myImagedataOffsetReserved << std::endl;
+ std::cerr << "EReader record[0] myMetadataOffset : " << myMetadataOffset << std::endl;
+ std::cerr << "EReader record[0] myMetadataOffset2 : " << myMetadataOffsetReserved << std::endl;
+ std::cerr << "EReader record[0] myFootnoteOffset : " << myFootnoteOffset << std::endl;
+ std::cerr << "EReader record[0] mySidebarOffset : " << mySidebarOffset << std::endl;
+ std::cerr << "EReader record[0] myLastdataOffset : " << myLastdataOffset << std::endl;
+ std::cerr << "PDB header lastSectionIndex : " << endSectionIndex - 1 << "\n";
+ */
+ return true;
+}
+
+void EReaderStream::clearBuffer(unsigned char symbol) {
+ myBufferLength = std::remove(myBuffer, myBuffer + myBufferLength, symbol) - myBuffer;
+}
+
+bool EReaderStream::processFootnoteIdsRecord() {
+ char* footnoteIdBuffer = new char[myMaxRecordSize];
+ myBase->seek(header().Offsets[myFootnoteOffset], true);
+ const std::size_t currentOffset = recordOffset(myFootnoteOffset);
+ const std::size_t nextOffset = recordOffset(myFootnoteOffset + 1);
+ const std::size_t length = nextOffset - currentOffset;
+ myBase->read(footnoteIdBuffer, length);
+ std::string footnoteIdStr(footnoteIdBuffer, length);
+ unsigned short footnoteIndex = myFootnoteOffset + 1;
+ while (!footnoteIdStr.empty() && (footnoteIndex < myLastdataOffset)) {
+ std::string id = findFootnoteId(footnoteIdStr);
+ if (!id.empty()) {
+ myFootnotes[id] = footnoteIndex;
+ ++footnoteIndex;
+ }
+ }
+ delete[] footnoteIdBuffer;
+ return (myFootnoteRecords - 1 == (unsigned short)myFootnotes.size());
+}
+
+std::string EReaderStream::findFootnoteId(std::string &footnoteIdStr) const {
+ std::string resultStr;
+ if (!footnoteIdStr.empty()) {
+ std::size_t counter = 0;
+ for (; counter < footnoteIdStr.length(); ++counter) {
+ if (std::isalnum(footnoteIdStr[counter])) {
+ break;
+ }
+ }
+ const std::size_t startIdIndex = counter;
+ for (; counter < footnoteIdStr.length(); ++counter) {
+ if (footnoteIdStr[counter] == '\0') {
+ break;
+ }
+ }
+ const std::size_t endIdIndex = counter;
+ resultStr = footnoteIdStr.substr(startIdIndex, endIdIndex - startIdIndex);
+ footnoteIdStr = footnoteIdStr.substr(endIdIndex);
+ }
+ return resultStr;
+}
+
+const std::map<std::string, unsigned short>& EReaderStream::footnotes() const {
+ return myFootnotes;
+}
+
+bool EReaderStream::processImageHeaders() {
+ unsigned short recordIndex = myImagedataOffset;
+ bool result = true;
+ myBase->seek(header().Offsets[recordIndex], true);
+ while (recordIndex < myMetadataOffset && recordIndex < myLastdataOffset) {
+ result = result && addImageInfo(recordIndex);
+ ++recordIndex;
+ }
+ return result;
+}
+
+bool EReaderStream::addImageInfo(const unsigned short recordIndex) {
+ const std::size_t bufferLength = 128;
+ char *buffer = new char[bufferLength]; //TODO may be it's needed here more bytes
+ ImageInfo image;
+ const std::size_t currentOffset = recordOffset(recordIndex);
+ const std::size_t nextOffset = recordOffset(recordIndex + 1);
+
+ myBase->read(buffer, bufferLength);
+ std::string header(buffer, bufferLength);
+ delete[] buffer;
+
+ image.Offset = currentOffset + header.find("\x89PNG"); //TODO treat situation when there isn't PNG in first 128 bytes
+ image.Size = nextOffset - image.Offset;
+ const int endType = header.find(' ');
+ image.Type = ZLMimeType::get(header.substr(0, endType));
+ header = header.substr(endType + 1);
+ const int endId = header.find('\0');
+ const std::string id = header.substr(0, endId);
+ myBase->seek(nextOffset - currentOffset - bufferLength, false);
+ if (id.empty()) {
+ return false;
+ }
+ myImages[id] = image;
+ return true;
+}
+
+
+/*bool EReaderStream::hasExtraSections() const {
+ return false;
+ //return myMaxRecordIndex < header().Offsets.size() - 1;
+}*/
+
+EReaderStream::ImageInfo EReaderStream::imageLocation(const std::string& id) {
+ if (myImagedataOffset != myMetadataOffset && myImages.empty()) {
+ processImageHeaders();
+ }
+ const std::map<std::string, ImageInfo>::const_iterator it = myImages.find(id);
+ if (it != myImages.end()) {
+ return it->second;
+ } else {
+ return ImageInfo();
+ }
+}
+
+const std::map<std::string, EReaderStream::ImageInfo>& EReaderStream::images() const {
+ return myImages;
+}
diff --git a/reader/src/formats/pdb/EReaderStream.h b/reader/src/formats/pdb/EReaderStream.h
new file mode 100644
index 0000000..990c6ba
--- /dev/null
+++ b/reader/src/formats/pdb/EReaderStream.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __EREADERSTREAM_H__
+#define __EREADERSTREAM_H__
+
+#include <map>
+
+#include "PalmDocLikeStream.h"
+#include <ZLMimeType.h>
+
+class ZLFile;
+
+class EReaderStream : public PalmDocLikeStream {
+
+public:
+ EReaderStream(const ZLFile &file);
+ ~EReaderStream();
+
+ enum StreamDestination {
+ TEXT,
+ FOOTNOTE,
+ };
+
+ struct ImageInfo {
+ unsigned long Offset;
+ unsigned short Size;
+ shared_ptr<ZLMimeType> Type;
+ };
+
+ ImageInfo imageLocation(const std::string& id);
+ //bool hasExtraSections() const;
+ bool switchStreamDestination(StreamDestination destination, const std::string &footnoteId);
+ const std::map<std::string, unsigned short>& footnotes() const;
+ const std::map<std::string, ImageInfo>& images() const;
+
+private:
+ bool processRecord();
+ bool processZeroRecord();
+ bool processFootnoteIdsRecord();
+ bool processImageHeaders();
+
+ void clearBuffer(unsigned char symbol);
+ std::string findFootnoteId(std::string &footnoteIdStr) const;
+ bool addImageInfo(const unsigned short recordIndex);
+
+ bool fillBuffer();
+
+private:
+ unsigned short myCompressionVersion;
+ unsigned short myNonTextOffset;
+ unsigned short myNonTextOffsetReserved; //TODO: Warning: isn't used
+ unsigned short myFootnoteRecords;
+ unsigned short mySidebarRecords;
+ unsigned short myBookmarksOffset;
+ unsigned short myNonTextOffsetExtraReserved; //TODO: Warning: isn't used
+ unsigned short myImagedataOffset;
+ unsigned short myImagedataOffsetReserved; //TODO: Warning: isn't used
+ unsigned short myMetadataOffset;
+ unsigned short myMetadataOffsetReserved; //TODO: Warning: isn't used
+ unsigned short myFootnoteOffset;
+ unsigned short mySidebarOffset;
+ unsigned short myLastdataOffset;
+
+
+ StreamDestination myDestination;
+ std::map<std::string, unsigned short> myFootnotes;
+ std::map<std::string, ImageInfo> myImages;
+
+};
+
+#endif /* __EREADERSTREAM_H__ */
diff --git a/reader/src/formats/pdb/HtmlMetainfoReader.cpp b/reader/src/formats/pdb/HtmlMetainfoReader.cpp
new file mode 100644
index 0000000..8829591
--- /dev/null
+++ b/reader/src/formats/pdb/HtmlMetainfoReader.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLUnicodeUtil.h>
+
+#include "HtmlMetainfoReader.h"
+
+#include "../../library/Book.h"
+
+HtmlMetainfoReader::HtmlMetainfoReader(Book &book, ReadType readType) :
+ HtmlReader(book.encoding()), myBook(book), myReadType(readType) {
+}
+
+bool HtmlMetainfoReader::tagHandler(const HtmlReader::HtmlTag &tag) {
+ if (tag.Name == "BODY") {
+ return false;
+ } else if (((myReadType & TAGS) == TAGS) && (tag.Name == "DC:SUBJECT")) {
+ myReadTags = tag.Start;
+ if (!tag.Start && !myBuffer.empty()) {
+ myBook.addTag(myBuffer);
+ myBuffer.erase();
+ }
+ } else if (((myReadType & TITLE) == TITLE) && (tag.Name == "DC:TITLE")) {
+ myReadTitle = tag.Start;
+ if (!tag.Start && !myBuffer.empty()) {
+ myBook.setTitle(myBuffer);
+ myBuffer.erase();
+ }
+ } else if (((myReadType & AUTHOR) == AUTHOR) && (tag.Name == "DC:CREATOR")) {
+ if (tag.Start) {
+ bool flag = false;
+ for (std::size_t i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "ROLE") {
+ flag = ZLUnicodeUtil::toUpper(tag.Attributes[i].Value) == "AUT";
+ break;
+ }
+ }
+ if (flag) {
+ if (!myBuffer.empty()) {
+ myBuffer += ", ";
+ }
+ myReadAuthor = true;
+ }
+ } else {
+ myReadAuthor = false;
+ if (!myBuffer.empty()) {
+ myBook.addAuthor(myBuffer);
+ }
+ myBuffer.erase();
+ }
+ }
+ return true;
+}
+
+void HtmlMetainfoReader::startDocumentHandler() {
+ myReadAuthor = false;
+ myReadTitle = false;
+ myReadTags = false;
+}
+
+void HtmlMetainfoReader::endDocumentHandler() {
+}
+
+bool HtmlMetainfoReader::characterDataHandler(const char *text, std::size_t len, bool convert) {
+ if (myReadTitle || myReadAuthor || myReadTags) {
+ if (convert) {
+ myConverter->convert(myBuffer, text, text + len);
+ } else {
+ myBuffer.append(text, len);
+ }
+ }
+ return true;
+}
diff --git a/reader/src/formats/pdb/HtmlMetainfoReader.h b/reader/src/formats/pdb/HtmlMetainfoReader.h
new file mode 100644
index 0000000..119c72e
--- /dev/null
+++ b/reader/src/formats/pdb/HtmlMetainfoReader.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HTMLMETAINFOREADER_H__
+#define __HTMLMETAINFOREADER_H__
+
+#include "../html/HtmlReader.h"
+
+class Book;
+
+class HtmlMetainfoReader : public HtmlReader {
+
+public:
+ enum ReadType {
+ NONE = 0,
+ TITLE = 1,
+ AUTHOR = 2,
+ TITLE_AND_AUTHOR = TITLE | AUTHOR,
+ TAGS = 4,
+ ALL = TITLE | AUTHOR | TAGS
+ };
+
+public:
+ HtmlMetainfoReader(Book &book, ReadType readType);
+
+private:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char *text, std::size_t len, bool convert);
+
+private:
+ Book &myBook;
+ const ReadType myReadType;
+
+ bool myReadTitle;
+ bool myReadAuthor;
+ bool myReadTags;
+
+ std::string myBuffer;
+};
+
+#endif /* __HTMLMETAINFOREADER_H__ */
diff --git a/reader/src/formats/pdb/HuffDecompressor.cpp b/reader/src/formats/pdb/HuffDecompressor.cpp
new file mode 100644
index 0000000..9b6f285
--- /dev/null
+++ b/reader/src/formats/pdb/HuffDecompressor.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLInputStream.h>
+
+#include "PdbReader.h"
+#include "BitReader.h"
+#include "HuffDecompressor.h"
+
+HuffDecompressor::HuffDecompressor(ZLInputStream& stream,
+ const std::vector<unsigned long>::const_iterator beginIt,
+ const std::vector<unsigned long>::const_iterator endIt,
+ const unsigned long endHuffDataOffset, const unsigned long extraFlags) : myExtraFlags(extraFlags), myErrorCode(ERROR_NONE) {
+
+
+ const unsigned long huffHeaderOffset = *beginIt;
+ const unsigned long huffRecordsNumber = endIt - beginIt;
+ const unsigned long huffDataOffset = *(beginIt + 1);
+
+ stream.seek(huffHeaderOffset, true);
+ stream.seek(16, false);
+ unsigned long cacheTableOffset, baseTableOffset;
+ PdbUtil::readUnsignedLongBE(stream, cacheTableOffset);
+ PdbUtil::readUnsignedLongBE(stream, baseTableOffset);
+
+
+ myCacheTable = new unsigned long[256];
+ stream.seek(huffHeaderOffset + cacheTableOffset, true);
+ for (std::size_t i = 0; i < 256; ++i) {
+ PdbUtil::readUnsignedLongLE(stream, myCacheTable[i]); //LE
+ }
+
+ myBaseTable = new unsigned long[64];
+ stream.seek(huffHeaderOffset + baseTableOffset, true);
+ for (std::size_t i = 0; i < 64; ++i) {
+ PdbUtil::readUnsignedLongLE(stream, myBaseTable[i]); //LE
+ }
+
+ stream.seek(huffDataOffset + 12, true);
+ PdbUtil::readUnsignedLongBE(stream, myEntryBits);
+
+ std::size_t huffDataSize = endHuffDataOffset - huffDataOffset;
+ myData = new unsigned char[huffDataSize];
+ stream.seek(huffDataOffset, true);
+ if (huffDataSize == stream.read((char*)myData, huffDataSize)) {
+ myDicts = new unsigned char* [huffRecordsNumber - 1];
+ for(std::size_t i = 0; i < huffRecordsNumber - 1; ++i) {
+ std::size_t shift = *(beginIt + i + 1) - huffDataOffset;
+ myDicts[i] = myData + shift;
+ }
+ } else {
+ myErrorCode = ERROR_CORRUPTED_FILE;
+ }
+
+ myTargetBuffer = 0;
+ myTargetBufferEnd = 0;
+ myTargetBufferPtr = 0;
+}
+
+HuffDecompressor::~HuffDecompressor() {
+ delete[] myCacheTable;
+ delete[] myBaseTable;
+ delete[] myData;
+ delete[] myDicts;
+}
+
+bool HuffDecompressor::error() const {
+ return myErrorCode == ERROR_CORRUPTED_FILE;
+}
+
+std::size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, std::size_t compressedSize, std::size_t maxUncompressedSize) {
+ if ((compressedSize == 0) || (myErrorCode == ERROR_CORRUPTED_FILE)) {
+ return 0;
+ }
+ if (targetBuffer != 0) {
+ unsigned char *sourceBuffer = new unsigned char[compressedSize];
+ myTargetBuffer = targetBuffer;
+ myTargetBufferEnd = targetBuffer + maxUncompressedSize;
+ myTargetBufferPtr = targetBuffer;
+ if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
+ std::size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize);
+ if (trailSize < compressedSize) {
+ bitsDecompress(BitReader(sourceBuffer, compressedSize - trailSize));
+ } else {
+ myErrorCode = ERROR_CORRUPTED_FILE;
+ }
+ }
+ delete[] sourceBuffer;
+ } else {
+ myTargetBuffer = 0;
+ myTargetBufferEnd = 0;
+ myTargetBufferPtr = 0;
+ }
+
+ return myTargetBufferPtr - myTargetBuffer;
+}
+
+void HuffDecompressor::bitsDecompress(BitReader bits, std::size_t depth) {
+ if (depth > 32) {
+ myErrorCode = ERROR_CORRUPTED_FILE;
+ return;
+ }
+
+ while (bits.left()) {
+ const unsigned long dw = (unsigned long)bits.peek(32);
+ const unsigned long v = myCacheTable[dw >> 24];
+ unsigned long codelen = v & 0x1F;
+ //if ((codelen == 0) || (codelen > 32)) {
+ // return false;
+ //}
+ unsigned long code = dw >> (32 - codelen);
+ unsigned long r = (v >> 8);
+ if (!(v & 0x80)) {
+ while (code < myBaseTable[(codelen - 1) * 2]) {
+ codelen += 1;
+ code = dw >> (32 - codelen);
+ }
+ r = myBaseTable[(codelen - 1) * 2 + 1];
+ }
+ r -= code;
+ //if (codelen == 0) {
+ // return false;
+ //}
+ if (!bits.eat(codelen)) {
+ return;
+ }
+ const unsigned long dicno = r >> myEntryBits;
+ const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2;
+ const unsigned char* dict = myDicts[dicno]; //TODO need index check
+ const unsigned long off2 = 16 + dict[off1] * 256 + dict[off1 + 1]; //TODO need index check
+ const unsigned long blen = dict[off2] * 256 + dict[off2 + 1]; //TODO need index check
+ const unsigned char* slice = dict + off2 + 2;
+ const unsigned long sliceSize = blen & 0x7fff;
+ if (blen & 0x8000) {
+ if (myTargetBufferPtr + sliceSize < myTargetBufferEnd) {
+ std::memcpy(myTargetBufferPtr, slice, sliceSize);
+ myTargetBufferPtr += sliceSize;
+ } else {
+ return;
+ }
+ } else {
+ bitsDecompress(BitReader(slice, sliceSize), depth + 1);
+ }
+ }
+}
+
+std::size_t HuffDecompressor::sizeOfTrailingEntries(unsigned char* data, std::size_t size) const {
+ std::size_t num = 0;
+ std::size_t flags = myExtraFlags >> 1;
+ while (flags) {
+ if (flags & 1) {
+ if (num < size) {
+ num += readVariableWidthIntegerBE(data, size - num);
+ }
+ }
+ flags >>= 1;
+ }
+ return num;
+}
+
+
+std::size_t HuffDecompressor::readVariableWidthIntegerBE(unsigned char* ptr, std::size_t psize) const {
+ unsigned char bitsSaved = 0;
+ std::size_t result = 0;
+ while (true) {
+ const unsigned char oneByte = ptr[psize - 1];
+ result |= (oneByte & 0x7F) << bitsSaved;
+ bitsSaved += 7;
+ psize -= 1;
+ if (((oneByte & 0x80) != 0) || (bitsSaved >= 28) || (psize == 0)) {
+ return result;
+ }
+ }
+}
diff --git a/reader/src/formats/pdb/HuffDecompressor.h b/reader/src/formats/pdb/HuffDecompressor.h
new file mode 100644
index 0000000..76539e9
--- /dev/null
+++ b/reader/src/formats/pdb/HuffDecompressor.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HUFFDECOMPRESSOR_H__
+#define __HUFFDECOMPRESSOR_H__
+
+#include <string>
+
+class ZLInputStream;
+class BitReader;
+
+class HuffDecompressor {
+
+public:
+ HuffDecompressor(ZLInputStream& stream,
+ const std::vector<unsigned long>::const_iterator beginHuffRecordOffsetIt,
+ const std::vector<unsigned long>::const_iterator endHuffRecordOffsetIt,
+ const unsigned long endHuffDataOffset, const unsigned long extraFlags);
+ ~HuffDecompressor();
+
+ std::size_t decompress(ZLInputStream &stream, char *buffer, std::size_t compressedSize, std::size_t maxUncompressedSize);
+ bool error() const;
+private:
+ std::size_t sizeOfTrailingEntries(unsigned char* data, std::size_t size) const;
+ std::size_t readVariableWidthIntegerBE(unsigned char* ptr, std::size_t psize) const;
+ void bitsDecompress(BitReader bits, std::size_t depth = 0);
+
+private:
+ unsigned long myEntryBits;
+ unsigned long myExtraFlags;
+
+ unsigned long* myCacheTable;
+ unsigned long* myBaseTable;
+ unsigned char* myData;
+ unsigned char** myDicts;
+
+ char* myTargetBuffer;
+ char* myTargetBufferEnd;
+ char* myTargetBufferPtr;
+
+ enum {
+ ERROR_NONE,
+ ERROR_CORRUPTED_FILE
+ } myErrorCode;
+};
+
+#endif /* __HUFFDECOMPRESSOR_H__ */
diff --git a/reader/src/formats/pdb/MobipocketHtmlBookReader.cpp b/reader/src/formats/pdb/MobipocketHtmlBookReader.cpp
new file mode 100644
index 0000000..cecbfbc
--- /dev/null
+++ b/reader/src/formats/pdb/MobipocketHtmlBookReader.cpp
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <algorithm>
+
+#include <ZLFile.h>
+#include <ZLFileImage.h>
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+
+#include "MobipocketHtmlBookReader.h"
+#include "PalmDocStream.h"
+#include "../html/HtmlTagActions.h"
+#include "../../bookmodel/BookModel.h"
+
+class MobipocketHtmlImageTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlImageTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlHrTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlHrTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlHrefTagAction : public HtmlHrefTagAction {
+
+public:
+ MobipocketHtmlHrefTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlGuideTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlGuideTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlReferenceTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlReferenceTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+class MobipocketHtmlPagebreakTagAction : public HtmlTagAction {
+
+public:
+ MobipocketHtmlPagebreakTagAction(HtmlBookReader &reader);
+ void run(const HtmlReader::HtmlTag &tag);
+};
+
+MobipocketHtmlImageTagAction::MobipocketHtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "RECINDEX") {
+ int index = std::atoi(tag.Attributes[i].Value.c_str());
+ if (index > 0) {
+ int &imageCounter = ((MobipocketHtmlBookReader&)myReader).myImageCounter;
+ imageCounter = std::max(imageCounter, index);
+ bool stopParagraph = bookReader().paragraphIsOpen();
+ if (stopParagraph) {
+ bookReader().endParagraph();
+ }
+ std::string id;
+ ZLStringUtil::appendNumber(id, index);
+ bookReader().addImageReference(id);
+ if (stopParagraph) {
+ bookReader().beginParagraph();
+ }
+ }
+ break;
+ }
+ }
+ }
+}
+
+MobipocketHtmlHrTagAction::MobipocketHtmlHrTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlHrTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ if (bookReader().contentsParagraphIsOpen()) {
+ bookReader().endContentsParagraph();
+ bookReader().exitTitle();
+ }
+ bookReader().insertEndOfSectionParagraph();
+ }
+}
+
+MobipocketHtmlHrefTagAction::MobipocketHtmlHrefTagAction(HtmlBookReader &reader) : HtmlHrefTagAction(reader) {
+}
+
+MobipocketHtmlPagebreakTagAction::MobipocketHtmlPagebreakTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlPagebreakTagAction::run(const HtmlReader::HtmlTag &tag) {
+ if (tag.Start) {
+ if (bookReader().contentsParagraphIsOpen()) {
+ bookReader().endContentsParagraph();
+ bookReader().exitTitle();
+ }
+ bookReader().insertEndOfSectionParagraph();
+ }
+}
+
+MobipocketHtmlBookReader::TOCReader::TOCReader(MobipocketHtmlBookReader &reader) : myReader(reader) {
+ reset();
+}
+
+void MobipocketHtmlBookReader::TOCReader::reset() {
+ myEntries.clear();
+
+ myIsActive = false;
+ myStartOffset = (std::size_t)-1;
+ myEndOffset = (std::size_t)-1;
+ myCurrentEntryText.erase();
+}
+
+bool MobipocketHtmlBookReader::TOCReader::rangeContainsPosition(std::size_t position) {
+ return (myStartOffset <= position) && (myEndOffset > position);
+}
+
+void MobipocketHtmlBookReader::TOCReader::startReadEntry(std::size_t position) {
+ myCurrentReference = position;
+ myIsActive = true;
+}
+
+void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
+ if (myIsActive && !myCurrentEntryText.empty()) {
+ std::string converted;
+ myReader.myConverter->convert(converted, myCurrentEntryText);
+ myReader.myConverter->reset();
+ myEntries[myCurrentReference] = converted;
+ myCurrentEntryText.erase();
+ }
+ myIsActive = false;
+}
+
+void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, std::size_t len) {
+ if (myIsActive) {
+ myCurrentEntryText.append(text, len);
+ }
+}
+
+void MobipocketHtmlBookReader::TOCReader::addReference(std::size_t position, const std::string &text) {
+ myEntries[position] = text;
+ if (rangeContainsPosition(position)) {
+ setEndOffset(position);
+ }
+}
+
+void MobipocketHtmlBookReader::TOCReader::setStartOffset(std::size_t position) {
+ myStartOffset = position;
+ std::map<std::size_t,std::string>::const_iterator it = myEntries.lower_bound(position);
+ if (it != myEntries.end()) {
+ ++it;
+ if (it != myEntries.end()) {
+ myEndOffset = it->first;
+ }
+ }
+}
+
+void MobipocketHtmlBookReader::TOCReader::setEndOffset(std::size_t position) {
+ myEndOffset = position;
+}
+
+const std::map<std::size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const {
+ return myEntries;
+}
+
+void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
+ MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
+ if (tag.Start) {
+ for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
+ if (tag.Attributes[i].Name == "FILEPOS") {
+ const std::string &value = tag.Attributes[i].Value;
+ if (!value.empty()) {
+ std::string label = "&";
+ int intValue = std::atoi(value.c_str());
+ if (intValue > 0) {
+ if (reader.myTocReader.rangeContainsPosition(tag.Offset)) {
+ reader.myTocReader.startReadEntry(intValue);
+ if (reader.myTocReader.rangeContainsPosition(intValue)) {
+ reader.myTocReader.setEndOffset(intValue);
+ }
+ }
+ reader.myFileposReferences.insert(intValue);
+ ZLStringUtil::appendNumber(label, intValue);
+ setHyperlinkType(INTERNAL_HYPERLINK);
+ bookReader().addHyperlinkControl(INTERNAL_HYPERLINK, label);
+ return;
+ }
+ }
+ }
+ }
+ } else {
+ reader.myTocReader.endReadEntry();
+ }
+ HtmlHrefTagAction::run(tag);
+}
+
+MobipocketHtmlGuideTagAction::MobipocketHtmlGuideTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlGuideTagAction::run(const HtmlReader::HtmlTag &tag) {
+ MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
+ reader.myInsideGuide = tag.Start;
+}
+
+MobipocketHtmlReferenceTagAction::MobipocketHtmlReferenceTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
+}
+
+void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
+ MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
+ if (reader.myInsideGuide) {
+ std::string title;
+ std::string filepos;
+ bool isTocReference = false;
+ for (std::size_t i = 0; i < tag.Attributes.size(); ++i) {
+ const std::string &name = tag.Attributes[i].Name;
+ const std::string &value = tag.Attributes[i].Value;
+ if (name == "TITLE") {
+ title = value;
+ } else if (name == "FILEPOS") {
+ filepos = value;
+ } else if ((name == "TYPE") && (ZLUnicodeUtil::toUpper(value) == "TOC")) {
+ isTocReference = true;
+ }
+ }
+ if (!title.empty() && !filepos.empty()) {
+ int position = std::atoi(filepos.c_str());
+ if (position > 0) {
+ reader.myTocReader.addReference(position, title);
+ if (isTocReference) {
+ reader.myTocReader.setStartOffset(position);
+ }
+ }
+ }
+ }
+}
+
+shared_ptr<HtmlTagAction> MobipocketHtmlBookReader::createAction(const std::string &tag) {
+ if (tag == "IMG") {
+ return new MobipocketHtmlImageTagAction(*this);
+ } else if (tag == "HR") {
+ return new MobipocketHtmlHrTagAction(*this);
+ } else if (tag == "A") {
+ return new MobipocketHtmlHrefTagAction(*this);
+ } else if (tag == "GUIDE") {
+ return new MobipocketHtmlGuideTagAction(*this);
+ } else if (tag == "REFERENCE") {
+ return new MobipocketHtmlReferenceTagAction(*this);
+ } else if (tag == "MBP:PAGEBREAK") {
+ return new MobipocketHtmlPagebreakTagAction(*this);
+ }
+ return HtmlBookReader::createAction(tag);
+}
+
+void MobipocketHtmlBookReader::startDocumentHandler() {
+ HtmlBookReader::startDocumentHandler();
+ myImageCounter = 0;
+ myInsideGuide = false;
+ myFileposReferences.clear();
+ myPositionToParagraphMap.clear();
+ myTocReader.reset();
+}
+
+bool MobipocketHtmlBookReader::tagHandler(const HtmlTag &tag) {
+ std::size_t paragraphNumber = myBookReader.model().bookTextModel()->paragraphsNumber();
+ if (myBookReader.paragraphIsOpen()) {
+ --paragraphNumber;
+ }
+ myPositionToParagraphMap.push_back(std::make_pair(tag.Offset, paragraphNumber));
+ return HtmlBookReader::tagHandler(tag);
+}
+
+MobipocketHtmlBookReader::MobipocketHtmlBookReader(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding) : HtmlBookReader("", model, format, encoding), myFileName(file.path()), myTocReader(*this) {
+ setBuildTableOfContent(false);
+ setProcessPreTag(false);
+}
+
+bool MobipocketHtmlBookReader::characterDataHandler(const char *text, std::size_t len, bool convert) {
+ myTocReader.appendText(text, len);
+ return HtmlBookReader::characterDataHandler(text, len, convert);
+}
+
+void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
+ HtmlBookReader::readDocument(stream);
+
+ PalmDocStream &pdStream = (PalmDocStream&)stream;
+ int index = pdStream.firstImageLocationIndex(myFileName);
+
+ if (index >= 0) {
+ for (int i = 0; i < myImageCounter; i++) {
+ std::pair<int,int> imageLocation = pdStream.imageLocation(pdStream.header(), i + index);
+ if ((imageLocation.first > 0) && (imageLocation.second > 0)) {
+ std::string id;
+ ZLStringUtil::appendNumber(id, i + 1);
+ myBookReader.addImage(id, new ZLFileImage(ZLFile(myFileName), imageLocation.first, imageLocation.second));
+ }
+ }
+ }
+
+ std::vector<std::pair<std::size_t,std::size_t> >::const_iterator jt = myPositionToParagraphMap.begin();
+ for (std::set<std::size_t>::const_iterator it = myFileposReferences.begin(); it != myFileposReferences.end(); ++it) {
+ while (jt != myPositionToParagraphMap.end() && jt->first < *it) {
+ ++jt;
+ }
+ if (jt == myPositionToParagraphMap.end()) {
+ break;
+ }
+ std::string label = "&";
+ ZLStringUtil::appendNumber(label, *it);
+ myBookReader.addHyperlinkLabel(label, jt->second);
+ }
+
+ jt = myPositionToParagraphMap.begin();
+ const std::map<std::size_t,std::string> &entries = myTocReader.entries();
+ for (std::map<std::size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
+ while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
+ ++jt;
+ }
+ if (jt == myPositionToParagraphMap.end()) {
+ break;
+ }
+ myBookReader.beginContentsParagraph(jt->second);
+ myBookReader.addContentsData(it->second);
+ myBookReader.endContentsParagraph();
+ }
+}
diff --git a/reader/src/formats/pdb/MobipocketHtmlBookReader.h b/reader/src/formats/pdb/MobipocketHtmlBookReader.h
new file mode 100644
index 0000000..7a35523
--- /dev/null
+++ b/reader/src/formats/pdb/MobipocketHtmlBookReader.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __MOBIPOCKETHTMLBOOKREADER_H__
+#define __MOBIPOCKETHTMLBOOKREADER_H__
+
+#include <set>
+
+#include "../html/HtmlBookReader.h"
+
+class MobipocketHtmlBookReader : public HtmlBookReader {
+
+public:
+ MobipocketHtmlBookReader(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding);
+ void readDocument(ZLInputStream &stream);
+
+private:
+ void startDocumentHandler();
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char *text, std::size_t len, bool convert);
+ shared_ptr<HtmlTagAction> createAction(const std::string &tag);
+
+public:
+ class TOCReader {
+
+ public:
+ TOCReader(MobipocketHtmlBookReader &reader);
+ void reset();
+
+ void addReference(std::size_t position, const std::string &text);
+
+ void setStartOffset(std::size_t position);
+ void setEndOffset(std::size_t position);
+
+ bool rangeContainsPosition(std::size_t position);
+
+ void startReadEntry(std::size_t position);
+ void endReadEntry();
+ void appendText(const char *text, std::size_t len);
+
+ const std::map<std::size_t,std::string> &entries() const;
+
+ private:
+ MobipocketHtmlBookReader &myReader;
+
+ std::map<std::size_t,std::string> myEntries;
+
+ bool myIsActive;
+ std::size_t myStartOffset;
+ std::size_t myEndOffset;
+
+ std::size_t myCurrentReference;
+ std::string myCurrentEntryText;
+ };
+
+private:
+ int myImageCounter;
+ const std::string myFileName;
+
+ std::vector<std::pair<std::size_t,std::size_t> > myPositionToParagraphMap;
+ std::set<std::size_t> myFileposReferences;
+ bool myInsideGuide;
+ TOCReader myTocReader;
+
+friend class MobipocketHtmlImageTagAction;
+friend class MobipocketHtmlHrefTagAction;
+friend class MobipocketHtmlGuideTagAction;
+friend class MobipocketHtmlReferenceTagAction;
+friend class MobipocketHtmlPagebreakTagAction;
+friend class TOCReader;
+};
+
+#endif /* __MOBIPOCKETHTMLBOOKREADER_H__ */
diff --git a/reader/src/formats/pdb/MobipocketPlugin.cpp b/reader/src/formats/pdb/MobipocketPlugin.cpp
new file mode 100644
index 0000000..4832b43
--- /dev/null
+++ b/reader/src/formats/pdb/MobipocketPlugin.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+#include <ZLEncodingConverter.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLLanguageUtil.h>
+#include <ZLImage.h>
+#include <ZLFileImage.h>
+
+#include "PdbPlugin.h"
+#include "PalmDocStream.h"
+#include "MobipocketHtmlBookReader.h"
+
+#include "../../library/Book.h"
+
+bool MobipocketPlugin::acceptsFile(const ZLFile &file) const {
+ return PdbPlugin::fileType(file) == "BOOKMOBI";
+}
+
+void MobipocketPlugin::readDocumentInternal(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const {
+ MobipocketHtmlBookReader(file, model, format, encoding).readDocument(stream);
+}
+
+bool MobipocketPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = book.file().inputStream();
+ if (stream.isNull() || ! stream->open()) {
+ return false;
+ }
+ PdbHeader header;
+ if (!header.read(stream)) {
+ return false;
+ }
+ stream->seek(header.Offsets[0] + 16, true);
+ char test[5];
+ test[4] = '\0';
+ stream->read(test, 4);
+ static const std::string MOBI = "MOBI";
+ if (MOBI != test) {
+ return PalmDocLikePlugin::readMetaInfo(book);
+ }
+
+ unsigned long length;
+ PdbUtil::readUnsignedLongBE(*stream, length);
+
+ stream->seek(4, false);
+
+ unsigned long encodingCode;
+ PdbUtil::readUnsignedLongBE(*stream, encodingCode);
+ if (book.encoding().empty()) {
+ ZLEncodingConverterInfoPtr info = ZLEncodingCollection::Instance().info(encodingCode);
+ if (!info.isNull()) {
+ book.setEncoding(info->name());
+ }
+ }
+
+ stream->seek(52, false);
+
+ unsigned long fullNameOffset;
+ PdbUtil::readUnsignedLongBE(*stream, fullNameOffset);
+ unsigned long fullNameLength;
+ PdbUtil::readUnsignedLongBE(*stream, fullNameLength);
+
+ unsigned long languageCode;
+ PdbUtil::readUnsignedLongBE(*stream, languageCode);
+ book.setLanguage(ZLLanguageUtil::languageByCode(languageCode & 0xFF, (languageCode >> 8) & 0xFF));
+
+ stream->seek(32, false);
+
+ unsigned long exthFlags;
+ PdbUtil::readUnsignedLongBE(*stream, exthFlags);
+ if (exthFlags & 0x40) {
+ stream->seek(header.Offsets[0] + 16 + length, true);
+
+ stream->read(test, 4);
+ static const std::string EXTH = "EXTH";
+ if (EXTH == test) {
+ stream->seek(4, false);
+ unsigned long recordsNum;
+ PdbUtil::readUnsignedLongBE(*stream, recordsNum);
+ for (unsigned long i = 0; i < recordsNum; ++i) {
+ unsigned long type;
+ PdbUtil::readUnsignedLongBE(*stream, type);
+ unsigned long size;
+ PdbUtil::readUnsignedLongBE(*stream, size);
+ if (size > 8) {
+ std::string value(size - 8, '\0');
+ stream->read((char*)value.data(), size - 8);
+ switch (type) {
+ case 100: // author
+ {
+ int index = value.find(',');
+ if (index != -1) {
+ std::string part0 = value.substr(0, index);
+ std::string part1 = value.substr(index + 1);
+ ZLUnicodeUtil::utf8Trim(part0);
+ ZLUnicodeUtil::utf8Trim(part1);
+ value = part1 + ' ' + part0;
+ } else {
+ ZLUnicodeUtil::utf8Trim(value);
+ }
+ book.addAuthor(value);
+ break;
+ }
+ case 105: // subject
+ book.addTag(value);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ stream->seek(header.Offsets[0] + fullNameOffset, true);
+ std::string title(fullNameLength, '\0');
+ stream->read((char*)title.data(), fullNameLength);
+ book.setTitle(title);
+
+ stream->close();
+ return PalmDocLikePlugin::readMetaInfo(book);
+}
+
+shared_ptr<const ZLImage> MobipocketPlugin::coverImage(const ZLFile &file) const {
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull() || ! stream->open()) {
+ return 0;
+ }
+ PdbHeader header;
+ if (!header.read(stream)) {
+ return 0;
+ }
+ stream->seek(header.Offsets[0] + 16, true);
+ char test[5];
+ test[4] = '\0';
+ stream->read(test, 4);
+ static const std::string MOBI = "MOBI";
+ if (MOBI != test) {
+ return 0;
+ }
+
+ unsigned long length;
+ PdbUtil::readUnsignedLongBE(*stream, length);
+
+ stream->seek(104, false);
+
+ unsigned long exthFlags;
+ unsigned long coverIndex = (unsigned long)-1;
+ unsigned long thumbIndex = (unsigned long)-1;
+ PdbUtil::readUnsignedLongBE(*stream, exthFlags);
+ if (exthFlags & 0x40) {
+ stream->seek(header.Offsets[0] + 16 + length, true);
+
+ stream->read(test, 4);
+ static const std::string EXTH = "EXTH";
+ if (EXTH != test) {
+ return 0;
+ }
+ stream->seek(4, false);
+ unsigned long recordsNum;
+ PdbUtil::readUnsignedLongBE(*stream, recordsNum);
+ for (unsigned long i = 0; i < recordsNum; ++i) {
+ unsigned long type;
+ PdbUtil::readUnsignedLongBE(*stream, type);
+ unsigned long size;
+ PdbUtil::readUnsignedLongBE(*stream, size);
+ switch (type) {
+ case 201: // coveroffset
+ if (size == 12) {
+ PdbUtil::readUnsignedLongBE(*stream, coverIndex);
+ } else {
+ stream->seek(size - 8, false);
+ }
+ break;
+ case 202: // thumboffset
+ if (size == 12) {
+ PdbUtil::readUnsignedLongBE(*stream, thumbIndex);
+ } else {
+ stream->seek(size - 8, false);
+ }
+ break;
+ default:
+ stream->seek(size - 8, false);
+ break;
+ }
+ }
+ }
+ stream->close();
+
+ if (coverIndex == (unsigned long)-1) {
+ if (thumbIndex == (unsigned long)-1) {
+ return 0;
+ }
+ coverIndex = thumbIndex;
+ }
+
+ PalmDocStream pbStream(file);
+ if (!pbStream.open()) {
+ return 0;
+ }
+ int index = pbStream.firstImageLocationIndex(file.path());
+ if (index >= 0) {
+ std::pair<int,int> imageLocation = pbStream.imageLocation(pbStream.header(), index + coverIndex);
+ if ((imageLocation.first > 0) && (imageLocation.second > 0)) {
+ return new ZLFileImage(
+ file,
+ imageLocation.first,
+ imageLocation.second
+ );
+ }
+ }
+ return 0;
+}
diff --git a/reader/src/formats/pdb/PalmDocLikePlugin.cpp b/reader/src/formats/pdb/PalmDocLikePlugin.cpp
new file mode 100644
index 0000000..27c03a1
--- /dev/null
+++ b/reader/src/formats/pdb/PalmDocLikePlugin.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+
+#include "PdbPlugin.h"
+#include "PalmDocStream.h"
+#include "PalmDocLikeStream.h"
+
+#include "../../library/Book.h"
+
+bool PalmDocLikePlugin::providesMetaInfo() const {
+ return true;
+}
+
+shared_ptr<ZLInputStream> PalmDocLikePlugin::createStream(const ZLFile &file) const {
+ return new PalmDocStream(file);
+}
+
+const std::string &PalmDocLikePlugin::tryOpen(const ZLFile &file) const {
+ PalmDocStream stream(file);
+ stream.open();
+ return stream.error();
+}
diff --git a/reader/src/formats/pdb/PalmDocLikeStream.cpp b/reader/src/formats/pdb/PalmDocLikeStream.cpp
new file mode 100644
index 0000000..8b99d4d
--- /dev/null
+++ b/reader/src/formats/pdb/PalmDocLikeStream.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLResource.h>
+
+#include "PalmDocLikeStream.h"
+
+
+PalmDocLikeStream::PalmDocLikeStream(const ZLFile &file) : PdbStream(file) {
+}
+
+PalmDocLikeStream::~PalmDocLikeStream() {
+ close();
+}
+
+bool PalmDocLikeStream::open() {
+ myErrorCode = ERROR_NONE;
+ if (!PdbStream::open()) {
+ myErrorCode = ERROR_UNKNOWN;
+ return false;
+ }
+
+ if (!processZeroRecord()) {
+ return false;
+ }
+
+ myBuffer = new char[myMaxRecordSize];
+ myRecordIndex = 0;
+ return true;
+}
+
+bool PalmDocLikeStream::fillBuffer() {
+ while (myBufferOffset == myBufferLength) {
+ if (myRecordIndex + 1 > myMaxRecordIndex) {
+ return false;
+ }
+ ++myRecordIndex;
+ if (!processRecord()) {
+ return false;
+ }
+ }
+ //myBufferOffset = 0;
+ return true;
+}
+
+const std::string &PalmDocLikeStream::error() const {
+ static const ZLResource &resource = ZLResource::resource("mobipocketPlugin");
+ switch (myErrorCode) {
+ default:
+ {
+ static const std::string EMPTY;
+ return EMPTY;
+ }
+ case ERROR_UNKNOWN:
+ return resource["unknown"].value();
+ case ERROR_COMPRESSION:
+ return resource["unsupportedCompressionMethod"].value();
+ case ERROR_ENCRYPTION:
+ return resource["encryptedFile"].value();
+ }
+}
diff --git a/reader/src/formats/pdb/PalmDocLikeStream.h b/reader/src/formats/pdb/PalmDocLikeStream.h
new file mode 100644
index 0000000..623a493
--- /dev/null
+++ b/reader/src/formats/pdb/PalmDocLikeStream.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PALMDOCLIKESTREAM_H__
+#define __PALMDOCLIKESTREAM_H__
+
+#include "PdbStream.h"
+
+class ZLFile;
+
+class PalmDocLikeStream : public PdbStream {
+
+public:
+ PalmDocLikeStream(const ZLFile &file);
+ ~PalmDocLikeStream();
+ bool open();
+
+ const std::string &error() const;
+ //std::pair<int,int> imageLocation(int index);
+ //bool hasExtraSections() const;
+
+protected:
+ bool fillBuffer();
+
+private:
+ virtual bool processRecord() = 0;
+ virtual bool processZeroRecord() = 0;
+
+protected:
+ unsigned short myMaxRecordSize;
+ std::size_t myRecordIndex;
+ std::size_t myMaxRecordIndex;
+
+ enum {
+ ERROR_NONE,
+ ERROR_UNKNOWN,
+ ERROR_COMPRESSION,
+ ERROR_ENCRYPTION,
+ } myErrorCode;
+};
+
+#endif /* __PALMDOCLIKESTREAM_H__ */
diff --git a/reader/src/formats/pdb/PalmDocPlugin.cpp b/reader/src/formats/pdb/PalmDocPlugin.cpp
new file mode 100644
index 0000000..c23f11c
--- /dev/null
+++ b/reader/src/formats/pdb/PalmDocPlugin.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "PdbPlugin.h"
+#include "PalmDocStream.h"
+#include "MobipocketHtmlBookReader.h"
+#include "../txt/PlainTextFormat.h"
+#include "../util/TextFormatDetector.h"
+
+bool PalmDocPlugin::acceptsFile(const ZLFile &file) const {
+ return PdbPlugin::fileType(file) == "TEXtREAd";
+}
+
+void PalmDocPlugin::readDocumentInternal(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const {
+ stream.open();
+ bool readAsPalmDoc = ((PalmDocStream&)stream).hasExtraSections();
+ stream.close();
+ if (readAsPalmDoc) {
+ MobipocketHtmlBookReader(file, model, format, encoding).readDocument(stream);
+ } else {
+ SimplePdbPlugin::readDocumentInternal(file, model, format, encoding, stream);
+ }
+}
+
+FormatInfoPage *PalmDocPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) {
+ shared_ptr<ZLInputStream> stream = createStream(file);
+ stream->open();
+ bool readAsPalmDoc = ((PalmDocStream&)*stream).hasExtraSections();
+ stream->close();
+ if (!readAsPalmDoc) {
+ return new PlainTextInfoPage(dialog, file, ZLResourceKey("Text"), !TextFormatDetector().isHtml(*stream));
+ } else {
+ return 0;
+ }
+}
diff --git a/reader/src/formats/pdb/PalmDocStream.cpp b/reader/src/formats/pdb/PalmDocStream.cpp
new file mode 100644
index 0000000..e699d47
--- /dev/null
+++ b/reader/src/formats/pdb/PalmDocStream.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <algorithm>
+
+#include <ZLFile.h>
+#include <ZLResource.h>
+#include <ZLZDecompressor.h>
+
+#include "PalmDocStream.h"
+#include "DocDecompressor.h"
+#include "HuffDecompressor.h"
+
+PalmDocStream::PalmDocStream(const ZLFile &file) : PalmDocLikeStream(file) {
+}
+
+PalmDocStream::~PalmDocStream() {
+ close();
+}
+
+bool PalmDocStream::processRecord() {
+ const std::size_t currentOffset = recordOffset(myRecordIndex);
+ if (currentOffset < myBase->offset()) {
+ return false;
+ }
+ myBase->seek(currentOffset, true);
+ const std::size_t nextOffset = recordOffset(myRecordIndex + 1);
+ if (nextOffset < currentOffset) {
+ return false;
+ }
+ const unsigned short recordSize = nextOffset - currentOffset;
+ switch(myCompressionVersion) {
+ case 17480://'DH' // HuffCDic compression
+ myBufferLength = myHuffDecompressorPtr->decompress(*myBase, myBuffer, recordSize, myMaxRecordSize);
+ //if (myHuffDecompressorPtr->error()) {
+ // myErrorCode = ERROR_UNKNOWN;
+ //}
+ break;
+ case 2: // PalmDoc compression
+ myBufferLength = DocDecompressor().decompress(*myBase, myBuffer, recordSize, myMaxRecordSize);
+ break;
+ case 1: // No compression
+ myBufferLength = myBase->read(myBuffer, std::min(recordSize, myMaxRecordSize));
+ break;
+ }
+ myBufferOffset = 0;
+ return true;
+}
+
+bool PalmDocStream::processZeroRecord() {
+ // Uses with offset presetting to zero record offset value
+ PdbUtil::readUnsignedShort(*myBase, myCompressionVersion); // myBase offset: ^ + 2
+ switch (myCompressionVersion) {
+ case 1:
+ case 2:
+ case 17480:
+ break;
+ default:
+ myErrorCode = ERROR_COMPRESSION;
+ return false;
+ }
+ myBase->seek(2, false); // myBase offset: ^ + 4
+ PdbUtil::readUnsignedLongBE(*myBase, myTextLength); // myBase offset: ^ + 8
+ PdbUtil::readUnsignedShort(*myBase, myTextRecordNumber); // myBase offset: ^ + 10
+
+ unsigned short endSectionIndex = header().Offsets.size();
+ myMaxRecordIndex = std::min(myTextRecordNumber, (unsigned short)(endSectionIndex - 1));
+ //TODO Insert in this point error message about uncompatible records and numRecords from Header
+
+ PdbUtil::readUnsignedShort(*myBase, myMaxRecordSize); // myBase offset: ^ + 12
+ if (myMaxRecordSize == 0) {
+ myErrorCode = ERROR_UNKNOWN;
+ return false;
+ }
+
+ /*
+ std::cerr << "PalmDocStream::processRecord0():\n";
+ std::cerr << "PDB header indentificator : " << header().Id << "\n";
+ std::cerr << "PDB file system: sizeof opened : " << myBaseSize << "\n";
+ std::cerr << "PDB header/record[0] max index : " << myMaxRecordIndex << "\n";
+ std::cerr << "PDB record[0][0..2] compression : " << myCompressionVersion << "\n";
+ std::cerr << "PDB record[0][2..4] spare : " << mySpare << "\n";
+ std::cerr << "PDB record[0][4..8] text length : " << myTextLength << "\n";
+ std::cerr << "PDB record[0][8..10] text records : " << myTextRecords << "\n";
+ std::cerr << "PDB record[0][10..12] max record size: " << myMaxRecordSize << "\n";
+ */
+
+ if (header().Id == "BOOKMOBI") {
+ unsigned short encrypted = 0;
+ PdbUtil::readUnsignedShort(*myBase, encrypted); // myBase offset: ^ + 14
+ if (encrypted) { //Always = 2, if encrypted
+ myErrorCode = ERROR_ENCRYPTION;
+ return false;
+ }
+ } else {
+ myBase->seek(2, false);
+ }
+
+
+ if (myCompressionVersion == 17480) {
+ unsigned long mobiHeaderLength;
+ unsigned long huffSectionIndex;
+ unsigned long huffSectionNumber;
+ unsigned short extraFlags;
+ unsigned long initialOffset = header().Offsets[0]; // myBase offset: ^
+
+ myBase->seek(6, false); // myBase offset: ^ + 20
+ PdbUtil::readUnsignedLongBE(*myBase, mobiHeaderLength); // myBase offset: ^ + 24
+
+ myBase->seek(0x70 - 24, false); // myBase offset: ^ + 102 (0x70)
+ PdbUtil::readUnsignedLongBE(*myBase, huffSectionIndex); // myBase offset: ^ + 106 (0x74)
+ PdbUtil::readUnsignedLongBE(*myBase, huffSectionNumber); // myBase offset: ^ + 110 (0x78)
+
+ if (mobiHeaderLength >= 244) {
+ myBase->seek(0xF2 - 0x78, false); // myBase offset: ^ + 242 (0xF2)
+ PdbUtil::readUnsignedShort(*myBase, extraFlags); // myBase offset: ^ + 244 (0xF4)
+ } else {
+ extraFlags = 0;
+ }
+ /*
+ std::cerr << "mobi header length: " << mobiHeaderLength << "\n";
+ std::cerr << "Huff's start record : " << huffSectionIndex << " from " << endSectionIndex - 1 << "\n";
+ std::cerr << "Huff's records number: " << huffSectionNumber << "\n";
+ std::cerr << "Huff's extraFlags : " << extraFlags << "\n";
+ */
+ const unsigned long endHuffSectionIndex = huffSectionIndex + huffSectionNumber;
+ if (endHuffSectionIndex > endSectionIndex || huffSectionNumber <= 1) {
+ myErrorCode = ERROR_COMPRESSION;
+ return false;
+ }
+ const unsigned long endHuffDataOffset = recordOffset(endHuffSectionIndex);
+ std::vector<unsigned long>::const_iterator beginHuffSectionOffsetIt = header().Offsets.begin() + huffSectionIndex;
+ // point to first Huff section
+ std::vector<unsigned long>::const_iterator endHuffSectionOffsetIt = header().Offsets.begin() + endHuffSectionIndex;
+ // point behind last Huff section
+
+
+ myHuffDecompressorPtr = new HuffDecompressor(*myBase, beginHuffSectionOffsetIt, endHuffSectionOffsetIt, endHuffDataOffset, extraFlags);
+ myBase->seek(initialOffset, true); // myBase offset: ^ + 14
+ }
+ return true;
+}
+
+bool PalmDocStream::hasExtraSections() const {
+ return myMaxRecordIndex < header().Offsets.size() - 1;
+}
+
+std::pair<int,int> PalmDocStream::imageLocation(const PdbHeader &header, int index) const {
+ index += myMaxRecordIndex + 1;
+ int recordNumber = header.Offsets.size();
+ if (index > recordNumber - 1) {
+ return std::make_pair(-1, -1);
+ } else {
+ int start = header.Offsets[index];
+ int end = (index < recordNumber - 1) ?
+ header.Offsets[index + 1] : myBase->offset();
+ return std::make_pair(start, end - start);
+ }
+}
+
+int PalmDocStream::firstImageLocationIndex(const std::string &fileName) {
+ shared_ptr<ZLInputStream> fileStream = ZLFile(fileName).inputStream();
+ if (fileStream.isNull() || !fileStream->open()) {
+ return -1;
+ }
+
+ bool found = false;
+ int index = 0;
+ char bu[5] = { 0 };
+ std::pair<int,int> firstImageLocation = imageLocation(header(), 0);
+ fileStream->seek(firstImageLocation.first, false);
+ while ((firstImageLocation.first > 0) && (firstImageLocation.second > 0)) {
+ if (firstImageLocation.second > 4) {
+ fileStream->read(bu, 4);
+ static const char jpegStart[2] = { (char)0xFF, (char)0xd8 };
+ if (std::strncmp(bu, "BM", 2) == 0 ||
+ std::strncmp(bu, "GIF8", 4) == 0 ||
+ std::strncmp(bu, jpegStart, 2) == 0) {
+ found = true;
+ break;
+ }
+ fileStream->seek(firstImageLocation.second - 4, false);
+ } else {
+ fileStream->seek(firstImageLocation.second, false);
+ }
+ index++;
+ firstImageLocation = imageLocation(header(), index);
+ }
+
+ fileStream->close();
+ return found ? index : -1;
+}
diff --git a/reader/src/formats/pdb/PalmDocStream.h b/reader/src/formats/pdb/PalmDocStream.h
new file mode 100644
index 0000000..4782a7b
--- /dev/null
+++ b/reader/src/formats/pdb/PalmDocStream.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PALMDOCSTREAM_H__
+#define __PALMDOCSTREAM_H__
+
+#include "PalmDocLikeStream.h"
+
+class ZLFile;
+class HuffDecompressor;
+
+class PalmDocStream : public PalmDocLikeStream {
+
+public:
+ PalmDocStream(const ZLFile &file);
+ ~PalmDocStream();
+
+ std::pair<int,int> imageLocation(const PdbHeader &header, int index) const;
+ bool hasExtraSections() const;
+ int firstImageLocationIndex(const std::string &fileName);
+
+private:
+ bool processRecord();
+ bool processZeroRecord();
+
+private:
+ unsigned short myCompressionVersion;
+ unsigned long myTextLength; //TODO: Warning: isn't used
+ unsigned short myTextRecordNumber;
+
+ shared_ptr<HuffDecompressor> myHuffDecompressorPtr;
+};
+
+#endif /* __PALMDOCSTREAM_H__ */
diff --git a/reader/src/formats/pdb/PdbPlugin.cpp b/reader/src/formats/pdb/PdbPlugin.cpp
new file mode 100644
index 0000000..69ef233
--- /dev/null
+++ b/reader/src/formats/pdb/PdbPlugin.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+#include <ZLOptions.h>
+
+#include "PdbPlugin.h"
+#include "../../options/FBCategoryKey.h"
+
+#include "../../database/booksdb/BooksDBUtil.h"
+#include "../../database/booksdb/BooksDB.h"
+
+PdbPlugin::~PdbPlugin() {
+}
+
+std::string PdbPlugin::fileType(const ZLFile &file) {
+ const std::string &extension = file.extension();
+ if ((extension != "prc") && (extension != "pdb") && (extension != "mobi")) {
+ return "";
+ }
+
+ const std::string &fileName = file.path();
+ //int index = fileName.find(':');
+ //ZLFile baseFile = (index == -1) ? file : ZLFile(fileName.substr(0, index));
+ ZLFile baseFile(file.physicalFilePath());
+ bool upToDate = BooksDBUtil::checkInfo(baseFile);
+
+ //ZLStringOption palmTypeOption(FBCategoryKey::BOOKS, file.path(), "PalmType", "");
+ std::string palmType = BooksDB::Instance().getPalmType(fileName);
+ if ((palmType.length() != 8) || !upToDate) {
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull() || !stream->open()) {
+ return "";
+ }
+ stream->seek(60, false);
+ char id[8];
+ stream->read(id, 8);
+ stream->close();
+ palmType = std::string(id, 8);
+ if (!upToDate) {
+ BooksDBUtil::saveInfo(baseFile);
+ }
+ //palmTypeOption.setValue(palmType);
+ BooksDB::Instance().setPalmType(fileName, palmType);
+ }
+ return palmType;
+}
+
+bool PdbPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
diff --git a/reader/src/formats/pdb/PdbPlugin.h b/reader/src/formats/pdb/PdbPlugin.h
new file mode 100644
index 0000000..9f8600b
--- /dev/null
+++ b/reader/src/formats/pdb/PdbPlugin.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PDBPLUGIN_H__
+#define __PDBPLUGIN_H__
+
+#include <shared_ptr.h>
+
+#include "../FormatPlugin.h"
+
+class PdbPlugin : public FormatPlugin {
+
+public:
+ static std::string fileType(const ZLFile &file);
+ bool readLanguageAndEncoding(Book &book) const;
+
+protected:
+ PdbPlugin();
+
+public:
+ virtual ~PdbPlugin();
+};
+
+class PluckerPlugin : public PdbPlugin {
+
+public:
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+class SimplePdbPlugin : public PdbPlugin {
+
+public:
+ bool readMetaInfo(Book &book) const;
+ bool readModel(BookModel &model) const;
+
+protected:
+ virtual shared_ptr<ZLInputStream> createStream(const ZLFile &file) const = 0;
+ virtual void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
+};
+
+class PalmDocLikePlugin : public SimplePdbPlugin {
+
+public:
+ bool providesMetaInfo() const;
+ const std::string &tryOpen(const ZLFile &file) const;
+
+protected:
+ shared_ptr<ZLInputStream> createStream(const ZLFile &file) const;
+};
+
+class PalmDocPlugin : public PalmDocLikePlugin {
+
+public:
+ bool acceptsFile(const ZLFile &file) const;
+
+ void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
+
+private:
+ FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
+};
+
+class MobipocketPlugin : public PalmDocLikePlugin {
+
+private:
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+
+ void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
+ shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
+};
+
+class EReaderPlugin : public SimplePdbPlugin {
+
+public:
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ const std::string &tryOpen(const ZLFile &file) const;
+
+ void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
+protected:
+ shared_ptr<ZLInputStream> createStream(const ZLFile &file) const;
+};
+
+class ZTXTPlugin : public SimplePdbPlugin {
+
+public:
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+
+protected:
+ shared_ptr<ZLInputStream> createStream(const ZLFile &file) const;
+
+private:
+ FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
+};
+
+inline PdbPlugin::PdbPlugin() {}
+
+#endif /* __PDBPLUGIN_H__ */
diff --git a/reader/src/formats/pdb/PdbReader.cpp b/reader/src/formats/pdb/PdbReader.cpp
new file mode 100644
index 0000000..54dc654
--- /dev/null
+++ b/reader/src/formats/pdb/PdbReader.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+
+#include "PdbReader.h"
+
+void PdbUtil::readUnsignedShort(ZLInputStream &stream, unsigned short &N) {
+ unsigned char data[2];
+ stream.read((char*)data, 2);
+ N = (((unsigned short)data[0]) << 8) + data[1];
+}
+
+void PdbUtil::readUnsignedLongBE(ZLInputStream &stream, unsigned long &N) {
+ unsigned char data[4];
+ stream.read((char*)data, 4);
+ N = (((unsigned long)data[0]) << 24) +
+ (((unsigned long)data[1]) << 16) +
+ (((unsigned long)data[2]) << 8) +
+ (unsigned long)data[3];
+}
+
+void PdbUtil::readUnsignedLongLE(ZLInputStream &stream, unsigned long &N) {
+ unsigned char data[4];
+ stream.read((char*)data, 4);
+ N = (((unsigned long)data[3]) << 24) +
+ (((unsigned long)data[2]) << 16) +
+ (((unsigned long)data[1]) << 8) +
+ (unsigned long)data[0];
+}
+
+bool PdbHeader::read(shared_ptr<ZLInputStream> stream) {
+ const std::size_t startOffset = stream->offset();
+ DocName.erase();
+ DocName.append(32, '\0');
+ stream->read((char*)DocName.data(), 32); // stream offset: +32
+
+ PdbUtil::readUnsignedShort(*stream, Flags); // stream offset: +34
+
+ stream->seek(26, false); // stream offset: +60
+
+ Id.erase();
+ Id.append(8, '\0');
+ stream->read((char*)Id.data(), 8); // stream offset: +68
+
+ stream->seek(8, false); // stream offset: +76
+ Offsets.clear();
+ unsigned short numRecords;
+ PdbUtil::readUnsignedShort(*stream, numRecords); // stream offset: +78
+ Offsets.reserve(numRecords);
+
+ for (int i = 0; i < numRecords; ++i) { // stream offset: +78 + 8 * records number
+ unsigned long recordOffset;
+ PdbUtil::readUnsignedLongBE(*stream, recordOffset);
+ Offsets.push_back(recordOffset);
+ stream->seek(4, false);
+ }
+ return stream->offset() == startOffset + 78 + 8 * numRecords;
+}
+
+/*bool PdbRecord0::read(shared_ptr<ZLInputStream> stream) {
+ std::size_t startOffset = stream->offset();
+
+ PdbUtil::readUnsignedShort(*stream, CompressionType);
+ PdbUtil::readUnsignedShort(*stream, Spare);
+ PdbUtil::readUnsignedLongBE(*stream, TextLength);
+ PdbUtil::readUnsignedShort(*stream, TextRecords);
+ PdbUtil::readUnsignedShort(*stream, MaxRecordSize);
+ PdbUtil::readUnsignedShort(*stream, NontextOffset);
+ PdbUtil::readUnsignedShort(*stream, NontextOffset2);
+
+ PdbUtil::readUnsignedLongBE(*stream, MobipocketID);
+ PdbUtil::readUnsignedLongBE(*stream, MobipocketHeaderSize);
+ PdbUtil::readUnsignedLongBE(*stream, Unknown24);
+ PdbUtil::readUnsignedShort(*stream, FootnoteRecs);
+ PdbUtil::readUnsignedShort(*stream, SidebarRecs);
+
+ PdbUtil::readUnsignedShort(*stream, BookmarkOffset);
+ PdbUtil::readUnsignedShort(*stream, Unknown34);
+ PdbUtil::readUnsignedShort(*stream, NontextOffset3);
+ PdbUtil::readUnsignedShort(*stream, Unknown38);
+ PdbUtil::readUnsignedShort(*stream, ImagedataOffset);
+ PdbUtil::readUnsignedShort(*stream, ImagedataOffset2);
+ PdbUtil::readUnsignedShort(*stream, MetadataOffset);
+ PdbUtil::readUnsignedShort(*stream, MetadataOffset2);
+ PdbUtil::readUnsignedShort(*stream, FootnoteOffset);
+ PdbUtil::readUnsignedShort(*stream, SidebarOffset);
+ PdbUtil::readUnsignedShort(*stream, LastDataOffset);
+ PdbUtil::readUnsignedShort(*stream, Unknown54);
+
+ return stream->offset() == startOffset + 56;
+}*/
diff --git a/reader/src/formats/pdb/PdbReader.h b/reader/src/formats/pdb/PdbReader.h
new file mode 100644
index 0000000..f32ebf5
--- /dev/null
+++ b/reader/src/formats/pdb/PdbReader.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PDBREADER_H__
+#define __PDBREADER_H__
+
+#include <vector>
+
+#include <shared_ptr.h>
+#include <ZLInputStream.h>
+
+//class BookModel;
+
+class PdbUtil {
+
+public:
+ static void readUnsignedShort(ZLInputStream &stream, unsigned short &N);
+ static void readUnsignedLongBE(ZLInputStream &stream, unsigned long &N);
+ static void readUnsignedLongLE(ZLInputStream &stream, unsigned long &N);
+};
+
+struct PdbHeader {
+ std::string DocName;
+ unsigned short Flags;
+ std::string Id;
+ std::vector<unsigned long> Offsets;
+
+ bool read(shared_ptr<ZLInputStream> stream);
+};
+
+struct PdbRecord0 {
+ unsigned short CompressionType; //[0..2] PalmDoc, Mobipocket, Ereader:version
+ unsigned short Spare; //[2..4] PalmDoc, Mobipocket
+ unsigned long TextLength; //[4..8] PalmDoc, Mobipocket
+ unsigned short TextRecords; //[8..10] PalmDoc, Mobipocket
+ unsigned short MaxRecordSize; //[10..12] PalmDoc, Mobipocket
+ unsigned short NontextOffset; //[12..14] Ereader
+ unsigned short NontextOffset2; //[14..16] Ereader //PalmDoc, Mobipocket: encrypted - there is conflict !!!!
+
+ unsigned long MobipocketID; //[16..20] Mobipocket
+ unsigned long MobipocketHeaderSize;//[20..24] Mobipocket
+ unsigned long Unknown24; //[24..28]
+ unsigned short FootnoteRecs; //[28..30] Ereader
+ unsigned short SidebarRecs; //[30..32] Ereader
+
+// Following fields are specific for EReader pdb document specification
+
+ unsigned short BookmarkOffset; //[32..34]
+ unsigned short Unknown34; //[34..36]
+ unsigned short NontextOffset3; //[36..38]
+ unsigned short Unknown38; //[38..40]
+ unsigned short ImagedataOffset; //[40..42]
+ unsigned short ImagedataOffset2; //[42..44]
+ unsigned short MetadataOffset; //[44..46]
+ unsigned short MetadataOffset2; //[46..48]
+ unsigned short FootnoteOffset; //[48..50]
+ unsigned short SidebarOffset; //[50..52]
+ unsigned short LastDataOffset; //[52..54]
+ unsigned short Unknown54; //[54..56]
+
+ bool read(shared_ptr<ZLInputStream> stream);
+//private:
+// static bool readNumberBE(unsigned char* buffer, std::size_t offset, std::size_t size);
+};
+
+#endif /* __PDBREADER_H__ */
diff --git a/reader/src/formats/pdb/PdbStream.cpp b/reader/src/formats/pdb/PdbStream.cpp
new file mode 100644
index 0000000..219a0de
--- /dev/null
+++ b/reader/src/formats/pdb/PdbStream.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLFile.h>
+
+#include "PdbStream.h"
+
+PdbStream::PdbStream(const ZLFile &file) : myBase(file.inputStream()) {
+ myBuffer = 0;
+}
+
+PdbStream::~PdbStream() {
+}
+
+bool PdbStream::open() {
+ close();
+ if (myBase.isNull() || !myBase->open() || !myHeader.read(myBase)) {
+ return false;
+ }
+ // myBase offset: startOffset + 78 + 8 * records number ( myHeader.Offsets.size() )
+
+ myBase->seek(myHeader.Offsets[0], true);
+ // myBase offset: Offset[0] - zero record
+
+ myBufferLength = 0;
+ myBufferOffset = 0;
+
+ myOffset = 0;
+
+ return true;
+}
+
+std::size_t PdbStream::read(char *buffer, std::size_t maxSize) {
+ std::size_t realSize = 0;
+ while (realSize < maxSize) {
+ if (!fillBuffer()) {
+ break;
+ }
+ std::size_t size = std::min((std::size_t)(maxSize - realSize), (std::size_t)(myBufferLength - myBufferOffset));
+
+ if (size > 0) {
+ if (buffer != 0) {
+ std::memcpy(buffer + realSize, myBuffer + myBufferOffset, size);
+ }
+ realSize += size;
+ myBufferOffset += size;
+ }
+ }
+
+ myOffset += realSize;
+ return realSize;
+}
+
+void PdbStream::close() {
+ if (!myBase.isNull()) {
+ myBase->close();
+ }
+ if (myBuffer != 0) {
+ delete[] myBuffer;
+ myBuffer = 0;
+ }
+}
+
+void PdbStream::seek(int offset, bool absoluteOffset) {
+ if (absoluteOffset) {
+ offset -= this->offset();
+ }
+ if (offset > 0) {
+ read(0, offset);
+ } else if (offset < 0) {
+ offset += this->offset();
+ open();
+ if (offset >= 0) {
+ read(0, offset);
+ }
+ }
+}
+
+std::size_t PdbStream::offset() const {
+ return myOffset;
+}
+
+std::size_t PdbStream::sizeOfOpened() {
+ // TODO: implement
+ return 0;
+}
+
+std::size_t PdbStream::recordOffset(std::size_t index) const {
+ return index < myHeader.Offsets.size() ?
+ myHeader.Offsets[index] : myBase->sizeOfOpened();
+}
diff --git a/reader/src/formats/pdb/PdbStream.h b/reader/src/formats/pdb/PdbStream.h
new file mode 100644
index 0000000..f2c58f1
--- /dev/null
+++ b/reader/src/formats/pdb/PdbStream.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PDBSTREAM_H__
+#define __PDBSTREAM_H__
+
+#include <ZLInputStream.h>
+
+#include "PdbReader.h"
+
+class ZLFile;
+
+class PdbStream : public ZLInputStream {
+
+public:
+ PdbStream(const ZLFile &file);
+ virtual ~PdbStream();
+
+protected:
+ virtual bool open();
+ virtual void close();
+
+private:
+ std::size_t read(char *buffer, std::size_t maxSize);
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+protected:
+ virtual bool fillBuffer() = 0;
+
+protected:
+ std::size_t recordOffset(std::size_t index) const;
+
+public:
+ const PdbHeader &header() const;
+
+protected:
+ shared_ptr<ZLInputStream> myBase;
+ std::size_t myOffset;
+
+private:
+ PdbHeader myHeader;
+
+protected:
+ char *myBuffer;
+ unsigned short myBufferLength;
+ unsigned short myBufferOffset;
+};
+
+inline const PdbHeader &PdbStream::header() const {
+ return myHeader;
+}
+
+#endif /* __PDBSTREAM_H__ */
diff --git a/reader/src/formats/pdb/PluckerBookReader.cpp b/reader/src/formats/pdb/PluckerBookReader.cpp
new file mode 100644
index 0000000..61bc311
--- /dev/null
+++ b/reader/src/formats/pdb/PluckerBookReader.cpp
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <algorithm>
+#include <vector>
+#include <cctype>
+
+#include <ZLZDecompressor.h>
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLImage.h>
+#include <ZLFileImage.h>
+#include <ZLFile.h>
+#include <ZLTextStyleEntry.h>
+
+#include "PdbReader.h"
+#include "PluckerBookReader.h"
+#include "DocDecompressor.h"
+#include "PluckerImages.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+PluckerBookReader::PluckerBookReader(BookModel &model) : BookReader(model), EncodedTextReader(model.book()->encoding()), myFile(model.book()->file()), myFont(FT_REGULAR) {
+ myCharBuffer = new char[65535];
+ myForcedEntry = 0;
+}
+
+PluckerBookReader::~PluckerBookReader() {
+ delete[] myCharBuffer;
+}
+
+void PluckerBookReader::safeAddControl(FBTextKind kind, bool start) {
+ if (myParagraphStarted) {
+ addControl(kind, start);
+ } else {
+ myDelayedControls.push_back(std::make_pair(kind, start));
+ }
+}
+
+void PluckerBookReader::safeAddHyperlinkControl(const std::string &id) {
+ if (myParagraphStarted) {
+ addHyperlinkControl(INTERNAL_HYPERLINK, id);
+ } else {
+ myDelayedHyperlinks.push_back(id);
+ }
+}
+
+void PluckerBookReader::safeBeginParagraph() {
+ if (!myParagraphStarted) {
+ myParagraphStarted = true;
+ myBufferIsEmpty = true;
+ beginParagraph();
+ if (!myParagraphStored) {
+ myParagraphVector->push_back(model().bookTextModel()->paragraphsNumber() - 1);
+ myParagraphStored = true;
+ }
+ for (std::vector<std::pair<FBTextKind,bool> >::const_iterator it = myDelayedControls.begin(); it != myDelayedControls.end(); ++it) {
+ addControl(it->first, it->second);
+ }
+ if (myForcedEntry != 0) {
+ addStyleEntry(*myForcedEntry);
+ } else {
+ addControl(REGULAR, true);
+ }
+ for (std::vector<std::string>::const_iterator it = myDelayedHyperlinks.begin(); it != myDelayedHyperlinks.end(); ++it) {
+ addHyperlinkControl(INTERNAL_HYPERLINK, *it);
+ }
+ myDelayedHyperlinks.clear();
+ }
+}
+
+
+void PluckerBookReader::safeEndParagraph() {
+ if (myParagraphStarted) {
+ if (myBufferIsEmpty) {
+ static const std::string SPACE = " ";
+ addData(SPACE);
+ }
+ endParagraph();
+ myParagraphStarted = false;
+ }
+}
+
+void PluckerBookReader::processHeader(FontType font, bool start) {
+ if (start) {
+ enterTitle();
+ FBTextKind kind;
+ switch (font) {
+ case FT_H1:
+ kind = H1;
+ break;
+ case FT_H2:
+ kind = H2;
+ break;
+ case FT_H3:
+ kind = H3;
+ break;
+ case FT_H4:
+ kind = H4;
+ break;
+ case FT_H5:
+ kind = H5;
+ break;
+ case FT_H6:
+ default:
+ kind = H6;
+ break;
+ }
+ pushKind(kind);
+ } else {
+ popKind();
+ exitTitle();
+ }
+};
+
+void PluckerBookReader::setFont(FontType font, bool start) {
+ switch (font) {
+ case FT_REGULAR:
+ break;
+ case FT_H1:
+ case FT_H2:
+ case FT_H3:
+ case FT_H4:
+ case FT_H5:
+ case FT_H6:
+ processHeader(font, start);
+ break;
+ case FT_BOLD:
+ safeAddControl(BOLD, start);
+ break;
+ case FT_TT:
+ safeAddControl(CODE, start);
+ break;
+ case FT_SMALL:
+ break;
+ case FT_SUB:
+ safeAddControl(SUB, start);
+ break;
+ case FT_SUP:
+ safeAddControl(SUP, start);
+ break;
+ }
+}
+
+void PluckerBookReader::changeFont(FontType font) {
+ if (myFont == font) {
+ return;
+ }
+ setFont(myFont, false);
+ myFont = font;
+ setFont(myFont, true);
+}
+
+/*
+static void listParameters(char *ptr) {
+ int argc = ((unsigned char)*ptr) % 8;
+ std::cerr << (int)(unsigned char)*ptr << "(";
+ for (int i = 0; i < argc - 1; ++i) {
+ ++ptr;
+ std::cerr << (int)*ptr << ", ";
+ }
+ if (argc > 0) {
+ ++ptr;
+ std::cerr << (int)*ptr;
+ }
+ std::cerr << ")\n";
+}
+*/
+
+static unsigned int twoBytes(char *ptr) {
+ return 256 * (unsigned char)*ptr + (unsigned char)*(ptr + 1);
+}
+
+static unsigned int fourBytes(char *ptr) {
+ return 65536 * twoBytes(ptr) + twoBytes(ptr + 2);
+}
+
+static std::string fromNumber(unsigned int num) {
+ std::string str;
+ ZLStringUtil::appendNumber(str, num);
+ return str;
+}
+
+void PluckerBookReader::processTextFunction(char *ptr) {
+ switch ((unsigned char)*ptr) {
+ case 0x08:
+ safeAddControl(INTERNAL_HYPERLINK, false);
+ break;
+ case 0x0A:
+ safeAddHyperlinkControl(fromNumber(twoBytes(ptr + 1)));
+ break;
+ case 0x0C:
+ {
+ int sectionNum = twoBytes(ptr + 1);
+ int paragraphNum = twoBytes(ptr + 3);
+ safeAddHyperlinkControl(fromNumber(sectionNum) + '#' + fromNumber(paragraphNum));
+ myReferencedParagraphs.insert(std::make_pair(sectionNum, paragraphNum));
+ break;
+ }
+ case 0x11:
+ changeFont((FontType)*(ptr + 1));
+ break;
+ case 0x1A:
+ safeBeginParagraph();
+ addImageReference(fromNumber(twoBytes(ptr + 1)));
+ break;
+ case 0x22:
+ if (!myParagraphStarted) {
+ if (myForcedEntry == 0) {
+ myForcedEntry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ }
+ myForcedEntry->setLength(
+ ZLTextStyleEntry::LENGTH_LEFT_INDENT,
+ *(ptr + 1), ZLTextStyleEntry::SIZE_UNIT_PIXEL
+ );
+ myForcedEntry->setLength(
+ ZLTextStyleEntry::LENGTH_RIGHT_INDENT,
+ *(ptr + 2), ZLTextStyleEntry::SIZE_UNIT_PIXEL
+ );
+ }
+ break;
+ case 0x29:
+ if (!myParagraphStarted) {
+ if (myForcedEntry == 0) {
+ myForcedEntry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ }
+ switch (*(ptr + 1)) {
+ case 0: myForcedEntry->setAlignmentType(ALIGN_LEFT); break;
+ case 1: myForcedEntry->setAlignmentType(ALIGN_RIGHT); break;
+ case 2: myForcedEntry->setAlignmentType(ALIGN_CENTER); break;
+ case 3: myForcedEntry->setAlignmentType(ALIGN_JUSTIFY); break;
+ }
+ }
+ break;
+ case 0x33: // just break line instead of horizontal rule (TODO: draw horizontal rule?)
+ safeEndParagraph();
+ break;
+ case 0x38:
+ safeEndParagraph();
+ break;
+ case 0x40:
+ safeAddControl(EMPHASIS, true);
+ break;
+ case 0x48:
+ safeAddControl(EMPHASIS, false);
+ break;
+ case 0x53: // color setting is ignored
+ break;
+ case 0x5C:
+ addImageReference(fromNumber(twoBytes(ptr + 3)));
+ break;
+ case 0x60: // underlined text is ignored
+ break;
+ case 0x68: // underlined text is ignored
+ break;
+ case 0x70: // strike-through text is ignored
+ break;
+ case 0x78: // strike-through text is ignored
+ break;
+ case 0x83:
+ case 0x85:
+ {
+ ZLUnicodeUtil::Ucs4Char symbol =
+ (((unsigned char)*ptr) == 0x83) ? twoBytes(ptr + 2) : fourBytes(ptr + 2);
+ char utf8[6];
+ int len = ZLUnicodeUtil::ucs4ToUtf8(utf8, symbol);
+ safeBeginParagraph();
+ addData(std::string(utf8, len));
+ myBufferIsEmpty = false;
+ myBytesToSkip = *(ptr + 1);
+ break;
+ }
+ case 0x8E: // custom font operations are ignored
+ case 0x8C:
+ case 0x8A:
+ case 0x88:
+ break;
+ case 0x90: // TODO: add table processing
+ case 0x92: // TODO: process table
+ case 0x97: // TODO: process table
+ break;
+ default: // this should be impossible
+ //std::cerr << "Oops... function #" << (int)(unsigned char)*ptr << "\n";
+ break;
+ }
+}
+
+void PluckerBookReader::processTextParagraph(char *start, char *end) {
+ changeFont(FT_REGULAR);
+ while (popKind()) {}
+
+ myParagraphStarted = false;
+ myBytesToSkip = 0;
+
+ char *textStart = start;
+ bool functionFlag = false;
+ for (char *ptr = start; ptr < end; ++ptr) {
+ if (*ptr == 0) {
+ functionFlag = true;
+ if (ptr > textStart) {
+ safeBeginParagraph();
+ myConvertedTextBuffer.erase();
+ myConverter->convert(myConvertedTextBuffer, textStart, ptr);
+ addData(myConvertedTextBuffer);
+ myBufferIsEmpty = false;
+ }
+ } else if (functionFlag) {
+ int paramCounter = ((unsigned char)*ptr) % 8;
+ if (end - ptr > paramCounter) {
+ processTextFunction(ptr);
+ ptr += paramCounter;
+ } else {
+ ptr = end - 1;
+ }
+ functionFlag = false;
+ if (myBytesToSkip > 0) {
+ ptr += myBytesToSkip;
+ myBytesToSkip = 0;
+ }
+ textStart = ptr + 1;
+ } else {
+ if ((unsigned char)*ptr == 0xA0) {
+ *ptr = 0x20;
+ }
+ if (!myParagraphStarted && textStart == ptr && std::isspace((unsigned char)*ptr)) {
+ ++textStart;
+ }
+ }
+ }
+ if (end > textStart) {
+ safeBeginParagraph();
+ myConvertedTextBuffer.erase();
+ myConverter->convert(myConvertedTextBuffer, textStart, end);
+ addData(myConvertedTextBuffer);
+ myBufferIsEmpty = false;
+ }
+ safeEndParagraph();
+ if (myForcedEntry != 0) {
+ delete myForcedEntry;
+ myForcedEntry = 0;
+ }
+ myDelayedControls.clear();
+}
+
+void PluckerBookReader::processTextRecord(std::size_t size, const std::vector<int> &pars) {
+ char *start = myCharBuffer;
+ char *end = myCharBuffer;
+
+ for (std::vector<int>::const_iterator it = pars.begin(); it != pars.end(); ++it) {
+ start = end;
+ end = start + *it;
+ if (end > myCharBuffer + size) {
+ return;
+ }
+ myParagraphStored = false;
+ processTextParagraph(start, end);
+ if (!myParagraphStored) {
+ myParagraphVector->push_back(-1);
+ }
+ }
+}
+
+void PluckerBookReader::readRecord(std::size_t recordSize) {
+ unsigned short uid;
+ PdbUtil::readUnsignedShort(*myStream, uid);
+ if (uid == 1) {
+ PdbUtil::readUnsignedShort(*myStream, myCompressionVersion);
+ } else {
+ unsigned short paragraphs;
+ PdbUtil::readUnsignedShort(*myStream, paragraphs);
+
+ unsigned short size;
+ PdbUtil::readUnsignedShort(*myStream, size);
+
+ unsigned char type;
+ myStream->read((char*)&type, 1);
+
+ unsigned char flags;
+ myStream->read((char*)&flags, 1);
+
+ switch (type) {
+ case 0: // text (TODO: found sample file and test this code)
+ case 1: // compressed text
+ {
+ std::vector<int> pars;
+ for (int i = 0; i < paragraphs; ++i) {
+ unsigned short pSize;
+ PdbUtil::readUnsignedShort(*myStream, pSize);
+ pars.push_back(pSize);
+ myStream->seek(2, false);
+ }
+
+ bool doProcess = false;
+ if (type == 0) {
+ doProcess = myStream->read(myCharBuffer, size) == size;
+ } else if (myCompressionVersion == 1) {
+ doProcess =
+ DocDecompressor().decompress(*myStream, myCharBuffer, recordSize - 8 - 4 * paragraphs, size) == size;
+ } else if (myCompressionVersion == 2) {
+ myStream->seek(2, false);
+ doProcess =
+ ZLZDecompressor(recordSize - 10 - 4 * paragraphs).
+ decompress(*myStream, myCharBuffer, size) == size;
+ }
+ if (doProcess) {
+ addHyperlinkLabel(fromNumber(uid));
+ myParagraphVector = &myParagraphMap[uid];
+ processTextRecord(size, pars);
+ if ((flags & 0x1) == 0) {
+ insertEndOfTextParagraph();
+ }
+ }
+ break;
+ }
+ case 2: // image
+ case 3: // compressed image
+ {
+ ZLImage *image = 0;
+ const ZLFile imageFile(myFile.path(), ZLMimeType::IMAGE_PALM);
+ if (type == 2) {
+ image = new ZLFileImage(imageFile, myStream->offset(), recordSize - 8);
+ } else if (myCompressionVersion == 1) {
+ image = new DocCompressedFileImage(imageFile, myStream->offset(), recordSize - 8);
+ } else if (myCompressionVersion == 2) {
+ image = new ZCompressedFileImage(imageFile, myStream->offset() + 2, recordSize - 10);
+ }
+ if (image != 0) {
+ addImage(fromNumber(uid), image);
+ }
+ break;
+ }
+ case 9: // category record is ignored
+ break;
+ case 10:
+ unsigned short typeCode;
+ PdbUtil::readUnsignedShort(*myStream, typeCode);
+ //std::cerr << "type = " << (int)type << "; ";
+ //std::cerr << "typeCode = " << typeCode << "\n";
+ break;
+ case 11: // style sheet record is ignored
+ break;
+ case 12: // font page record is ignored
+ break;
+ case 13: // TODO: process tables
+ case 14: // TODO: process tables
+ break;
+ case 15: // multiimage
+ {
+ unsigned short columns;
+ unsigned short rows;
+ PdbUtil::readUnsignedShort(*myStream, columns);
+ PdbUtil::readUnsignedShort(*myStream, rows);
+ PluckerMultiImage *image = new PluckerMultiImage(rows, columns, model().imageMap());
+ for (int i = 0; i < size / 2 - 2; ++i) {
+ unsigned short us;
+ PdbUtil::readUnsignedShort(*myStream, us);
+ image->addId(fromNumber(us));
+ }
+ addImage(fromNumber(uid), image);
+ break;
+ }
+ default:
+ //std::cerr << "type = " << (int)type << "\n";
+ break;
+ }
+ }
+}
+
+bool PluckerBookReader::readDocument() {
+ myStream = myFile.inputStream();
+ if (myStream.isNull() || !myStream->open()) {
+ return false;
+ }
+
+ PdbHeader header;
+ if (!header.read(myStream)) {
+ myStream->close();
+ return false;
+ }
+
+ setMainTextModel();
+ myFont = FT_REGULAR;
+
+ for (std::vector<unsigned long>::const_iterator it = header.Offsets.begin(); it != header.Offsets.end(); ++it) {
+ std::size_t currentOffset = myStream->offset();
+ if (currentOffset > *it) {
+ break;
+ }
+ myStream->seek(*it - currentOffset, false);
+ if (myStream->offset() != *it) {
+ break;
+ }
+ std::size_t recordSize = ((it != header.Offsets.end() - 1) ? *(it + 1) : myStream->sizeOfOpened()) - *it;
+ readRecord(recordSize);
+ }
+ myStream->close();
+
+ for (std::set<std::pair<int,int> >::const_iterator it = myReferencedParagraphs.begin(); it != myReferencedParagraphs.end(); ++it) {
+ std::map<int,std::vector<int> >::const_iterator jt = myParagraphMap.find(it->first);
+ if (jt != myParagraphMap.end()) {
+ for (unsigned int k = it->second; k < jt->second.size(); ++k) {
+ if (jt->second[k] != -1) {
+ addHyperlinkLabel(fromNumber(it->first) + '#' + fromNumber(it->second), jt->second[k]);
+ break;
+ }
+ }
+ }
+ }
+ myReferencedParagraphs.clear();
+ myParagraphMap.clear();
+ return true;
+}
diff --git a/reader/src/formats/pdb/PluckerBookReader.h b/reader/src/formats/pdb/PluckerBookReader.h
new file mode 100644
index 0000000..1078f37
--- /dev/null
+++ b/reader/src/formats/pdb/PluckerBookReader.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PLUCKERBOOKREADER_H__
+#define __PLUCKERBOOKREADER_H__
+
+#include <set>
+#include <map>
+
+#include <ZLEncodingConverter.h>
+
+#include "../../bookmodel/BookReader.h"
+#include "../EncodedTextReader.h"
+
+class PluckerBookReader : public BookReader, public EncodedTextReader {
+
+public:
+ PluckerBookReader(BookModel &model);
+ ~PluckerBookReader();
+
+ bool readDocument();
+
+private:
+ enum FontType {
+ FT_REGULAR = 0,
+ FT_H1 = 1,
+ FT_H2 = 2,
+ FT_H3 = 3,
+ FT_H4 = 4,
+ FT_H5 = 5,
+ FT_H6 = 6,
+ FT_BOLD = 7,
+ FT_TT = 8,
+ FT_SMALL = 9,
+ FT_SUB = 10,
+ FT_SUP = 11
+ };
+
+ void readRecord(std::size_t recordSize);
+ void processTextRecord(std::size_t size, const std::vector<int> &pars);
+ void processTextParagraph(char *start, char *end);
+ void processTextFunction(char *ptr);
+ void setFont(FontType font, bool start);
+ void changeFont(FontType font);
+
+ void safeAddControl(FBTextKind kind, bool start);
+ void safeAddHyperlinkControl(const std::string &id);
+ void safeBeginParagraph();
+ void safeEndParagraph();
+
+ void processHeader(FontType font, bool start);
+
+private:
+ const ZLFile myFile;
+ shared_ptr<ZLInputStream> myStream;
+ FontType myFont;
+ char *myCharBuffer;
+ std::string myConvertedTextBuffer;
+ bool myParagraphStarted;
+ bool myBufferIsEmpty;
+ ZLTextStyleEntry *myForcedEntry;
+ std::vector<std::pair<FBTextKind,bool> > myDelayedControls;
+ std::vector<std::string> myDelayedHyperlinks;
+ unsigned short myCompressionVersion;
+ unsigned char myBytesToSkip;
+
+ std::set<std::pair<int, int> > myReferencedParagraphs;
+ std::map<int, std::vector<int> > myParagraphMap;
+ std::vector<int> *myParagraphVector;
+ bool myParagraphStored;
+};
+
+#endif /* __PLUCKERBOOKREADER_H__ */
diff --git a/reader/src/formats/pdb/PluckerImages.cpp b/reader/src/formats/pdb/PluckerImages.cpp
new file mode 100644
index 0000000..db291ab
--- /dev/null
+++ b/reader/src/formats/pdb/PluckerImages.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+#include <ZLZDecompressor.h>
+#include <ZLStringUtil.h>
+
+#include "PluckerImages.h"
+#include "DocDecompressor.h"
+
+const shared_ptr<std::string> ZCompressedFileImage::stringData() const {
+ shared_ptr<ZLInputStream> stream = myFile.inputStream();
+
+ shared_ptr<std::string> imageData = new std::string();
+
+ if (!stream.isNull() && stream->open()) {
+ stream->seek(myOffset, false);
+ ZLZDecompressor decompressor(myCompressedSize);
+
+ static const std::size_t charBufferSize = 2048;
+ char *charBuffer = new char[charBufferSize];
+ std::vector<std::string> buffer;
+
+ std::size_t s;
+ do {
+ s = decompressor.decompress(*stream, charBuffer, charBufferSize);
+ if (s != 0) {
+ buffer.push_back(std::string());
+ buffer.back().append(charBuffer, s);
+ }
+ } while (s == charBufferSize);
+ ZLStringUtil::append(*imageData, buffer);
+
+ delete[] charBuffer;
+ }
+
+ return imageData;
+}
+
+const shared_ptr<std::string> DocCompressedFileImage::stringData() const {
+ shared_ptr<ZLInputStream> stream = myFile.inputStream();
+
+ shared_ptr<std::string> imageData = new std::string();
+
+ if (!stream.isNull() && stream->open()) {
+ stream->seek(myOffset, false);
+ char *buffer = new char[65535];
+ std::size_t uncompressedSize = DocDecompressor().decompress(*stream, buffer, myCompressedSize, 65535);
+ imageData->append(buffer, uncompressedSize);
+ delete[] buffer;
+ }
+
+ return imageData;
+}
+
+shared_ptr<const ZLImage> PluckerMultiImage::subImage(unsigned int row, unsigned int column) const {
+ unsigned int index = row * myColumns + column;
+ if (index >= myIds.size()) {
+ return 0;
+ }
+ ZLImageMap::const_iterator entry = myImageMap.find(myIds[index]);
+ return (entry != myImageMap.end()) ? entry->second : 0;
+}
diff --git a/reader/src/formats/pdb/PluckerImages.h b/reader/src/formats/pdb/PluckerImages.h
new file mode 100644
index 0000000..3269a29
--- /dev/null
+++ b/reader/src/formats/pdb/PluckerImages.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PLUCKERIMAGES_H__
+#define __PLUCKERIMAGES_H__
+
+#include <string>
+
+#include <ZLImage.h>
+#include <ZLFile.h>
+#include "../../bookmodel/BookModel.h"
+
+class ZCompressedFileImage : public ZLSingleImage {
+
+public:
+ ZCompressedFileImage(const ZLFile &file, std::size_t offset, std::size_t size);
+ const shared_ptr<std::string> stringData() const;
+
+private:
+ const ZLFile myFile;
+ const std::size_t myOffset;
+ const std::size_t myCompressedSize;
+};
+
+class DocCompressedFileImage : public ZLSingleImage {
+
+public:
+ DocCompressedFileImage(const ZLFile &file, std::size_t offset, std::size_t compressedSize);
+ const shared_ptr<std::string> stringData() const;
+
+private:
+ const ZLFile myFile;
+ const std::size_t myOffset;
+ const std::size_t myCompressedSize;
+};
+
+class PluckerMultiImage : public ZLMultiImage {
+
+public:
+ PluckerMultiImage(unsigned int rows, unsigned int columns, const ZLImageMap &imageMap);
+
+ void addId(const std::string &id);
+
+ unsigned int rows() const;
+ unsigned int columns() const;
+ shared_ptr<const ZLImage> subImage(unsigned int row, unsigned int column) const;
+
+private:
+ unsigned int myRows, myColumns;
+ const ZLImageMap &myImageMap;
+ std::vector<std::string> myIds;
+};
+
+inline ZCompressedFileImage::ZCompressedFileImage(const ZLFile &file, std::size_t offset, std::size_t compressedSize) : ZLSingleImage(file.mimeType()), myFile(file), myOffset(offset), myCompressedSize(compressedSize) {}
+
+inline DocCompressedFileImage::DocCompressedFileImage(const ZLFile &file, std::size_t offset, std::size_t compressedSize) : ZLSingleImage(file.mimeType()), myFile(file), myOffset(offset), myCompressedSize(compressedSize) {}
+
+inline PluckerMultiImage::PluckerMultiImage(unsigned int rows, unsigned int columns, const ZLImageMap &imageMap) : myRows(rows), myColumns(columns), myImageMap(imageMap) {}
+inline void PluckerMultiImage::addId(const std::string &id) { myIds.push_back(id); }
+inline unsigned int PluckerMultiImage::rows() const { return myRows; }
+inline unsigned int PluckerMultiImage::columns() const { return myColumns; }
+
+#endif /* __PLUCKERIMAGES_H__ */
diff --git a/reader/src/formats/pdb/PluckerPlugin.cpp b/reader/src/formats/pdb/PluckerPlugin.cpp
new file mode 100644
index 0000000..1ec89ba
--- /dev/null
+++ b/reader/src/formats/pdb/PluckerPlugin.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+
+#include "PdbPlugin.h"
+#include "PluckerBookReader.h"
+#include "PluckerTextStream.h"
+
+#include "../../library/Book.h"
+
+bool PluckerPlugin::providesMetaInfo() const {
+ return false;
+}
+
+bool PluckerPlugin::acceptsFile(const ZLFile &file) const {
+ return PdbPlugin::fileType(file) == "DataPlkr";
+}
+
+bool PluckerPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = new PluckerTextStream(book.file());
+ detectEncodingAndLanguage(book, *stream);
+ if (book.encoding().empty()) {
+ return false;
+ }
+
+ return true;
+}
+
+bool PluckerPlugin::readModel(BookModel &model) const {
+ return PluckerBookReader(model).readDocument();
+}
diff --git a/reader/src/formats/pdb/PluckerTextStream.cpp b/reader/src/formats/pdb/PluckerTextStream.cpp
new file mode 100644
index 0000000..01291eb
--- /dev/null
+++ b/reader/src/formats/pdb/PluckerTextStream.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLFile.h>
+#include <ZLZDecompressor.h>
+
+#include "PluckerTextStream.h"
+#include "PdbReader.h"
+#include "DocDecompressor.h"
+
+PluckerTextStream::PluckerTextStream(const ZLFile &file) : PdbStream(file) {
+ myFullBuffer = 0;
+}
+
+PluckerTextStream::~PluckerTextStream() {
+ close();
+}
+
+bool PluckerTextStream::open() {
+ if (!PdbStream::open()) {
+ return false;
+ }
+
+ PdbUtil::readUnsignedShort(*myBase, myCompressionVersion);
+
+ myBuffer = new char[65536];
+ myFullBuffer = new char[65536];
+
+ myRecordIndex = 0;
+
+ return true;
+}
+
+bool PluckerTextStream::fillBuffer() {
+ while (myBufferOffset == myBufferLength) {
+ if (myRecordIndex + 1 > header().Offsets.size() - 1) {
+ return false;
+ }
+ ++myRecordIndex;
+ const std::size_t currentOffset = recordOffset(myRecordIndex);
+ if (currentOffset < myBase->offset()) {
+ return false;
+ }
+ myBase->seek(currentOffset, true);
+ const std::size_t nextOffset = recordOffset(myRecordIndex + 1);
+ if (nextOffset < currentOffset) {
+ return false;
+ }
+ processRecord(nextOffset - currentOffset);
+ }
+ return true;
+}
+
+void PluckerTextStream::close() {
+ if (myFullBuffer != 0) {
+ delete[] myFullBuffer;
+ myFullBuffer = 0;
+ }
+ PdbStream::close();
+}
+
+void PluckerTextStream::processRecord(std::size_t recordSize) {
+ myBase->seek(2, false);
+
+ unsigned short paragraphs;
+ PdbUtil::readUnsignedShort(*myBase, paragraphs);
+
+ unsigned short size;
+ PdbUtil::readUnsignedShort(*myBase, size);
+
+ unsigned char type;
+ myBase->read((char*)&type, 1);
+ if (type > 1) { // this record is not text record
+ return;
+ }
+
+ myBase->seek(1, false);
+
+ std::vector<int> pars;
+ for (int i = 0; i < paragraphs; ++i) {
+ unsigned short pSize;
+ PdbUtil::readUnsignedShort(*myBase, pSize);
+ pars.push_back(pSize);
+ myBase->seek(2, false);
+ }
+
+ bool doProcess = false;
+ if (type == 0) {
+ doProcess = myBase->read(myFullBuffer, size) == size;
+ } else if (myCompressionVersion == 1) {
+ doProcess =
+ DocDecompressor().decompress(*myBase, myFullBuffer, recordSize - 8 - 4 * paragraphs, size) == size;
+ } else if (myCompressionVersion == 2) {
+ myBase->seek(2, false);
+ doProcess =
+ ZLZDecompressor(recordSize - 10 - 4 * paragraphs).decompress(*myBase, myFullBuffer, size) == size;
+ }
+ if (doProcess) {
+ myBufferLength = 0;
+ myBufferOffset = 0;
+
+ char *start = myFullBuffer;
+ char *end = myFullBuffer;
+
+ for (std::vector<int>::const_iterator it = pars.begin(); it != pars.end(); ++it) {
+ start = end;
+ end = start + *it;
+ if (end > myFullBuffer + size) {
+ break;
+ }
+ processTextParagraph(start, end);
+ }
+ }
+}
+
+void PluckerTextStream::processTextParagraph(char *start, char *end) {
+ char *textStart = start;
+ bool functionFlag = false;
+ for (char *ptr = start; ptr < end; ++ptr) {
+ if (*ptr == 0) {
+ functionFlag = true;
+ if (ptr != textStart) {
+ std::memcpy(myBuffer + myBufferLength, textStart, ptr - textStart);
+ myBufferLength += ptr - textStart;
+ }
+ } else if (functionFlag) {
+ int paramCounter = ((unsigned char)*ptr) % 8;
+ if (end - ptr > paramCounter + 1) {
+ ptr += paramCounter;
+ } else {
+ ptr = end - 1;
+ }
+ functionFlag = false;
+ textStart = ptr + 1;
+ }
+ }
+ if (end != textStart) {
+ std::memcpy(myBuffer + myBufferLength, textStart, end - textStart);
+ myBufferLength += end - textStart;
+ }
+}
diff --git a/reader/src/formats/pdb/PluckerTextStream.h b/reader/src/formats/pdb/PluckerTextStream.h
new file mode 100644
index 0000000..70c1182
--- /dev/null
+++ b/reader/src/formats/pdb/PluckerTextStream.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PLUCKERTEXTSTREAM_H__
+#define __PLUCKERTEXTSTREAM_H__
+
+#include "PdbStream.h"
+
+class ZLFile;
+
+class PluckerTextStream : public PdbStream {
+
+public:
+ PluckerTextStream(const ZLFile &file);
+ ~PluckerTextStream();
+ bool open();
+ void close();
+
+private:
+ bool fillBuffer();
+
+private:
+ void processRecord(std::size_t recordSize);
+ void processTextParagraph(char *start, char *end);
+
+private:
+ unsigned short myCompressionVersion;
+ char *myFullBuffer;
+ std::size_t myRecordIndex;
+};
+
+#endif /* __PLUCKERTEXTSTREAM_H__ */
diff --git a/reader/src/formats/pdb/PmlBookReader.cpp b/reader/src/formats/pdb/PmlBookReader.cpp
new file mode 100644
index 0000000..e365983
--- /dev/null
+++ b/reader/src/formats/pdb/PmlBookReader.cpp
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLTextParagraph.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLStringUtil.h>
+#include <ZLTextStyleEntry.h>
+
+#include "PmlBookReader.h"
+#include "../../bookmodel/BookModel.h"
+
+PmlBookReader::PmlBookReader(BookReader &bookReader, const PlainTextFormat&, const std::string &encoding) : PmlReader(encoding), myBookReader(bookReader) {
+}
+
+PmlBookReader::~PmlBookReader() {
+}
+
+bool PmlBookReader::readDocument(ZLInputStream& stream) {
+ myBookReader.pushKind(REGULAR);
+ myBookReader.beginParagraph();
+ myParagraphIsEmpty = true;
+ bool code = PmlReader::readDocument(stream);
+ myBookReader.endParagraph();
+ return code;
+}
+
+void PmlBookReader::addCharData(const char *data, std::size_t len, bool convert) {
+ if (!myBookReader.paragraphIsOpen()) {
+ myBookReader.beginParagraph();
+ }
+ static std::string newString;
+ if (len != 0) {
+ if (!myConverter.isNull() && convert) {
+ myConverter->convert(newString, data, data + len);
+ } else {
+ newString.append(data, len);
+ }
+ if (myState.SmallCaps) {
+ myBookReader.addData(ZLUnicodeUtil::toUpper(newString));
+ } else {
+ myBookReader.addData(newString);
+ }
+ newString.erase();
+ if (myParagraphIsEmpty) {
+ myParagraphIsEmpty = false;
+ }
+ }
+}
+
+void PmlBookReader::switchFontProperty(FontProperty property) {
+ if (!myBookReader.paragraphIsOpen()) {
+ myBookReader.beginParagraph();
+ }
+ switch (property) {
+ case FONT_BOLD:
+ if (myState.Bold) {
+ myBookReader.pushKind(STRONG);
+ } else {
+ myBookReader.popKind();
+ }
+ myBookReader.addControl(STRONG, myState.Bold);
+ break;
+ case FONT_ITALIC:
+ if (myState.Italic) {
+ if (!myState.Bold) {
+ myBookReader.pushKind(EMPHASIS);
+ myBookReader.addControl(EMPHASIS, true);
+ } else {
+ myBookReader.popKind();
+ myBookReader.addControl(STRONG, false);
+
+ myBookReader.pushKind(EMPHASIS);
+ myBookReader.addControl(EMPHASIS, true);
+ myBookReader.pushKind(STRONG);
+ myBookReader.addControl(STRONG, true);
+ }
+ } else {
+ if (!myState.Bold) {
+ myBookReader.addControl(EMPHASIS, false);
+ myBookReader.popKind();
+ } else {
+ myBookReader.addControl(STRONG, false);
+ myBookReader.popKind();
+ myBookReader.addControl(EMPHASIS, false);
+ myBookReader.popKind();
+
+ myBookReader.pushKind(STRONG);
+ myBookReader.addControl(STRONG, true);
+ }
+ }
+ break;
+ case FONT_UNDERLINED:
+ break;
+ case FONT_SUBSCRIPT: //don't have to be mixed with other style tags
+ if (myState.Subscript) {
+ myBookReader.pushKind(SUB);
+ } else {
+ myBookReader.popKind();
+ }
+ myBookReader.addControl(SUB, myState.Subscript);
+ break;
+ case FONT_SUPERSCRIPT: //Should not be mixed with other style tags
+ if (myState.Superscript) {
+ myBookReader.pushKind(SUP);
+ } else {
+ myBookReader.popKind();
+ }
+ myBookReader.addControl(SUP, myState.Superscript);
+ break;
+ }
+}
+
+void PmlBookReader::newLine() {
+ if (myBookReader.paragraphIsOpen()) {
+ myBookReader.endParagraph();
+ }
+ if (myParagraphIsEmpty) {
+ myBookReader.beginParagraph(ZLTextParagraph::EMPTY_LINE_PARAGRAPH);
+ myBookReader.endParagraph();
+ } else {
+ myParagraphIsEmpty = true;
+ }
+ newParagraph();
+}
+
+void PmlBookReader::newPage() {
+ if (myBookReader.paragraphIsOpen()) {
+ myBookReader.endParagraph();
+ }
+ //newLine();
+ newParagraph();
+}
+
+void PmlBookReader::newParagraph() {
+ if (myBookReader.paragraphIsOpen()) {
+ myBookReader.endParagraph();
+ }
+ myBookReader.beginParagraph();
+ if (myState.Alignment != ALIGN_UNDEFINED) {
+ setAlignment();
+ }
+ if (myState.FontSize != NORMAL) {
+ setFontSize();
+ }
+ if (myState.IndentBlockOn && (myState.Indent != 0)) {
+ setIndent();
+ }
+}
+
+void PmlBookReader::setAlignment() {
+ ZLTextStyleEntry entry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ entry.setAlignmentType(myState.Alignment);
+ myBookReader.addStyleEntry(entry);
+}
+
+void PmlBookReader::setIndent() {
+ ZLTextStyleEntry entry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ entry.setLength(ZLTextStyleEntry::LENGTH_FIRST_LINE_INDENT_DELTA, 0, ZLTextStyleEntry::SIZE_UNIT_PERCENT);
+ entry.setLength(ZLTextStyleEntry::LENGTH_LEFT_INDENT, (short)myState.Indent, ZLTextStyleEntry::SIZE_UNIT_PERCENT);
+ myBookReader.addStyleEntry(entry);
+}
+
+void PmlBookReader::setFontSize() {
+ if (!myBookReader.paragraphIsOpen()) {
+ myBookReader.beginParagraph();
+ }
+ ZLTextStyleEntry entry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ switch(myState.FontSize) {
+ case SMALLER:
+ entry.setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_SMALLER, true);
+ break;
+ case LARGER:
+ entry.setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_LARGER, true);
+ break;
+ default:
+ break;
+ }
+ myBookReader.addStyleEntry(entry);
+}
+
+void PmlBookReader::addLink(FBTextKind kind, const std::string &id, bool on) {
+ switch (kind) {
+ case INTERNAL_HYPERLINK:
+ case FOOTNOTE:
+ //case EXTERNAL_HYPERLINK:
+ //case BOOK_HYPERLINK:
+ if (on) {
+ myBookReader.addHyperlinkControl(kind, id);
+ } else {
+ myBookReader.addControl(kind, false);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void PmlBookReader::addLinkLabel(const std::string &label) {
+ myBookReader.addHyperlinkLabel(label);
+}
+
+void PmlBookReader::addImageReference(const std::string &id) {
+ const bool stopParagraph = myBookReader.paragraphIsOpen();
+ if (stopParagraph) {
+ myBookReader.endParagraph();
+ }
+ myBookReader.addImageReference(id);
+ if (stopParagraph) {
+ myBookReader.beginParagraph();
+ }
+}
diff --git a/reader/src/formats/pdb/PmlBookReader.h b/reader/src/formats/pdb/PmlBookReader.h
new file mode 100644
index 0000000..22944b4
--- /dev/null
+++ b/reader/src/formats/pdb/PmlBookReader.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PMLBOOKREADER_H__
+#define __PMLBOOKREADER_H__
+
+#include <string>
+
+#include "PmlReader.h"
+#include "../../bookmodel/BookReader.h"
+#include "../txt/PlainTextFormat.h"
+
+class PmlBookReader : public PmlReader {
+
+public:
+ PmlBookReader(BookReader &bookReader, const PlainTextFormat &format, const std::string &encoding);
+ ~PmlBookReader();
+
+ bool readDocument(ZLInputStream &stream);
+
+protected:
+ void addCharData(const char *data, std::size_t len, bool convert);
+ void addLink(FBTextKind kind, const std::string &id, bool on);
+ void addLinkLabel(const std::string &label);
+ void addImageReference(const std::string &id);
+ void switchFontProperty(FontProperty property);
+ void setFontSize();
+ void newLine();
+ void newPage();
+ void newParagraph();
+
+private:
+ void setAlignment();
+ void setIndent();
+
+private:
+ BookReader& myBookReader;
+ bool myParagraphIsEmpty;
+
+ /*FontType myFont;
+ char *myCharBuffer;
+ std::string myConvertedTextBuffer;
+ bool myParagraphStarted;
+ bool myBufferIsEmpty;
+ ZLTextStyleEntry *myForcedEntry;
+ std::vector<std::pair<FBTextKind,bool> > myDelayedControls;
+ std::vector<std::string> myDelayedHyperlinks;
+ unsigned short myCompressionVersion;
+ unsigned char myBytesToSkip;
+
+ std::set<std::pair<int, int> > myReferencedParagraphs;
+ std::map<int, std::vector<int> > myParagraphMap;
+ std::vector<int> *myParagraphVector;
+ bool myParagraphStored;*/
+};
+
+#endif /* __PMLBOOKREADER_H__ */
diff --git a/reader/src/formats/pdb/PmlReader.cpp b/reader/src/formats/pdb/PmlReader.cpp
new file mode 100644
index 0000000..712a6e0
--- /dev/null
+++ b/reader/src/formats/pdb/PmlReader.cpp
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * Information about Palm Markup Language was taken from:
+ * http://www.m.ereader.com/ereader/help/dropbook/pml.htm
+ * http://ccit205.wikispaces.com/Palm+Markup+Language+(PML)
+ */
+
+#include <cstdlib>
+#include <cctype>
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "PmlReader.h"
+
+static const int pmlStreamBufferSize = 4096;
+
+const std::string PmlReader::ourDefaultParameter = "";
+
+PmlReader::PmlReader(const std::string &encoding) : EncodedTextReader(encoding) {
+}
+
+PmlReader::~PmlReader() {
+}
+
+bool PmlReader::readDocument(ZLInputStream& stream) {
+ myStreamBuffer = new char[pmlStreamBufferSize];
+
+ myIsInterrupted = false;
+
+ myState.Italic = false;
+ myState.Bold = false;
+ myState.Underlined = false;
+ myState.SmallCaps = false;
+ myState.Subscript = false;
+ myState.Superscript = false;
+ myState.Alignment = ALIGN_UNDEFINED;
+ myState.FontSize = NORMAL;
+ myState.Indent = 0;
+ myState.IndentBlockOn = false;
+ myState.BoldBlockOn = false;
+ myState.FootnoteLinkOn = false;
+ myState.InternalLinkOn = false;
+ myState.InvisibleText = false;
+
+ bool code = parseDocument(stream);
+
+ delete[] myStreamBuffer;
+
+ return code;
+}
+
+bool PmlReader::parseDocument(ZLInputStream &stream) {
+ enum {
+ READ_NORMAL_DATA,
+ READ_TAG,
+ READ_TAG_PARAMETER,
+ } parserState = READ_NORMAL_DATA;
+
+ std::size_t tagNameLength = 0;
+ std::string tagName;
+ std::string parameterString;
+
+ bool startParameterReading = false;
+ std::size_t tagCounter = 0;
+ static bool FLAG = true;
+
+ while (!myIsInterrupted) {
+ const char *ptr = myStreamBuffer;
+ const char *end = myStreamBuffer + stream.read(myStreamBuffer, pmlStreamBufferSize);
+ if (ptr == end) {
+ break;
+ }
+ const char *dataStart = ptr;
+ bool readNextChar = true;
+ while (ptr != end) {
+ switch (parserState) {
+ case READ_NORMAL_DATA:
+ if (*ptr == '\n') {
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ newLine();
+ FLAG = true;
+ dataStart = ptr + 1;
+ } else if (FLAG && std::isspace(*ptr)) {
+ } else {
+ FLAG = false;
+ if (*ptr == '\\') {
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+ tagName.erase();
+ parserState = READ_TAG;
+ }
+ }
+ break;
+ case READ_TAG:
+ if ((ptr == dataStart) && (tagName.empty())) {
+ if (*ptr == '\\') {
+ processCharData(ptr, 1);
+ dataStart = ptr + 1;
+ parserState = READ_NORMAL_DATA;
+ } else {
+ tagNameLength = findTagLength(ptr);
+ if (tagNameLength == 0) {
+ dataStart = ptr + 1;
+ parserState = READ_NORMAL_DATA;
+ ++tagCounter;
+ } else {
+ --tagNameLength;
+ }
+ }
+ } else {
+ if (tagNameLength == 0) {
+ tagName.append(dataStart, ptr - dataStart);
+ if (*ptr == '=') {
+ dataStart = ptr + 1;
+ parameterString.erase();
+ parserState = READ_TAG_PARAMETER;
+ ++tagCounter;
+ } else {
+ readNextChar = false;
+ processTag(tagName);
+ dataStart = ptr;
+ parserState = READ_NORMAL_DATA;
+ ++tagCounter;
+ }
+ } else {
+ --tagNameLength;
+ }
+ }
+ break;
+ case READ_TAG_PARAMETER:
+ if (*ptr == '"') {
+ if (!startParameterReading) {
+ startParameterReading = true;
+ dataStart = ptr + 1;
+ } else {
+ parameterString.append(dataStart, ptr - dataStart);
+ processTag(tagName, parameterString);
+ parserState = READ_NORMAL_DATA;
+ dataStart = ptr + 1;
+ startParameterReading = false;
+ }
+ }
+ break;
+ }
+ if (readNextChar) {
+ ++ptr;
+ } else {
+ readNextChar = true;
+ }
+ }
+ if (dataStart < end) {
+ switch (parserState) {
+ case READ_NORMAL_DATA:
+ processCharData(dataStart, end - dataStart);
+ case READ_TAG:
+ tagName.append(dataStart, end - dataStart);
+ break;
+ case READ_TAG_PARAMETER:
+ parameterString.append(dataStart, end - dataStart);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return myIsInterrupted;
+}
+
+std::size_t PmlReader::findTagLength(const char* ptr) {
+ switch(*ptr) { // tag action description | close | support |
+ case 'p': // new page | - | + |
+ case 'x': // new chapter and new page | + | + |
+ case 'c': // center alignment block | + | + |
+ case 'r': // right alignment block | + | + |
+ case 'i': // italize block | + | + |
+ case 'u': // underlined block | + | + |
+ case 'o': // overstrike block | + | - |
+ case 'v': // invisible text block | + | + |
+ case 't': // indent block | + | + |
+ case 'T': // indent with value | - | + |
+ case 'w': // embed text width rule | - | - |
+ case 'n': // switch to normal font | - | + |
+ case 's': // switch to std font |+ or \n| + |
+ case 'b': // switch to bold font (deprecated) |+ or \n| - |
+ case 'l': // switch to large font |+ or \n| + |
+ case 'B': // mark text as bold | + | + |
+ case 'k': // smaller font size and uppercase | + | + |
+ case 'm': // insert named image | - | + |
+ case 'q': // reference to another spot | + | + |
+ case 'Q': // link anchor for \q reference | - | + |
+ case '-': // soft hyphen | - | - |
+ case 'I': // reference index item | - | - |
+ return 1;
+ case 'X': // XN - new chapter, n indent level | + | - |
+ case 'S': // Sp - mark text as superscript | + | + |
+ // Sb - mark text as subscript | + | + |
+ // Sd - link to a sidebar | + | - |
+ case 'C': // CN - chapter title + indent level| - | - |
+ case 'F': // Fn - link to a footnote | + | + |
+ return 2;
+ default:
+ return 0;
+ }
+}
+
+
+void PmlReader::interrupt() {
+ myIsInterrupted = true;
+}
+
+
+void PmlReader::processTag(std::string &tagName, const std::string &parameter) {
+ const char tagDeterminant = *tagName.data();
+ switch (tagDeterminant) {
+ case 'p':
+ newPage();
+ break;
+ case 'x':
+ //TODO add close tag processing
+ newPage();
+ break;
+ case 'B':
+ if (!myState.BoldBlockOn) {
+ processFontProperty(FONT_BOLD);
+ }
+ break;
+ case 'i':
+ processFontProperty(FONT_ITALIC);
+ break;
+ case 'u':
+ processFontProperty(FONT_UNDERLINED);
+ break;
+ case 'v':
+ myState.InvisibleText = !myState.InvisibleText;;
+ break;
+ case 'c':
+ processAlignment(ALIGN_CENTER);
+ break;
+ case 'r':
+ processAlignment(ALIGN_RIGHT);
+ break;
+ case 'n':
+ processFontSize(NORMAL);
+ break;
+ case 'b':
+ myState.BoldBlockOn = !myState.BoldBlockOn;
+ processFontProperty(FONT_BOLD);
+ break;
+ case 's':
+ processFontSize(SMALLER);
+ break;
+ case 'l':
+ processFontSize(LARGER);
+ break;
+ case 'k':
+ myState.SmallCaps = !myState.SmallCaps;
+ processFontSize(SMALLER);
+ break;
+ case 'S':
+ if (tagName == "Sb") {
+ processFontProperty(FONT_SUBSCRIPT);
+ } else if (tagName == "Sp") {
+ processFontProperty(FONT_SUPERSCRIPT);
+ } else if (tagName == "Sd") {
+ //processSidebarLink();
+ }
+ break;
+ case 't':
+ processIndent();
+ break;
+ case 'T':
+ processIndent(parameter);
+ myState.IndentBlockOn = false;
+ break;
+ case 'w':
+ //addHorizontalRule(parameter);
+ break;
+ case 'F':
+ processLink(FOOTNOTE, parameter);
+ break;
+ case 'q':
+ processLink(INTERNAL_HYPERLINK, parameter);
+ break;
+ case 'Q':
+ addLinkLabel(parameter);
+ break;
+ case 'm':
+ addImageReference(parameter);
+ break;
+ default:
+ //std::cerr << "PmlReader: unsupported tag: name: " << tagName << " parameter: " << parameter << "\n";
+ break;
+ }
+}
+
+void PmlReader::processCharData(const char* data, std::size_t len, bool convert) {
+ if(!myState.InvisibleText) {
+ addCharData(data, len, convert);
+ }
+}
+
+void PmlReader::processFontProperty(PmlReader::FontProperty property) {
+ switch (property) {
+ case FONT_BOLD:
+ myState.Bold = !myState.Bold;
+ switchFontProperty(FONT_BOLD);
+ break;
+ case FONT_ITALIC:
+ myState.Italic = !myState.Italic;
+ switchFontProperty(FONT_ITALIC);
+ break;
+ case FONT_UNDERLINED:
+ myState.Underlined = !myState.Underlined;
+ switchFontProperty(FONT_UNDERLINED);
+ break;
+ case FONT_SUBSCRIPT:
+ myState.Subscript = !myState.Subscript;
+ switchFontProperty(FONT_SUBSCRIPT);
+ break;
+ case FONT_SUPERSCRIPT:
+ myState.Superscript = !myState.Superscript;
+ switchFontProperty(FONT_SUPERSCRIPT);
+ break;
+ }
+}
+
+void PmlReader::processAlignment(ZLTextAlignmentType alignment) {
+ if (myState.Alignment != alignment) {
+ myState.Alignment = alignment;
+ } else {
+ myState.Alignment = ALIGN_UNDEFINED;
+ }
+ newParagraph();
+}
+
+void PmlReader::processFontSize(FontSizeType sizeType) {
+ if (myState.FontSize != sizeType) {
+ myState.FontSize = sizeType;
+ } else {
+ myState.FontSize = NORMAL;
+ }
+ setFontSize();
+}
+
+void PmlReader::processIndent(const std::string& parameter) {
+ int indentPercentSize = 5;
+ if (!parameter.empty()) {
+ const int index = parameter.find('%');
+ if (index != -1) {
+ const std::string indentValueStr = parameter.substr(0, index);
+ indentPercentSize = std::atoi(indentValueStr.data());
+ } else {
+ indentPercentSize = 5;
+ }
+ }
+ if (!myState.IndentBlockOn) {
+ myState.Indent = indentPercentSize;
+ } else {
+ myState.Indent = 0;
+ }
+ myState.IndentBlockOn = !myState.IndentBlockOn;
+ newParagraph();
+}
+
+void PmlReader::processLink(FBTextKind kind, const std::string &parameter) {
+ switch(kind) {
+ case FOOTNOTE:
+ myState.FootnoteLinkOn = !myState.FootnoteLinkOn;
+ addLink(FOOTNOTE, parameter, myState.FootnoteLinkOn);
+ break;
+ case INTERNAL_HYPERLINK:
+ myState.InternalLinkOn = !myState.InternalLinkOn;
+ if (parameter.size() > 1) {
+ // '#' character has to stand before link label , so we should omit '#' for getting label
+ addLink(INTERNAL_HYPERLINK, parameter.substr(1), myState.InternalLinkOn);
+ } else {
+ // In case trailing or corrupted tag we use parameter entirely
+ addLink(INTERNAL_HYPERLINK, parameter, myState.InternalLinkOn);
+ }
+ break;
+ default:
+ break;
+ }
+}
diff --git a/reader/src/formats/pdb/PmlReader.h b/reader/src/formats/pdb/PmlReader.h
new file mode 100644
index 0000000..496c8d9
--- /dev/null
+++ b/reader/src/formats/pdb/PmlReader.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * Information about Palm Markup Language was taken from next sources:
+ * http://www.m.ereader.com/ereader/help/dropbook/pml.htm
+ * http://ccit205.wikispaces.com/Palm+Markup+Language+(PML)
+ */
+
+#ifndef __PMLREADER_H__
+#define __PMLREADER_H__
+
+#include <string>
+
+#include <ZLEncodingConverter.h>
+#include <ZLTextAlignmentType.h>
+
+#include "../EncodedTextReader.h"
+#include "../../bookmodel/FBTextKind.h"
+
+class ZLInputStream;
+
+class PmlReader : public EncodedTextReader {
+
+public:
+ virtual bool readDocument(ZLInputStream &stream);
+
+protected:
+ PmlReader(const std::string &encoding);
+ virtual ~PmlReader();
+
+protected:
+ enum FontProperty {
+ FONT_BOLD,
+ FONT_ITALIC,
+ FONT_UNDERLINED,
+ FONT_SUBSCRIPT,
+ FONT_SUPERSCRIPT
+ };
+
+ enum FontSizeType {
+ NORMAL,
+ SMALLER,
+ LARGER
+ };
+
+
+ virtual void addCharData(const char *data, std::size_t len, bool convert) = 0;
+ virtual void addLink(FBTextKind kind, const std::string &id, bool on) = 0;
+ virtual void addLinkLabel(const std::string &label) = 0;
+ virtual void addImageReference(const std::string &id) = 0;
+ virtual void setFontSize() = 0;
+ virtual void switchFontProperty(FontProperty property) = 0;
+ virtual void newLine() = 0;
+ virtual void newPage() = 0;
+ virtual void newParagraph() = 0;
+
+ void interrupt();
+
+private:
+ bool parseDocument(ZLInputStream &stream);
+ void processTag(std::string &tagName, const std::string &parameter = ourDefaultParameter);
+ void processCharData(const char* data, std::size_t len, bool convert = true);
+ void processFontProperty(FontProperty property);
+ void processAlignment(ZLTextAlignmentType alignment);
+ void processFontSize(FontSizeType sizeType);
+ void processIndent(const std::string &parameter =ourDefaultParameter);
+ void processLink(FBTextKind kind, const std::string &parameter);
+
+ static std::size_t findTagLength(const char* ptr);
+
+protected:
+ struct PmlReaderState {
+ bool Bold;
+ bool Italic;
+ bool Underlined;
+ bool SmallCaps;
+ bool Subscript;
+ bool Superscript;
+
+ ZLTextAlignmentType Alignment;
+ FontSizeType FontSize;
+
+ unsigned short Indent;
+ bool IndentBlockOn;
+ bool BoldBlockOn;
+ bool FootnoteLinkOn;
+ bool InternalLinkOn;
+ bool InvisibleText;
+ };
+
+ PmlReaderState myState;
+
+private:
+ char* myStreamBuffer;
+
+ bool myIsInterrupted;
+ const static std::string ourDefaultParameter;
+};
+
+#endif /* __PMLREADER_H__ */
diff --git a/reader/src/formats/pdb/SimplePdbPlugin.cpp b/reader/src/formats/pdb/SimplePdbPlugin.cpp
new file mode 100644
index 0000000..f4b5c30
--- /dev/null
+++ b/reader/src/formats/pdb/SimplePdbPlugin.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "PdbPlugin.h"
+#include "../txt/TxtBookReader.h"
+#include "../html/HtmlBookReader.h"
+#include "HtmlMetainfoReader.h"
+#include "../util/TextFormatDetector.h"
+
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+bool SimplePdbPlugin::readMetaInfo(Book &book) const {
+ const ZLFile &file = book.file();
+ shared_ptr<ZLInputStream> stream = createStream(file);
+ detectEncodingAndLanguage(book, *stream);
+ if (book.encoding().empty()) {
+ return false;
+ }
+ int readType = HtmlMetainfoReader::NONE;
+ if (book.title().empty()) {
+ readType |= HtmlMetainfoReader::TITLE;
+ }
+ if (book.authors().empty()) {
+ readType |= HtmlMetainfoReader::AUTHOR;
+ }
+ if ((readType != HtmlMetainfoReader::NONE) && TextFormatDetector().isHtml(*stream)) {
+ readType |= HtmlMetainfoReader::TAGS;
+ HtmlMetainfoReader metainfoReader(book, (HtmlMetainfoReader::ReadType)readType);
+ metainfoReader.readDocument(*stream);
+ }
+
+ return true;
+}
+
+bool SimplePdbPlugin::readModel(BookModel &model) const {
+ const Book &book = *model.book();
+ const ZLFile &file = book.file();
+ shared_ptr<ZLInputStream> stream = createStream(file);
+
+ PlainTextFormat format(file);
+ if (!format.initialized()) {
+ PlainTextFormatDetector detector;
+ detector.detect(*stream, format);
+ }
+ readDocumentInternal(file, model, format, book.encoding(), *stream);
+ return true;
+}
+
+void SimplePdbPlugin::readDocumentInternal(const ZLFile&, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const {
+ if (TextFormatDetector().isHtml(stream)) {
+ HtmlBookReader("", model, format, encoding).readDocument(stream);
+ } else {
+ TxtBookReader(model, format, encoding).readDocument(stream);
+ }
+}
diff --git a/reader/src/formats/pdb/ZTXTPlugin.cpp b/reader/src/formats/pdb/ZTXTPlugin.cpp
new file mode 100644
index 0000000..1465856
--- /dev/null
+++ b/reader/src/formats/pdb/ZTXTPlugin.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "PdbPlugin.h"
+#include "ZTXTStream.h"
+#include "../txt/PlainTextFormat.h"
+#include "../util/TextFormatDetector.h"
+
+bool ZTXTPlugin::providesMetaInfo() const {
+ return false;
+}
+
+bool ZTXTPlugin::acceptsFile(const ZLFile &file) const {
+ return PdbPlugin::fileType(file) == "zTXTGPlm";
+}
+
+shared_ptr<ZLInputStream> ZTXTPlugin::createStream(const ZLFile &file) const {
+ return new ZTXTStream(file);
+}
+
+FormatInfoPage *ZTXTPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) {
+ shared_ptr<ZLInputStream> stream = createStream(file);
+ return new PlainTextInfoPage(dialog, file, ZLResourceKey("Text"), !TextFormatDetector().isHtml(*stream));
+}
diff --git a/reader/src/formats/pdb/ZTXTStream.cpp b/reader/src/formats/pdb/ZTXTStream.cpp
new file mode 100644
index 0000000..2dc549c
--- /dev/null
+++ b/reader/src/formats/pdb/ZTXTStream.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLZDecompressor.h>
+
+#include "ZTXTStream.h"
+
+ZTXTStream::ZTXTStream(const ZLFile &file) : PdbStream(file) {
+}
+
+ZTXTStream::~ZTXTStream() {
+ close();
+}
+
+bool ZTXTStream::open() {
+ if (!PdbStream::open()) {
+ return false;
+ }
+
+ myBase->seek(2, false);
+ unsigned short recordNumber;
+ PdbUtil::readUnsignedShort(*myBase, recordNumber);
+ myMaxRecordIndex = std::min(recordNumber, (unsigned short)(header().Offsets.size() - 1));
+ myBase->seek(4, false);
+ PdbUtil::readUnsignedShort(*myBase, myMaxRecordSize);
+ if (myMaxRecordSize == 0) {
+ return false;
+ }
+ myBuffer = new char[myMaxRecordSize];
+
+ myRecordIndex = 0;
+
+ return true;
+}
+
+bool ZTXTStream::fillBuffer() {
+ while (myBufferOffset == myBufferLength) {
+ if (myRecordIndex + 1 > myMaxRecordIndex) {
+ return false;
+ }
+ ++myRecordIndex;
+ std::size_t currentOffset = recordOffset(myRecordIndex);
+ // Hmm, this works on examples from manybooks.net,
+ // but I don't understand what this code means :((
+ if (myRecordIndex == 1) {
+ currentOffset += 2;
+ }
+ if (currentOffset < myBase->offset()) {
+ return false;
+ }
+ myBase->seek(currentOffset, true);
+ const std::size_t nextOffset = recordOffset(myRecordIndex + 1);
+ if (nextOffset < currentOffset) {
+ return false;
+ }
+ myBufferLength = ZLZDecompressor(nextOffset - currentOffset).decompress(*myBase, myBuffer, myMaxRecordSize);
+ myBufferOffset = 0;
+ }
+ return true;
+}
diff --git a/reader/src/formats/pdb/ZTXTStream.h b/reader/src/formats/pdb/ZTXTStream.h
new file mode 100644
index 0000000..f89d3a0
--- /dev/null
+++ b/reader/src/formats/pdb/ZTXTStream.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ZTXTSTREAM_H__
+#define __ZTXTSTREAM_H__
+
+#include <ZLInputStream.h>
+
+#include "PdbStream.h"
+
+class ZLFile;
+
+class ZTXTStream : public PdbStream {
+
+public:
+ ZTXTStream(const ZLFile &file);
+ ~ZTXTStream();
+ bool open();
+
+private:
+ bool fillBuffer();
+
+private:
+ std::size_t myMaxRecordIndex;
+ unsigned short myMaxRecordSize;
+ std::size_t myRecordIndex;
+};
+
+#endif /* __ZTXTSTREAM_H__ */
diff --git a/reader/src/formats/pdf/PdfBookReader.cpp b/reader/src/formats/pdf/PdfBookReader.cpp
new file mode 100644
index 0000000..bd84452
--- /dev/null
+++ b/reader/src/formats/pdf/PdfBookReader.cpp
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <iostream>
+
+#include <ZLStringUtil.h>
+#include <ZLInputStream.h>
+
+#include "PdfBookReader.h"
+#include "PdfObject.h"
+#include "../../bookmodel/BookModel.h"
+
+static void readLine(ZLInputStream &stream, std::string &buffer) {
+ buffer.clear();
+ char ch;
+ while (1) {
+ if (stream.read(&ch, 1) != 1) {
+ return;
+ }
+ if ((ch == 10) || (ch == 13)) {
+ if (!buffer.empty()) {
+ return;
+ }
+ } else {
+ buffer += ch;
+ }
+ }
+}
+
+PdfBookReader::PdfBookReader(BookModel &model) : myModelReader(model) {
+}
+
+PdfBookReader::~PdfBookReader() {
+}
+
+shared_ptr<PdfObject> PdfBookReader::readObjectFromLocation(ZLInputStream &stream, const std::pair<int,int> &address) {
+ std::map<std::pair<int,int>,int>::const_iterator jt = myObjectLocationMap.find(address);
+ if (jt == myObjectLocationMap.end()) {
+ return 0;
+ }
+ stream.seek(jt->second, true);
+ char ch = 0;
+ PdfObject::readToken(stream, myBuffer, ch);
+ if (address.first != atoi(myBuffer.c_str())) {
+ return 0;
+ }
+ PdfObject::readToken(stream, myBuffer, ch);
+ if (address.second != atoi(myBuffer.c_str())) {
+ return 0;
+ }
+ PdfObject::readToken(stream, myBuffer, ch);
+ if (myBuffer != "obj") {
+ return 0;
+ }
+ return PdfObject::readObject(stream, ch);
+}
+
+shared_ptr<PdfObject> PdfBookReader::resolveReference(shared_ptr<PdfObject> ref, ZLInputStream &stream) {
+ if (ref.isNull() || (ref->type() != PdfObject::REFERENCE)) {
+ return ref;
+ }
+ const PdfObjectReference &reference = (const PdfObjectReference&)*ref;
+ const std::pair<int,int> address(reference.number(), reference.generation());
+ std::map<std::pair<int,int>,shared_ptr<PdfObject> >::const_iterator it = myObjectMap.find(address);
+ if (it != myObjectMap.end()) {
+ return it->second;
+ }
+ std::map<std::pair<int,int>,int>::const_iterator jt = myObjectLocationMap.find(address);
+ shared_ptr<PdfObject> object = readObjectFromLocation(stream, address);
+ myObjectMap.insert(std::make_pair(address, object));
+ return object;
+}
+
+static void stripBuffer(std::string &buffer) {
+ int index = buffer.find('%');
+ if (index >= 0) {
+ buffer.erase(index);
+ }
+ ZLStringUtil::stripWhiteSpaces(buffer);
+}
+
+bool PdfBookReader::readReferenceTable(ZLInputStream &stream, int xrefOffset) {
+ while (true) {
+ stream.seek(xrefOffset, true);
+ readLine(stream, myBuffer);
+ stripBuffer(myBuffer);
+ if (myBuffer != "xref") {
+ return false;
+ }
+
+ while (true) {
+ readLine(stream, myBuffer);
+ stripBuffer(myBuffer);
+ if (myBuffer == "trailer") {
+ break;
+ }
+ const int index = myBuffer.find(' ');
+ const int start = atoi(myBuffer.c_str());
+ const int len = atoi(myBuffer.c_str() + index + 1);
+ for (int i = 0; i < len; ++i) {
+ readLine(stream, myBuffer);
+ stripBuffer(myBuffer);
+ if (myBuffer.length() != 18) {
+ return false;
+ }
+ const int objectOffset = atoi(myBuffer.c_str());
+ const int objectGeneration = atoi(myBuffer.c_str() + 11);
+ const bool objectInUse = myBuffer[17] == 'n';
+ if (objectInUse) {
+ myObjectLocationMap[std::make_pair(start + i, objectGeneration)] = objectOffset;
+ }
+ }
+ }
+ char ch = 0;
+ shared_ptr<PdfObject> trailer = PdfObject::readObject(stream, ch);
+ if (trailer.isNull() || (trailer->type() != PdfObject::DICTIONARY)) {
+ return false;
+ }
+ if (myTrailer.isNull()) {
+ myTrailer = trailer;
+ }
+ PdfDictionaryObject &trailerDictionary = (PdfDictionaryObject&)*trailer;
+ shared_ptr<PdfObject> previous = trailerDictionary["Prev"];
+ if (previous.isNull()) {
+ return true;
+ }
+
+ if (previous->type() != PdfObject::INTEGER_NUMBER) {
+ return false;
+ }
+ xrefOffset = ((PdfIntegerObject&)*previous).value();
+ }
+}
+
+bool PdfBookReader::readBook(shared_ptr<ZLInputStream> stream) {
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+
+ readLine(*stream, myBuffer);
+ if (!ZLStringUtil::stringStartsWith(myBuffer, "%PDF-")) {
+ return false;
+ }
+
+ std::string version = myBuffer.substr(5);
+ std::cerr << "version = " << version << "\n";
+
+ std::size_t eofOffset = stream->sizeOfOpened();
+ if (eofOffset < 100) {
+ return false;
+ }
+
+ stream->seek(eofOffset - 100, true);
+ bool readXrefOffset = false;
+ std::size_t xrefOffset = (std::size_t)-1;
+ while (true) {
+ readLine(*stream, myBuffer);
+ if (myBuffer.empty()) {
+ break;
+ }
+ stripBuffer(myBuffer);
+ if (readXrefOffset) {
+ if (!myBuffer.empty()) {
+ xrefOffset = atoi(myBuffer.c_str());
+ break;
+ }
+ } else if (myBuffer == "startxref") {
+ readXrefOffset = true;
+ }
+ }
+
+ if (!readReferenceTable(*stream, xrefOffset)) {
+ return false;
+ }
+
+ PdfDictionaryObject &trailerDictionary = (PdfDictionaryObject&)*myTrailer;
+ shared_ptr<PdfObject> root = resolveReference(trailerDictionary["Root"], *stream);
+ if (root.isNull() || (root->type() != PdfObject::DICTIONARY)) {
+ return false;
+ }
+
+ PdfDictionaryObject &rootDictionary = (PdfDictionaryObject&)*root;
+ if (rootDictionary["Type"] != PdfNameObject::nameObject("Catalog")) {
+ return false;
+ }
+ shared_ptr<PdfObject> pageRootNode = resolveReference(rootDictionary["Pages"], *stream);
+ if (pageRootNode.isNull() || (pageRootNode->type() != PdfObject::DICTIONARY)) {
+ return false;
+ }
+ PdfDictionaryObject &pageRootNodeDictionary = (PdfDictionaryObject&)*pageRootNode;
+ if (pageRootNodeDictionary["Type"] != PdfNameObject::nameObject("Pages")) {
+ return false;
+ }
+
+ /*
+ shared_ptr<PdfObject> count = pageRootNodeDictionary["Count"];
+ if (!count.isNull() && (count->type() == PdfObject::INTEGER_NUMBER)) {
+ std::cerr << "count = " << ((PdfIntegerObject&)*count).value() << "\n";
+ }
+ */
+ shared_ptr<PdfObject> pages = pageRootNodeDictionary["Kids"];
+ if (pages.isNull() || (pages->type() != PdfObject::ARRAY)) {
+ return false;
+ }
+ const PdfArrayObject& pagesArray = (const PdfArrayObject&)*pages;
+ const std::size_t pageNumber = pagesArray.size();
+ for (std::size_t i = 0; i < pageNumber; ++i) {
+ processPage(pagesArray[i], *stream);
+ }
+
+ return true;
+}
+
+void PdfBookReader::processContents(shared_ptr<PdfObject> contentsObject, ZLInputStream &stream) {
+ contentsObject = resolveReference(contentsObject, stream);
+}
+
+void PdfBookReader::processPage(shared_ptr<PdfObject> pageObject, ZLInputStream &stream) {
+ pageObject = resolveReference(pageObject, stream);
+ if (pageObject.isNull() || pageObject->type() != PdfObject::DICTIONARY) {
+ return;
+ }
+ const PdfDictionaryObject &pageDictionary = (const PdfDictionaryObject&)*pageObject;
+ shared_ptr<PdfObject> contents = pageDictionary["Contents"];
+ if (contents.isNull()) {
+ return;
+ }
+ switch (contents->type()) {
+ default:
+ break;
+ case PdfObject::REFERENCE:
+ processContents(contents, stream);
+ break;
+ case PdfObject::ARRAY:
+ {
+ const PdfArrayObject &array = (const PdfArrayObject&)*contents;
+ const std::size_t len = array.size();
+ for (std::size_t i = 0; i < len; ++i) {
+ processContents(array[i], stream);
+ }
+ break;
+ }
+ }
+}
diff --git a/reader/src/formats/pdf/PdfBookReader.h b/reader/src/formats/pdf/PdfBookReader.h
new file mode 100644
index 0000000..9488dcf
--- /dev/null
+++ b/reader/src/formats/pdf/PdfBookReader.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PdfBOOKREADER_H__
+#define __PdfBOOKREADER_H__
+
+#include <map>
+
+#include "../../bookmodel/BookReader.h"
+
+class PdfObject;
+class PdfObjectReference;
+
+class PdfBookReader {
+
+public:
+ PdfBookReader(BookModel &model);
+ ~PdfBookReader();
+ bool readBook(shared_ptr<ZLInputStream> stream);
+
+private:
+ bool readReferenceTable(ZLInputStream &stream, int offset);
+ shared_ptr<PdfObject> resolveReference(shared_ptr<PdfObject> reference, ZLInputStream &stream);
+ shared_ptr<PdfObject> readObjectFromLocation(ZLInputStream &stream, const std::pair<int,int> &address);
+ void processPage(shared_ptr<PdfObject> pageObject, ZLInputStream &stream);
+ void processContents(shared_ptr<PdfObject> contentsObject, ZLInputStream &stream);
+
+private:
+ BookReader myModelReader;
+ std::string myBuffer;
+ std::map<std::pair<int,int>,int> myObjectLocationMap;
+ std::map<std::pair<int,int>,shared_ptr<PdfObject> > myObjectMap;
+ shared_ptr<PdfObject> myTrailer;
+};
+
+#endif /* __PdfBOOKREADER_H__ */
diff --git a/reader/src/formats/pdf/PdfDescriptionReader.cpp b/reader/src/formats/pdf/PdfDescriptionReader.cpp
new file mode 100644
index 0000000..98937fa
--- /dev/null
+++ b/reader/src/formats/pdf/PdfDescriptionReader.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "PdfDescriptionReader.h"
+
+PdfDescriptionReader::PdfDescriptionReader(Book &book) : myBook(book) {
+}
+
+bool PdfDescriptionReader::readMetaInfo(shared_ptr<ZLInputStream> stream) {
+ return true;
+}
diff --git a/reader/src/formats/pdf/PdfDescriptionReader.h b/reader/src/formats/pdf/PdfDescriptionReader.h
new file mode 100644
index 0000000..004cdfa
--- /dev/null
+++ b/reader/src/formats/pdf/PdfDescriptionReader.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PDFDESCRIPTIONREADER_H__
+#define __PDFDESCRIPTIONREADER_H__
+
+#include <string>
+
+class Book;
+
+class PdfDescriptionReader {
+
+public:
+ PdfDescriptionReader(Book &book);
+ ~PdfDescriptionReader();
+ bool readMetaInfo(shared_ptr<ZLInputStream> stream);
+
+private:
+ Book &myBook;
+};
+
+inline PdfDescriptionReader::~PdfDescriptionReader() {}
+
+#endif /* __PDFDESCRIPTIONREADER_H__ */
diff --git a/reader/src/formats/pdf/PdfObject.cpp b/reader/src/formats/pdf/PdfObject.cpp
new file mode 100644
index 0000000..374a618
--- /dev/null
+++ b/reader/src/formats/pdf/PdfObject.cpp
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <iostream>
+
+#include <ZLInputStream.h>
+#include <ZLZDecompressor.h>
+
+#include "PdfObject.h"
+
+PdfObject::~PdfObject() {
+}
+
+shared_ptr<PdfObject> PdfIntegerObject::integerObject(int value) {
+ if ((value < 0) || (value >= 256)) {
+ return new PdfIntegerObject(value);
+ } else {
+ static shared_ptr<PdfObject>* table = new shared_ptr<PdfObject>[256];
+ if (table[value].isNull()) {
+ table[value] = new PdfIntegerObject(value);
+ }
+ return table[value];
+ }
+}
+
+PdfIntegerObject::PdfIntegerObject(int value) : myValue(value) {
+ std::cerr << "PdfIntegerObject " << value << "\n";
+}
+
+int PdfIntegerObject::value() const {
+ return myValue;
+}
+
+PdfObject::Type PdfIntegerObject::type() const {
+ return INTEGER_NUMBER;
+}
+
+shared_ptr<PdfObject> PdfBooleanObject::TRUE() {
+ static shared_ptr<PdfObject> value = new PdfBooleanObject(true);
+ return value;
+}
+
+shared_ptr<PdfObject> PdfBooleanObject::FALSE() {
+ static shared_ptr<PdfObject> value = new PdfBooleanObject(false);
+ return value;
+}
+
+PdfBooleanObject::PdfBooleanObject(bool value) : myValue(value) {
+ std::cerr << "PdfBooleanObject " << value << "\n";
+}
+
+bool PdfBooleanObject::value() const {
+ return myValue;
+}
+
+PdfObject::Type PdfBooleanObject::type() const {
+ return BOOLEAN;
+}
+
+PdfStringObject::PdfStringObject(const std::string &value) : myValue(value) {
+ std::cerr << "PdfStringObject " << value << "\n";
+}
+
+PdfObject::Type PdfStringObject::type() const {
+ return STRING;
+}
+
+std::map<std::string,shared_ptr<PdfObject> > PdfNameObject::ourObjectMap;
+
+shared_ptr<PdfObject> PdfNameObject::nameObject(const std::string &id) {
+ // TODO: process escaped characters
+ std::map<std::string,shared_ptr<PdfObject> >::const_iterator it = ourObjectMap.find(id);
+ if (it != ourObjectMap.end()) {
+ return it->second;
+ }
+ std::cerr << "PdfNameObject " << id << "\n";
+ shared_ptr<PdfObject> object = new PdfNameObject();
+ ourObjectMap.insert(std::make_pair(id, object));
+ return object;
+}
+
+PdfNameObject::PdfNameObject() {
+}
+
+PdfObject::Type PdfNameObject::type() const {
+ return NAME;
+}
+
+PdfDictionaryObject::PdfDictionaryObject() {
+}
+
+void PdfDictionaryObject::setObject(shared_ptr<PdfObject> id, shared_ptr<PdfObject> object) {
+ myMap[id] = object;
+}
+
+shared_ptr<PdfObject> PdfDictionaryObject::operator[](shared_ptr<PdfObject> id) const {
+ std::map<shared_ptr<PdfObject>,shared_ptr<PdfObject> >::const_iterator it = myMap.find(id);
+ return (it != myMap.end()) ? it->second : 0;
+}
+
+shared_ptr<PdfObject> PdfDictionaryObject::operator[](const std::string &id) const {
+ return operator[](PdfNameObject::nameObject(id));
+}
+
+PdfObject::Type PdfDictionaryObject::type() const {
+ return DICTIONARY;
+}
+
+PdfArrayObject::PdfArrayObject() {
+}
+
+void PdfArrayObject::addObject(shared_ptr<PdfObject> object) {
+ myVector.push_back(object);
+}
+
+shared_ptr<PdfObject> PdfArrayObject::popLast() {
+ if (!myVector.empty()) {
+ shared_ptr<PdfObject> last = myVector.back();
+ myVector.pop_back();
+ return last;
+ }
+ return 0;
+}
+
+int PdfArrayObject::size() const {
+ return myVector.size();
+}
+
+shared_ptr<PdfObject> PdfArrayObject::operator[](int index) const {
+ return myVector[index];
+}
+
+PdfObject::Type PdfArrayObject::type() const {
+ return ARRAY;
+}
+
+PdfObjectReference::PdfObjectReference(int number, int generation) : myNumber(number), myGeneration(generation) {
+}
+
+int PdfObjectReference::number() const {
+ return myNumber;
+}
+
+int PdfObjectReference::generation() const {
+ return myGeneration;
+}
+
+PdfObject::Type PdfObjectReference::type() const {
+ return REFERENCE;
+}
+
+PdfStreamObject::PdfStreamObject(const PdfDictionaryObject &dictionary, ZLInputStream &dataStream) {
+ char ch;
+ skipWhiteSpaces(dataStream, ch);
+
+ shared_ptr<PdfObject> length = dictionary["Length"];
+ if (!length.isNull() && (length->type() == INTEGER_NUMBER)) {
+ int value = ((PdfIntegerObject&)*length).value();
+ if (value > 0) {
+ shared_ptr<PdfObject> filter = dictionary["Filter"];
+ if (filter == PdfNameObject::nameObject("FlateDecode")) {
+ dataStream.seek(1, false);
+ ZLZDecompressor decompressor(value - 2);
+ char buffer[2048];
+ while (true) {
+ std::size_t size = decompressor.decompress(dataStream, buffer, 2048);
+ if (size == 0) {
+ break;
+ }
+ myData.append(buffer, size);
+ }
+ std::cerr << myData << "\n";
+ } else {
+ myData.append(value, '\0');
+ myData[0] = ch;
+ dataStream.read((char*)myData.data() + 1, value - 1);
+ }
+ }
+ }
+
+ /*
+ shared_ptr<PdfObject> filter = dictionary["Filter"];
+ if (!filter.isNull()) {
+ switch (filter->type()) {
+ default:
+ break;
+ case NAME:
+ myFilters.push_back(
+ (filter == PdfNameObject::nameObject("FlateDecode")) ?
+ FLATE : UNKNOWN
+ );
+ break;
+ case ARRAY:
+ {
+ // TODO: process filters array
+ }
+ }
+ }
+ */
+}
+
+PdfObject::Type PdfStreamObject::type() const {
+ return STREAM;
+}
+
+enum PdfCharacterType {
+ PDF_CHAR_REGULAR,
+ PDF_CHAR_WHITESPACE,
+ PDF_CHAR_DELIMITER
+};
+
+static PdfCharacterType *PdfCharacterTypeTable = 0;
+
+void PdfObject::skipWhiteSpaces(ZLInputStream &stream, char &ch) {
+ if (PdfCharacterTypeTable == 0) {
+ PdfCharacterTypeTable = new PdfCharacterType[256];
+ for (int i = 0; i < 256; ++i) {
+ PdfCharacterTypeTable[i] = PDF_CHAR_REGULAR;
+ }
+ PdfCharacterTypeTable[0] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[9] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[10] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[12] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[13] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[32] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable['('] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable[')'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['<'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['>'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['['] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable[']'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['{'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['}'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['/'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['%'] = PDF_CHAR_DELIMITER;
+ }
+
+ while ((PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_WHITESPACE) &&
+ (stream.read(&ch, 1) == 1)) {
+ }
+}
+
+void PdfObject::readToken(ZLInputStream &stream, std::string &buffer, char &ch) {
+ buffer.clear();
+ skipWhiteSpaces(stream, ch);
+ while (PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_REGULAR) {
+ buffer += ch;
+ if (stream.read(&ch, 1) != 1) {
+ break;
+ }
+ }
+}
+
+shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch) {
+ skipWhiteSpaces(stream, ch);
+
+ PdfObject::Type type = PdfObject::NIL;
+ bool hexString = false;
+ switch (ch) {
+ case '(':
+ hexString = false;
+ type = PdfObject::STRING;
+ break;
+ case '<':
+ stream.read(&ch, 1);
+ hexString = true;
+ type = (ch == '<') ? PdfObject::DICTIONARY : PdfObject::STRING;
+ break;
+ case '>': // end of dictionary
+ stream.read(&ch, 1);
+ if (ch == '>') {
+ stream.read(&ch, 1);
+ }
+ return 0;
+ case '/':
+ type = PdfObject::NAME;
+ break;
+ case '[':
+ type = PdfObject::ARRAY;
+ break;
+ case ']': // end of array
+ stream.read(&ch, 1);
+ return 0;
+ case '+':
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ type = PdfObject::INTEGER_NUMBER;
+ break;
+ case 't':
+ case 'f':
+ type = PdfObject::BOOLEAN;
+ break;
+ }
+
+ switch (type) {
+ case PdfObject::DICTIONARY:
+ {
+ ch = 0;
+ shared_ptr<PdfObject> name;
+ shared_ptr<PdfObject> value;
+ shared_ptr<PdfObject> next;
+ PdfDictionaryObject *dictionary = new PdfDictionaryObject();
+ while (true) {
+ next = readObject(stream, ch);
+ if (next.isNull()) {
+ break;
+ }
+ PdfObject::Type oType = next->type();
+ if (oType == PdfObject::NAME) {
+ name = next;
+ value = readObject(stream, ch);
+ if (value.isNull()) {
+ break;
+ }
+ dictionary->setObject(name, value);
+ } else if (oType == PdfObject::INTEGER_NUMBER) {
+ if (value.isNull() || (value->type() != PdfObject::INTEGER_NUMBER)) {
+ break;
+ }
+ skipWhiteSpaces(stream, ch);
+ if (ch != 'R') {
+ break;
+ }
+ const int number = ((PdfIntegerObject&)*value).value();
+ const int generation = ((PdfIntegerObject&)*next).value();
+ dictionary->setObject(name, new PdfObjectReference(number, generation));
+ value = 0;
+ ch = 0;
+ } else {
+ break;
+ }
+ }
+ std::string token;
+ readToken(stream, token, ch);
+ if (token == "stream") {
+ shared_ptr<PdfObject> d = dictionary;
+ return new PdfStreamObject(*dictionary, stream);
+ } else {
+ return dictionary;
+ }
+ }
+ case PdfObject::NAME:
+ {
+ std::string name;
+ stream.read(&ch, 1);
+ readToken(stream, name, ch);
+ return PdfNameObject::nameObject(name);
+ }
+ case PdfObject::BOOLEAN:
+ {
+ std::string name;
+ readToken(stream, name, ch);
+ return (name == "true") ? PdfBooleanObject::TRUE() : PdfBooleanObject::FALSE();
+ }
+ case PdfObject::INTEGER_NUMBER:
+ {
+ std::string str;
+ if ((ch == '+') || (ch == '-')) {
+ str += ch;
+ stream.read(&ch, 1);
+ }
+ while ((ch >= '0') && (ch <= '9')) {
+ str += ch;
+ stream.read(&ch, 1);
+ }
+ return PdfIntegerObject::integerObject(atoi(str.c_str()));
+ }
+ case PdfObject::STRING:
+ {
+ std::string value;
+ if (hexString) {
+ char num[3];
+ num[2] = '\0';
+ while (ch != '>') {
+ num[0] = ch;
+ stream.read(num + 1, 1);
+ value += (char)strtol(num, 0, 16);
+ stream.read(&ch, 1);
+ }
+ ch = 0;
+ } else {
+ // TODO: implement
+ }
+ return new PdfStringObject(value);
+ }
+ case PdfObject::ARRAY:
+ {
+ PdfArrayObject *array = new PdfArrayObject();
+ ch = 0;
+ while (true) {
+ skipWhiteSpaces(stream, ch);
+ if (ch == 'R') {
+ const int size = array->size();
+ if ((size >= 2) &&
+ ((*array)[size - 1]->type() == PdfObject::INTEGER_NUMBER) &&
+ ((*array)[size - 2]->type() == PdfObject::INTEGER_NUMBER)) {
+ const int generation = ((PdfIntegerObject&)*array->popLast()).value();
+ const int number = ((PdfIntegerObject&)*array->popLast()).value();
+ array->addObject(new PdfObjectReference(number, generation));
+ ch = 0;
+ }
+ }
+ shared_ptr<PdfObject> object = readObject(stream, ch);
+ if (object.isNull()) {
+ break;
+ }
+ array->addObject(object);
+ }
+ std::cerr << "PdfArrayObject " << array->size() << "\n";
+ return array;
+ }
+ default:
+ break;
+ }
+
+ std::string buffer;
+ stream.read(&ch, 1);
+ while (PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_REGULAR) {
+ buffer += ch;
+ stream.read(&ch, 1);
+ }
+ std::cerr << "buffer = " << buffer << "\n";
+
+ return 0;
+}
diff --git a/reader/src/formats/pdf/PdfObject.h b/reader/src/formats/pdf/PdfObject.h
new file mode 100644
index 0000000..76b8528
--- /dev/null
+++ b/reader/src/formats/pdf/PdfObject.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PDFOBJECT_H__
+#define __PDFOBJECT_H__
+
+#include <string>
+#include <vector>
+#include <map>
+
+#include <shared_ptr.h>
+
+class ZLInputStream;
+
+class PdfObject {
+
+public:
+ static shared_ptr<PdfObject> readObject(ZLInputStream &stream, char &ch);
+ static void readToken(ZLInputStream &stream, std::string &buffer, char &ch);
+
+protected:
+ static void skipWhiteSpaces(ZLInputStream &stream, char &ch);
+
+public:
+ enum Type {
+ BOOLEAN,
+ INTEGER_NUMBER,
+ REAL_NUMBER,
+ STRING,
+ NAME,
+ ARRAY,
+ DICTIONARY,
+ STREAM,
+ NIL,
+ REFERENCE
+ };
+
+ virtual ~PdfObject();
+
+ virtual Type type() const = 0;
+};
+
+class PdfBooleanObject : public PdfObject {
+
+public:
+ static shared_ptr<PdfObject> TRUE();
+ static shared_ptr<PdfObject> FALSE();
+
+private:
+ PdfBooleanObject(bool value);
+
+public:
+ bool value() const;
+
+private:
+ Type type() const;
+
+private:
+ const bool myValue;
+};
+
+class PdfIntegerObject : public PdfObject {
+
+public:
+ static shared_ptr<PdfObject> integerObject(int value);
+
+private:
+ PdfIntegerObject(int value);
+
+public:
+ int value() const;
+
+private:
+ Type type() const;
+
+private:
+ const int myValue;
+};
+
+class PdfStringObject : public PdfObject {
+
+private:
+ PdfStringObject(const std::string &value);
+
+private:
+ Type type() const;
+
+private:
+ std::string myValue;
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfNameObject : public PdfObject {
+
+public:
+ static shared_ptr<PdfObject> nameObject(const std::string &id);
+
+private:
+ static std::map<std::string,shared_ptr<PdfObject> > ourObjectMap;
+
+private:
+ PdfNameObject();
+
+private:
+ Type type() const;
+};
+
+class PdfDictionaryObject : public PdfObject {
+
+private:
+ PdfDictionaryObject();
+ void setObject(shared_ptr<PdfObject> id, shared_ptr<PdfObject> object);
+
+public:
+ shared_ptr<PdfObject> operator [] (shared_ptr<PdfObject> id) const;
+ shared_ptr<PdfObject> operator [] (const std::string &id) const;
+
+private:
+ Type type() const;
+
+private:
+ std::map<shared_ptr<PdfObject>,shared_ptr<PdfObject> > myMap;
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfStreamObject : public PdfObject {
+
+private:
+ PdfStreamObject(const PdfDictionaryObject &dictionary, ZLInputStream &dataStream);
+
+private:
+ Type type() const;
+
+private:
+ std::string myData;
+ /*
+ enum EncodingType {
+ UNKNOWN,
+ FLATE,
+ };
+ std::vector<EncodingType> myFilters;
+ */
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfArrayObject : public PdfObject {
+
+private:
+ PdfArrayObject();
+ void addObject(shared_ptr<PdfObject> object);
+ shared_ptr<PdfObject> popLast();
+
+public:
+ int size() const;
+ shared_ptr<PdfObject> operator [] (int index) const;
+
+private:
+ Type type() const;
+
+private:
+ std::vector<shared_ptr<PdfObject> > myVector;
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfObjectReference : public PdfObject {
+
+public:
+ PdfObjectReference(int number, int generation);
+
+ int number() const;
+ int generation() const;
+
+private:
+ Type type() const;
+
+private:
+ const int myNumber;
+ const int myGeneration;
+};
+
+#endif /* __PDFOBJECT_H__ */
diff --git a/reader/src/formats/pdf/PdfPlugin.cpp b/reader/src/formats/pdf/PdfPlugin.cpp
new file mode 100644
index 0000000..06325d4
--- /dev/null
+++ b/reader/src/formats/pdf/PdfPlugin.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "PdfPlugin.h"
+#include "PdfDescriptionReader.h"
+#include "PdfBookReader.h"
+#include "../../library/Book.h"
+
+bool PdfPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "pdf";
+}
+
+bool PdfPlugin::readMetaInfo(Book &book) const {
+ return PdfDescriptionReader(book).readMetaInfo(ZLFile(path).inputStream());
+}
+
+bool PdfPlugin::readLanguageAndEncoding(Book &book) const {
+ return true;
+}
+
+bool PdfPlugin::readModel(BookModel &model) const {
+ return PdfBookReader(model).readBook(ZLFile(book.fileName()).inputStream());
+}
diff --git a/reader/src/formats/pdf/PdfPlugin.h b/reader/src/formats/pdf/PdfPlugin.h
new file mode 100644
index 0000000..9c330f6
--- /dev/null
+++ b/reader/src/formats/pdf/PdfPlugin.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PdfPLUGIN_H__
+#define __PdfPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class PdfPlugin : public FormatPlugin {
+
+public:
+ PdfPlugin();
+ ~PdfPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+inline PdfPlugin::PdfPlugin() {}
+inline PdfPlugin::~PdfPlugin() {}
+inline bool PdfPlugin::providesMetaInfo() const { return true; }
+
+#endif /* __PdfPLUGIN_H__ */
diff --git a/reader/src/formats/pdf/StringStream.cpp b/reader/src/formats/pdf/StringStream.cpp
new file mode 100644
index 0000000..b2369df
--- /dev/null
+++ b/reader/src/formats/pdf/StringStream.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <algorithm>
+
+#include "StringStream.h"
+
+StringStream::StringStream(const std::string &data) : myData(data), myOffset(0) {
+}
+
+bool StringStream::open() {
+ myOffset = 0;
+ return true;
+}
+
+std::size_t StringStream::read(char *buffer, std::size_t maxSize) {
+ std::size_t size = std::min(maxSize, myData.length() - myOffset);
+ memcpy(buffer, myData.data() + myOffset, size);
+ myOffset += size;
+ return size;
+}
+
+void StringStream::close() {
+}
+
+void StringStream::seek(int offset, bool absoluteOffset) {
+ if (!absoluteOffset) {
+ offset += myOffset;
+ }
+ myOffset = std::min((std::size_t)std::max(0, offset), myData.length());
+}
+
+std::size_t StringStream::offset() const {
+ return myOffset;
+}
+
+std::size_t StringStream::sizeOfOpened() {
+ return myData.length();
+}
diff --git a/reader/src/formats/pdf/StringStream.h b/reader/src/formats/pdf/StringStream.h
new file mode 100644
index 0000000..f46c038
--- /dev/null
+++ b/reader/src/formats/pdf/StringStream.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __STRINGSTREAM_H__
+#define __STRINGSTREAM_H__
+
+#include <ZLInputStream.h>
+
+class StringStream : public ZLInputStream {
+
+public:
+ StringStream(const std::string &data);
+
+public:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+private:
+ const std::string &myData;
+ std::size_t myOffset;
+};
+
+#endif /* __STRINGSTREAM_H__ */
diff --git a/reader/src/formats/rtf/RtfBookReader.cpp b/reader/src/formats/rtf/RtfBookReader.cpp
new file mode 100644
index 0000000..cf16bc7
--- /dev/null
+++ b/reader/src/formats/rtf/RtfBookReader.cpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+
+#include <ZLStringUtil.h>
+#include <ZLFileImage.h>
+#include <ZLTextStyleEntry.h>
+
+#include "RtfBookReader.h"
+#include "../../bookmodel/BookModel.h"
+
+RtfBookReader::RtfBookReader(BookModel &model, const std::string &encoding) : RtfReader(encoding), myBookReader(model) {
+}
+
+static const std::size_t maxBufferSize = 1024;
+
+void RtfBookReader::addCharData(const char *data, std::size_t len, bool convert) {
+ if (myCurrentState.ReadText) {
+ if (convert || myConverter.isNull()) {
+ myOutputBuffer.append(data, len);
+ if (myOutputBuffer.size() >= maxBufferSize) {
+ flushBuffer();
+ }
+ } else {
+ flushBuffer();
+ std::string newString(data, len);
+ characterDataHandler(newString);
+ }
+ }
+}
+
+void RtfBookReader::flushBuffer() {
+ if (!myOutputBuffer.empty()) {
+ if (myCurrentState.ReadText) {
+ if (!myConverter.isNull()) {
+ static std::string newString;
+ myConverter->convert(newString, myOutputBuffer.data(), myOutputBuffer.data() + myOutputBuffer.length());
+ characterDataHandler(newString);
+ newString.erase();
+ } else {
+ characterDataHandler(myOutputBuffer);
+ }
+ }
+ myOutputBuffer.erase();
+ }
+}
+
+void RtfBookReader::switchDestination(DestinationType destination, bool on) {
+ switch (destination) {
+ case DESTINATION_NONE:
+ break;
+ case DESTINATION_SKIP:
+ case DESTINATION_INFO:
+ case DESTINATION_TITLE:
+ case DESTINATION_AUTHOR:
+ case DESTINATION_STYLESHEET:
+ myCurrentState.ReadText = !on;
+ break;
+ case DESTINATION_PICTURE:
+ if (on) {
+ flushBuffer();
+ if (myBookReader.paragraphIsOpen()) {
+ myBookReader.endParagraph();
+ }
+ }
+ myCurrentState.ReadText = !on;
+ break;
+ case DESTINATION_FOOTNOTE:
+ flushBuffer();
+ if (on) {
+ std::string id;
+ ZLStringUtil::appendNumber(id, myFootnoteIndex++);
+
+ myStateStack.push(myCurrentState);
+ myCurrentState.Id = id;
+ myCurrentState.ReadText = true;
+
+ myBookReader.addHyperlinkControl(FOOTNOTE, id);
+ myBookReader.addData(id);
+ myBookReader.addControl(FOOTNOTE, false);
+
+ myBookReader.setFootnoteTextModel(id);
+ myBookReader.addHyperlinkLabel(id);
+ myBookReader.pushKind(REGULAR);
+ myBookReader.beginParagraph();
+ } else {
+ myBookReader.endParagraph();
+ myBookReader.popKind();
+
+ if (!myStateStack.empty()) {
+ myCurrentState = myStateStack.top();
+ myStateStack.pop();
+ }
+
+ if (myStateStack.empty()) {
+ myBookReader.setMainTextModel();
+ } else {
+ myBookReader.setFootnoteTextModel(myCurrentState.Id);
+ }
+ }
+ break;
+ }
+}
+
+void RtfBookReader::insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size) {
+ std::string id;
+ ZLStringUtil::appendNumber(id, myImageIndex++);
+ myBookReader.addImageReference(id);
+ const ZLFile file(fileName, mimeType);
+ myBookReader.addImage(id, new ZLFileImage(file, startOffset, size, ZLFileImage::ENCODING_HEX));
+}
+
+bool RtfBookReader::characterDataHandler(std::string &str) {
+ if (myCurrentState.ReadText) {
+ if (!myBookReader.paragraphIsOpen()) {
+ myBookReader.beginParagraph();
+ }
+ myBookReader.addData(str);
+ }
+ return true;
+}
+
+bool RtfBookReader::readDocument(const ZLFile &file) {
+ myImageIndex = 0;
+ myFootnoteIndex = 1;
+
+ myCurrentState.ReadText = true;
+
+ myBookReader.setMainTextModel();
+ myBookReader.pushKind(REGULAR);
+ myBookReader.beginParagraph();
+
+ bool code = RtfReader::readDocument(file);
+
+ flushBuffer();
+ myBookReader.endParagraph();
+ while (!myStateStack.empty()) {
+ myStateStack.pop();
+ }
+
+ return code;
+}
+
+void RtfBookReader::setFontProperty(FontProperty property) {
+ if (!myCurrentState.ReadText) {
+ //DPRINT("change style not in text.\n");
+ return;
+ }
+ flushBuffer();
+
+ switch (property) {
+ case FONT_BOLD:
+ if (myState.Bold) {
+ myBookReader.pushKind(STRONG);
+ } else {
+ myBookReader.popKind();
+ }
+ myBookReader.addControl(STRONG, myState.Bold);
+ break;
+ case FONT_ITALIC:
+ if (myState.Italic) {
+ if (!myState.Bold) {
+ //DPRINT("add style emphasis.\n");
+ myBookReader.pushKind(EMPHASIS);
+ myBookReader.addControl(EMPHASIS, true);
+ } else {
+ //DPRINT("add style emphasis and strong.\n");
+ myBookReader.popKind();
+ myBookReader.addControl(STRONG, false);
+
+ myBookReader.pushKind(EMPHASIS);
+ myBookReader.addControl(EMPHASIS, true);
+ myBookReader.pushKind(STRONG);
+ myBookReader.addControl(STRONG, true);
+ }
+ } else {
+ if (!myState.Bold) {
+ //DPRINT("remove style emphasis.\n");
+ myBookReader.addControl(EMPHASIS, false);
+ myBookReader.popKind();
+ } else {
+ //DPRINT("remove style strong n emphasis, add strong.\n");
+ myBookReader.addControl(STRONG, false);
+ myBookReader.popKind();
+ myBookReader.addControl(EMPHASIS, false);
+ myBookReader.popKind();
+
+ myBookReader.pushKind(STRONG);
+ myBookReader.addControl(STRONG, true);
+ }
+ }
+ break;
+ case FONT_UNDERLINED:
+ break;
+ }
+}
+
+void RtfBookReader::newParagraph() {
+ flushBuffer();
+ myBookReader.endParagraph();
+ myBookReader.beginParagraph();
+ if (myState.Alignment != ALIGN_UNDEFINED) {
+ setAlignment();
+ }
+}
+
+void RtfBookReader::setEncoding(int) {
+}
+
+void RtfBookReader::setAlignment() {
+ ZLTextStyleEntry entry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ entry.setAlignmentType(myState.Alignment);
+ myBookReader.addStyleEntry(entry);
+ // TODO: call addStyleCloseEntry somewhere (?)
+}
diff --git a/reader/src/formats/rtf/RtfBookReader.h b/reader/src/formats/rtf/RtfBookReader.h
new file mode 100644
index 0000000..a977cbd
--- /dev/null
+++ b/reader/src/formats/rtf/RtfBookReader.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __RTFBOOKREADER_H__
+#define __RTFBOOKREADER_H__
+
+#include <vector>
+
+#include "RtfReader.h"
+#include "../../bookmodel/BookReader.h"
+
+class ZLFile;
+
+class BookModel;
+
+class RtfBookReader : public RtfReader {
+
+public:
+ RtfBookReader(BookModel &model, const std::string &encoding);
+ ~RtfBookReader();
+
+ bool readDocument(const ZLFile &file);
+
+ bool characterDataHandler(std::string &str);
+ void flushBuffer();
+
+ void setEncoding(int code);
+ void setAlignment();
+ void switchDestination(DestinationType destination, bool on);
+ void addCharData(const char *data, std::size_t len, bool convert);
+ void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size);
+
+ void setFontProperty(FontProperty property);
+ void newParagraph();
+
+private:
+ BookReader myBookReader;
+
+ std::string myOutputBuffer;
+
+ int myImageIndex;
+ int myFootnoteIndex;
+
+ struct RtfBookReaderState {
+ std::string Id;
+ bool ReadText;
+ };
+
+ RtfBookReaderState myCurrentState;
+ std::stack<RtfBookReaderState> myStateStack;
+};
+
+inline RtfBookReader::~RtfBookReader() {}
+
+#endif /* __RTFBOOKREADER_H__ */
diff --git a/reader/src/formats/rtf/RtfDescriptionReader.cpp b/reader/src/formats/rtf/RtfDescriptionReader.cpp
new file mode 100644
index 0000000..571e66b
--- /dev/null
+++ b/reader/src/formats/rtf/RtfDescriptionReader.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "RtfDescriptionReader.h"
+
+#include "../FormatPlugin.h"
+#include "../../library/Book.h"
+#include "../../library/Author.h"
+
+RtfDescriptionReader::RtfDescriptionReader(Book &book) : RtfReader(book.encoding()), myBook(book) {
+}
+
+void RtfDescriptionReader::setEncoding(int code) {
+ ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
+ ZLEncodingConverterInfoPtr info = collection.info(code);
+ if (!info.isNull()) {
+ myConverter = info->createConverter();
+ myBook.setEncoding(info->name());
+ } else {
+ myConverter = collection.defaultConverter();
+ }
+}
+
+bool RtfDescriptionReader::readDocument(const ZLFile &file) {
+ myDoRead = false;
+ bool code = RtfReader::readDocument(file);
+ if (myBook.encoding().empty()) {
+ myBook.setEncoding(PluginCollection::Instance().DefaultEncodingOption.value());
+ }
+ return code;
+}
+
+void RtfDescriptionReader::addCharData(const char *data, std::size_t len, bool convert) {
+ if (myDoRead && len > 0) {
+ if (convert) {
+ myConverter->convert(myBuffer, data, data + len);
+ } else {
+ myBuffer.append(data, len);
+ }
+ }
+}
+
+void RtfDescriptionReader::switchDestination(DestinationType destination, bool on) {
+ switch (destination) {
+ case DESTINATION_INFO:
+ if (!on) {
+ interrupt();
+ }
+ break;
+ case DESTINATION_TITLE:
+ myDoRead = on;
+ if (!on) {
+ myBook.setTitle(myBuffer);
+ myBuffer.erase();
+ }
+ break;
+ case DESTINATION_AUTHOR:
+ myDoRead = on;
+ if (!on) {
+ myBook.addAuthor(myBuffer);
+ myBuffer.erase();
+ }
+ break;
+ default:
+ break;
+ }
+ if (!myBook.title().empty() && !myBook.authors().empty() && !myBook.encoding().empty()) {
+ interrupt();
+ }
+}
+
+void RtfDescriptionReader::insertImage(shared_ptr<ZLMimeType>, const std::string&, std::size_t, std::size_t) {
+}
+
+void RtfDescriptionReader::setFontProperty(FontProperty) {
+}
+
+void RtfDescriptionReader::newParagraph() {
+}
+
+void RtfDescriptionReader::setAlignment() {
+}
diff --git a/reader/src/formats/rtf/RtfDescriptionReader.h b/reader/src/formats/rtf/RtfDescriptionReader.h
new file mode 100644
index 0000000..ff4ffa1
--- /dev/null
+++ b/reader/src/formats/rtf/RtfDescriptionReader.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __RTFDESCRIPTIONREADER_H__
+#define __RTFDESCRIPTIONREADER_H__
+
+#include <string>
+
+#include "RtfReader.h"
+
+class Book;
+
+class RtfDescriptionReader : public RtfReader {
+
+public:
+ RtfDescriptionReader(Book &book);
+ ~RtfDescriptionReader();
+
+ bool readDocument(const ZLFile &file);
+
+ void setEncoding(int code);
+ void setAlignment();
+ void switchDestination(DestinationType destination, bool on);
+ void addCharData(const char *data, std::size_t len, bool convert);
+ void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size);
+
+ void setFontProperty(FontProperty property);
+ void newParagraph();
+
+private:
+ Book &myBook;
+
+ bool myDoRead;
+ std::string myBuffer;
+};
+
+inline RtfDescriptionReader::~RtfDescriptionReader() {}
+
+#endif /* __RTFDESCRIPTIONREADER_H__ */
diff --git a/reader/src/formats/rtf/RtfPlugin.cpp b/reader/src/formats/rtf/RtfPlugin.cpp
new file mode 100644
index 0000000..42ce39b
--- /dev/null
+++ b/reader/src/formats/rtf/RtfPlugin.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLStringUtil.h>
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "RtfPlugin.h"
+#include "RtfDescriptionReader.h"
+#include "RtfBookReader.h"
+#include "RtfReaderStream.h"
+
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+bool RtfPlugin::providesMetaInfo() const {
+ return false;
+}
+
+bool RtfPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "rtf";
+}
+
+bool RtfPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = new RtfReaderStream(book.file(), 50000);
+
+ if (stream.isNull()) {
+ return false;
+ }
+
+ detectEncodingAndLanguage(book, *stream);
+
+ if (!RtfDescriptionReader(book).readDocument(book.file())) {
+ return false;
+ }
+
+ return true;
+}
+
+bool RtfPlugin::readModel(BookModel &model) const {
+ const Book &book = *model.book();
+ return RtfBookReader(model, book.encoding()).readDocument(book.file());
+}
+bool RtfPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
diff --git a/reader/src/formats/rtf/RtfPlugin.h b/reader/src/formats/rtf/RtfPlugin.h
new file mode 100644
index 0000000..cb3ef9d
--- /dev/null
+++ b/reader/src/formats/rtf/RtfPlugin.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __RTFPLUGIN_H__
+#define __RTFPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class RtfPlugin : public FormatPlugin {
+
+public:
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+#endif /* __RTFPLUGIN_H__ */
diff --git a/reader/src/formats/rtf/RtfReader.cpp b/reader/src/formats/rtf/RtfReader.cpp
new file mode 100644
index 0000000..91fea0c
--- /dev/null
+++ b/reader/src/formats/rtf/RtfReader.cpp
@@ -0,0 +1,470 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <cctype>
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "RtfReader.h"
+
+std::map<std::string, RtfCommand*> RtfReader::ourKeywordMap;
+
+static const int rtfStreamBufferSize = 4096;
+
+RtfReader::RtfReader(const std::string &encoding) : EncodedTextReader(encoding) {
+ myNextImageMimeType = ZLMimeType::EMPTY;
+}
+
+RtfReader::~RtfReader() {
+}
+
+RtfCommand::~RtfCommand() {
+}
+
+void RtfDummyCommand::run(RtfReader&, int*) const {
+}
+
+void RtfNewParagraphCommand::run(RtfReader &reader, int*) const {
+ reader.newParagraph();
+}
+
+RtfFontPropertyCommand::RtfFontPropertyCommand(RtfReader::FontProperty property) : myProperty(property) {
+}
+
+void RtfFontPropertyCommand::run(RtfReader &reader, int *parameter) const {
+ const bool start = (parameter == 0) || (*parameter != 0);
+ switch (myProperty) {
+ case RtfReader::FONT_BOLD:
+ if (reader.myState.Bold != start) {
+ reader.myState.Bold = start;
+ reader.setFontProperty(RtfReader::FONT_BOLD);
+ }
+ break;
+ case RtfReader::FONT_ITALIC:
+ if (reader.myState.Italic != start) {
+ reader.myState.Italic = start;
+ reader.setFontProperty(RtfReader::FONT_ITALIC);
+ }
+ break;
+ case RtfReader::FONT_UNDERLINED:
+ if (reader.myState.Underlined != start) {
+ reader.myState.Underlined = start;
+ reader.setFontProperty(RtfReader::FONT_UNDERLINED);
+ }
+ break;
+ }
+}
+
+RtfAlignmentCommand::RtfAlignmentCommand(ZLTextAlignmentType alignment) : myAlignment(alignment) {
+}
+
+void RtfAlignmentCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Alignment != myAlignment) {
+ reader.myState.Alignment = myAlignment;
+ reader.setAlignment();
+ }
+}
+
+RtfCharCommand::RtfCharCommand(const std::string &chr) : myChar(chr) {
+}
+
+void RtfCharCommand::run(RtfReader &reader, int*) const {
+ reader.processCharData(myChar.data(), myChar.length(), false);
+}
+
+RtfDestinationCommand::RtfDestinationCommand(RtfReader::DestinationType destination) : myDestination(destination) {
+}
+
+void RtfDestinationCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Destination == myDestination) {
+ return;
+ }
+ reader.myState.Destination = myDestination;
+ if (myDestination == RtfReader::DESTINATION_PICTURE) {
+ reader.myState.ReadDataAsHex = true;
+ reader.myNextImageMimeType = ZLMimeType::EMPTY;
+ }
+ reader.switchDestination(myDestination, true);
+}
+
+void RtfStyleCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Destination == RtfReader::DESTINATION_STYLESHEET) {
+ //std::cerr << "Add style index: " << val << "\n";
+
+ //sprintf(style_attributes[0], "%i", val);
+ } else /*if (myState.Destination == rdsContent)*/ {
+ //std::cerr << "Set style index: " << val << "\n";
+
+ //sprintf(style_attributes[0], "%i", val);
+ }
+}
+
+void RtfCodepageCommand::run(RtfReader &reader, int *parameter) const {
+ if (parameter != 0) {
+ reader.setEncoding(*parameter);
+ }
+}
+
+void RtfSpecialCommand::run(RtfReader &reader, int*) const {
+ reader.mySpecialMode = true;
+}
+
+RtfPictureCommand::RtfPictureCommand(shared_ptr<ZLMimeType> mimeType) : myMimeType(mimeType) {
+}
+
+void RtfPictureCommand::run(RtfReader &reader, int*) const {
+ reader.myNextImageMimeType = myMimeType;
+}
+
+void RtfFontResetCommand::run(RtfReader &reader, int*) const {
+ if (reader.myState.Bold) {
+ reader.myState.Bold = false;
+ reader.setFontProperty(RtfReader::FONT_BOLD);
+ }
+ if (reader.myState.Italic) {
+ reader.myState.Italic = false;
+ reader.setFontProperty(RtfReader::FONT_ITALIC);
+ }
+ if (reader.myState.Underlined) {
+ reader.myState.Underlined = false;
+ reader.setFontProperty(RtfReader::FONT_UNDERLINED);
+ }
+}
+
+void RtfReader::addAction(const std::string &tag, RtfCommand *command) {
+ ourKeywordMap.insert(std::make_pair(tag, command));
+}
+
+void RtfReader::fillKeywordMap() {
+ if (ourKeywordMap.empty()) {
+ addAction("*", new RtfSpecialCommand());
+ addAction("ansicpg", new RtfCodepageCommand());
+
+ static const char *keywordsToSkip[] = {"buptim", "colortbl", "comment", "creatim", "doccomm", "fonttbl", "footer", "footerf", "footerl", "footerr", "ftncn", "ftnsep", "ftnsepc", "header", "headerf", "headerl", "headerr", "keywords", "operator", "printim", "private1", "revtim", "rxe", "subject", "tc", "txe", "xe", 0};
+ RtfCommand *skipCommand = new RtfDestinationCommand(RtfReader::DESTINATION_SKIP);
+ for (const char **i = keywordsToSkip; *i != 0; ++i) {
+ addAction(*i, skipCommand);
+ }
+ addAction("shppict", new RtfDummyCommand());
+ addAction("info", new RtfDestinationCommand(RtfReader::DESTINATION_INFO));
+ addAction("title", new RtfDestinationCommand(RtfReader::DESTINATION_TITLE));
+ addAction("author", new RtfDestinationCommand(RtfReader::DESTINATION_AUTHOR));
+ addAction("pict", new RtfDestinationCommand(RtfReader::DESTINATION_PICTURE));
+ addAction("stylesheet", new RtfDestinationCommand(RtfReader::DESTINATION_STYLESHEET));
+ addAction("footnote", new RtfDestinationCommand(RtfReader::DESTINATION_FOOTNOTE));
+
+ RtfCommand *newParagraphCommand = new RtfNewParagraphCommand();
+ addAction("\n", newParagraphCommand);
+ addAction("\r", newParagraphCommand);
+ addAction("par", newParagraphCommand);
+
+ addAction("\x09", new RtfCharCommand("\x09"));
+ addAction("_", new RtfCharCommand("-"));
+ addAction("\\", new RtfCharCommand("\\"));
+ addAction("{", new RtfCharCommand("{"));
+ addAction("}", new RtfCharCommand("}"));
+ addAction("bullet", new RtfCharCommand("\xE2\x80\xA2")); // &bullet;
+ addAction("endash", new RtfCharCommand("\xE2\x80\x93")); // &ndash;
+ addAction("emdash", new RtfCharCommand("\xE2\x80\x94")); // &mdash;
+ addAction("~", new RtfCharCommand("\xC0\xA0")); // &nbsp;
+ addAction("enspace", new RtfCharCommand("\xE2\x80\x82")); // &emsp;
+ addAction("emspace", new RtfCharCommand("\xE2\x80\x83")); // &ensp;
+ addAction("lquote", new RtfCharCommand("\xE2\x80\x98")); // &lsquo;
+ addAction("rquote", new RtfCharCommand("\xE2\x80\x99")); // &rsquo;
+ addAction("ldblquote", new RtfCharCommand("\xE2\x80\x9C")); // &ldquo;
+ addAction("rdblquote", new RtfCharCommand("\xE2\x80\x9D")); // &rdquo;
+
+ addAction("jpegblip", new RtfPictureCommand(ZLMimeType::IMAGE_JPEG));
+ addAction("pngblip", new RtfPictureCommand(ZLMimeType::IMAGE_PNG));
+
+ addAction("s", new RtfStyleCommand());
+
+ addAction("qc", new RtfAlignmentCommand(ALIGN_CENTER));
+ addAction("ql", new RtfAlignmentCommand(ALIGN_LEFT));
+ addAction("qr", new RtfAlignmentCommand(ALIGN_RIGHT));
+ addAction("qj", new RtfAlignmentCommand(ALIGN_JUSTIFY));
+ addAction("pard", new RtfAlignmentCommand(ALIGN_UNDEFINED));
+
+ addAction("b", new RtfFontPropertyCommand(RtfReader::FONT_BOLD));
+ addAction("i", new RtfFontPropertyCommand(RtfReader::FONT_ITALIC));
+ addAction("u", new RtfFontPropertyCommand(RtfReader::FONT_UNDERLINED));
+ addAction("plain", new RtfFontResetCommand());
+ }
+}
+
+bool RtfReader::parseDocument() {
+ enum {
+ READ_NORMAL_DATA,
+ READ_BINARY_DATA,
+ READ_HEX_SYMBOL,
+ READ_KEYWORD,
+ READ_KEYWORD_PARAMETER,
+ READ_END_OF_FILE
+ } parserState = READ_NORMAL_DATA;
+
+ std::string keyword;
+ std::string parameterString;
+ std::string hexString;
+ int imageStartOffset = -1;
+
+ while (!myIsInterrupted) {
+ const char *ptr = myStreamBuffer;
+ const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize);
+ if (ptr == end) {
+ break;
+ }
+ const char *dataStart = ptr;
+ bool readNextChar = true;
+ while (ptr != end) {
+ switch (parserState) {
+ case READ_END_OF_FILE:
+ if (*ptr != '}' && !std::isspace(*ptr)) {
+ return false;
+ }
+ break;
+ case READ_BINARY_DATA:
+ // TODO: optimize
+ processCharData(ptr, 1);
+ --myBinaryDataSize;
+ if (myBinaryDataSize == 0) {
+ parserState = READ_NORMAL_DATA;
+ }
+ break;
+ case READ_NORMAL_DATA:
+ switch (*ptr) {
+ case '{':
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+ myStateStack.push(myState);
+ myState.ReadDataAsHex = false;
+ break;
+ case '}':
+ {
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+
+ if (imageStartOffset >= 0) {
+ if (ZLMimeType::EMPTY != myNextImageMimeType) {
+ const int imageSize = myStream->offset() + (ptr - end) - imageStartOffset;
+ insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize);
+ }
+ imageStartOffset = -1;
+ }
+
+ if (myStateStack.empty()) {
+ parserState = READ_END_OF_FILE;
+ break;
+ }
+
+ if (myState.Destination != myStateStack.top().Destination) {
+ switchDestination(myState.Destination, false);
+ switchDestination(myStateStack.top().Destination, true);
+ }
+
+ bool oldItalic = myState.Italic;
+ bool oldBold = myState.Bold;
+ bool oldUnderlined = myState.Underlined;
+ ZLTextAlignmentType oldAlignment = myState.Alignment;
+ myState = myStateStack.top();
+ myStateStack.pop();
+
+ if (myState.Italic != oldItalic) {
+ setFontProperty(RtfReader::FONT_ITALIC);
+ }
+ if (myState.Bold != oldBold) {
+ setFontProperty(RtfReader::FONT_BOLD);
+ }
+ if (myState.Underlined != oldUnderlined) {
+ setFontProperty(RtfReader::FONT_UNDERLINED);
+ }
+ if (myState.Alignment != oldAlignment) {
+ setAlignment();
+ }
+
+ break;
+ }
+ case '\\':
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+ keyword.erase();
+ parserState = READ_KEYWORD;
+ break;
+ case 0x0d:
+ case 0x0a: // cr and lf are noise characters...
+ if (ptr > dataStart) {
+ processCharData(dataStart, ptr - dataStart);
+ }
+ dataStart = ptr + 1;
+ break;
+ default:
+ if (myState.ReadDataAsHex) {
+ if (imageStartOffset == -1) {
+ imageStartOffset = myStream->offset() + (ptr - end);
+ }
+ }
+ break;
+ }
+ break;
+ case READ_HEX_SYMBOL:
+ hexString += *ptr;
+ if (hexString.size() == 2) {
+ char ch = std::strtol(hexString.c_str(), 0, 16);
+ hexString.erase();
+ processCharData(&ch, 1);
+ parserState = READ_NORMAL_DATA;
+ dataStart = ptr + 1;
+ }
+ break;
+ case READ_KEYWORD:
+ if (!std::isalpha(*ptr)) {
+ if ((ptr == dataStart) && (keyword.empty())) {
+ if (*ptr == '\'') {
+ parserState = READ_HEX_SYMBOL;
+ } else {
+ keyword = *ptr;
+ processKeyword(keyword);
+ parserState = READ_NORMAL_DATA;
+ }
+ dataStart = ptr + 1;
+ } else {
+ keyword.append(dataStart, ptr - dataStart);
+ if (*ptr == '-' || std::isdigit(*ptr)) {
+ dataStart = ptr;
+ parserState = READ_KEYWORD_PARAMETER;
+ } else {
+ readNextChar = *ptr == ' ';
+ processKeyword(keyword);
+ parserState = READ_NORMAL_DATA;
+ dataStart = readNextChar ? ptr + 1 : ptr;
+ }
+ }
+ }
+ break;
+ case READ_KEYWORD_PARAMETER:
+ if (!std::isdigit(*ptr)) {
+ parameterString.append(dataStart, ptr - dataStart);
+ int parameter = std::atoi(parameterString.c_str());
+ parameterString.erase();
+ readNextChar = *ptr == ' ';
+ if ((keyword == "bin") && (parameter > 0)) {
+ myBinaryDataSize = parameter;
+ parserState = READ_BINARY_DATA;
+ } else {
+ processKeyword(keyword, &parameter);
+ parserState = READ_NORMAL_DATA;
+ }
+ dataStart = readNextChar ? ptr + 1 : ptr;
+ }
+ break;
+ }
+ if (readNextChar) {
+ ++ptr;
+ } else {
+ readNextChar = true;
+ }
+ }
+ if (dataStart < end) {
+ switch (parserState) {
+ case READ_NORMAL_DATA:
+ processCharData(dataStart, end - dataStart);
+ case READ_KEYWORD:
+ keyword.append(dataStart, end - dataStart);
+ break;
+ case READ_KEYWORD_PARAMETER:
+ parameterString.append(dataStart, end - dataStart);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return myIsInterrupted || myStateStack.empty();
+}
+
+void RtfReader::processKeyword(const std::string &keyword, int *parameter) {
+ const bool wasSpecialMode = mySpecialMode;
+ mySpecialMode = false;
+ if (myState.Destination == RtfReader::DESTINATION_SKIP) {
+ return;
+ }
+
+ std::map<std::string, RtfCommand*>::const_iterator it = ourKeywordMap.find(keyword);
+
+ if (it == ourKeywordMap.end()) {
+ if (wasSpecialMode) {
+ myState.Destination = RtfReader::DESTINATION_SKIP;
+ }
+ return;
+ }
+
+ it->second->run(*this, parameter);
+}
+
+void RtfReader::processCharData(const char *data, std::size_t len, bool convert) {
+ if (myState.Destination != RtfReader::DESTINATION_SKIP) {
+ addCharData(data, len, convert);
+ }
+}
+
+void RtfReader::interrupt() {
+ myIsInterrupted = true;
+}
+
+bool RtfReader::readDocument(const ZLFile &file) {
+ myFileName = file.path();
+ myStream = file.inputStream();
+ if (myStream.isNull() || !myStream->open()) {
+ return false;
+ }
+
+ fillKeywordMap();
+
+ myStreamBuffer = new char[rtfStreamBufferSize];
+
+ myIsInterrupted = false;
+
+ mySpecialMode = false;
+
+ myState.Alignment = ALIGN_UNDEFINED;
+ myState.Italic = false;
+ myState.Bold = false;
+ myState.Underlined = false;
+ myState.Destination = RtfReader::DESTINATION_NONE;
+ myState.ReadDataAsHex = false;
+
+ bool code = parseDocument();
+
+ while (!myStateStack.empty()) {
+ myStateStack.pop();
+ }
+
+ delete[] myStreamBuffer;
+ myStream->close();
+
+ return code;
+}
diff --git a/reader/src/formats/rtf/RtfReader.h b/reader/src/formats/rtf/RtfReader.h
new file mode 100644
index 0000000..10b037a
--- /dev/null
+++ b/reader/src/formats/rtf/RtfReader.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __RTFREADER_H__
+#define __RTFREADER_H__
+
+#include <string>
+#include <map>
+#include <stack>
+#include <ZLMimeType.h>
+
+#include <ZLEncodingConverter.h>
+
+#include <ZLTextAlignmentType.h>
+
+#include "../EncodedTextReader.h"
+
+class ZLFile;
+class ZLInputStream;
+class RtfCommand;
+
+class RtfReader : public EncodedTextReader {
+
+private:
+ static void fillKeywordMap();
+ static void addAction(const std::string &tag, RtfCommand *command);
+
+private:
+ static std::map<std::string, RtfCommand*> ourKeywordMap;
+
+protected:
+ RtfReader(const std::string &encoding);
+ virtual ~RtfReader();
+
+public:
+ virtual bool readDocument(const ZLFile &file);
+
+protected:
+ enum DestinationType {
+ DESTINATION_NONE,
+ DESTINATION_SKIP,
+ DESTINATION_INFO,
+ DESTINATION_TITLE,
+ DESTINATION_AUTHOR,
+ DESTINATION_PICTURE,
+ DESTINATION_STYLESHEET,
+ DESTINATION_FOOTNOTE,
+ };
+
+ enum FontProperty {
+ FONT_BOLD,
+ FONT_ITALIC,
+ FONT_UNDERLINED
+ };
+
+ virtual void addCharData(const char *data, std::size_t len, bool convert) = 0;
+ virtual void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size) = 0;
+ virtual void setEncoding(int code) = 0;
+ virtual void switchDestination(DestinationType destination, bool on) = 0;
+ virtual void setAlignment() = 0;
+ virtual void setFontProperty(FontProperty property) = 0;
+ virtual void newParagraph() = 0;
+
+ void interrupt();
+
+private:
+ bool parseDocument();
+ void processKeyword(const std::string &keyword, int *parameter = 0);
+ void processCharData(const char *data, std::size_t len, bool convert = true);
+
+protected:
+ struct RtfReaderState {
+ bool Bold;
+ bool Italic;
+ bool Underlined;
+ ZLTextAlignmentType Alignment;
+ DestinationType Destination;
+
+ bool ReadDataAsHex;
+ };
+
+ RtfReaderState myState;
+
+private:
+ bool mySpecialMode;
+
+ std::string myFileName;
+ shared_ptr<ZLInputStream> myStream;
+ char *myStreamBuffer;
+
+ std::stack<RtfReaderState> myStateStack;
+
+ int myBinaryDataSize;
+ shared_ptr<ZLMimeType> myNextImageMimeType;
+
+ int myIsInterrupted;
+
+friend class RtfNewParagraphCommand;
+friend class RtfFontPropertyCommand;
+friend class RtfAlignmentCommand;
+friend class RtfCharCommand;
+friend class RtfDestinationCommand;
+friend class RtfStyleCommand;
+friend class RtfSpecialCommand;
+friend class RtfPictureCommand;
+friend class RtfFontResetCommand;
+friend class RtfCodepageCommand;
+};
+
+class RtfCommand {
+protected:
+ virtual ~RtfCommand();
+
+public:
+ virtual void run(RtfReader &reader, int *parameter) const = 0;
+};
+
+class RtfDummyCommand : public RtfCommand {
+public:
+ void run(RtfReader &reader, int *parameter) const;
+};
+
+class RtfNewParagraphCommand : public RtfCommand {
+public:
+ void run(RtfReader &reader, int *parameter) const;
+};
+
+class RtfFontPropertyCommand : public RtfCommand {
+
+public:
+ RtfFontPropertyCommand(RtfReader::FontProperty property);
+ void run(RtfReader &reader, int *parameter) const;
+
+private:
+ RtfReader::FontProperty myProperty;
+};
+
+class RtfAlignmentCommand : public RtfCommand {
+public:
+ RtfAlignmentCommand(ZLTextAlignmentType alignment);
+ void run(RtfReader &reader, int *parameter) const;
+
+private:
+ ZLTextAlignmentType myAlignment;
+};
+
+class RtfCharCommand : public RtfCommand {
+public:
+ RtfCharCommand(const std::string &chr);
+ void run(RtfReader &reader, int *parameter) const;
+
+private:
+ std::string myChar;
+};
+
+class RtfDestinationCommand : public RtfCommand {
+public:
+ RtfDestinationCommand(RtfReader::DestinationType dest);
+ void run(RtfReader &reader, int *parameter) const;
+
+private:
+ RtfReader::DestinationType myDestination;
+};
+
+class RtfStyleCommand : public RtfCommand {
+public:
+ void run(RtfReader &reader, int *parameter) const;
+};
+
+class RtfSpecialCommand : public RtfCommand {
+ void run(RtfReader &reader, int *parameter) const;
+};
+
+class RtfPictureCommand : public RtfCommand {
+public:
+ RtfPictureCommand(shared_ptr<ZLMimeType> mimeType);
+ void run(RtfReader &reader, int *parameter) const;
+
+private:
+ const shared_ptr<ZLMimeType> myMimeType;
+};
+
+class RtfFontResetCommand : public RtfCommand {
+public:
+ void run(RtfReader &reader, int *parameter) const;
+};
+
+class RtfCodepageCommand : public RtfCommand {
+public:
+ void run(RtfReader &reader, int *parameter) const;
+};
+
+#endif /* __RTFREADER_H__ */
diff --git a/reader/src/formats/rtf/RtfReaderStream.cpp b/reader/src/formats/rtf/RtfReaderStream.cpp
new file mode 100644
index 0000000..f4537f7
--- /dev/null
+++ b/reader/src/formats/rtf/RtfReaderStream.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <cstdlib>
+#include <string>
+
+#include "RtfReader.h"
+#include "RtfReaderStream.h"
+
+class RtfTextOnlyReader : public RtfReader {
+
+public:
+ RtfTextOnlyReader(char *buffer, std::size_t maxSize);
+ ~RtfTextOnlyReader();
+ std::size_t readSize() const;
+
+protected:
+ void addCharData(const char *data, std::size_t len, bool convert);
+ void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size);
+ void setEncoding(int code);
+ void switchDestination(DestinationType destination, bool on);
+ void setAlignment();
+ void setFontProperty(FontProperty property);
+ void newParagraph();
+
+ void interrupt();
+
+private:
+ struct RtfTextOnlyReaderState {
+ bool ReadText;
+ };
+
+ RtfTextOnlyReaderState myCurrentState;
+
+private:
+ char* myBuffer;
+ const std::size_t myMaxSize;
+ std::size_t myFilledSize;
+};
+
+RtfTextOnlyReader::RtfTextOnlyReader(char *buffer, std::size_t maxSize) : RtfReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myFilledSize(0) {
+ myCurrentState.ReadText = true;
+}
+
+RtfTextOnlyReader::~RtfTextOnlyReader() {
+}
+
+void RtfTextOnlyReader::addCharData(const char *data, std::size_t len, bool) {
+ if (myBuffer == 0) {
+ return;
+ }
+ if (myCurrentState.ReadText) {
+ if (myFilledSize < myMaxSize) {
+ len = std::min((std::size_t)len, myMaxSize - myFilledSize);
+ std::memcpy(myBuffer + myFilledSize, data, len);
+ myFilledSize += len;
+ }
+ if (myFilledSize < myMaxSize) {
+ myBuffer[myFilledSize++]=' ';
+ } else {
+ interrupt();
+ }
+ }
+}
+
+std::size_t RtfTextOnlyReader::readSize() const {
+ return myFilledSize;
+}
+
+void RtfTextOnlyReader::insertImage(shared_ptr<ZLMimeType>, const std::string&, std::size_t, std::size_t) {
+}
+
+void RtfTextOnlyReader::setEncoding(int) {
+}
+
+void RtfTextOnlyReader::switchDestination(DestinationType destination, bool on) {
+ switch (destination) {
+ case DESTINATION_NONE:
+ break;
+ case DESTINATION_SKIP:
+ case DESTINATION_INFO:
+ case DESTINATION_TITLE:
+ case DESTINATION_AUTHOR:
+ case DESTINATION_STYLESHEET:
+ myCurrentState.ReadText = !on;
+ break;
+ case DESTINATION_PICTURE:
+ myCurrentState.ReadText = !on;
+ break;
+ case DESTINATION_FOOTNOTE:
+ if (on) {
+ myCurrentState.ReadText = true;
+ }
+ break;
+ }
+}
+
+void RtfTextOnlyReader::setAlignment() {
+}
+
+void RtfTextOnlyReader::setFontProperty(FontProperty) {
+}
+
+void RtfTextOnlyReader::newParagraph() {
+}
+
+void RtfTextOnlyReader::interrupt() {
+}
+
+RtfReaderStream::RtfReaderStream(const ZLFile& file, std::size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
+}
+
+RtfReaderStream::~RtfReaderStream() {
+ close();
+}
+
+bool RtfReaderStream::open() {
+ if (mySize != 0) {
+ myBuffer = new char[mySize];
+ }
+ RtfTextOnlyReader reader(myBuffer, mySize);
+ reader.readDocument(myFile);
+ mySize = reader.readSize();
+ myOffset = 0;
+ return true;
+}
+
+std::size_t RtfReaderStream::read(char *buffer, std::size_t maxSize) {
+ maxSize = std::min(maxSize, mySize - myOffset);
+ if ((buffer != 0) && (myBuffer !=0)) {
+ std::memcpy(buffer, myBuffer + myOffset, maxSize);
+ }
+ myOffset += maxSize;
+ return maxSize;
+}
+
+void RtfReaderStream::close() {
+ if (myBuffer != 0) {
+ delete[] myBuffer;
+ myBuffer = 0;
+ }
+}
+
+void RtfReaderStream::seek(int offset, bool absoluteOffset) {
+ if (!absoluteOffset) {
+ offset += myOffset;
+ }
+ myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
+}
+
+std::size_t RtfReaderStream::offset() const {
+ return myOffset;
+}
+
+std::size_t RtfReaderStream::sizeOfOpened() {
+ return mySize;
+}
+
diff --git a/reader/src/formats/rtf/RtfReaderStream.h b/reader/src/formats/rtf/RtfReaderStream.h
new file mode 100644
index 0000000..71555b4
--- /dev/null
+++ b/reader/src/formats/rtf/RtfReaderStream.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __RTFREADERSTREAM_H__
+#define __RTFREADERSTREAM_H__
+
+#include <string>
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+class RtfReaderStream : public ZLInputStream {
+
+public:
+ RtfReaderStream(const ZLFile& file, std::size_t maxSize);
+ ~RtfReaderStream();
+
+private:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+private:
+ const ZLFile myFile;
+ char *myBuffer;
+ std::size_t mySize;
+ std::size_t myOffset;
+};
+
+#endif /* __RTFREADERSTREAM_H__ */
diff --git a/reader/src/formats/tcr/PPLBookReader.cpp b/reader/src/formats/tcr/PPLBookReader.cpp
new file mode 100644
index 0000000..9b7d271
--- /dev/null
+++ b/reader/src/formats/tcr/PPLBookReader.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <cctype>
+
+#include "PPLBookReader.h"
+#include <ZLInputStream.h>
+
+static const std::size_t BUFFER_SIZE = 2048;
+
+PPLBookReader::PPLBookReader(BookModel &model, const std::string &encoding) : EncodedTextReader(encoding), myModelReader(model) {
+ myBuffer = new char[BUFFER_SIZE + 1];
+}
+
+PPLBookReader::~PPLBookReader() {
+ delete[] myBuffer;
+}
+
+bool PPLBookReader::currentParagraphIsEmpty() const {
+ const char *ptr = myCurrentParagraph.data();
+ const char *end = ptr + myCurrentParagraph.length();
+ for (; ptr < end; ++ptr) {
+ if (!std::isspace((unsigned char)*ptr)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void PPLBookReader::addParagraph() {
+ static const std::string END_OF_TEXT = "<* >";
+ if (!myCurrentParagraph.empty()) {
+ if (currentParagraphIsEmpty()) {
+ ++myEmptyLineCounter;
+ if (myEmptyLineCounter >= 2) {
+ myModelReader.beginParagraph(ZLTextParagraph::EMPTY_LINE_PARAGRAPH);
+ myModelReader.endParagraph();
+ }
+ } else if (myEmptyLineCounter < 2) {
+ myModelReader.beginParagraph();
+ myModelReader.addControl(TITLE, true);
+ myModelReader.addData(myCurrentParagraph);
+ myModelReader.endParagraph();
+ } else if (myCurrentParagraph[0] == 9) {
+ myModelReader.beginParagraph();
+ myModelReader.addData(myCurrentParagraph);
+ myModelReader.endParagraph();
+ } else if ((myCurrentParagraph.length() >= 2) &&
+ (myCurrentParagraph[0] == '*') &&
+ (myCurrentParagraph[1] == ' ')) {
+ myCurrentParagraph.erase(0, 2);
+ myModelReader.insertEndOfSectionParagraph();
+ myModelReader.beginContentsParagraph();
+ myModelReader.addContentsData(myCurrentParagraph);
+ myModelReader.endContentsParagraph();
+ myModelReader.beginParagraph();
+ myModelReader.addControl(SECTION_TITLE, true);
+ myModelReader.addData(myCurrentParagraph);
+ myModelReader.endParagraph();
+ } else if (myCurrentParagraph.substr(0, 4) != END_OF_TEXT) {
+ myModelReader.beginParagraph();
+ myModelReader.addControl(SUBTITLE, true);
+ myModelReader.addData(myCurrentParagraph);
+ myModelReader.endParagraph();
+ }
+ myCurrentParagraph.erase();
+ }
+}
+
+bool PPLBookReader::readDocument(ZLInputStream &stream) {
+ if (!stream.open()) {
+ return false;
+ }
+
+ myModelReader.setMainTextModel();
+ myModelReader.pushKind(REGULAR);
+ myCurrentParagraph.erase();
+ myEmptyLineCounter = 0;
+
+ // "PPL\r\n"
+ stream.seek(5, false);
+
+ std::size_t size;
+ do {
+ size = stream.read(myBuffer, BUFFER_SIZE);
+ myBuffer[size] = '\0';
+
+ const char *start = myBuffer;
+ const char *end = myBuffer + size;
+ const char *eol;
+ do {
+ eol = std::strchr(start, '\n');
+ if (eol != 0) {
+ if (start < eol) {
+ myConverter->convert(myCurrentParagraph, start, eol);
+ }
+ addParagraph();
+ start = eol + 1;
+ } else {
+ if (start < end) {
+ myConverter->convert(myCurrentParagraph, start, end);
+ }
+ }
+ } while (eol != 0);
+ } while (size == BUFFER_SIZE);
+
+ addParagraph();
+
+ stream.close();
+
+ return true;
+}
diff --git a/reader/src/formats/tcr/PPLBookReader.h b/reader/src/formats/tcr/PPLBookReader.h
new file mode 100644
index 0000000..98c7f9d
--- /dev/null
+++ b/reader/src/formats/tcr/PPLBookReader.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PPLBOOKREADER_H__
+#define __PPLBOOKREADER_H__
+
+#include <shared_ptr.h>
+#include <ZLEncodingConverter.h>
+#include "../../bookmodel/BookReader.h"
+#include "../EncodedTextReader.h"
+
+class ZLInputStream;
+class BookModel;
+
+class PPLBookReader : public EncodedTextReader {
+
+public:
+ PPLBookReader(BookModel &model, const std::string &encoding);
+ ~PPLBookReader();
+
+ bool readDocument(ZLInputStream &stream);
+
+private:
+ bool currentParagraphIsEmpty() const;
+ void addParagraph();
+
+private:
+ BookReader myModelReader;
+
+ char *myBuffer;
+ std::string myCurrentParagraph;
+ int myEmptyLineCounter;
+};
+
+#endif /* __PPLBOOKREADER_H__ */
diff --git a/reader/src/formats/tcr/TcrPlugin.cpp b/reader/src/formats/tcr/TcrPlugin.cpp
new file mode 100644
index 0000000..8ee0f14
--- /dev/null
+++ b/reader/src/formats/tcr/TcrPlugin.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "TcrPlugin.h"
+#include "TcrStream.h"
+#include "PPLBookReader.h"
+#include "../util/TextFormatDetector.h"
+#include "../txt/TxtBookReader.h"
+#include "../html/HtmlBookReader.h"
+#include "../txt/PlainTextFormat.h"
+
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+bool TcrPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "tcr";
+}
+
+bool TcrPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = new TcrStream(book.file());
+ detectEncodingAndLanguage(book, *stream);
+ if (book.encoding().empty()) {
+ return false;
+ }
+
+ return true;
+}
+
+bool TcrPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
+
+bool TcrPlugin::readModel(BookModel &model) const {
+ const Book &book = *model.book();
+ const ZLFile &file = book.file();
+
+ shared_ptr<ZLInputStream> stream = new TcrStream(file);
+
+ PlainTextFormat format(file);
+ if (!format.initialized()) {
+ PlainTextFormatDetector detector;
+ detector.detect(*stream, format);
+ }
+
+ const std::string &encoding = book.encoding();
+ if (TextFormatDetector().isPPL(*stream)) {
+ PPLBookReader(model, encoding).readDocument(*stream);
+ } else if (TextFormatDetector().isHtml(*stream)) {
+ HtmlBookReader("", model, format, encoding).readDocument(*stream);
+ } else {
+ TxtBookReader(model, format, encoding).readDocument(*stream);
+ }
+ return true;
+}
+
+FormatInfoPage *TcrPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) {
+ shared_ptr<ZLInputStream> stream = new TcrStream(file);
+ if (TextFormatDetector().isPPL(*stream)) {
+ return 0;
+ }
+ return new PlainTextInfoPage(dialog, file, ZLResourceKey("Text"), !TextFormatDetector().isHtml(*stream));
+}
diff --git a/reader/src/formats/tcr/TcrPlugin.h b/reader/src/formats/tcr/TcrPlugin.h
new file mode 100644
index 0000000..9655892
--- /dev/null
+++ b/reader/src/formats/tcr/TcrPlugin.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TCRPLUGIN_H__
+#define __TCRPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class TcrPlugin : public FormatPlugin {
+
+public:
+ TcrPlugin();
+ ~TcrPlugin();
+
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+ FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
+};
+
+inline TcrPlugin::TcrPlugin() {}
+inline TcrPlugin::~TcrPlugin() {}
+inline bool TcrPlugin::providesMetaInfo() const { return false; }
+
+#endif /* __TCRPLUGIN_H__ */
diff --git a/reader/src/formats/tcr/TcrStream.cpp b/reader/src/formats/tcr/TcrStream.cpp
new file mode 100644
index 0000000..cf4e540
--- /dev/null
+++ b/reader/src/formats/tcr/TcrStream.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <algorithm>
+
+#include <ZLFile.h>
+#include <ZLZDecompressor.h>
+
+#include "TcrStream.h"
+
+TcrStream::TcrStream(const ZLFile &file) : myBase(file.inputStream()) {
+}
+
+TcrStream::~TcrStream() {
+ close();
+}
+
+bool TcrStream::open() {
+ close();
+ if (myBase.isNull() || !myBase->open()) {
+ return false;
+ }
+
+ char header[9];
+ if (myBase->read(header, 9) != 9 || std::strncmp(header, "!!8-Bit!!", 9) != 0) {
+ myBase->close();
+ return false;
+ }
+
+ unsigned char entryLength;
+ char entryBuffer[255];
+ for (int i = 0; i < 256; ++i) {
+ if (myBase->read((char*)&entryLength, 1) != 1 ||
+ (entryLength > 0 && myBase->read(entryBuffer, entryLength) != entryLength)) {
+ myBase->close();
+ return false;
+ }
+ if (entryLength > 0) {
+ myDictionary[i].append(entryBuffer, entryLength);
+ }
+ }
+
+ return true;
+}
+
+void TcrStream::close() {
+ if (!myBase.isNull()) {
+ myBase->close();
+ }
+ for (int i = 0; i < 256; ++i) {
+ myDictionary[i].erase();
+ }
+ myBuffer.erase();
+}
+
+std::size_t TcrStream::read(char *buffer, std::size_t maxSize) {
+ std::size_t size = 0;
+ if (myBuffer.length() > 0) {
+ size += std::min(maxSize, myBuffer.length());
+ if (buffer != 0) {
+ std::strncpy(buffer, myBuffer.data(), size);
+ }
+ myBuffer.erase(0, size);
+ }
+ while (size < maxSize) {
+ unsigned char index;
+ if (myBase->read((char*)&index, 1) != 1) {
+ break;
+ }
+ std::size_t len = myDictionary[index].length();
+ if (len > 0) {
+ std::size_t freeSize = maxSize - size;
+ if (buffer != 0) {
+ std::strncpy(buffer + size, myDictionary[index].data(), std::min(len, freeSize));
+ }
+ size += std::min(len, freeSize);
+ if (len > freeSize) {
+ myBuffer = myDictionary[index].substr(freeSize);
+ }
+ }
+ }
+ myOffset += size;
+ return size;
+}
+
+void TcrStream::seek(int offset, bool absoluteOffset) {
+ if (absoluteOffset) {
+ offset -= this->offset();
+ }
+ if (offset > 0) {
+ read(0, offset);
+ } else if (offset < 0) {
+ offset += this->offset();
+ open();
+ if (offset >= 0) {
+ read(0, offset);
+ }
+ }
+}
+
+std::size_t TcrStream::offset() const {
+ return myOffset;
+}
+
+std::size_t TcrStream::sizeOfOpened() {
+ // TODO: implement
+ return 0;
+}
diff --git a/reader/src/formats/tcr/TcrStream.h b/reader/src/formats/tcr/TcrStream.h
new file mode 100644
index 0000000..0a9d212
--- /dev/null
+++ b/reader/src/formats/tcr/TcrStream.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TCRSTREAM_H__
+#define __TCRSTREAM_H__
+
+#include <ZLInputStream.h>
+
+class ZLFile;
+
+class TcrStream : public ZLInputStream {
+
+public:
+ TcrStream(const ZLFile &file);
+ virtual ~TcrStream();
+ bool open();
+ virtual void close();
+
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+protected:
+ std::string myDictionary[256];
+ std::string myBuffer;
+ shared_ptr<ZLInputStream> myBase;
+ std::size_t myOffset;
+};
+
+#endif /* __TCRSTREAM_H__ */
diff --git a/reader/src/formats/txt/PlainTextFormat.cpp b/reader/src/formats/txt/PlainTextFormat.cpp
new file mode 100644
index 0000000..7c9360f
--- /dev/null
+++ b/reader/src/formats/txt/PlainTextFormat.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+#include <algorithm>
+
+#include <ZLOptions.h>
+#include <ZLOptionsDialog.h>
+#include <ZLOptionEntry.h>
+#include <ZLFile.h>
+
+#include "PlainTextFormat.h"
+
+#include "../../options/FBCategoryKey.h"
+
+const std::string OPTION_Initialized = "Initialized";
+const std::string OPTION_BreakType = "BreakType";
+const std::string OPTION_IgnoredIndent = "IgnoredIndent";
+const std::string OPTION_EmptyLinesBeforeNewSection = "EmptyLinesBeforeNewSection";
+const std::string OPTION_CreateContentsTable = "CreateContentsTable";
+
+PlainTextFormat::PlainTextFormat(const ZLFile &file) :
+ InitializedOption(FBCategoryKey::BOOKS, file.path(), OPTION_Initialized, false),
+ BreakTypeOption(FBCategoryKey::BOOKS, file.path(), OPTION_BreakType, 1),
+ IgnoredIndentOption(FBCategoryKey::BOOKS, file.path(), OPTION_IgnoredIndent, 1, 100, 1),
+ EmptyLinesBeforeNewSectionOption(FBCategoryKey::BOOKS, file.path(), OPTION_EmptyLinesBeforeNewSection, 1, 100, 1),
+ CreateContentsTableOption(FBCategoryKey::BOOKS, file.path(), OPTION_CreateContentsTable, false) {
+}
+
+PlainTextInfoPage::PlainTextInfoPage(ZLOptionsDialog &dialog, const ZLFile &file, const ZLResourceKey &key, bool showContentsEntry) : myFormat(file) {
+ if (!myFormat.initialized()) {
+ PlainTextFormatDetector detector;
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (!stream.isNull()) {
+ detector.detect(*stream, myFormat);
+ }
+ }
+
+ ZLDialogContent &tab = dialog.createTab(key);
+
+ BreakTypeOptionEntry *breakEntry = new BreakTypeOptionEntry(*this, myFormat.BreakTypeOption);
+ myIgnoredIndentEntry = new ZLSimpleSpinOptionEntry(myFormat.IgnoredIndentOption, 1);
+ tab.addOption(ZLResourceKey("breakType"), breakEntry);
+ tab.addOption(ZLResourceKey("ignoreIndent"), myIgnoredIndentEntry);
+ breakEntry->onValueSelected(breakEntry->initialIndex());
+
+ if (showContentsEntry) {
+ CreateContentsTableOptionEntry *contentsTableEntry = new CreateContentsTableOptionEntry(*this, myFormat.CreateContentsTableOption);
+ myEmptyLinesBeforeNewSectionEntry = new ZLSimpleSpinOptionEntry(myFormat.EmptyLinesBeforeNewSectionOption, 1);
+ tab.addOption(ZLResourceKey("buildTOC"), contentsTableEntry);
+ tab.addOption(ZLResourceKey("emptyLines"), myEmptyLinesBeforeNewSectionEntry);
+ contentsTableEntry->onStateChanged(contentsTableEntry->initialState());
+ }
+}
+
+PlainTextInfoPage::~PlainTextInfoPage() {
+}
+
+const int BUFFER_SIZE = 4096;
+
+void PlainTextFormatDetector::detect(ZLInputStream &stream, PlainTextFormat &format) {
+ if (!stream.open()) {
+ return;
+ }
+
+ const unsigned int tableSize = 10;
+
+ unsigned int lineCounter = 0;
+ int emptyLineCounter = -1;
+ unsigned int stringsWithLengthLessThan81Counter = 0;
+ unsigned int stringIndentTable[tableSize] = { 0 };
+ unsigned int emptyLinesTable[tableSize] = { 0 };
+ unsigned int emptyLinesBeforeShortStringTable[tableSize] = { 0 };
+
+ bool currentLineIsEmpty = true;
+ unsigned int currentLineLength = 0;
+ unsigned int currentLineIndent = 0;
+ int currentNumberOfEmptyLines = -1;
+
+ char *buffer = new char[BUFFER_SIZE];
+ int length;
+ char previous = 0;
+ do {
+ length = stream.read(buffer, BUFFER_SIZE);
+ const char *end = buffer + length;
+ for (const char *ptr = buffer; ptr != end; ++ptr) {
+ ++currentLineLength;
+ if (*ptr == '\n') {
+ ++lineCounter;
+ if (currentLineIsEmpty) {
+ ++emptyLineCounter;
+ ++currentNumberOfEmptyLines;
+ } else {
+ if (currentNumberOfEmptyLines >= 0) {
+ int index = std::min(currentNumberOfEmptyLines, (int)tableSize - 1);
+ emptyLinesTable[index]++;
+ if (currentLineLength < 51) {
+ emptyLinesBeforeShortStringTable[index]++;
+ }
+ }
+ currentNumberOfEmptyLines = -1;
+ }
+ if (currentLineLength < 81) {
+ ++stringsWithLengthLessThan81Counter;
+ }
+ if (!currentLineIsEmpty) {
+ stringIndentTable[std::min(currentLineIndent, tableSize - 1)]++;
+ }
+
+ currentLineIsEmpty = true;
+ currentLineLength = 0;
+ currentLineIndent = 0;
+ } else if (*ptr == '\r') {
+ continue;
+ } else if (std::isspace((unsigned char)*ptr)) {
+ if (currentLineIsEmpty) {
+ ++currentLineIndent;
+ }
+ } else {
+ currentLineIsEmpty = false;
+ }
+ previous = *ptr;
+ }
+ } while (length == BUFFER_SIZE);
+ delete[] buffer;
+
+ unsigned int nonEmptyLineCounter = lineCounter - emptyLineCounter;
+
+ {
+ unsigned int indent = 0;
+ unsigned int lineWithIndent = 0;
+ for (; indent < tableSize; ++indent) {
+ lineWithIndent += stringIndentTable[indent];
+ if (lineWithIndent > 0.1 * nonEmptyLineCounter) {
+ break;
+ }
+ }
+ format.IgnoredIndentOption.setValue(indent + 1);
+ }
+
+ {
+ int breakType = 0;
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE;
+ if (stringsWithLengthLessThan81Counter < 0.3 * nonEmptyLineCounter) {
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE;
+ } else {
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT;
+ }
+ format.BreakTypeOption.setValue(breakType);
+ }
+
+ {
+ unsigned int max = 0;
+ unsigned index;
+ int emptyLinesBeforeNewSection = -1;
+ for (index = 2; index < tableSize; ++index) {
+ if (max < emptyLinesBeforeShortStringTable[index]) {
+ max = emptyLinesBeforeShortStringTable[index];
+ emptyLinesBeforeNewSection = index;
+ }
+ }
+ if (emptyLinesBeforeNewSection > 0) {
+ for (index = tableSize - 1; index > 0; --index) {
+ emptyLinesTable[index - 1] += emptyLinesTable[index];
+ emptyLinesBeforeShortStringTable[index - 1] += emptyLinesBeforeShortStringTable[index];
+ }
+ for (index = emptyLinesBeforeNewSection; index < tableSize; ++index) {
+ if ((emptyLinesBeforeShortStringTable[index] > 2) &&
+ (emptyLinesBeforeShortStringTable[index] > 0.7 * emptyLinesTable[index])) {
+ break;
+ }
+ }
+ emptyLinesBeforeNewSection = (index == tableSize) ? -1 : (int)index;
+ }
+ format.EmptyLinesBeforeNewSectionOption.setValue(emptyLinesBeforeNewSection);
+ format.CreateContentsTableOption.setValue(emptyLinesBeforeNewSection > 0);
+ }
+
+ format.InitializedOption.setValue(true);
+}
+
+BreakTypeOptionEntry::BreakTypeOptionEntry(PlainTextInfoPage &page, ZLIntegerOption &breakTypeOption) : myPage(page), myBreakTypeOption(breakTypeOption) {
+}
+
+BreakTypeOptionEntry::~BreakTypeOptionEntry() {
+}
+
+static std::vector<std::string> BREAK_TYPE_VALUES_VECTOR;
+
+int BreakTypeOptionEntry::initialIndex() const {
+ switch (myBreakTypeOption.value()) {
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE:
+ return 0;
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE:
+ return 1;
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT:
+ default:
+ return 2;
+ }
+}
+
+const std::string &BreakTypeOptionEntry::initialValue() const {
+ return values()[initialIndex()];
+}
+
+const std::vector<std::string> &BreakTypeOptionEntry::values() const {
+ if (BREAK_TYPE_VALUES_VECTOR.empty()) {
+ BREAK_TYPE_VALUES_VECTOR.push_back("New Line");
+ BREAK_TYPE_VALUES_VECTOR.push_back("Empty Line");
+ BREAK_TYPE_VALUES_VECTOR.push_back("Line With Indent");
+ }
+ return BREAK_TYPE_VALUES_VECTOR;
+}
+
+void BreakTypeOptionEntry::onAccept(const std::string &value) {
+ if (value == values()[0]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE);
+ } else if (value == values()[1]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE);
+ } else if (value == values()[2]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT);
+ }
+}
+
+void BreakTypeOptionEntry::onValueSelected(int index) {
+ myPage.myIgnoredIndentEntry->setVisible(index == 2);
+}
+
+CreateContentsTableOptionEntry::CreateContentsTableOptionEntry(PlainTextInfoPage &page, ZLBooleanOption &option) : ZLSimpleBooleanOptionEntry(option), myPage(page) {
+}
+
+CreateContentsTableOptionEntry::~CreateContentsTableOptionEntry() {
+}
+
+void CreateContentsTableOptionEntry::onStateChanged(bool state) {
+ myPage.myEmptyLinesBeforeNewSectionEntry->setVisible(state);
+}
diff --git a/reader/src/formats/txt/PlainTextFormat.h b/reader/src/formats/txt/PlainTextFormat.h
new file mode 100644
index 0000000..32ca258
--- /dev/null
+++ b/reader/src/formats/txt/PlainTextFormat.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PLAINTEXTFORMAT_H__
+#define __PLAINTEXTFORMAT_H__
+
+#include <ZLInputStream.h>
+#include <ZLOptions.h>
+#include <ZLSimpleOptionEntry.h>
+#include <ZLResource.h>
+
+#include "../FormatPlugin.h"
+
+class PlainTextFormat {
+
+public:
+ enum ParagraphBreakType {
+ BREAK_PARAGRAPH_AT_NEW_LINE = 1,
+ BREAK_PARAGRAPH_AT_EMPTY_LINE = 2,
+ BREAK_PARAGRAPH_AT_LINE_WITH_INDENT = 4,
+ };
+
+ PlainTextFormat(const ZLFile &file);
+ ~PlainTextFormat() {}
+
+ bool initialized() const { return InitializedOption.value(); }
+ int breakType() const { return BreakTypeOption.value(); }
+ int ignoredIndent() const { return IgnoredIndentOption.value(); }
+ int emptyLinesBeforeNewSection() const { return EmptyLinesBeforeNewSectionOption.value(); }
+ bool createContentsTable() const { return CreateContentsTableOption.value(); }
+
+private:
+ ZLBooleanOption InitializedOption;
+ ZLIntegerOption BreakTypeOption;
+ ZLIntegerRangeOption IgnoredIndentOption;
+ ZLIntegerRangeOption EmptyLinesBeforeNewSectionOption;
+ ZLBooleanOption CreateContentsTableOption;
+
+friend class PlainTextInfoPage;
+friend class PlainTextFormatDetector;
+};
+
+class PlainTextInfoPage : public FormatInfoPage {
+
+public:
+ PlainTextInfoPage(ZLOptionsDialog &dialog, const ZLFile &file, const ZLResourceKey &key, bool showContentsEntry);
+ ~PlainTextInfoPage();
+
+private:
+ PlainTextFormat myFormat;
+
+ ZLSimpleSpinOptionEntry *myIgnoredIndentEntry;
+ ZLSimpleSpinOptionEntry *myEmptyLinesBeforeNewSectionEntry;
+
+friend class BreakTypeOptionEntry;
+friend class CreateContentsTableOptionEntry;
+};
+
+class PlainTextFormatDetector {
+
+public:
+ PlainTextFormatDetector() {}
+ ~PlainTextFormatDetector() {}
+
+ void detect(ZLInputStream &stream, PlainTextFormat &format);
+};
+
+class BreakTypeOptionEntry : public ZLComboOptionEntry {
+
+public:
+ BreakTypeOptionEntry(PlainTextInfoPage &page, ZLIntegerOption &breakTypeOption);
+ ~BreakTypeOptionEntry();
+
+ int initialIndex() const;
+ const std::string &initialValue() const;
+ const std::vector<std::string> &values() const;
+ void onAccept(const std::string &value);
+ void onValueSelected(int index);
+
+private:
+ PlainTextInfoPage &myPage;
+ ZLIntegerOption &myBreakTypeOption;
+};
+
+class CreateContentsTableOptionEntry : public ZLSimpleBooleanOptionEntry {
+
+public:
+ CreateContentsTableOptionEntry(PlainTextInfoPage &page, ZLBooleanOption &option);
+ ~CreateContentsTableOptionEntry();
+ void onStateChanged(bool state);
+
+private:
+ PlainTextInfoPage &myPage;
+};
+
+#endif /* __PLAINTEXTFORMAT_H__ */
diff --git a/reader/src/formats/txt/TxtBookReader.cpp b/reader/src/formats/txt/TxtBookReader.cpp
new file mode 100644
index 0000000..c68ea2c
--- /dev/null
+++ b/reader/src/formats/txt/TxtBookReader.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+
+#include "TxtBookReader.h"
+#include "../../bookmodel/BookModel.h"
+
+TxtBookReader::TxtBookReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding) : TxtReader(encoding), BookReader(model), myFormat(format) {
+}
+
+void TxtBookReader::internalEndParagraph() {
+ if (!myLastLineIsEmpty) {
+ //myLineFeedCounter = 0;
+ myLineFeedCounter = -1; /* Fixed by Hatred: zero value was break LINE INDENT formater -
+ second line print with indent like new paragraf */
+ }
+ myLastLineIsEmpty = true;
+ endParagraph();
+}
+
+bool TxtBookReader::characterDataHandler(std::string &str) {
+ const char *ptr = str.data();
+ const char *end = ptr + str.length();
+ for (; ptr != end; ++ptr) {
+ if (std::isspace((unsigned char)*ptr)) {
+ if (*ptr != '\t') {
+ ++mySpaceCounter;
+ } else {
+ mySpaceCounter += myFormat.ignoredIndent() + 1; // TODO: implement single option in PlainTextFormat
+ }
+ } else {
+ myLastLineIsEmpty = false;
+ break;
+ }
+ }
+ if (ptr != end) {
+ if ((myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT) &&
+ myNewLine && (mySpaceCounter > myFormat.ignoredIndent())) {
+ internalEndParagraph();
+ beginParagraph();
+ }
+ addData(str);
+ if (myInsideContentsParagraph) {
+ addContentsData(str);
+ }
+ myNewLine = false;
+ }
+ return true;
+}
+
+bool TxtBookReader::newLineHandler() {
+ if (!myLastLineIsEmpty) {
+ myLineFeedCounter = -1;
+ }
+ myLastLineIsEmpty = true;
+ ++myLineFeedCounter;
+ myNewLine = true;
+ mySpaceCounter = 0;
+ bool paragraphBreak =
+ (myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) ||
+ ((myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE) && (myLineFeedCounter > 0));
+
+ if (myFormat.createContentsTable()) {
+// if (!myInsideContentsParagraph && (myLineFeedCounter == myFormat.emptyLinesBeforeNewSection() + 1)) {
+ /* Fixed by Hatred: remove '+ 1' for emptyLinesBeforeNewSection, it looks like very strange
+ when we should point count of empty string decrised by 1 in settings dialog */
+ if (!myInsideContentsParagraph && (myLineFeedCounter == myFormat.emptyLinesBeforeNewSection())) {
+ myInsideContentsParagraph = true;
+ internalEndParagraph();
+ insertEndOfSectionParagraph();
+ beginContentsParagraph();
+ enterTitle();
+ pushKind(SECTION_TITLE);
+ beginParagraph();
+ paragraphBreak = false;
+ }
+ if (myInsideContentsParagraph && (myLineFeedCounter == 1)) {
+ exitTitle();
+ endContentsParagraph();
+ popKind();
+ myInsideContentsParagraph = false;
+ paragraphBreak = true;
+ }
+ }
+
+ if (paragraphBreak) {
+ internalEndParagraph();
+ beginParagraph();
+ }
+ return true;
+}
+
+void TxtBookReader::startDocumentHandler() {
+ setMainTextModel();
+ pushKind(REGULAR);
+ beginParagraph();
+ myLineFeedCounter = 0;
+ myInsideContentsParagraph = false;
+ enterTitle();
+ myLastLineIsEmpty = true;
+ myNewLine = true;
+ mySpaceCounter = 0;
+}
+
+void TxtBookReader::endDocumentHandler() {
+ internalEndParagraph();
+}
diff --git a/reader/src/formats/txt/TxtBookReader.h b/reader/src/formats/txt/TxtBookReader.h
new file mode 100644
index 0000000..e02ad2a
--- /dev/null
+++ b/reader/src/formats/txt/TxtBookReader.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TXTBOOKREADER_H__
+#define __TXTBOOKREADER_H__
+
+#include <stack>
+
+#include "TxtReader.h"
+#include "PlainTextFormat.h"
+#include "../../bookmodel/BookReader.h"
+
+class BookModel;
+
+class TxtBookReader : public TxtReader, public BookReader {
+
+public:
+ TxtBookReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding);
+ ~TxtBookReader();
+
+protected:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool characterDataHandler(std::string &str);
+ bool newLineHandler();
+
+private:
+ void internalEndParagraph();
+
+private:
+ const PlainTextFormat &myFormat;
+
+ int myLineFeedCounter;
+ bool myInsideContentsParagraph;
+ bool myLastLineIsEmpty;
+ bool myNewLine;
+ int mySpaceCounter;
+};
+
+inline TxtBookReader::~TxtBookReader() {}
+
+#endif /* __TXTBOOKREADER_H__ */
diff --git a/reader/src/formats/txt/TxtPlugin.cpp b/reader/src/formats/txt/TxtPlugin.cpp
new file mode 100644
index 0000000..b155c2f
--- /dev/null
+++ b/reader/src/formats/txt/TxtPlugin.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "TxtPlugin.h"
+#include "TxtBookReader.h"
+#include "PlainTextFormat.h"
+
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+TxtPlugin::~TxtPlugin() {
+}
+
+bool TxtPlugin::providesMetaInfo() const {
+ return false;
+}
+
+bool TxtPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "txt";
+}
+
+bool TxtPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = book.file().inputStream();
+ if (stream.isNull()) {
+ return false;
+ }
+ detectEncodingAndLanguage(book, *stream);
+ if (book.encoding().empty()) {
+ return false;
+ }
+
+ return true;
+}
+
+bool TxtPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
+
+bool TxtPlugin::readModel(BookModel &model) const {
+ const Book &book = *model.book();
+ const ZLFile &file = book.file();
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull()) {
+ return false;
+ }
+
+ PlainTextFormat format(file);
+ if (!format.initialized()) {
+ PlainTextFormatDetector detector;
+ detector.detect(*stream, format);
+ }
+
+ TxtBookReader(model, format, book.encoding()).readDocument(*stream);
+ return true;
+}
+
+FormatInfoPage *TxtPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) {
+ return new PlainTextInfoPage(dialog, file, ZLResourceKey("Text"), true);
+}
diff --git a/reader/src/formats/txt/TxtPlugin.h b/reader/src/formats/txt/TxtPlugin.h
new file mode 100644
index 0000000..e3e6e50
--- /dev/null
+++ b/reader/src/formats/txt/TxtPlugin.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TXTPLUGIN_H__
+#define __TXTPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class TxtPlugin : public FormatPlugin {
+
+public:
+ ~TxtPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+ FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
+};
+
+#endif /* __TXTPLUGIN_H__ */
diff --git a/reader/src/formats/txt/TxtReader.cpp b/reader/src/formats/txt/TxtReader.cpp
new file mode 100644
index 0000000..d2f5659
--- /dev/null
+++ b/reader/src/formats/txt/TxtReader.cpp
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+
+#include <ZLInputStream.h>
+
+#include "TxtReader.h"
+
+class TxtReaderCore {
+
+public:
+ TxtReaderCore(TxtReader &reader);
+ virtual void readDocument(ZLInputStream &stream);
+
+protected:
+ TxtReader &myReader;
+};
+
+class TxtReaderCoreUtf16 : public TxtReaderCore {
+
+public:
+ TxtReaderCoreUtf16(TxtReader &reader);
+ void readDocument(ZLInputStream &stream);
+
+protected:
+ virtual char getAscii(const char *ptr) = 0;
+ virtual void setAscii(char *ptr, char ascii) = 0;
+};
+
+class TxtReaderCoreUtf16LE : public TxtReaderCoreUtf16 {
+
+public:
+ TxtReaderCoreUtf16LE(TxtReader &reader);
+
+protected:
+ char getAscii(const char *ptr);
+ void setAscii(char *ptr, char ascii);
+};
+
+class TxtReaderCoreUtf16BE : public TxtReaderCoreUtf16 {
+
+public:
+ TxtReaderCoreUtf16BE(TxtReader &reader);
+
+protected:
+ char getAscii(const char *ptr);
+ void setAscii(char *ptr, char ascii);
+};
+
+TxtReader::TxtReader(const std::string &encoding) : EncodedTextReader(encoding) {
+ if (ZLEncodingConverter::UTF16 == encoding) {
+ myCore = new TxtReaderCoreUtf16LE(*this);
+ } else if (ZLEncodingConverter::UTF16BE == encoding) {
+ myCore = new TxtReaderCoreUtf16BE(*this);
+ } else {
+ myCore = new TxtReaderCore(*this);
+ }
+}
+
+TxtReader::~TxtReader() {
+}
+
+void TxtReader::readDocument(ZLInputStream &stream) {
+ if (!stream.open()) {
+ return;
+ }
+ startDocumentHandler();
+ myCore->readDocument(stream);
+ endDocumentHandler();
+ stream.close();
+}
+
+TxtReaderCore::TxtReaderCore(TxtReader &reader) : myReader(reader) {
+}
+
+TxtReaderCoreUtf16::TxtReaderCoreUtf16(TxtReader &reader) : TxtReaderCore(reader) {
+}
+
+void TxtReaderCore::readDocument(ZLInputStream &stream) {
+ const std::size_t BUFSIZE = 2048;
+ char *buffer = new char[BUFSIZE];
+ std::string str;
+ std::size_t length;
+ do {
+ length = stream.read(buffer, BUFSIZE);
+ char *start = buffer;
+ const char *end = buffer + length;
+ for (char *ptr = start; ptr != end; ++ptr) {
+ if (*ptr == '\n' || *ptr == '\r') {
+ bool skipNewLine = false;
+ if (*ptr == '\r' && (ptr + 1) != end && *(ptr + 1) == '\n') {
+ skipNewLine = true;
+ *ptr = '\n';
+ }
+ if (start != ptr) {
+ str.erase();
+ myReader.myConverter->convert(str, start, ptr + 1);
+ myReader.characterDataHandler(str);
+ }
+ if (skipNewLine) {
+ ++ptr;
+ }
+ start = ptr + 1;
+ myReader.newLineHandler();
+ } else if (((*ptr) & 0x80) == 0 && std::isspace((unsigned char)*ptr)) {
+ if (*ptr != '\t') {
+ *ptr = ' ';
+ }
+ } else {
+ }
+ }
+ if (start != end) {
+ str.erase();
+ myReader.myConverter->convert(str, start, end);
+ myReader.characterDataHandler(str);
+ }
+ } while (length == BUFSIZE);
+ delete[] buffer;
+}
+
+void TxtReaderCoreUtf16::readDocument(ZLInputStream &stream) {
+ const std::size_t BUFSIZE = 2048;
+ char *buffer = new char[BUFSIZE];
+ std::string str;
+ std::size_t length;
+ do {
+ length = stream.read(buffer, BUFSIZE);
+ char *start = buffer;
+ const char *end = buffer + length;
+ for (char *ptr = start; ptr < end; ptr += 2) {
+ const char chr = getAscii(ptr);
+ if (chr == '\n' || chr == '\r') {
+ bool skipNewLine = false;
+ if (chr == '\r' && ptr + 2 != end && getAscii(ptr + 2) == '\n') {
+ skipNewLine = true;
+ setAscii(ptr, '\n');
+ }
+ if (start != ptr) {
+ str.erase();
+ myReader.myConverter->convert(str, start, ptr + 2);
+ myReader.characterDataHandler(str);
+ }
+ if (skipNewLine) {
+ ptr += 2;
+ }
+ start = ptr + 2;
+ myReader.newLineHandler();
+ } else if (chr != 0 && ((*ptr) & 0x80) == 0 && std::isspace(chr)) {
+ if (chr != '\t') {
+ setAscii(ptr, ' ');
+ }
+ }
+ }
+ if (start != end) {
+ str.erase();
+ myReader.myConverter->convert(str, start, end);
+ myReader.characterDataHandler(str);
+ }
+ } while (length == BUFSIZE);
+ delete[] buffer;
+}
+
+TxtReaderCoreUtf16LE::TxtReaderCoreUtf16LE(TxtReader &reader) : TxtReaderCoreUtf16(reader) {
+}
+
+char TxtReaderCoreUtf16LE::getAscii(const char *ptr) {
+ return *(ptr + 1) == '\0' ? *ptr : '\0';
+}
+
+void TxtReaderCoreUtf16LE::setAscii(char *ptr, char ascii) {
+ *ptr = ascii;
+}
+
+TxtReaderCoreUtf16BE::TxtReaderCoreUtf16BE(TxtReader &reader) : TxtReaderCoreUtf16(reader) {
+}
+
+char TxtReaderCoreUtf16BE::getAscii(const char *ptr) {
+ return *ptr == '\0' ? *(ptr + 1) : '\0';
+}
+
+void TxtReaderCoreUtf16BE::setAscii(char *ptr, char ascii) {
+ *(ptr + 1) = ascii;
+}
diff --git a/reader/src/formats/txt/TxtReader.h b/reader/src/formats/txt/TxtReader.h
new file mode 100644
index 0000000..518ba8e
--- /dev/null
+++ b/reader/src/formats/txt/TxtReader.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TXTREADER_H__
+#define __TXTREADER_H__
+
+#include <string>
+
+#include <ZLEncodingConverter.h>
+
+#include "../EncodedTextReader.h"
+
+class ZLInputStream;
+class TxtReaderCore;
+
+class TxtReader : public EncodedTextReader {
+
+public:
+ void readDocument(ZLInputStream &stream);
+
+protected:
+ TxtReader(const std::string &encoding);
+ virtual ~TxtReader();
+
+protected:
+ virtual void startDocumentHandler() = 0;
+ virtual void endDocumentHandler() = 0;
+
+ virtual bool characterDataHandler(std::string &str) = 0;
+ virtual bool newLineHandler() = 0;
+
+private:
+ shared_ptr<TxtReaderCore> myCore;
+
+friend class TxtReaderCore;
+friend class TxtReaderCoreUtf16;
+friend class TxtReaderCoreUtf16BE;
+};
+
+#endif /* __TXTREADER_H__ */
diff --git a/reader/src/formats/util/EntityFilesCollector.cpp b/reader/src/formats/util/EntityFilesCollector.cpp
new file mode 100644
index 0000000..075bd29
--- /dev/null
+++ b/reader/src/formats/util/EntityFilesCollector.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLStringUtil.h>
+#include <ZLibrary.h>
+#include <ZLFile.h>
+#include <ZLDir.h>
+
+#include "EntityFilesCollector.h"
+
+EntityFilesCollector *EntityFilesCollector::ourInstance = 0;
+
+EntityFilesCollector &EntityFilesCollector::Instance() {
+ if (ourInstance == 0) {
+ ourInstance = new EntityFilesCollector();
+ }
+ return *ourInstance;
+}
+
+const std::vector<std::string> &EntityFilesCollector::externalDTDs(const std::string &format) {
+ std::map<std::string,std::vector<std::string> >::const_iterator it = myCollections.find(format);
+ if (it != myCollections.end()) {
+ return it->second;
+ }
+
+ std::vector<std::string> &collection = myCollections[format];
+
+ std::string directoryName =
+ ZLibrary::ApplicationDirectory() + ZLibrary::FileNameDelimiter +
+ "formats" + ZLibrary::FileNameDelimiter + format;
+ shared_ptr<ZLDir> dtdPath = ZLFile(directoryName).directory();
+ if (!dtdPath.isNull()) {
+ std::vector<std::string> files;
+ dtdPath->collectFiles(files, false);
+ for (std::vector<std::string>::const_iterator it = files.begin(); it != files.end(); ++it) {
+ if (ZLStringUtil::stringEndsWith(*it, ".ent")) {
+ collection.push_back(dtdPath->itemPath(*it));
+ }
+ }
+ }
+
+ return collection;
+}
+
+EntityFilesCollector::EntityFilesCollector() {
+}
diff --git a/reader/src/formats/util/EntityFilesCollector.h b/reader/src/formats/util/EntityFilesCollector.h
new file mode 100644
index 0000000..9967b3d
--- /dev/null
+++ b/reader/src/formats/util/EntityFilesCollector.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ENTITYFILESCOLLECTOR_H__
+#define __ENTITYFILESCOLLECTOR_H__
+
+#include <map>
+#include <vector>
+#include <string>
+
+class EntityFilesCollector {
+
+public:
+ static EntityFilesCollector &Instance();
+
+ const std::vector<std::string> &externalDTDs(const std::string &format);
+
+private:
+ EntityFilesCollector();
+
+private:
+ static EntityFilesCollector *ourInstance;
+ std::map<std::string,std::vector<std::string> > myCollections;
+};
+
+#endif /* __ENTITYFILESCOLLECTOR_H__ */
diff --git a/reader/src/formats/util/MergedStream.cpp b/reader/src/formats/util/MergedStream.cpp
new file mode 100644
index 0000000..1a26a33
--- /dev/null
+++ b/reader/src/formats/util/MergedStream.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "MergedStream.h"
+
+bool MergedStream::open() {
+ close();
+ resetToStart();
+ myOffset = 0;
+ myCurrentStream = nextStream();
+ return !myCurrentStream.isNull() && myCurrentStream->open();
+}
+
+std::size_t MergedStream::read(char *buffer, std::size_t maxSize) {
+ std::size_t bytesToRead = maxSize;
+ while ((bytesToRead > 0) && !myCurrentStream.isNull()) {
+ std::size_t len = myCurrentStream->read(buffer, bytesToRead);
+ bytesToRead -= len;
+ if (buffer != 0) {
+ buffer += len;
+ }
+ if (bytesToRead != 0) {
+ if (buffer != 0) {
+ *buffer++ = '\n';
+ }
+ bytesToRead--;
+ myCurrentStream = nextStream();
+ if (myCurrentStream.isNull() || !myCurrentStream->open()) {
+ break;
+ }
+ }
+ }
+ myOffset += maxSize - bytesToRead;
+ return maxSize - bytesToRead;
+}
+
+void MergedStream::close() {
+ myCurrentStream.reset();
+}
+
+void MergedStream::seek(int offset, bool absoluteOffset) {
+ // works for nonnegative offsets only
+ if (absoluteOffset) {
+ offset -= myOffset;
+ }
+ read(0, offset);
+}
+
+std::size_t MergedStream::offset() const {
+ return myOffset;
+}
+
+std::size_t MergedStream::sizeOfOpened() {
+ // coudn't be implemented
+ return 0;
+}
diff --git a/reader/src/formats/util/MergedStream.h b/reader/src/formats/util/MergedStream.h
new file mode 100644
index 0000000..3f982ee
--- /dev/null
+++ b/reader/src/formats/util/MergedStream.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __MERGEDSTREAM_H__
+#define __MERGEDSTREAM_H__
+
+#include <shared_ptr.h>
+#include <ZLInputStream.h>
+
+class MergedStream : public ZLInputStream {
+
+protected:
+ virtual shared_ptr<ZLInputStream> nextStream() = 0;
+ virtual void resetToStart() = 0;
+
+private:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+private:
+ shared_ptr<ZLInputStream> myCurrentStream;
+ std::size_t myOffset;
+};
+
+#endif /* __MERGEDSTREAM_H__ */
diff --git a/reader/src/formats/util/MiscUtil.cpp b/reader/src/formats/util/MiscUtil.cpp
new file mode 100644
index 0000000..1a91406
--- /dev/null
+++ b/reader/src/formats/util/MiscUtil.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+
+#include <ZLApplication.h>
+#include <ZLFile.h>
+#include <ZLStringUtil.h>
+
+#include "MiscUtil.h"
+
+FBTextKind MiscUtil::referenceType(const std::string &link) {
+ std::string lowerCasedLink = link;
+ bool isFileReference =
+ ZLStringUtil::stringStartsWith(lowerCasedLink, "http://") ||
+ ZLStringUtil::stringStartsWith(lowerCasedLink, "https://") ||
+ ZLStringUtil::stringStartsWith(lowerCasedLink, "ftp://");
+ if (!isFileReference) {
+ return ZLStringUtil::stringStartsWith(lowerCasedLink, "mailto:") ? EXTERNAL_HYPERLINK : INTERNAL_HYPERLINK;
+ }
+ static const std::string FeedBooksPrefix0 = "http://feedbooks.com/book/stanza/";
+ static const std::string FeedBooksPrefix1 = "http://www.feedbooks.com/book/stanza/";
+ bool isBookHyperlink =
+ ZLStringUtil::stringStartsWith(lowerCasedLink, FeedBooksPrefix0) ||
+ ZLStringUtil::stringStartsWith(lowerCasedLink, FeedBooksPrefix1) ||
+ ZLStringUtil::stringEndsWith(lowerCasedLink, ".epub") ||
+ ZLStringUtil::stringEndsWith(lowerCasedLink, ".mobi") ||
+ ZLStringUtil::stringEndsWith(lowerCasedLink, ".chm") ||
+ ZLStringUtil::stringEndsWith(lowerCasedLink, ".fb2");
+ return isBookHyperlink ? BOOK_HYPERLINK : EXTERNAL_HYPERLINK;
+}
+
+std::string MiscUtil::htmlDirectoryPrefix(const std::string &fileName) {
+ ZLFile file(fileName);
+ std::string shortName = file.name(false);
+ std::string path = file.path();
+ int index = -1;
+ if ((path.length() > shortName.length()) &&
+ (path[path.length() - shortName.length() - 1] == ':')) {
+ index = shortName.rfind('/');
+ }
+ return path.substr(0, path.length() - shortName.length() + index + 1);
+}
+
+std::string MiscUtil::htmlFileName(const std::string &fileName) {
+ ZLFile file(fileName);
+ std::string shortName = file.name(false);
+ std::string path = file.path();
+ int index = -1;
+ if ((path.length() > shortName.length()) &&
+ (path[path.length() - shortName.length() - 1] == ':')) {
+ index = shortName.rfind('/');
+ }
+ return path.substr(path.length() - shortName.length() + index + 1);
+}
+
+std::string MiscUtil::decodeHtmlURL(const std::string &encoded) {
+ char buffer[3];
+ buffer[2] = '\0';
+
+ std::string decoded;
+ const int len = encoded.length();
+ decoded.reserve(len);
+ for (int i = 0; i < len; i++) {
+ if ((encoded[i] == '%') && (i < len - 2)) {
+ buffer[0] = *(encoded.data() + i + 1);
+ buffer[1] = *(encoded.data() + i + 2);
+ decoded += (char)std::strtol(buffer, 0, 16);
+ i += 2;
+ } else {
+ decoded += encoded[i];
+ }
+ }
+ return decoded;
+}
diff --git a/reader/src/formats/util/MiscUtil.h b/reader/src/formats/util/MiscUtil.h
new file mode 100644
index 0000000..c47d84a
--- /dev/null
+++ b/reader/src/formats/util/MiscUtil.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __MISCUTIL_H__
+#define __MISCUTIL_H__
+
+#include <string>
+
+#include "../../bookmodel/FBTextKind.h"
+
+class MiscUtil {
+
+private:
+ MiscUtil();
+
+public:
+ static FBTextKind referenceType(const std::string &link);
+ static std::string htmlDirectoryPrefix(const std::string &fileName);
+ static std::string htmlFileName(const std::string &fileName);
+ static std::string decodeHtmlURL(const std::string &encodedURL);
+};
+
+#endif /* __MISCUTIL_H__ */
diff --git a/reader/src/formats/util/TextFormatDetector.cpp b/reader/src/formats/util/TextFormatDetector.cpp
new file mode 100644
index 0000000..4a3ef67
--- /dev/null
+++ b/reader/src/formats/util/TextFormatDetector.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <cctype>
+#include <algorithm>
+
+#include <ZLInputStream.h>
+#include <ZLUnicodeUtil.h>
+
+#include "TextFormatDetector.h"
+
+TextFormatDetector::TextFormatDetector() {
+}
+
+TextFormatDetector::~TextFormatDetector() {
+}
+
+bool TextFormatDetector::isHtml(ZLInputStream &stream) const {
+ if (!stream.open()) {
+ return false;
+ }
+
+ const std::size_t bufferSize = 1024;
+ char *buffer = new char[bufferSize];
+ std::string sixBytes;
+ int valuableBytesCounter = 0;
+ bool skipFlag = true;
+ while (valuableBytesCounter < 6) {
+ std::size_t size = stream.read(buffer, bufferSize);
+ if (size == 0) {
+ break;
+ }
+ std::size_t index;
+ for (index = 0; skipFlag && (index < size); ++index) {
+ if (!std::isspace((unsigned char)buffer[index])) {
+ skipFlag = false;
+ break;
+ }
+ }
+ if (!skipFlag && index < size) {
+ int bytes = std::min(6 - valuableBytesCounter, (int)(size - index));
+ sixBytes = std::string(buffer + index, bytes);
+ valuableBytesCounter += bytes;
+ }
+ }
+ stream.close();
+ delete[] buffer;
+ return ZLUnicodeUtil::toLower(sixBytes) == "<html>";
+}
+
+bool TextFormatDetector::isPPL(ZLInputStream &stream) const {
+ if (!stream.open()) {
+ return false;
+ }
+
+ char buffer[5];
+ bool result = stream.read(buffer, 5) == 5 && std::strncmp(buffer, "PPL\r\n", 5) == 0;
+ stream.close();
+ return result;
+}
diff --git a/reader/src/formats/util/TextFormatDetector.h b/reader/src/formats/util/TextFormatDetector.h
new file mode 100644
index 0000000..c86b90b
--- /dev/null
+++ b/reader/src/formats/util/TextFormatDetector.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TEXTFORMATDETECTOR_H__
+#define __TEXTFORMATDETECTOR_H__
+
+class ZLInputStream;
+
+class TextFormatDetector {
+
+public:
+ TextFormatDetector();
+ ~TextFormatDetector();
+
+ bool isHtml(ZLInputStream &stream) const;
+ bool isPPL(ZLInputStream &stream) const;
+};
+
+#endif /* __TEXTFORMATDETECTOR_H__ */
diff --git a/reader/src/formats/util/XMLTextStream.cpp b/reader/src/formats/util/XMLTextStream.cpp
new file mode 100644
index 0000000..19343a1
--- /dev/null
+++ b/reader/src/formats/util/XMLTextStream.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLXMLReader.h>
+#include <ZLUnicodeUtil.h>
+
+#include <ZLPlainAsynchronousInputStream.h>
+
+#include "XMLTextStream.h"
+
+class XMLTextReader : public ZLXMLReader {
+
+public:
+ XMLTextReader(std::string &buffer, const std::string &startTag);
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void characterDataHandler(const char *text, std::size_t len);
+
+private:
+ const std::string myStartTag;
+ std::string &myBuffer;
+ bool myStarted;
+};
+
+XMLTextReader::XMLTextReader(std::string &buffer, const std::string &startTag) : myStartTag(ZLUnicodeUtil::toLower(startTag)), myBuffer(buffer), myStarted(myStartTag.empty()) {
+}
+
+void XMLTextReader::startElementHandler(const char *tag, const char**) {
+ if (!myStarted && (myStartTag == ZLUnicodeUtil::toLower(tag))) {
+ myStarted = true;
+ }
+}
+
+void XMLTextReader::characterDataHandler(const char *text, std::size_t len) {
+ if (myStarted) {
+ myBuffer.append(text, len);
+ }
+}
+
+XMLTextStream::XMLTextStream(shared_ptr<ZLInputStream> base, const std::string &startTag) : myBase(base), myStreamBuffer(2048, '\0') {
+ myReader = new XMLTextReader(myDataBuffer, startTag);
+}
+
+XMLTextStream::~XMLTextStream() {
+}
+
+bool XMLTextStream::open() {
+ close();
+ if (myBase.isNull() || !myBase->open()) {
+ return false;
+ }
+ myStream = new ZLPlainAsynchronousInputStream();
+ myOffset = 0;
+ return true;
+}
+
+std::size_t XMLTextStream::read(char *buffer, std::size_t maxSize) {
+ while (myDataBuffer.size() < maxSize) {
+ std::size_t len = myBase->read((char*)myStreamBuffer.data(), 2048);
+ /*if ((len == 0) || !myReader->readFromBuffer(myStreamBuffer.data(), len)) {
+ break;
+ }*/
+ if (len == 0) {
+ break;
+ }
+ myStream->setBuffer(myStreamBuffer.data(), len);
+ if (!myReader->readDocument(myStream)) {
+ break;
+ }
+ }
+ std::size_t realSize = std::min(myDataBuffer.size(), maxSize);
+ if (buffer != 0) {
+ std::memcpy(buffer, myDataBuffer.data(), realSize);
+ }
+ myDataBuffer.erase(0, realSize);
+ myOffset += realSize;
+ return realSize;
+}
+
+void XMLTextStream::close() {
+ if (!myStream.isNull()) {
+ myStream->setEof();
+ myReader->readDocument(myStream);
+ myStream.reset();
+ }
+ myBase->close();
+ myDataBuffer.erase();
+}
+
+void XMLTextStream::seek(int offset, bool absoluteOffset) {
+ // works for nonnegative offsets only
+ if (absoluteOffset) {
+ offset -= myOffset;
+ }
+ read(0, offset);
+}
+
+std::size_t XMLTextStream::offset() const {
+ return myOffset;
+}
+
+std::size_t XMLTextStream::sizeOfOpened() {
+ // couldn't be implemented
+ return 0;
+}
diff --git a/reader/src/formats/util/XMLTextStream.h b/reader/src/formats/util/XMLTextStream.h
new file mode 100644
index 0000000..f3151c6
--- /dev/null
+++ b/reader/src/formats/util/XMLTextStream.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __XMLTEXTSTREAM_H__
+#define __XMLTEXTSTREAM_H__
+
+#include <shared_ptr.h>
+#include <ZLInputStream.h>
+#include <ZLAsynchronousInputStream.h>
+
+class XMLTextReader;
+
+class XMLTextStream : public ZLInputStream {
+
+public:
+ XMLTextStream(shared_ptr<ZLInputStream> base, const std::string &startTag);
+ ~XMLTextStream();
+
+private:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+private:
+ shared_ptr<ZLInputStream> myBase;
+ shared_ptr<XMLTextReader> myReader;
+ shared_ptr<ZLAsynchronousInputStream> myStream;
+ std::string myStreamBuffer;
+ std::string myDataBuffer;
+ std::size_t myOffset;
+};
+
+#endif /* __XMLTEXTSTREAM_H__ */
diff --git a/reader/src/formats/xhtml/XHTMLReader.cpp b/reader/src/formats/xhtml/XHTMLReader.cpp
new file mode 100644
index 0000000..6e4ba59
--- /dev/null
+++ b/reader/src/formats/xhtml/XHTMLReader.cpp
@@ -0,0 +1,715 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <cctype>
+
+#include <ZLFile.h>
+#include <ZLFileUtil.h>
+#include <ZLFileImage.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLStringUtil.h>
+#include <ZLXMLNamespace.h>
+#include <ZLInputStream.h>
+#include <ZLLogger.h>
+
+#include "XHTMLReader.h"
+#include "../util/EntityFilesCollector.h"
+#include "../util/MiscUtil.h"
+#include "../css/StyleSheetParser.h"
+
+#include "../../bookmodel/BookReader.h"
+#include "../../bookmodel/BookModel.h"
+
+std::map<std::string,XHTMLTagAction*> XHTMLReader::ourTagActions;
+
+XHTMLTagAction::~XHTMLTagAction() {
+}
+
+BookReader &XHTMLTagAction::bookReader(XHTMLReader &reader) {
+ return reader.myModelReader;
+}
+
+const std::string &XHTMLTagAction::pathPrefix(XHTMLReader &reader) {
+ return reader.myPathPrefix;
+}
+
+void XHTMLTagAction::beginParagraph(XHTMLReader &reader) {
+ reader.beginParagraph();
+}
+
+void XHTMLTagAction::endParagraph(XHTMLReader &reader) {
+ reader.endParagraph();
+}
+
+class XHTMLTagStyleAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+};
+
+class XHTMLTagLinkAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+};
+
+class XHTMLTagParagraphAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+};
+
+class XHTMLTagBodyAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+};
+
+class XHTMLTagRestartParagraphAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+};
+
+class XHTMLTagImageAction : public XHTMLTagAction {
+
+public:
+ XHTMLTagImageAction(shared_ptr<ZLXMLReader::AttributeNamePredicate> predicate);
+ XHTMLTagImageAction(const std::string &attributeName);
+
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+
+private:
+ shared_ptr<ZLXMLReader::AttributeNamePredicate> myPredicate;
+};
+
+class XHTMLSvgImageAttributeNamePredicate : public ZLXMLReader::NamespaceAttributeNamePredicate {
+
+public:
+ XHTMLSvgImageAttributeNamePredicate();
+ bool accepts(const ZLXMLReader &reader, const char *name) const;
+
+private:
+ bool myIsEnabled;
+
+friend class XHTMLTagSvgAction;
+};
+
+class XHTMLTagSvgAction : public XHTMLTagAction {
+
+public:
+ XHTMLTagSvgAction(XHTMLSvgImageAttributeNamePredicate &predicate);
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+
+private:
+ XHTMLSvgImageAttributeNamePredicate &myPredicate;
+};
+
+class XHTMLTagItemAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+};
+
+class XHTMLTagHyperlinkAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+
+private:
+ std::stack<FBTextKind> myHyperlinkStack;
+};
+
+class XHTMLTagControlAction : public XHTMLTagAction {
+
+public:
+ XHTMLTagControlAction(FBTextKind control);
+
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+
+private:
+ FBTextKind myControl;
+};
+
+class XHTMLTagParagraphWithControlAction : public XHTMLTagAction {
+
+public:
+ XHTMLTagParagraphWithControlAction(FBTextKind control);
+
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+
+private:
+ FBTextKind myControl;
+};
+
+class XHTMLTagPreAction : public XHTMLTagAction {
+
+public:
+ void doAtStart(XHTMLReader &reader, const char **xmlattributes);
+ void doAtEnd(XHTMLReader &reader);
+};
+
+void XHTMLTagStyleAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
+ static const std::string TYPE = "text/css";
+
+ const char *type = reader.attributeValue(xmlattributes, "type");
+ if ((type == 0) || (TYPE != type)) {
+ return;
+ }
+
+ if (reader.myReadState == XHTMLReader::READ_NOTHING) {
+ reader.myReadState = XHTMLReader::READ_STYLE;
+ reader.myTableParser = new StyleSheetTableParser(reader.myStyleSheetTable);
+ ZLLogger::Instance().println("CSS", "parsing style tag content");
+ }
+}
+
+void XHTMLTagStyleAction::doAtEnd(XHTMLReader &reader) {
+ if (reader.myReadState == XHTMLReader::READ_STYLE) {
+ reader.myReadState = XHTMLReader::READ_NOTHING;
+ reader.myTableParser.reset();
+ }
+}
+
+void XHTMLTagLinkAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
+ static const std::string REL = "stylesheet";
+ const char *rel = reader.attributeValue(xmlattributes, "rel");
+ if ((rel == 0) || (REL != rel)) {
+ return;
+ }
+ static const std::string TYPE = "text/css";
+
+ const char *type = reader.attributeValue(xmlattributes, "type");
+ if ((type == 0) || (TYPE != type)) {
+ return;
+ }
+
+ const char *href = reader.attributeValue(xmlattributes, "href");
+ if (href == 0) {
+ return;
+ }
+
+ ZLLogger::Instance().println("CSS", "style file: " + reader.myPathPrefix + MiscUtil::decodeHtmlURL(href));
+ shared_ptr<ZLInputStream> cssStream = ZLFile(reader.myPathPrefix + MiscUtil::decodeHtmlURL(href)).inputStream();
+ if (cssStream.isNull()) {
+ return;
+ }
+ ZLLogger::Instance().println("CSS", "parsing file");
+ StyleSheetTableParser parser(reader.myStyleSheetTable);
+ parser.parse(*cssStream);
+ //reader.myStyleSheetTable.dump();
+}
+
+void XHTMLTagLinkAction::doAtEnd(XHTMLReader&) {
+}
+
+void XHTMLTagParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
+ if (!reader.myNewParagraphInProgress) {
+ beginParagraph(reader);
+ reader.myNewParagraphInProgress = true;
+ }
+}
+
+void XHTMLTagParagraphAction::doAtEnd(XHTMLReader &reader) {
+ endParagraph(reader);
+}
+
+void XHTMLTagBodyAction::doAtStart(XHTMLReader &reader, const char**) {
+ reader.myReadState = XHTMLReader::READ_BODY;
+}
+
+void XHTMLTagBodyAction::doAtEnd(XHTMLReader &reader) {
+ endParagraph(reader);
+ reader.myReadState = XHTMLReader::READ_NOTHING;
+}
+
+void XHTMLTagRestartParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
+ if (reader.myCurrentParagraphIsEmpty) {
+ bookReader(reader).addData(" ");
+ }
+ endParagraph(reader);
+ beginParagraph(reader);
+}
+
+void XHTMLTagRestartParagraphAction::doAtEnd(XHTMLReader&) {
+}
+
+void XHTMLTagItemAction::doAtStart(XHTMLReader &reader, const char**) {
+ endParagraph(reader);
+ // TODO: increase left indent
+ beginParagraph(reader);
+ // TODO: replace bullet sign by number inside OL tag
+ const std::string bullet = "\xE2\x80\xA2\xC0\xA0";
+ bookReader(reader).addData(bullet);
+}
+
+void XHTMLTagItemAction::doAtEnd(XHTMLReader &reader) {
+ endParagraph(reader);
+}
+
+XHTMLTagImageAction::XHTMLTagImageAction(shared_ptr<ZLXMLReader::AttributeNamePredicate> predicate) {
+ myPredicate = predicate;
+}
+
+XHTMLTagImageAction::XHTMLTagImageAction(const std::string &attributeName) {
+ myPredicate = new ZLXMLReader::FixedAttributeNamePredicate(attributeName);
+}
+
+void XHTMLTagImageAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
+ const char *fileName = reader.attributeValue(xmlattributes, *myPredicate);
+ if (fileName == 0) {
+ return;
+ }
+
+ const std::string fullfileName = pathPrefix(reader) + MiscUtil::decodeHtmlURL(fileName);
+ ZLFile imageFile(fullfileName);
+ if (!imageFile.exists()) {
+ return;
+ }
+
+ bool flag = bookReader(reader).paragraphIsOpen();
+ if (flag) {
+ endParagraph(reader);
+ }
+ if (std::strlen(fileName) > 2 && std::strncmp(fileName, "./", 2) == 0) {
+ fileName +=2;
+ }
+ bookReader(reader).addImageReference(fullfileName);
+ bookReader(reader).addImage(fullfileName, new ZLFileImage(ZLFile(fullfileName), 0));
+ if (flag) {
+ beginParagraph(reader);
+ }
+}
+
+XHTMLTagSvgAction::XHTMLTagSvgAction(XHTMLSvgImageAttributeNamePredicate &predicate) : myPredicate(predicate) {
+}
+
+void XHTMLTagSvgAction::doAtStart(XHTMLReader&, const char**) {
+ myPredicate.myIsEnabled = true;
+}
+
+void XHTMLTagSvgAction::doAtEnd(XHTMLReader&) {
+ myPredicate.myIsEnabled = false;
+}
+
+XHTMLSvgImageAttributeNamePredicate::XHTMLSvgImageAttributeNamePredicate() : ZLXMLReader::NamespaceAttributeNamePredicate(ZLXMLNamespace::XLink, "href"), myIsEnabled(false) {
+}
+
+bool XHTMLSvgImageAttributeNamePredicate::accepts(const ZLXMLReader &reader, const char *name) const {
+ return myIsEnabled && NamespaceAttributeNamePredicate::accepts(reader, name);
+}
+
+void XHTMLTagImageAction::doAtEnd(XHTMLReader&) {
+}
+
+XHTMLTagControlAction::XHTMLTagControlAction(FBTextKind control) : myControl(control) {
+}
+
+void XHTMLTagControlAction::doAtStart(XHTMLReader &reader, const char**) {
+ bookReader(reader).pushKind(myControl);
+ bookReader(reader).addControl(myControl, true);
+}
+
+void XHTMLTagControlAction::doAtEnd(XHTMLReader &reader) {
+ bookReader(reader).addControl(myControl, false);
+ bookReader(reader).popKind();
+}
+
+void XHTMLTagHyperlinkAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
+ const char *href = reader.attributeValue(xmlattributes, "href");
+ if (href != 0 && href[0] != '\0') {
+ const FBTextKind hyperlinkType = MiscUtil::referenceType(href);
+ std::string link = MiscUtil::decodeHtmlURL(href);
+ if (hyperlinkType == INTERNAL_HYPERLINK) {
+ if (link[0] == '#') {
+ link = reader.myReferenceAlias + link;
+ } else {
+ link = reader.normalizedReference(reader.myReferenceDirName + link);
+ }
+ }
+ myHyperlinkStack.push(hyperlinkType);
+ bookReader(reader).addHyperlinkControl(hyperlinkType, link);
+ } else {
+ myHyperlinkStack.push(REGULAR);
+ }
+ const char *name = reader.attributeValue(xmlattributes, "name");
+ if (name != 0) {
+ bookReader(reader).addHyperlinkLabel(
+ reader.myReferenceAlias + "#" + MiscUtil::decodeHtmlURL(name)
+ );
+ }
+}
+
+void XHTMLTagHyperlinkAction::doAtEnd(XHTMLReader &reader) {
+ FBTextKind kind = myHyperlinkStack.top();
+ if (kind != REGULAR) {
+ bookReader(reader).addControl(kind, false);
+ }
+ myHyperlinkStack.pop();
+}
+
+XHTMLTagParagraphWithControlAction::XHTMLTagParagraphWithControlAction(FBTextKind control) : myControl(control) {
+}
+
+void XHTMLTagParagraphWithControlAction::doAtStart(XHTMLReader &reader, const char**) {
+ if (myControl == TITLE && bookReader(reader).model().bookTextModel()->paragraphsNumber() > 1) {
+ bookReader(reader).insertEndOfSectionParagraph();
+ }
+ bookReader(reader).pushKind(myControl);
+ beginParagraph(reader);
+}
+
+void XHTMLTagParagraphWithControlAction::doAtEnd(XHTMLReader &reader) {
+ endParagraph(reader);
+ bookReader(reader).popKind();
+}
+
+void XHTMLTagPreAction::doAtStart(XHTMLReader &reader, const char**) {
+ reader.myPreformatted = true;
+ beginParagraph(reader);
+ bookReader(reader).addControl(PREFORMATTED, true);
+}
+
+void XHTMLTagPreAction::doAtEnd(XHTMLReader &reader) {
+ endParagraph(reader);
+ reader.myPreformatted = false;
+}
+
+XHTMLTagAction *XHTMLReader::addAction(const std::string &tag, XHTMLTagAction *action) {
+ XHTMLTagAction *old = ourTagActions[tag];
+ ourTagActions[tag] = action;
+ return old;
+}
+
+void XHTMLReader::fillTagTable() {
+ if (ourTagActions.empty()) {
+ //addAction("html", new XHTMLTagAction());
+ addAction("body", new XHTMLTagBodyAction());
+ //addAction("title", new XHTMLTagAction());
+ //addAction("meta", new XHTMLTagAction());
+ //addAction("script", new XHTMLTagAction());
+
+ //addAction("font", new XHTMLTagAction());
+ addAction("style", new XHTMLTagStyleAction());
+
+ addAction("p", new XHTMLTagParagraphAction());
+ addAction("h1", new XHTMLTagParagraphWithControlAction(H1));
+ addAction("h2", new XHTMLTagParagraphWithControlAction(H2));
+ addAction("h3", new XHTMLTagParagraphWithControlAction(H3));
+ addAction("h4", new XHTMLTagParagraphWithControlAction(H4));
+ addAction("h5", new XHTMLTagParagraphWithControlAction(H5));
+ addAction("h6", new XHTMLTagParagraphWithControlAction(H6));
+
+ //addAction("ol", new XHTMLTagAction());
+ //addAction("ul", new XHTMLTagAction());
+ //addAction("dl", new XHTMLTagAction());
+ addAction("li", new XHTMLTagItemAction());
+
+ addAction("strong", new XHTMLTagControlAction(STRONG));
+ addAction("b", new XHTMLTagControlAction(BOLD));
+ addAction("em", new XHTMLTagControlAction(EMPHASIS));
+ addAction("i", new XHTMLTagControlAction(ITALIC));
+ addAction("code", new XHTMLTagControlAction(CODE));
+ addAction("tt", new XHTMLTagControlAction(CODE));
+ addAction("kbd", new XHTMLTagControlAction(CODE));
+ addAction("var", new XHTMLTagControlAction(CODE));
+ addAction("samp", new XHTMLTagControlAction(CODE));
+ addAction("cite", new XHTMLTagControlAction(CITE));
+ addAction("sub", new XHTMLTagControlAction(SUB));
+ addAction("sup", new XHTMLTagControlAction(SUP));
+ addAction("dd", new XHTMLTagControlAction(DEFINITION_DESCRIPTION));
+ addAction("dfn", new XHTMLTagControlAction(DEFINITION));
+ addAction("strike", new XHTMLTagControlAction(STRIKETHROUGH));
+
+ addAction("a", new XHTMLTagHyperlinkAction());
+
+ addAction("img", new XHTMLTagImageAction("src"));
+ addAction("object", new XHTMLTagImageAction("data"));
+ XHTMLSvgImageAttributeNamePredicate *predicate = new XHTMLSvgImageAttributeNamePredicate();
+ addAction("image", new XHTMLTagImageAction(predicate));
+ addAction("svg", new XHTMLTagSvgAction(*predicate));
+
+ //addAction("area", new XHTMLTagAction());
+ //addAction("map", new XHTMLTagAction());
+
+ //addAction("base", new XHTMLTagAction());
+ //addAction("blockquote", new XHTMLTagAction());
+ addAction("br", new XHTMLTagRestartParagraphAction());
+ //addAction("center", new XHTMLTagAction());
+ addAction("div", new XHTMLTagParagraphAction());
+ addAction("dt", new XHTMLTagParagraphAction());
+ //addAction("head", new XHTMLTagAction());
+ //addAction("hr", new XHTMLTagAction());
+ addAction("link", new XHTMLTagLinkAction());
+ //addAction("param", new XHTMLTagAction());
+ //addAction("q", new XHTMLTagAction());
+ //addAction("s", new XHTMLTagAction());
+
+ addAction("pre", new XHTMLTagPreAction());
+ //addAction("big", new XHTMLTagAction());
+ //addAction("small", new XHTMLTagAction());
+ //addAction("u", new XHTMLTagAction());
+
+ //addAction("table", new XHTMLTagAction());
+ addAction("td", new XHTMLTagParagraphAction());
+ addAction("th", new XHTMLTagParagraphAction());
+ //addAction("tr", new XHTMLTagAction());
+ //addAction("caption", new XHTMLTagAction());
+ //addAction("span", new XHTMLTagAction());
+ }
+}
+
+XHTMLReader::XHTMLReader(BookReader &modelReader) : myModelReader(modelReader) {
+}
+
+bool XHTMLReader::readFile(const ZLFile &file, const std::string &referenceName) {
+ fillTagTable();
+
+ myPathPrefix = MiscUtil::htmlDirectoryPrefix(file.path());
+ myReferenceAlias = fileAlias(referenceName);
+ myModelReader.addHyperlinkLabel(myReferenceAlias);
+
+ const int index = referenceName.rfind('/', referenceName.length() - 1);
+ myReferenceDirName = referenceName.substr(0, index + 1);
+
+ myPreformatted = false;
+ myNewParagraphInProgress = false;
+ myReadState = READ_NOTHING;
+ myCurrentParagraphIsEmpty = true;
+
+ myStyleSheetTable.clear();
+ myCSSStack.clear();
+ myStyleEntryStack.clear();
+ myStylesToRemove = 0;
+
+ myDoPageBreakAfterStack.clear();
+ myStyleParser = new StyleSheetSingleStyleParser();
+ myTableParser.reset();
+
+ return readDocument(file);
+}
+
+bool XHTMLReader::addStyleEntry(const std::string tag, const std::string aClass) {
+ shared_ptr<ZLTextStyleEntry> entry = myStyleSheetTable.control(tag, aClass);
+ if (!entry.isNull()) {
+ myModelReader.addStyleEntry(*entry);
+ myStyleEntryStack.push_back(entry);
+ return true;
+ }
+ return false;
+}
+
+void XHTMLReader::startElementHandler(const char *tag, const char **attributes) {
+ static const std::string HASH = "#";
+ const char *id = attributeValue(attributes, "id");
+ if (id != 0) {
+ myModelReader.addHyperlinkLabel(myReferenceAlias + HASH + id);
+ }
+
+ const std::string sTag = ZLUnicodeUtil::toLower(tag);
+
+ const char *aClass = attributeValue(attributes, "class");
+ const std::string sClass = (aClass != 0) ? aClass : "";
+
+ if (myStyleSheetTable.doBreakBefore(sTag, sClass)) {
+ myModelReader.insertEndOfSectionParagraph();
+ }
+ myDoPageBreakAfterStack.push_back(myStyleSheetTable.doBreakAfter(sTag, sClass));
+
+ XHTMLTagAction *action = ourTagActions[sTag];
+ if (action != 0) {
+ action->doAtStart(*this, attributes);
+ }
+
+ const int sizeBefore = myStyleEntryStack.size();
+ addStyleEntry(sTag, "");
+ addStyleEntry("", sClass);
+ addStyleEntry(sTag, sClass);
+ const char *style = attributeValue(attributes, "style");
+ if (style != 0) {
+ ZLLogger::Instance().println("CSS", std::string("parsing style attribute: ") + style);
+ shared_ptr<ZLTextStyleEntry> entry = myStyleParser->parseString(style);
+ myModelReader.addStyleEntry(*entry);
+ myStyleEntryStack.push_back(entry);
+ } else {
+ }
+ myCSSStack.push_back(myStyleEntryStack.size() - sizeBefore);
+}
+
+void XHTMLReader::endElementHandler(const char *tag) {
+ for (int i = myCSSStack.back(); i > 0; --i) {
+ myModelReader.addStyleCloseEntry();
+ }
+ myStylesToRemove = myCSSStack.back();
+ myCSSStack.pop_back();
+
+ XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
+ if (action != 0) {
+ action->doAtEnd(*this);
+ myNewParagraphInProgress = false;
+ }
+
+ for (; myStylesToRemove > 0; --myStylesToRemove) {
+ myStyleEntryStack.pop_back();
+ }
+
+ if (myDoPageBreakAfterStack.back()) {
+ myModelReader.insertEndOfSectionParagraph();
+ }
+ myDoPageBreakAfterStack.pop_back();
+}
+
+void XHTMLReader::beginParagraph() {
+ myCurrentParagraphIsEmpty = true;
+ myModelReader.beginParagraph();
+ bool doBlockSpaceBefore = false;
+ for (std::vector<shared_ptr<ZLTextStyleEntry> >::const_iterator it = myStyleEntryStack.begin(); it != myStyleEntryStack.end(); ++it) {
+ myModelReader.addStyleEntry(**it);
+ doBlockSpaceBefore =
+ doBlockSpaceBefore ||
+ (*it)->isFeatureSupported(ZLTextStyleEntry::LENGTH_SPACE_BEFORE);
+ }
+
+ if (doBlockSpaceBefore) {
+ ZLTextStyleEntry blockingEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ blockingEntry.setLength(
+ ZLTextStyleEntry::LENGTH_SPACE_BEFORE,
+ 0,
+ ZLTextStyleEntry::SIZE_UNIT_PIXEL
+ );
+ myModelReader.addStyleEntry(blockingEntry);
+ }
+}
+
+void XHTMLReader::endParagraph() {
+ bool doBlockSpaceAfter = false;
+ for (std::vector<shared_ptr<ZLTextStyleEntry> >::const_iterator it = myStyleEntryStack.begin(); it != myStyleEntryStack.end() - myStylesToRemove; ++it) {
+ doBlockSpaceAfter =
+ doBlockSpaceAfter ||
+ (*it)->isFeatureSupported(ZLTextStyleEntry::LENGTH_SPACE_AFTER);
+ }
+ if (doBlockSpaceAfter) {
+ ZLTextStyleEntry blockingEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+ blockingEntry.setLength(
+ ZLTextStyleEntry::LENGTH_SPACE_AFTER,
+ 0,
+ ZLTextStyleEntry::SIZE_UNIT_PIXEL
+ );
+ myModelReader.addStyleEntry(blockingEntry);
+ }
+ for (; myStylesToRemove > 0; --myStylesToRemove) {
+ myModelReader.addStyleEntry(*myStyleEntryStack.back());
+ myStyleEntryStack.pop_back();
+ }
+ myModelReader.endParagraph();
+}
+
+void XHTMLReader::characterDataHandler(const char *text, std::size_t len) {
+ switch (myReadState) {
+ case READ_NOTHING:
+ break;
+ case READ_STYLE:
+ if (!myTableParser.isNull()) {
+ myTableParser->parse(text, len);
+ }
+ break;
+ case READ_BODY:
+ if (myPreformatted) {
+ if (*text == '\r' || *text == '\n') {
+ endParagraph();
+ text += 1;
+ len -= 1;
+ beginParagraph();
+ myModelReader.addControl(PREFORMATTED, true);
+ }
+ std::size_t spaceCounter = 0;
+ while (spaceCounter < len && std::isspace((unsigned char)*(text + spaceCounter))) {
+ ++spaceCounter;
+ }
+ myModelReader.addFixedHSpace(spaceCounter);
+ text += spaceCounter;
+ len -= spaceCounter;
+ } else if (myNewParagraphInProgress || !myModelReader.paragraphIsOpen()) {
+ while (std::isspace((unsigned char)*text)) {
+ ++text;
+ if (--len == 0) {
+ break;
+ }
+ }
+ }
+ if (len > 0) {
+ myCurrentParagraphIsEmpty = false;
+ if (!myModelReader.paragraphIsOpen()) {
+ myModelReader.beginParagraph();
+ }
+ myModelReader.addData(std::string(text, len));
+ myNewParagraphInProgress = false;
+ }
+ break;
+ }
+}
+
+const std::vector<std::string> &XHTMLReader::externalDTDs() const {
+ return EntityFilesCollector::Instance().externalDTDs("xhtml");
+}
+
+bool XHTMLReader::processNamespaces() const {
+ return true;
+}
+
+const std::string XHTMLReader::normalizedReference(const std::string &reference) const {
+ const std::size_t index = reference.find('#');
+ if (index == std::string::npos) {
+ return fileAlias(reference);
+ } else {
+ return fileAlias(reference.substr(0, index)) + reference.substr(index);
+ }
+}
+
+const std::string &XHTMLReader::fileAlias(const std::string &fileName) const {
+ std::map<std::string,std::string>::const_iterator it = myFileNumbers.find(fileName);
+ if (it != myFileNumbers.end()) {
+ return it->second;
+ }
+
+ const std::string correctedFileName =
+ ZLFileUtil::normalizeUnixPath(MiscUtil::decodeHtmlURL(fileName));
+ it = myFileNumbers.find(correctedFileName);
+ if (it != myFileNumbers.end()) {
+ return it->second;
+ }
+
+ std::string num;
+ ZLStringUtil::appendNumber(num, myFileNumbers.size());
+ myFileNumbers.insert(std::make_pair(correctedFileName, num));
+ it = myFileNumbers.find(correctedFileName);
+ return it->second;
+}
diff --git a/reader/src/formats/xhtml/XHTMLReader.h b/reader/src/formats/xhtml/XHTMLReader.h
new file mode 100644
index 0000000..08d4c02
--- /dev/null
+++ b/reader/src/formats/xhtml/XHTMLReader.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __XHTMLREADER_H__
+#define __XHTMLREADER_H__
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include <ZLXMLReader.h>
+
+#include "../css/StyleSheetTable.h"
+#include "../css/StyleSheetParser.h"
+
+class ZLFile;
+
+class BookReader;
+class XHTMLReader;
+
+class XHTMLTagAction {
+
+public:
+ virtual ~XHTMLTagAction();
+
+ virtual void doAtStart(XHTMLReader &reader, const char **xmlattributes) = 0;
+ virtual void doAtEnd(XHTMLReader &reader) = 0;
+
+protected:
+ static BookReader &bookReader(XHTMLReader &reader);
+ static const std::string &pathPrefix(XHTMLReader &reader);
+ static void beginParagraph(XHTMLReader &reader);
+ static void endParagraph(XHTMLReader &reader);
+};
+
+class XHTMLReader : public ZLXMLReader {
+
+public:
+ static XHTMLTagAction *addAction(const std::string &tag, XHTMLTagAction *action);
+ static void fillTagTable();
+
+private:
+ static std::map<std::string,XHTMLTagAction*> ourTagActions;
+
+public:
+ XHTMLReader(BookReader &modelReader);
+ bool readFile(const ZLFile &file, const std::string &referenceName);
+ const std::string &fileAlias(const std::string &fileName) const;
+ const std::string normalizedReference(const std::string &reference) const;
+
+private:
+ void startElementHandler(const char *tag, const char **attributes);
+ void endElementHandler(const char *tag);
+ void characterDataHandler(const char *text, std::size_t len);
+
+ const std::vector<std::string> &externalDTDs() const;
+
+ bool processNamespaces() const;
+
+ void beginParagraph();
+ void endParagraph();
+ bool addStyleEntry(const std::string tag, const std::string aClass);
+
+private:
+ mutable std::map<std::string,std::string> myFileNumbers;
+
+ BookReader &myModelReader;
+ std::string myPathPrefix;
+ std::string myReferenceAlias;
+ std::string myReferenceDirName;
+ bool myPreformatted;
+ bool myNewParagraphInProgress;
+ StyleSheetTable myStyleSheetTable;
+ std::vector<int> myCSSStack;
+ std::vector<shared_ptr<ZLTextStyleEntry> > myStyleEntryStack;
+ int myStylesToRemove;
+ std::vector<bool> myDoPageBreakAfterStack;
+ bool myCurrentParagraphIsEmpty;
+ shared_ptr<StyleSheetSingleStyleParser> myStyleParser;
+ shared_ptr<StyleSheetTableParser> myTableParser;
+ enum {
+ READ_NOTHING,
+ READ_STYLE,
+ READ_BODY
+ } myReadState;
+
+ friend class XHTMLTagAction;
+ friend class XHTMLTagStyleAction;
+ friend class XHTMLTagLinkAction;
+ friend class XHTMLTagHyperlinkAction;
+ friend class XHTMLTagPreAction;
+ friend class XHTMLTagParagraphAction;
+ friend class XHTMLTagBodyAction;
+ friend class XHTMLTagRestartParagraphAction;
+};
+
+#endif /* __XHTMLREADER_H__ */