From 2462d03f322261bd616721c2b2065c4004b36c9c Mon Sep 17 00:00:00 2001 From: Michele Calgaro Date: Sat, 11 May 2024 21:28:48 +0900 Subject: Initial import (as is) from Debian Snapshot's 'fbreader' source code (https://snapshot.debian.org/package/fbreader/0.99.4%2Bdfsg-6). The Debian code is provided under GPL2 license. Signed-off-by: Michele Calgaro --- fbreader/src/formats/chm/CHMFile.cpp | 490 +++++++++++++++++++++++++++++++++++ 1 file changed, 490 insertions(+) create mode 100644 fbreader/src/formats/chm/CHMFile.cpp (limited to 'fbreader/src/formats/chm/CHMFile.cpp') diff --git a/fbreader/src/formats/chm/CHMFile.cpp b/fbreader/src/formats/chm/CHMFile.cpp new file mode 100644 index 0000000..8c62bca --- /dev/null +++ b/fbreader/src/formats/chm/CHMFile.cpp @@ -0,0 +1,490 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include + +#include +#include +#include +#include + +#include "CHMFile.h" +#include "CHMReferenceCollection.h" + +#include "LZXDecompressor.h" + +static std::string readString(ZLInputStream &stream, std::size_t length) { + std::string string(length, ' '); + stream.read(const_cast(string.data()), length); + return string; +} + +static unsigned short readUnsignedWord(ZLInputStream &stream) { + unsigned char buffer[2]; + stream.read((char*)buffer, 2); + unsigned short result = buffer[1]; + result = result << 8; + result += buffer[0]; + return result; +} + +static unsigned long readUnsignedDWord(ZLInputStream &stream) { + unsigned long lowPart = readUnsignedWord(stream); + unsigned long highPart = readUnsignedWord(stream); + return (highPart << 16) + lowPart; +} + +static unsigned long long readUnsignedQWord(ZLInputStream &stream) { + unsigned long long lowPart = readUnsignedDWord(stream); + unsigned long long highPart = readUnsignedDWord(stream); + return (highPart << 32) + lowPart; +} + +static unsigned long long readEncodedInteger(ZLInputStream &stream) { + unsigned long long result = 0; + char part; + do { + result = result << 7; + stream.read(&part, 1); + result += part & 0x7F; + } while (part & -0x80); + return result; +} + +CHMInputStream::CHMInputStream(shared_ptr base, const CHMFileInfo::SectionInfo §ionInfo, std::size_t offset, std::size_t size) : myBase(base), mySectionInfo(sectionInfo), mySize(size) { + myBaseStartIndex = offset / 0x8000; + myBaseStartIndex -= myBaseStartIndex % sectionInfo.ResetInterval; + myBytesToSkip = offset - myBaseStartIndex * 0x8000; + myOutData = new unsigned char[0x8000]; +} + +CHMInputStream::~CHMInputStream() { + close(); + delete[] myOutData; +} + +bool CHMInputStream::open() { + myOffset = 0; + myDoSkip = true; + myBaseIndex = myBaseStartIndex; + if (myDecompressor.isNull()) { + myDecompressor = new LZXDecompressor(mySectionInfo.WindowSizeIndex); + } else { + myDecompressor->reset(); + } + myOutDataOffset = 0; + myOutDataLength = 0; + return true; +} + +std::size_t CHMInputStream::read(char *buffer, std::size_t maxSize) { + if (myDoSkip) { + do_read(0, myBytesToSkip); + myDoSkip = false; + } + std::size_t realSize = do_read(buffer, std::min(maxSize, mySize - myOffset)); + myOffset += realSize; + return realSize; +} + +std::size_t CHMInputStream::do_read(char *buffer, std::size_t maxSize) { + std::size_t realSize = 0; + do { + if (myOutDataLength == 0) { + if (myBaseIndex >= mySectionInfo.ResetTable.size()) { + break; + } + const bool isTail = myBaseIndex + 1 == mySectionInfo.ResetTable.size(); + const std::size_t start = mySectionInfo.ResetTable[myBaseIndex]; + const std::size_t end = isTail ? mySectionInfo.CompressedSize : mySectionInfo.ResetTable[myBaseIndex + 1]; + myOutDataLength = isTail ? mySectionInfo.UncompressedSize % 0x8000 : 0x8000; + myOutDataOffset = 0; + + myInData.erase(); + myInData.append(end - start, '\0'); + myBase->seek(mySectionInfo.Offset + start, true); + myBase->read((char*)myInData.data(), myInData.length()); + if (myBaseIndex % mySectionInfo.ResetInterval == 0) { + myDecompressor->reset(); + } + ++myBaseIndex; + + if (!myDecompressor->decompress(myInData, myOutData, myOutDataLength)) { + break; + } + } + const std::size_t partSize = std::min(myOutDataLength, maxSize); + if (buffer != 0) { + std::memcpy(buffer + realSize, myOutData + myOutDataOffset, partSize); + } + maxSize -= partSize; + realSize += partSize; + myOutDataLength -= partSize; + myOutDataOffset += partSize; + } while (maxSize != 0); + return realSize; +} + +void CHMInputStream::close() { + myDecompressor = 0; +} + +void CHMInputStream::seek(int offset, bool absoluteOffset) { + if (absoluteOffset) { + offset -= myOffset; + } + if (offset > 0) { + read(0, offset); + } else if (offset < 0) { + open(); + read(0, std::max(offset + (int)myOffset, 0)); + } +} + +std::size_t CHMInputStream::offset() const { + return myOffset; +} + +std::size_t CHMInputStream::sizeOfOpened() { + return mySize; +} + +shared_ptr CHMFileInfo::entryStream(shared_ptr base, const std::string &name) const { + RecordMap::const_iterator it = myRecords.find(ZLUnicodeUtil::toLower(name)); + if (it == myRecords.end()) { + return 0; + } + const RecordInfo &recordInfo = it->second; + if (recordInfo.Length == 0) { + return 0; + } + if (recordInfo.Section == 0) { + // TODO: implement + return 0; + } + if (recordInfo.Section > mySectionInfos.size()) { + return 0; + } + const SectionInfo §ionInfo = mySectionInfos[recordInfo.Section - 1]; + if (recordInfo.Offset + recordInfo.Length > sectionInfo.UncompressedSize) { + return 0; + } + + return new CHMInputStream(base, sectionInfo, recordInfo.Offset, recordInfo.Length); +} + +CHMFileInfo::CHMFileInfo(const ZLFile &file) : myFilePath(file.path()) { +} + +bool CHMFileInfo::moveToEntry(ZLInputStream &stream, const std::string &entryName) { + RecordMap::const_iterator it = myRecords.find(entryName); + if (it == myRecords.end()) { + return false; + } + RecordInfo recordInfo = it->second; + if (recordInfo.Section > mySectionInfos.size()) { + return false; + } + if (recordInfo.Section != 0) { + // TODO: ??? + return false; + } + + stream.seek(mySection0Offset + recordInfo.Offset, true); + return true; +} + +bool CHMFileInfo::init(ZLInputStream &stream) { + { + // header start + if (readString(stream, 4) != "ITSF") { + return false; + } + + unsigned long version = readUnsignedDWord(stream); + + // DWORD total length + // DWORD unknown + // DWORD timestamp + // DWORD language id + // 0x10 bytes 1st GUID + // 0x10 bytes 2nd GUID + // QWORD section 0 offset + // QWORD section 0 length + stream.seek(4 * 4 + 2 * 0x10 + 2 * 8, false); + + unsigned long long sectionOffset1 = readUnsignedQWord(stream); + unsigned long long sectionLength1 = readUnsignedQWord(stream); + mySection0Offset = sectionOffset1 + sectionLength1; + // header end + + // additional header data start + if (version > 2) { + mySection0Offset = readUnsignedQWord(stream); + } + // additional header data end + + stream.seek(sectionOffset1, true); + // header section 1 start + // directory header start + if (readString(stream, 4) != "ITSP") { + return false; + } + + // DWORD version + // DWORD length + // DWORD 0x000A + // DWORD chunk size + // DWORD density + // DWORD depth + // DWORD root chunk number + // DWORD first chunk number + // DWORD last chunk number + // DWORD -1 + stream.seek(10 * 4, false); + unsigned long dirChunkNumber = readUnsignedDWord(stream); + // ... + stream.seek(36, false); + // header section 1 end + + std::size_t nextOffset = stream.offset(); + for (unsigned long i = 0; i < dirChunkNumber; ++i) { + nextOffset += 4096; + std::string header = readString(stream, 4); + if (header == "PMGL") { + unsigned long quickRefAreaSize = readUnsignedDWord(stream) % 4096; + stream.seek(12, false); + std::size_t startOffset = stream.offset(); + std::size_t oldOffset = startOffset; + while (startOffset < nextOffset - quickRefAreaSize) { + int nameLength = readEncodedInteger(stream); + std::string name = readString(stream, nameLength); + int contentSection = readEncodedInteger(stream); + int offset = readEncodedInteger(stream); + int length = readEncodedInteger(stream); + if (name.substr(0, 2) != "::") { + name = ZLUnicodeUtil::toLower(name); + } + myRecords.insert( + std::make_pair( + name, + CHMFileInfo::RecordInfo(contentSection, offset, length) + ) + ); + startOffset = stream.offset(); + if (oldOffset == startOffset) { + break; + } + oldOffset = startOffset; + } + } else if (header == "PMGI") { + unsigned long quickRefAreaSize = readUnsignedDWord(stream); + std::size_t startOffset = stream.offset(); + std::size_t oldOffset = startOffset; + while (startOffset < nextOffset - quickRefAreaSize) { + int nameLength = readEncodedInteger(stream); + std::string name = readString(stream, nameLength); + // chunk number + readEncodedInteger(stream); + startOffset = stream.offset(); + if (oldOffset == startOffset) { + break; + } + oldOffset = startOffset; + } + } + stream.seek(nextOffset, true); + if (stream.offset() != nextOffset) { + break; + } + } + } + + { + if (!moveToEntry(stream, "::DataSpace/NameList")) { + return false; + } + stream.seek(2, false); + const int sectionNumber = readUnsignedWord(stream); + for (int i = 0; i < sectionNumber; ++i) { + const int length = readUnsignedWord(stream); + std::string sectionName; + sectionName.reserve(length); + for (int j = 0; j < length; ++j) { + sectionName += (char)readUnsignedWord(stream); + } + stream.seek(2, false); + mySectionNames.push_back(sectionName); + } + } + + { + for (unsigned int i = 1; i < mySectionNames.size(); ++i) { + RecordMap::const_iterator it = + myRecords.find("::DataSpace/Storage/" + mySectionNames[i] + "/Content"); + if (it == myRecords.end()) { + return false; + } + RecordInfo recordInfo = it->second; + if (recordInfo.Section != 0) { + return false; + } + mySectionInfos.push_back(SectionInfo()); + SectionInfo &info = mySectionInfos.back(); + info.Offset = mySection0Offset + recordInfo.Offset; + info.Length = recordInfo.Length; + + if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/ControlData")) { + return false; + } + stream.seek(4, false); + std::string lzxc = readString(stream, 4); + if (lzxc != "LZXC") { + return false; + } + const int version = readUnsignedDWord(stream); + if ((version <= 0) || (version > 2)) { + return false; + } + info.ResetInterval = readUnsignedDWord(stream); + if (version == 1) { + info.ResetInterval /= 0x8000; + } + info.WindowSizeIndex = (version == 1) ? 0 : 15; + { + int ws = readUnsignedDWord(stream); + if (ws > 0) { + while ((ws & 1) == 0) { + ws >>= 1; + info.WindowSizeIndex++; + } + } + } + + if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable")) { + return false; + } + stream.seek(4, false); + const std::size_t entriesNumber = readUnsignedDWord(stream); + if (entriesNumber == 0) { + return false; + } + if (entriesNumber > 2048) { + // file size is greater than 60 Mb + return false; + } + info.ResetTable.reserve(entriesNumber); + stream.seek(8, false); + info.UncompressedSize = readUnsignedQWord(stream); + if ((info.UncompressedSize - 1) / 0x8000 != entriesNumber - 1) { + return false; + } + info.CompressedSize = readUnsignedQWord(stream); + stream.seek(8, false); + std::size_t previous = 0; + for (std::size_t j = 0; j < entriesNumber; ++j) { + std::size_t value = readUnsignedQWord(stream); + if ((j > 0) == (value <= previous)) { + return false; + } + info.ResetTable.push_back(value); + previous = value; + } + } + } + + return true; +} + +static std::string readNTString(ZLInputStream &stream) { + std::string s; + char c; + while (stream.read(&c, 1) == 1) { + if (c == '\0') { + break; + } else { + s += c; + } + } + return CHMReferenceCollection::fullReference("/", s); +} + +bool CHMFileInfo::FileNames::empty() const { + return Start.empty() && TOC.empty() && Home.empty() && Index.empty(); +} + +CHMFileInfo::FileNames CHMFileInfo::sectionNames(shared_ptr base) const { + FileNames names; + shared_ptr stringsStream = entryStream(base, "/#STRINGS"); + if (!stringsStream.isNull() && stringsStream->open()) { + std::vector fileNames; + int tocIndex = -1; + int indexIndex = -1; + for (int i = 0; i < 12; ++i) { + std::string argument = readNTString(*stringsStream); + if (argument.empty() || (argument[argument.length() - 1] == '/')) { + continue; + } + if (myRecords.find(argument) == myRecords.end()) { + continue; + } + if ((tocIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhc")) { + tocIndex = fileNames.size(); + names.TOC = argument; + } else if ((indexIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhk")) { + indexIndex = fileNames.size(); + names.Index = argument; + } + fileNames.push_back(argument); + } + std::size_t startIndex = std::max(3, std::max(tocIndex, indexIndex) + 1); + if (startIndex < 11) { + if (startIndex < fileNames.size()) { + names.Start = fileNames[startIndex]; + } + if (startIndex + 1 < fileNames.size()) { + names.Home = fileNames[startIndex + 1]; + } + } + stringsStream->close(); + } + if (names.TOC.empty()) { + for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) { + if (ZLStringUtil::stringEndsWith(it->first, ".hhc")) { + names.TOC = it->first; + break; + } + } + } + if (names.empty()) { + for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) { + if ((ZLStringUtil::stringEndsWith(it->first, ".htm")) || + (ZLStringUtil::stringEndsWith(it->first, ".html"))) { + names.Start = it->first; + break; + } + } + } + + return names; +} + +const std::string CHMFileInfo::filePath() const { + return myFilePath; +} -- cgit v1.2.1